xref: /openbmc/linux/drivers/gpu/drm/radeon/cik.c (revision b34e08d5)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35 
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
44 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
45 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
46 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
47 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
48 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
49 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
50 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
51 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
52 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
53 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
54 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
55 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
56 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
57 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
58 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
59 MODULE_FIRMWARE("radeon/KABINI_me.bin");
60 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
61 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
62 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
63 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
64 
65 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
66 extern void r600_ih_ring_fini(struct radeon_device *rdev);
67 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
68 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
69 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
70 extern void sumo_rlc_fini(struct radeon_device *rdev);
71 extern int sumo_rlc_init(struct radeon_device *rdev);
72 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
73 extern void si_rlc_reset(struct radeon_device *rdev);
74 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
75 extern int cik_sdma_resume(struct radeon_device *rdev);
76 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
77 extern void cik_sdma_fini(struct radeon_device *rdev);
78 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
79 static void cik_rlc_stop(struct radeon_device *rdev);
80 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
81 static void cik_program_aspm(struct radeon_device *rdev);
82 static void cik_init_pg(struct radeon_device *rdev);
83 static void cik_init_cg(struct radeon_device *rdev);
84 static void cik_fini_pg(struct radeon_device *rdev);
85 static void cik_fini_cg(struct radeon_device *rdev);
86 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
87 					  bool enable);
88 
89 /* get temperature in millidegrees */
90 int ci_get_temp(struct radeon_device *rdev)
91 {
92 	u32 temp;
93 	int actual_temp = 0;
94 
95 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
96 		CTF_TEMP_SHIFT;
97 
98 	if (temp & 0x200)
99 		actual_temp = 255;
100 	else
101 		actual_temp = temp & 0x1ff;
102 
103 	actual_temp = actual_temp * 1000;
104 
105 	return actual_temp;
106 }
107 
108 /* get temperature in millidegrees */
109 int kv_get_temp(struct radeon_device *rdev)
110 {
111 	u32 temp;
112 	int actual_temp = 0;
113 
114 	temp = RREG32_SMC(0xC0300E0C);
115 
116 	if (temp)
117 		actual_temp = (temp / 8) - 49;
118 	else
119 		actual_temp = 0;
120 
121 	actual_temp = actual_temp * 1000;
122 
123 	return actual_temp;
124 }
125 
126 /*
127  * Indirect registers accessor
128  */
129 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
130 {
131 	unsigned long flags;
132 	u32 r;
133 
134 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
135 	WREG32(PCIE_INDEX, reg);
136 	(void)RREG32(PCIE_INDEX);
137 	r = RREG32(PCIE_DATA);
138 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
139 	return r;
140 }
141 
142 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
143 {
144 	unsigned long flags;
145 
146 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
147 	WREG32(PCIE_INDEX, reg);
148 	(void)RREG32(PCIE_INDEX);
149 	WREG32(PCIE_DATA, v);
150 	(void)RREG32(PCIE_DATA);
151 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
152 }
153 
154 static const u32 spectre_rlc_save_restore_register_list[] =
155 {
156 	(0x0e00 << 16) | (0xc12c >> 2),
157 	0x00000000,
158 	(0x0e00 << 16) | (0xc140 >> 2),
159 	0x00000000,
160 	(0x0e00 << 16) | (0xc150 >> 2),
161 	0x00000000,
162 	(0x0e00 << 16) | (0xc15c >> 2),
163 	0x00000000,
164 	(0x0e00 << 16) | (0xc168 >> 2),
165 	0x00000000,
166 	(0x0e00 << 16) | (0xc170 >> 2),
167 	0x00000000,
168 	(0x0e00 << 16) | (0xc178 >> 2),
169 	0x00000000,
170 	(0x0e00 << 16) | (0xc204 >> 2),
171 	0x00000000,
172 	(0x0e00 << 16) | (0xc2b4 >> 2),
173 	0x00000000,
174 	(0x0e00 << 16) | (0xc2b8 >> 2),
175 	0x00000000,
176 	(0x0e00 << 16) | (0xc2bc >> 2),
177 	0x00000000,
178 	(0x0e00 << 16) | (0xc2c0 >> 2),
179 	0x00000000,
180 	(0x0e00 << 16) | (0x8228 >> 2),
181 	0x00000000,
182 	(0x0e00 << 16) | (0x829c >> 2),
183 	0x00000000,
184 	(0x0e00 << 16) | (0x869c >> 2),
185 	0x00000000,
186 	(0x0600 << 16) | (0x98f4 >> 2),
187 	0x00000000,
188 	(0x0e00 << 16) | (0x98f8 >> 2),
189 	0x00000000,
190 	(0x0e00 << 16) | (0x9900 >> 2),
191 	0x00000000,
192 	(0x0e00 << 16) | (0xc260 >> 2),
193 	0x00000000,
194 	(0x0e00 << 16) | (0x90e8 >> 2),
195 	0x00000000,
196 	(0x0e00 << 16) | (0x3c000 >> 2),
197 	0x00000000,
198 	(0x0e00 << 16) | (0x3c00c >> 2),
199 	0x00000000,
200 	(0x0e00 << 16) | (0x8c1c >> 2),
201 	0x00000000,
202 	(0x0e00 << 16) | (0x9700 >> 2),
203 	0x00000000,
204 	(0x0e00 << 16) | (0xcd20 >> 2),
205 	0x00000000,
206 	(0x4e00 << 16) | (0xcd20 >> 2),
207 	0x00000000,
208 	(0x5e00 << 16) | (0xcd20 >> 2),
209 	0x00000000,
210 	(0x6e00 << 16) | (0xcd20 >> 2),
211 	0x00000000,
212 	(0x7e00 << 16) | (0xcd20 >> 2),
213 	0x00000000,
214 	(0x8e00 << 16) | (0xcd20 >> 2),
215 	0x00000000,
216 	(0x9e00 << 16) | (0xcd20 >> 2),
217 	0x00000000,
218 	(0xae00 << 16) | (0xcd20 >> 2),
219 	0x00000000,
220 	(0xbe00 << 16) | (0xcd20 >> 2),
221 	0x00000000,
222 	(0x0e00 << 16) | (0x89bc >> 2),
223 	0x00000000,
224 	(0x0e00 << 16) | (0x8900 >> 2),
225 	0x00000000,
226 	0x3,
227 	(0x0e00 << 16) | (0xc130 >> 2),
228 	0x00000000,
229 	(0x0e00 << 16) | (0xc134 >> 2),
230 	0x00000000,
231 	(0x0e00 << 16) | (0xc1fc >> 2),
232 	0x00000000,
233 	(0x0e00 << 16) | (0xc208 >> 2),
234 	0x00000000,
235 	(0x0e00 << 16) | (0xc264 >> 2),
236 	0x00000000,
237 	(0x0e00 << 16) | (0xc268 >> 2),
238 	0x00000000,
239 	(0x0e00 << 16) | (0xc26c >> 2),
240 	0x00000000,
241 	(0x0e00 << 16) | (0xc270 >> 2),
242 	0x00000000,
243 	(0x0e00 << 16) | (0xc274 >> 2),
244 	0x00000000,
245 	(0x0e00 << 16) | (0xc278 >> 2),
246 	0x00000000,
247 	(0x0e00 << 16) | (0xc27c >> 2),
248 	0x00000000,
249 	(0x0e00 << 16) | (0xc280 >> 2),
250 	0x00000000,
251 	(0x0e00 << 16) | (0xc284 >> 2),
252 	0x00000000,
253 	(0x0e00 << 16) | (0xc288 >> 2),
254 	0x00000000,
255 	(0x0e00 << 16) | (0xc28c >> 2),
256 	0x00000000,
257 	(0x0e00 << 16) | (0xc290 >> 2),
258 	0x00000000,
259 	(0x0e00 << 16) | (0xc294 >> 2),
260 	0x00000000,
261 	(0x0e00 << 16) | (0xc298 >> 2),
262 	0x00000000,
263 	(0x0e00 << 16) | (0xc29c >> 2),
264 	0x00000000,
265 	(0x0e00 << 16) | (0xc2a0 >> 2),
266 	0x00000000,
267 	(0x0e00 << 16) | (0xc2a4 >> 2),
268 	0x00000000,
269 	(0x0e00 << 16) | (0xc2a8 >> 2),
270 	0x00000000,
271 	(0x0e00 << 16) | (0xc2ac  >> 2),
272 	0x00000000,
273 	(0x0e00 << 16) | (0xc2b0 >> 2),
274 	0x00000000,
275 	(0x0e00 << 16) | (0x301d0 >> 2),
276 	0x00000000,
277 	(0x0e00 << 16) | (0x30238 >> 2),
278 	0x00000000,
279 	(0x0e00 << 16) | (0x30250 >> 2),
280 	0x00000000,
281 	(0x0e00 << 16) | (0x30254 >> 2),
282 	0x00000000,
283 	(0x0e00 << 16) | (0x30258 >> 2),
284 	0x00000000,
285 	(0x0e00 << 16) | (0x3025c >> 2),
286 	0x00000000,
287 	(0x4e00 << 16) | (0xc900 >> 2),
288 	0x00000000,
289 	(0x5e00 << 16) | (0xc900 >> 2),
290 	0x00000000,
291 	(0x6e00 << 16) | (0xc900 >> 2),
292 	0x00000000,
293 	(0x7e00 << 16) | (0xc900 >> 2),
294 	0x00000000,
295 	(0x8e00 << 16) | (0xc900 >> 2),
296 	0x00000000,
297 	(0x9e00 << 16) | (0xc900 >> 2),
298 	0x00000000,
299 	(0xae00 << 16) | (0xc900 >> 2),
300 	0x00000000,
301 	(0xbe00 << 16) | (0xc900 >> 2),
302 	0x00000000,
303 	(0x4e00 << 16) | (0xc904 >> 2),
304 	0x00000000,
305 	(0x5e00 << 16) | (0xc904 >> 2),
306 	0x00000000,
307 	(0x6e00 << 16) | (0xc904 >> 2),
308 	0x00000000,
309 	(0x7e00 << 16) | (0xc904 >> 2),
310 	0x00000000,
311 	(0x8e00 << 16) | (0xc904 >> 2),
312 	0x00000000,
313 	(0x9e00 << 16) | (0xc904 >> 2),
314 	0x00000000,
315 	(0xae00 << 16) | (0xc904 >> 2),
316 	0x00000000,
317 	(0xbe00 << 16) | (0xc904 >> 2),
318 	0x00000000,
319 	(0x4e00 << 16) | (0xc908 >> 2),
320 	0x00000000,
321 	(0x5e00 << 16) | (0xc908 >> 2),
322 	0x00000000,
323 	(0x6e00 << 16) | (0xc908 >> 2),
324 	0x00000000,
325 	(0x7e00 << 16) | (0xc908 >> 2),
326 	0x00000000,
327 	(0x8e00 << 16) | (0xc908 >> 2),
328 	0x00000000,
329 	(0x9e00 << 16) | (0xc908 >> 2),
330 	0x00000000,
331 	(0xae00 << 16) | (0xc908 >> 2),
332 	0x00000000,
333 	(0xbe00 << 16) | (0xc908 >> 2),
334 	0x00000000,
335 	(0x4e00 << 16) | (0xc90c >> 2),
336 	0x00000000,
337 	(0x5e00 << 16) | (0xc90c >> 2),
338 	0x00000000,
339 	(0x6e00 << 16) | (0xc90c >> 2),
340 	0x00000000,
341 	(0x7e00 << 16) | (0xc90c >> 2),
342 	0x00000000,
343 	(0x8e00 << 16) | (0xc90c >> 2),
344 	0x00000000,
345 	(0x9e00 << 16) | (0xc90c >> 2),
346 	0x00000000,
347 	(0xae00 << 16) | (0xc90c >> 2),
348 	0x00000000,
349 	(0xbe00 << 16) | (0xc90c >> 2),
350 	0x00000000,
351 	(0x4e00 << 16) | (0xc910 >> 2),
352 	0x00000000,
353 	(0x5e00 << 16) | (0xc910 >> 2),
354 	0x00000000,
355 	(0x6e00 << 16) | (0xc910 >> 2),
356 	0x00000000,
357 	(0x7e00 << 16) | (0xc910 >> 2),
358 	0x00000000,
359 	(0x8e00 << 16) | (0xc910 >> 2),
360 	0x00000000,
361 	(0x9e00 << 16) | (0xc910 >> 2),
362 	0x00000000,
363 	(0xae00 << 16) | (0xc910 >> 2),
364 	0x00000000,
365 	(0xbe00 << 16) | (0xc910 >> 2),
366 	0x00000000,
367 	(0x0e00 << 16) | (0xc99c >> 2),
368 	0x00000000,
369 	(0x0e00 << 16) | (0x9834 >> 2),
370 	0x00000000,
371 	(0x0000 << 16) | (0x30f00 >> 2),
372 	0x00000000,
373 	(0x0001 << 16) | (0x30f00 >> 2),
374 	0x00000000,
375 	(0x0000 << 16) | (0x30f04 >> 2),
376 	0x00000000,
377 	(0x0001 << 16) | (0x30f04 >> 2),
378 	0x00000000,
379 	(0x0000 << 16) | (0x30f08 >> 2),
380 	0x00000000,
381 	(0x0001 << 16) | (0x30f08 >> 2),
382 	0x00000000,
383 	(0x0000 << 16) | (0x30f0c >> 2),
384 	0x00000000,
385 	(0x0001 << 16) | (0x30f0c >> 2),
386 	0x00000000,
387 	(0x0600 << 16) | (0x9b7c >> 2),
388 	0x00000000,
389 	(0x0e00 << 16) | (0x8a14 >> 2),
390 	0x00000000,
391 	(0x0e00 << 16) | (0x8a18 >> 2),
392 	0x00000000,
393 	(0x0600 << 16) | (0x30a00 >> 2),
394 	0x00000000,
395 	(0x0e00 << 16) | (0x8bf0 >> 2),
396 	0x00000000,
397 	(0x0e00 << 16) | (0x8bcc >> 2),
398 	0x00000000,
399 	(0x0e00 << 16) | (0x8b24 >> 2),
400 	0x00000000,
401 	(0x0e00 << 16) | (0x30a04 >> 2),
402 	0x00000000,
403 	(0x0600 << 16) | (0x30a10 >> 2),
404 	0x00000000,
405 	(0x0600 << 16) | (0x30a14 >> 2),
406 	0x00000000,
407 	(0x0600 << 16) | (0x30a18 >> 2),
408 	0x00000000,
409 	(0x0600 << 16) | (0x30a2c >> 2),
410 	0x00000000,
411 	(0x0e00 << 16) | (0xc700 >> 2),
412 	0x00000000,
413 	(0x0e00 << 16) | (0xc704 >> 2),
414 	0x00000000,
415 	(0x0e00 << 16) | (0xc708 >> 2),
416 	0x00000000,
417 	(0x0e00 << 16) | (0xc768 >> 2),
418 	0x00000000,
419 	(0x0400 << 16) | (0xc770 >> 2),
420 	0x00000000,
421 	(0x0400 << 16) | (0xc774 >> 2),
422 	0x00000000,
423 	(0x0400 << 16) | (0xc778 >> 2),
424 	0x00000000,
425 	(0x0400 << 16) | (0xc77c >> 2),
426 	0x00000000,
427 	(0x0400 << 16) | (0xc780 >> 2),
428 	0x00000000,
429 	(0x0400 << 16) | (0xc784 >> 2),
430 	0x00000000,
431 	(0x0400 << 16) | (0xc788 >> 2),
432 	0x00000000,
433 	(0x0400 << 16) | (0xc78c >> 2),
434 	0x00000000,
435 	(0x0400 << 16) | (0xc798 >> 2),
436 	0x00000000,
437 	(0x0400 << 16) | (0xc79c >> 2),
438 	0x00000000,
439 	(0x0400 << 16) | (0xc7a0 >> 2),
440 	0x00000000,
441 	(0x0400 << 16) | (0xc7a4 >> 2),
442 	0x00000000,
443 	(0x0400 << 16) | (0xc7a8 >> 2),
444 	0x00000000,
445 	(0x0400 << 16) | (0xc7ac >> 2),
446 	0x00000000,
447 	(0x0400 << 16) | (0xc7b0 >> 2),
448 	0x00000000,
449 	(0x0400 << 16) | (0xc7b4 >> 2),
450 	0x00000000,
451 	(0x0e00 << 16) | (0x9100 >> 2),
452 	0x00000000,
453 	(0x0e00 << 16) | (0x3c010 >> 2),
454 	0x00000000,
455 	(0x0e00 << 16) | (0x92a8 >> 2),
456 	0x00000000,
457 	(0x0e00 << 16) | (0x92ac >> 2),
458 	0x00000000,
459 	(0x0e00 << 16) | (0x92b4 >> 2),
460 	0x00000000,
461 	(0x0e00 << 16) | (0x92b8 >> 2),
462 	0x00000000,
463 	(0x0e00 << 16) | (0x92bc >> 2),
464 	0x00000000,
465 	(0x0e00 << 16) | (0x92c0 >> 2),
466 	0x00000000,
467 	(0x0e00 << 16) | (0x92c4 >> 2),
468 	0x00000000,
469 	(0x0e00 << 16) | (0x92c8 >> 2),
470 	0x00000000,
471 	(0x0e00 << 16) | (0x92cc >> 2),
472 	0x00000000,
473 	(0x0e00 << 16) | (0x92d0 >> 2),
474 	0x00000000,
475 	(0x0e00 << 16) | (0x8c00 >> 2),
476 	0x00000000,
477 	(0x0e00 << 16) | (0x8c04 >> 2),
478 	0x00000000,
479 	(0x0e00 << 16) | (0x8c20 >> 2),
480 	0x00000000,
481 	(0x0e00 << 16) | (0x8c38 >> 2),
482 	0x00000000,
483 	(0x0e00 << 16) | (0x8c3c >> 2),
484 	0x00000000,
485 	(0x0e00 << 16) | (0xae00 >> 2),
486 	0x00000000,
487 	(0x0e00 << 16) | (0x9604 >> 2),
488 	0x00000000,
489 	(0x0e00 << 16) | (0xac08 >> 2),
490 	0x00000000,
491 	(0x0e00 << 16) | (0xac0c >> 2),
492 	0x00000000,
493 	(0x0e00 << 16) | (0xac10 >> 2),
494 	0x00000000,
495 	(0x0e00 << 16) | (0xac14 >> 2),
496 	0x00000000,
497 	(0x0e00 << 16) | (0xac58 >> 2),
498 	0x00000000,
499 	(0x0e00 << 16) | (0xac68 >> 2),
500 	0x00000000,
501 	(0x0e00 << 16) | (0xac6c >> 2),
502 	0x00000000,
503 	(0x0e00 << 16) | (0xac70 >> 2),
504 	0x00000000,
505 	(0x0e00 << 16) | (0xac74 >> 2),
506 	0x00000000,
507 	(0x0e00 << 16) | (0xac78 >> 2),
508 	0x00000000,
509 	(0x0e00 << 16) | (0xac7c >> 2),
510 	0x00000000,
511 	(0x0e00 << 16) | (0xac80 >> 2),
512 	0x00000000,
513 	(0x0e00 << 16) | (0xac84 >> 2),
514 	0x00000000,
515 	(0x0e00 << 16) | (0xac88 >> 2),
516 	0x00000000,
517 	(0x0e00 << 16) | (0xac8c >> 2),
518 	0x00000000,
519 	(0x0e00 << 16) | (0x970c >> 2),
520 	0x00000000,
521 	(0x0e00 << 16) | (0x9714 >> 2),
522 	0x00000000,
523 	(0x0e00 << 16) | (0x9718 >> 2),
524 	0x00000000,
525 	(0x0e00 << 16) | (0x971c >> 2),
526 	0x00000000,
527 	(0x0e00 << 16) | (0x31068 >> 2),
528 	0x00000000,
529 	(0x4e00 << 16) | (0x31068 >> 2),
530 	0x00000000,
531 	(0x5e00 << 16) | (0x31068 >> 2),
532 	0x00000000,
533 	(0x6e00 << 16) | (0x31068 >> 2),
534 	0x00000000,
535 	(0x7e00 << 16) | (0x31068 >> 2),
536 	0x00000000,
537 	(0x8e00 << 16) | (0x31068 >> 2),
538 	0x00000000,
539 	(0x9e00 << 16) | (0x31068 >> 2),
540 	0x00000000,
541 	(0xae00 << 16) | (0x31068 >> 2),
542 	0x00000000,
543 	(0xbe00 << 16) | (0x31068 >> 2),
544 	0x00000000,
545 	(0x0e00 << 16) | (0xcd10 >> 2),
546 	0x00000000,
547 	(0x0e00 << 16) | (0xcd14 >> 2),
548 	0x00000000,
549 	(0x0e00 << 16) | (0x88b0 >> 2),
550 	0x00000000,
551 	(0x0e00 << 16) | (0x88b4 >> 2),
552 	0x00000000,
553 	(0x0e00 << 16) | (0x88b8 >> 2),
554 	0x00000000,
555 	(0x0e00 << 16) | (0x88bc >> 2),
556 	0x00000000,
557 	(0x0400 << 16) | (0x89c0 >> 2),
558 	0x00000000,
559 	(0x0e00 << 16) | (0x88c4 >> 2),
560 	0x00000000,
561 	(0x0e00 << 16) | (0x88c8 >> 2),
562 	0x00000000,
563 	(0x0e00 << 16) | (0x88d0 >> 2),
564 	0x00000000,
565 	(0x0e00 << 16) | (0x88d4 >> 2),
566 	0x00000000,
567 	(0x0e00 << 16) | (0x88d8 >> 2),
568 	0x00000000,
569 	(0x0e00 << 16) | (0x8980 >> 2),
570 	0x00000000,
571 	(0x0e00 << 16) | (0x30938 >> 2),
572 	0x00000000,
573 	(0x0e00 << 16) | (0x3093c >> 2),
574 	0x00000000,
575 	(0x0e00 << 16) | (0x30940 >> 2),
576 	0x00000000,
577 	(0x0e00 << 16) | (0x89a0 >> 2),
578 	0x00000000,
579 	(0x0e00 << 16) | (0x30900 >> 2),
580 	0x00000000,
581 	(0x0e00 << 16) | (0x30904 >> 2),
582 	0x00000000,
583 	(0x0e00 << 16) | (0x89b4 >> 2),
584 	0x00000000,
585 	(0x0e00 << 16) | (0x3c210 >> 2),
586 	0x00000000,
587 	(0x0e00 << 16) | (0x3c214 >> 2),
588 	0x00000000,
589 	(0x0e00 << 16) | (0x3c218 >> 2),
590 	0x00000000,
591 	(0x0e00 << 16) | (0x8904 >> 2),
592 	0x00000000,
593 	0x5,
594 	(0x0e00 << 16) | (0x8c28 >> 2),
595 	(0x0e00 << 16) | (0x8c2c >> 2),
596 	(0x0e00 << 16) | (0x8c30 >> 2),
597 	(0x0e00 << 16) | (0x8c34 >> 2),
598 	(0x0e00 << 16) | (0x9600 >> 2),
599 };
600 
601 static const u32 kalindi_rlc_save_restore_register_list[] =
602 {
603 	(0x0e00 << 16) | (0xc12c >> 2),
604 	0x00000000,
605 	(0x0e00 << 16) | (0xc140 >> 2),
606 	0x00000000,
607 	(0x0e00 << 16) | (0xc150 >> 2),
608 	0x00000000,
609 	(0x0e00 << 16) | (0xc15c >> 2),
610 	0x00000000,
611 	(0x0e00 << 16) | (0xc168 >> 2),
612 	0x00000000,
613 	(0x0e00 << 16) | (0xc170 >> 2),
614 	0x00000000,
615 	(0x0e00 << 16) | (0xc204 >> 2),
616 	0x00000000,
617 	(0x0e00 << 16) | (0xc2b4 >> 2),
618 	0x00000000,
619 	(0x0e00 << 16) | (0xc2b8 >> 2),
620 	0x00000000,
621 	(0x0e00 << 16) | (0xc2bc >> 2),
622 	0x00000000,
623 	(0x0e00 << 16) | (0xc2c0 >> 2),
624 	0x00000000,
625 	(0x0e00 << 16) | (0x8228 >> 2),
626 	0x00000000,
627 	(0x0e00 << 16) | (0x829c >> 2),
628 	0x00000000,
629 	(0x0e00 << 16) | (0x869c >> 2),
630 	0x00000000,
631 	(0x0600 << 16) | (0x98f4 >> 2),
632 	0x00000000,
633 	(0x0e00 << 16) | (0x98f8 >> 2),
634 	0x00000000,
635 	(0x0e00 << 16) | (0x9900 >> 2),
636 	0x00000000,
637 	(0x0e00 << 16) | (0xc260 >> 2),
638 	0x00000000,
639 	(0x0e00 << 16) | (0x90e8 >> 2),
640 	0x00000000,
641 	(0x0e00 << 16) | (0x3c000 >> 2),
642 	0x00000000,
643 	(0x0e00 << 16) | (0x3c00c >> 2),
644 	0x00000000,
645 	(0x0e00 << 16) | (0x8c1c >> 2),
646 	0x00000000,
647 	(0x0e00 << 16) | (0x9700 >> 2),
648 	0x00000000,
649 	(0x0e00 << 16) | (0xcd20 >> 2),
650 	0x00000000,
651 	(0x4e00 << 16) | (0xcd20 >> 2),
652 	0x00000000,
653 	(0x5e00 << 16) | (0xcd20 >> 2),
654 	0x00000000,
655 	(0x6e00 << 16) | (0xcd20 >> 2),
656 	0x00000000,
657 	(0x7e00 << 16) | (0xcd20 >> 2),
658 	0x00000000,
659 	(0x0e00 << 16) | (0x89bc >> 2),
660 	0x00000000,
661 	(0x0e00 << 16) | (0x8900 >> 2),
662 	0x00000000,
663 	0x3,
664 	(0x0e00 << 16) | (0xc130 >> 2),
665 	0x00000000,
666 	(0x0e00 << 16) | (0xc134 >> 2),
667 	0x00000000,
668 	(0x0e00 << 16) | (0xc1fc >> 2),
669 	0x00000000,
670 	(0x0e00 << 16) | (0xc208 >> 2),
671 	0x00000000,
672 	(0x0e00 << 16) | (0xc264 >> 2),
673 	0x00000000,
674 	(0x0e00 << 16) | (0xc268 >> 2),
675 	0x00000000,
676 	(0x0e00 << 16) | (0xc26c >> 2),
677 	0x00000000,
678 	(0x0e00 << 16) | (0xc270 >> 2),
679 	0x00000000,
680 	(0x0e00 << 16) | (0xc274 >> 2),
681 	0x00000000,
682 	(0x0e00 << 16) | (0xc28c >> 2),
683 	0x00000000,
684 	(0x0e00 << 16) | (0xc290 >> 2),
685 	0x00000000,
686 	(0x0e00 << 16) | (0xc294 >> 2),
687 	0x00000000,
688 	(0x0e00 << 16) | (0xc298 >> 2),
689 	0x00000000,
690 	(0x0e00 << 16) | (0xc2a0 >> 2),
691 	0x00000000,
692 	(0x0e00 << 16) | (0xc2a4 >> 2),
693 	0x00000000,
694 	(0x0e00 << 16) | (0xc2a8 >> 2),
695 	0x00000000,
696 	(0x0e00 << 16) | (0xc2ac >> 2),
697 	0x00000000,
698 	(0x0e00 << 16) | (0x301d0 >> 2),
699 	0x00000000,
700 	(0x0e00 << 16) | (0x30238 >> 2),
701 	0x00000000,
702 	(0x0e00 << 16) | (0x30250 >> 2),
703 	0x00000000,
704 	(0x0e00 << 16) | (0x30254 >> 2),
705 	0x00000000,
706 	(0x0e00 << 16) | (0x30258 >> 2),
707 	0x00000000,
708 	(0x0e00 << 16) | (0x3025c >> 2),
709 	0x00000000,
710 	(0x4e00 << 16) | (0xc900 >> 2),
711 	0x00000000,
712 	(0x5e00 << 16) | (0xc900 >> 2),
713 	0x00000000,
714 	(0x6e00 << 16) | (0xc900 >> 2),
715 	0x00000000,
716 	(0x7e00 << 16) | (0xc900 >> 2),
717 	0x00000000,
718 	(0x4e00 << 16) | (0xc904 >> 2),
719 	0x00000000,
720 	(0x5e00 << 16) | (0xc904 >> 2),
721 	0x00000000,
722 	(0x6e00 << 16) | (0xc904 >> 2),
723 	0x00000000,
724 	(0x7e00 << 16) | (0xc904 >> 2),
725 	0x00000000,
726 	(0x4e00 << 16) | (0xc908 >> 2),
727 	0x00000000,
728 	(0x5e00 << 16) | (0xc908 >> 2),
729 	0x00000000,
730 	(0x6e00 << 16) | (0xc908 >> 2),
731 	0x00000000,
732 	(0x7e00 << 16) | (0xc908 >> 2),
733 	0x00000000,
734 	(0x4e00 << 16) | (0xc90c >> 2),
735 	0x00000000,
736 	(0x5e00 << 16) | (0xc90c >> 2),
737 	0x00000000,
738 	(0x6e00 << 16) | (0xc90c >> 2),
739 	0x00000000,
740 	(0x7e00 << 16) | (0xc90c >> 2),
741 	0x00000000,
742 	(0x4e00 << 16) | (0xc910 >> 2),
743 	0x00000000,
744 	(0x5e00 << 16) | (0xc910 >> 2),
745 	0x00000000,
746 	(0x6e00 << 16) | (0xc910 >> 2),
747 	0x00000000,
748 	(0x7e00 << 16) | (0xc910 >> 2),
749 	0x00000000,
750 	(0x0e00 << 16) | (0xc99c >> 2),
751 	0x00000000,
752 	(0x0e00 << 16) | (0x9834 >> 2),
753 	0x00000000,
754 	(0x0000 << 16) | (0x30f00 >> 2),
755 	0x00000000,
756 	(0x0000 << 16) | (0x30f04 >> 2),
757 	0x00000000,
758 	(0x0000 << 16) | (0x30f08 >> 2),
759 	0x00000000,
760 	(0x0000 << 16) | (0x30f0c >> 2),
761 	0x00000000,
762 	(0x0600 << 16) | (0x9b7c >> 2),
763 	0x00000000,
764 	(0x0e00 << 16) | (0x8a14 >> 2),
765 	0x00000000,
766 	(0x0e00 << 16) | (0x8a18 >> 2),
767 	0x00000000,
768 	(0x0600 << 16) | (0x30a00 >> 2),
769 	0x00000000,
770 	(0x0e00 << 16) | (0x8bf0 >> 2),
771 	0x00000000,
772 	(0x0e00 << 16) | (0x8bcc >> 2),
773 	0x00000000,
774 	(0x0e00 << 16) | (0x8b24 >> 2),
775 	0x00000000,
776 	(0x0e00 << 16) | (0x30a04 >> 2),
777 	0x00000000,
778 	(0x0600 << 16) | (0x30a10 >> 2),
779 	0x00000000,
780 	(0x0600 << 16) | (0x30a14 >> 2),
781 	0x00000000,
782 	(0x0600 << 16) | (0x30a18 >> 2),
783 	0x00000000,
784 	(0x0600 << 16) | (0x30a2c >> 2),
785 	0x00000000,
786 	(0x0e00 << 16) | (0xc700 >> 2),
787 	0x00000000,
788 	(0x0e00 << 16) | (0xc704 >> 2),
789 	0x00000000,
790 	(0x0e00 << 16) | (0xc708 >> 2),
791 	0x00000000,
792 	(0x0e00 << 16) | (0xc768 >> 2),
793 	0x00000000,
794 	(0x0400 << 16) | (0xc770 >> 2),
795 	0x00000000,
796 	(0x0400 << 16) | (0xc774 >> 2),
797 	0x00000000,
798 	(0x0400 << 16) | (0xc798 >> 2),
799 	0x00000000,
800 	(0x0400 << 16) | (0xc79c >> 2),
801 	0x00000000,
802 	(0x0e00 << 16) | (0x9100 >> 2),
803 	0x00000000,
804 	(0x0e00 << 16) | (0x3c010 >> 2),
805 	0x00000000,
806 	(0x0e00 << 16) | (0x8c00 >> 2),
807 	0x00000000,
808 	(0x0e00 << 16) | (0x8c04 >> 2),
809 	0x00000000,
810 	(0x0e00 << 16) | (0x8c20 >> 2),
811 	0x00000000,
812 	(0x0e00 << 16) | (0x8c38 >> 2),
813 	0x00000000,
814 	(0x0e00 << 16) | (0x8c3c >> 2),
815 	0x00000000,
816 	(0x0e00 << 16) | (0xae00 >> 2),
817 	0x00000000,
818 	(0x0e00 << 16) | (0x9604 >> 2),
819 	0x00000000,
820 	(0x0e00 << 16) | (0xac08 >> 2),
821 	0x00000000,
822 	(0x0e00 << 16) | (0xac0c >> 2),
823 	0x00000000,
824 	(0x0e00 << 16) | (0xac10 >> 2),
825 	0x00000000,
826 	(0x0e00 << 16) | (0xac14 >> 2),
827 	0x00000000,
828 	(0x0e00 << 16) | (0xac58 >> 2),
829 	0x00000000,
830 	(0x0e00 << 16) | (0xac68 >> 2),
831 	0x00000000,
832 	(0x0e00 << 16) | (0xac6c >> 2),
833 	0x00000000,
834 	(0x0e00 << 16) | (0xac70 >> 2),
835 	0x00000000,
836 	(0x0e00 << 16) | (0xac74 >> 2),
837 	0x00000000,
838 	(0x0e00 << 16) | (0xac78 >> 2),
839 	0x00000000,
840 	(0x0e00 << 16) | (0xac7c >> 2),
841 	0x00000000,
842 	(0x0e00 << 16) | (0xac80 >> 2),
843 	0x00000000,
844 	(0x0e00 << 16) | (0xac84 >> 2),
845 	0x00000000,
846 	(0x0e00 << 16) | (0xac88 >> 2),
847 	0x00000000,
848 	(0x0e00 << 16) | (0xac8c >> 2),
849 	0x00000000,
850 	(0x0e00 << 16) | (0x970c >> 2),
851 	0x00000000,
852 	(0x0e00 << 16) | (0x9714 >> 2),
853 	0x00000000,
854 	(0x0e00 << 16) | (0x9718 >> 2),
855 	0x00000000,
856 	(0x0e00 << 16) | (0x971c >> 2),
857 	0x00000000,
858 	(0x0e00 << 16) | (0x31068 >> 2),
859 	0x00000000,
860 	(0x4e00 << 16) | (0x31068 >> 2),
861 	0x00000000,
862 	(0x5e00 << 16) | (0x31068 >> 2),
863 	0x00000000,
864 	(0x6e00 << 16) | (0x31068 >> 2),
865 	0x00000000,
866 	(0x7e00 << 16) | (0x31068 >> 2),
867 	0x00000000,
868 	(0x0e00 << 16) | (0xcd10 >> 2),
869 	0x00000000,
870 	(0x0e00 << 16) | (0xcd14 >> 2),
871 	0x00000000,
872 	(0x0e00 << 16) | (0x88b0 >> 2),
873 	0x00000000,
874 	(0x0e00 << 16) | (0x88b4 >> 2),
875 	0x00000000,
876 	(0x0e00 << 16) | (0x88b8 >> 2),
877 	0x00000000,
878 	(0x0e00 << 16) | (0x88bc >> 2),
879 	0x00000000,
880 	(0x0400 << 16) | (0x89c0 >> 2),
881 	0x00000000,
882 	(0x0e00 << 16) | (0x88c4 >> 2),
883 	0x00000000,
884 	(0x0e00 << 16) | (0x88c8 >> 2),
885 	0x00000000,
886 	(0x0e00 << 16) | (0x88d0 >> 2),
887 	0x00000000,
888 	(0x0e00 << 16) | (0x88d4 >> 2),
889 	0x00000000,
890 	(0x0e00 << 16) | (0x88d8 >> 2),
891 	0x00000000,
892 	(0x0e00 << 16) | (0x8980 >> 2),
893 	0x00000000,
894 	(0x0e00 << 16) | (0x30938 >> 2),
895 	0x00000000,
896 	(0x0e00 << 16) | (0x3093c >> 2),
897 	0x00000000,
898 	(0x0e00 << 16) | (0x30940 >> 2),
899 	0x00000000,
900 	(0x0e00 << 16) | (0x89a0 >> 2),
901 	0x00000000,
902 	(0x0e00 << 16) | (0x30900 >> 2),
903 	0x00000000,
904 	(0x0e00 << 16) | (0x30904 >> 2),
905 	0x00000000,
906 	(0x0e00 << 16) | (0x89b4 >> 2),
907 	0x00000000,
908 	(0x0e00 << 16) | (0x3e1fc >> 2),
909 	0x00000000,
910 	(0x0e00 << 16) | (0x3c210 >> 2),
911 	0x00000000,
912 	(0x0e00 << 16) | (0x3c214 >> 2),
913 	0x00000000,
914 	(0x0e00 << 16) | (0x3c218 >> 2),
915 	0x00000000,
916 	(0x0e00 << 16) | (0x8904 >> 2),
917 	0x00000000,
918 	0x5,
919 	(0x0e00 << 16) | (0x8c28 >> 2),
920 	(0x0e00 << 16) | (0x8c2c >> 2),
921 	(0x0e00 << 16) | (0x8c30 >> 2),
922 	(0x0e00 << 16) | (0x8c34 >> 2),
923 	(0x0e00 << 16) | (0x9600 >> 2),
924 };
925 
926 static const u32 bonaire_golden_spm_registers[] =
927 {
928 	0x30800, 0xe0ffffff, 0xe0000000
929 };
930 
931 static const u32 bonaire_golden_common_registers[] =
932 {
933 	0xc770, 0xffffffff, 0x00000800,
934 	0xc774, 0xffffffff, 0x00000800,
935 	0xc798, 0xffffffff, 0x00007fbf,
936 	0xc79c, 0xffffffff, 0x00007faf
937 };
938 
939 static const u32 bonaire_golden_registers[] =
940 {
941 	0x3354, 0x00000333, 0x00000333,
942 	0x3350, 0x000c0fc0, 0x00040200,
943 	0x9a10, 0x00010000, 0x00058208,
944 	0x3c000, 0xffff1fff, 0x00140000,
945 	0x3c200, 0xfdfc0fff, 0x00000100,
946 	0x3c234, 0x40000000, 0x40000200,
947 	0x9830, 0xffffffff, 0x00000000,
948 	0x9834, 0xf00fffff, 0x00000400,
949 	0x9838, 0x0002021c, 0x00020200,
950 	0xc78, 0x00000080, 0x00000000,
951 	0x5bb0, 0x000000f0, 0x00000070,
952 	0x5bc0, 0xf0311fff, 0x80300000,
953 	0x98f8, 0x73773777, 0x12010001,
954 	0x350c, 0x00810000, 0x408af000,
955 	0x7030, 0x31000111, 0x00000011,
956 	0x2f48, 0x73773777, 0x12010001,
957 	0x220c, 0x00007fb6, 0x0021a1b1,
958 	0x2210, 0x00007fb6, 0x002021b1,
959 	0x2180, 0x00007fb6, 0x00002191,
960 	0x2218, 0x00007fb6, 0x002121b1,
961 	0x221c, 0x00007fb6, 0x002021b1,
962 	0x21dc, 0x00007fb6, 0x00002191,
963 	0x21e0, 0x00007fb6, 0x00002191,
964 	0x3628, 0x0000003f, 0x0000000a,
965 	0x362c, 0x0000003f, 0x0000000a,
966 	0x2ae4, 0x00073ffe, 0x000022a2,
967 	0x240c, 0x000007ff, 0x00000000,
968 	0x8a14, 0xf000003f, 0x00000007,
969 	0x8bf0, 0x00002001, 0x00000001,
970 	0x8b24, 0xffffffff, 0x00ffffff,
971 	0x30a04, 0x0000ff0f, 0x00000000,
972 	0x28a4c, 0x07ffffff, 0x06000000,
973 	0x4d8, 0x00000fff, 0x00000100,
974 	0x3e78, 0x00000001, 0x00000002,
975 	0x9100, 0x03000000, 0x0362c688,
976 	0x8c00, 0x000000ff, 0x00000001,
977 	0xe40, 0x00001fff, 0x00001fff,
978 	0x9060, 0x0000007f, 0x00000020,
979 	0x9508, 0x00010000, 0x00010000,
980 	0xac14, 0x000003ff, 0x000000f3,
981 	0xac0c, 0xffffffff, 0x00001032
982 };
983 
984 static const u32 bonaire_mgcg_cgcg_init[] =
985 {
986 	0xc420, 0xffffffff, 0xfffffffc,
987 	0x30800, 0xffffffff, 0xe0000000,
988 	0x3c2a0, 0xffffffff, 0x00000100,
989 	0x3c208, 0xffffffff, 0x00000100,
990 	0x3c2c0, 0xffffffff, 0xc0000100,
991 	0x3c2c8, 0xffffffff, 0xc0000100,
992 	0x3c2c4, 0xffffffff, 0xc0000100,
993 	0x55e4, 0xffffffff, 0x00600100,
994 	0x3c280, 0xffffffff, 0x00000100,
995 	0x3c214, 0xffffffff, 0x06000100,
996 	0x3c220, 0xffffffff, 0x00000100,
997 	0x3c218, 0xffffffff, 0x06000100,
998 	0x3c204, 0xffffffff, 0x00000100,
999 	0x3c2e0, 0xffffffff, 0x00000100,
1000 	0x3c224, 0xffffffff, 0x00000100,
1001 	0x3c200, 0xffffffff, 0x00000100,
1002 	0x3c230, 0xffffffff, 0x00000100,
1003 	0x3c234, 0xffffffff, 0x00000100,
1004 	0x3c250, 0xffffffff, 0x00000100,
1005 	0x3c254, 0xffffffff, 0x00000100,
1006 	0x3c258, 0xffffffff, 0x00000100,
1007 	0x3c25c, 0xffffffff, 0x00000100,
1008 	0x3c260, 0xffffffff, 0x00000100,
1009 	0x3c27c, 0xffffffff, 0x00000100,
1010 	0x3c278, 0xffffffff, 0x00000100,
1011 	0x3c210, 0xffffffff, 0x06000100,
1012 	0x3c290, 0xffffffff, 0x00000100,
1013 	0x3c274, 0xffffffff, 0x00000100,
1014 	0x3c2b4, 0xffffffff, 0x00000100,
1015 	0x3c2b0, 0xffffffff, 0x00000100,
1016 	0x3c270, 0xffffffff, 0x00000100,
1017 	0x30800, 0xffffffff, 0xe0000000,
1018 	0x3c020, 0xffffffff, 0x00010000,
1019 	0x3c024, 0xffffffff, 0x00030002,
1020 	0x3c028, 0xffffffff, 0x00040007,
1021 	0x3c02c, 0xffffffff, 0x00060005,
1022 	0x3c030, 0xffffffff, 0x00090008,
1023 	0x3c034, 0xffffffff, 0x00010000,
1024 	0x3c038, 0xffffffff, 0x00030002,
1025 	0x3c03c, 0xffffffff, 0x00040007,
1026 	0x3c040, 0xffffffff, 0x00060005,
1027 	0x3c044, 0xffffffff, 0x00090008,
1028 	0x3c048, 0xffffffff, 0x00010000,
1029 	0x3c04c, 0xffffffff, 0x00030002,
1030 	0x3c050, 0xffffffff, 0x00040007,
1031 	0x3c054, 0xffffffff, 0x00060005,
1032 	0x3c058, 0xffffffff, 0x00090008,
1033 	0x3c05c, 0xffffffff, 0x00010000,
1034 	0x3c060, 0xffffffff, 0x00030002,
1035 	0x3c064, 0xffffffff, 0x00040007,
1036 	0x3c068, 0xffffffff, 0x00060005,
1037 	0x3c06c, 0xffffffff, 0x00090008,
1038 	0x3c070, 0xffffffff, 0x00010000,
1039 	0x3c074, 0xffffffff, 0x00030002,
1040 	0x3c078, 0xffffffff, 0x00040007,
1041 	0x3c07c, 0xffffffff, 0x00060005,
1042 	0x3c080, 0xffffffff, 0x00090008,
1043 	0x3c084, 0xffffffff, 0x00010000,
1044 	0x3c088, 0xffffffff, 0x00030002,
1045 	0x3c08c, 0xffffffff, 0x00040007,
1046 	0x3c090, 0xffffffff, 0x00060005,
1047 	0x3c094, 0xffffffff, 0x00090008,
1048 	0x3c098, 0xffffffff, 0x00010000,
1049 	0x3c09c, 0xffffffff, 0x00030002,
1050 	0x3c0a0, 0xffffffff, 0x00040007,
1051 	0x3c0a4, 0xffffffff, 0x00060005,
1052 	0x3c0a8, 0xffffffff, 0x00090008,
1053 	0x3c000, 0xffffffff, 0x96e00200,
1054 	0x8708, 0xffffffff, 0x00900100,
1055 	0xc424, 0xffffffff, 0x0020003f,
1056 	0x38, 0xffffffff, 0x0140001c,
1057 	0x3c, 0x000f0000, 0x000f0000,
1058 	0x220, 0xffffffff, 0xC060000C,
1059 	0x224, 0xc0000fff, 0x00000100,
1060 	0xf90, 0xffffffff, 0x00000100,
1061 	0xf98, 0x00000101, 0x00000000,
1062 	0x20a8, 0xffffffff, 0x00000104,
1063 	0x55e4, 0xff000fff, 0x00000100,
1064 	0x30cc, 0xc0000fff, 0x00000104,
1065 	0xc1e4, 0x00000001, 0x00000001,
1066 	0xd00c, 0xff000ff0, 0x00000100,
1067 	0xd80c, 0xff000ff0, 0x00000100
1068 };
1069 
1070 static const u32 spectre_golden_spm_registers[] =
1071 {
1072 	0x30800, 0xe0ffffff, 0xe0000000
1073 };
1074 
1075 static const u32 spectre_golden_common_registers[] =
1076 {
1077 	0xc770, 0xffffffff, 0x00000800,
1078 	0xc774, 0xffffffff, 0x00000800,
1079 	0xc798, 0xffffffff, 0x00007fbf,
1080 	0xc79c, 0xffffffff, 0x00007faf
1081 };
1082 
1083 static const u32 spectre_golden_registers[] =
1084 {
1085 	0x3c000, 0xffff1fff, 0x96940200,
1086 	0x3c00c, 0xffff0001, 0xff000000,
1087 	0x3c200, 0xfffc0fff, 0x00000100,
1088 	0x6ed8, 0x00010101, 0x00010000,
1089 	0x9834, 0xf00fffff, 0x00000400,
1090 	0x9838, 0xfffffffc, 0x00020200,
1091 	0x5bb0, 0x000000f0, 0x00000070,
1092 	0x5bc0, 0xf0311fff, 0x80300000,
1093 	0x98f8, 0x73773777, 0x12010001,
1094 	0x9b7c, 0x00ff0000, 0x00fc0000,
1095 	0x2f48, 0x73773777, 0x12010001,
1096 	0x8a14, 0xf000003f, 0x00000007,
1097 	0x8b24, 0xffffffff, 0x00ffffff,
1098 	0x28350, 0x3f3f3fff, 0x00000082,
1099 	0x28354, 0x0000003f, 0x00000000,
1100 	0x3e78, 0x00000001, 0x00000002,
1101 	0x913c, 0xffff03df, 0x00000004,
1102 	0xc768, 0x00000008, 0x00000008,
1103 	0x8c00, 0x000008ff, 0x00000800,
1104 	0x9508, 0x00010000, 0x00010000,
1105 	0xac0c, 0xffffffff, 0x54763210,
1106 	0x214f8, 0x01ff01ff, 0x00000002,
1107 	0x21498, 0x007ff800, 0x00200000,
1108 	0x2015c, 0xffffffff, 0x00000f40,
1109 	0x30934, 0xffffffff, 0x00000001
1110 };
1111 
1112 static const u32 spectre_mgcg_cgcg_init[] =
1113 {
1114 	0xc420, 0xffffffff, 0xfffffffc,
1115 	0x30800, 0xffffffff, 0xe0000000,
1116 	0x3c2a0, 0xffffffff, 0x00000100,
1117 	0x3c208, 0xffffffff, 0x00000100,
1118 	0x3c2c0, 0xffffffff, 0x00000100,
1119 	0x3c2c8, 0xffffffff, 0x00000100,
1120 	0x3c2c4, 0xffffffff, 0x00000100,
1121 	0x55e4, 0xffffffff, 0x00600100,
1122 	0x3c280, 0xffffffff, 0x00000100,
1123 	0x3c214, 0xffffffff, 0x06000100,
1124 	0x3c220, 0xffffffff, 0x00000100,
1125 	0x3c218, 0xffffffff, 0x06000100,
1126 	0x3c204, 0xffffffff, 0x00000100,
1127 	0x3c2e0, 0xffffffff, 0x00000100,
1128 	0x3c224, 0xffffffff, 0x00000100,
1129 	0x3c200, 0xffffffff, 0x00000100,
1130 	0x3c230, 0xffffffff, 0x00000100,
1131 	0x3c234, 0xffffffff, 0x00000100,
1132 	0x3c250, 0xffffffff, 0x00000100,
1133 	0x3c254, 0xffffffff, 0x00000100,
1134 	0x3c258, 0xffffffff, 0x00000100,
1135 	0x3c25c, 0xffffffff, 0x00000100,
1136 	0x3c260, 0xffffffff, 0x00000100,
1137 	0x3c27c, 0xffffffff, 0x00000100,
1138 	0x3c278, 0xffffffff, 0x00000100,
1139 	0x3c210, 0xffffffff, 0x06000100,
1140 	0x3c290, 0xffffffff, 0x00000100,
1141 	0x3c274, 0xffffffff, 0x00000100,
1142 	0x3c2b4, 0xffffffff, 0x00000100,
1143 	0x3c2b0, 0xffffffff, 0x00000100,
1144 	0x3c270, 0xffffffff, 0x00000100,
1145 	0x30800, 0xffffffff, 0xe0000000,
1146 	0x3c020, 0xffffffff, 0x00010000,
1147 	0x3c024, 0xffffffff, 0x00030002,
1148 	0x3c028, 0xffffffff, 0x00040007,
1149 	0x3c02c, 0xffffffff, 0x00060005,
1150 	0x3c030, 0xffffffff, 0x00090008,
1151 	0x3c034, 0xffffffff, 0x00010000,
1152 	0x3c038, 0xffffffff, 0x00030002,
1153 	0x3c03c, 0xffffffff, 0x00040007,
1154 	0x3c040, 0xffffffff, 0x00060005,
1155 	0x3c044, 0xffffffff, 0x00090008,
1156 	0x3c048, 0xffffffff, 0x00010000,
1157 	0x3c04c, 0xffffffff, 0x00030002,
1158 	0x3c050, 0xffffffff, 0x00040007,
1159 	0x3c054, 0xffffffff, 0x00060005,
1160 	0x3c058, 0xffffffff, 0x00090008,
1161 	0x3c05c, 0xffffffff, 0x00010000,
1162 	0x3c060, 0xffffffff, 0x00030002,
1163 	0x3c064, 0xffffffff, 0x00040007,
1164 	0x3c068, 0xffffffff, 0x00060005,
1165 	0x3c06c, 0xffffffff, 0x00090008,
1166 	0x3c070, 0xffffffff, 0x00010000,
1167 	0x3c074, 0xffffffff, 0x00030002,
1168 	0x3c078, 0xffffffff, 0x00040007,
1169 	0x3c07c, 0xffffffff, 0x00060005,
1170 	0x3c080, 0xffffffff, 0x00090008,
1171 	0x3c084, 0xffffffff, 0x00010000,
1172 	0x3c088, 0xffffffff, 0x00030002,
1173 	0x3c08c, 0xffffffff, 0x00040007,
1174 	0x3c090, 0xffffffff, 0x00060005,
1175 	0x3c094, 0xffffffff, 0x00090008,
1176 	0x3c098, 0xffffffff, 0x00010000,
1177 	0x3c09c, 0xffffffff, 0x00030002,
1178 	0x3c0a0, 0xffffffff, 0x00040007,
1179 	0x3c0a4, 0xffffffff, 0x00060005,
1180 	0x3c0a8, 0xffffffff, 0x00090008,
1181 	0x3c0ac, 0xffffffff, 0x00010000,
1182 	0x3c0b0, 0xffffffff, 0x00030002,
1183 	0x3c0b4, 0xffffffff, 0x00040007,
1184 	0x3c0b8, 0xffffffff, 0x00060005,
1185 	0x3c0bc, 0xffffffff, 0x00090008,
1186 	0x3c000, 0xffffffff, 0x96e00200,
1187 	0x8708, 0xffffffff, 0x00900100,
1188 	0xc424, 0xffffffff, 0x0020003f,
1189 	0x38, 0xffffffff, 0x0140001c,
1190 	0x3c, 0x000f0000, 0x000f0000,
1191 	0x220, 0xffffffff, 0xC060000C,
1192 	0x224, 0xc0000fff, 0x00000100,
1193 	0xf90, 0xffffffff, 0x00000100,
1194 	0xf98, 0x00000101, 0x00000000,
1195 	0x20a8, 0xffffffff, 0x00000104,
1196 	0x55e4, 0xff000fff, 0x00000100,
1197 	0x30cc, 0xc0000fff, 0x00000104,
1198 	0xc1e4, 0x00000001, 0x00000001,
1199 	0xd00c, 0xff000ff0, 0x00000100,
1200 	0xd80c, 0xff000ff0, 0x00000100
1201 };
1202 
1203 static const u32 kalindi_golden_spm_registers[] =
1204 {
1205 	0x30800, 0xe0ffffff, 0xe0000000
1206 };
1207 
1208 static const u32 kalindi_golden_common_registers[] =
1209 {
1210 	0xc770, 0xffffffff, 0x00000800,
1211 	0xc774, 0xffffffff, 0x00000800,
1212 	0xc798, 0xffffffff, 0x00007fbf,
1213 	0xc79c, 0xffffffff, 0x00007faf
1214 };
1215 
1216 static const u32 kalindi_golden_registers[] =
1217 {
1218 	0x3c000, 0xffffdfff, 0x6e944040,
1219 	0x55e4, 0xff607fff, 0xfc000100,
1220 	0x3c220, 0xff000fff, 0x00000100,
1221 	0x3c224, 0xff000fff, 0x00000100,
1222 	0x3c200, 0xfffc0fff, 0x00000100,
1223 	0x6ed8, 0x00010101, 0x00010000,
1224 	0x9830, 0xffffffff, 0x00000000,
1225 	0x9834, 0xf00fffff, 0x00000400,
1226 	0x5bb0, 0x000000f0, 0x00000070,
1227 	0x5bc0, 0xf0311fff, 0x80300000,
1228 	0x98f8, 0x73773777, 0x12010001,
1229 	0x98fc, 0xffffffff, 0x00000010,
1230 	0x9b7c, 0x00ff0000, 0x00fc0000,
1231 	0x8030, 0x00001f0f, 0x0000100a,
1232 	0x2f48, 0x73773777, 0x12010001,
1233 	0x2408, 0x000fffff, 0x000c007f,
1234 	0x8a14, 0xf000003f, 0x00000007,
1235 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1236 	0x30a04, 0x0000ff0f, 0x00000000,
1237 	0x28a4c, 0x07ffffff, 0x06000000,
1238 	0x4d8, 0x00000fff, 0x00000100,
1239 	0x3e78, 0x00000001, 0x00000002,
1240 	0xc768, 0x00000008, 0x00000008,
1241 	0x8c00, 0x000000ff, 0x00000003,
1242 	0x214f8, 0x01ff01ff, 0x00000002,
1243 	0x21498, 0x007ff800, 0x00200000,
1244 	0x2015c, 0xffffffff, 0x00000f40,
1245 	0x88c4, 0x001f3ae3, 0x00000082,
1246 	0x88d4, 0x0000001f, 0x00000010,
1247 	0x30934, 0xffffffff, 0x00000000
1248 };
1249 
1250 static const u32 kalindi_mgcg_cgcg_init[] =
1251 {
1252 	0xc420, 0xffffffff, 0xfffffffc,
1253 	0x30800, 0xffffffff, 0xe0000000,
1254 	0x3c2a0, 0xffffffff, 0x00000100,
1255 	0x3c208, 0xffffffff, 0x00000100,
1256 	0x3c2c0, 0xffffffff, 0x00000100,
1257 	0x3c2c8, 0xffffffff, 0x00000100,
1258 	0x3c2c4, 0xffffffff, 0x00000100,
1259 	0x55e4, 0xffffffff, 0x00600100,
1260 	0x3c280, 0xffffffff, 0x00000100,
1261 	0x3c214, 0xffffffff, 0x06000100,
1262 	0x3c220, 0xffffffff, 0x00000100,
1263 	0x3c218, 0xffffffff, 0x06000100,
1264 	0x3c204, 0xffffffff, 0x00000100,
1265 	0x3c2e0, 0xffffffff, 0x00000100,
1266 	0x3c224, 0xffffffff, 0x00000100,
1267 	0x3c200, 0xffffffff, 0x00000100,
1268 	0x3c230, 0xffffffff, 0x00000100,
1269 	0x3c234, 0xffffffff, 0x00000100,
1270 	0x3c250, 0xffffffff, 0x00000100,
1271 	0x3c254, 0xffffffff, 0x00000100,
1272 	0x3c258, 0xffffffff, 0x00000100,
1273 	0x3c25c, 0xffffffff, 0x00000100,
1274 	0x3c260, 0xffffffff, 0x00000100,
1275 	0x3c27c, 0xffffffff, 0x00000100,
1276 	0x3c278, 0xffffffff, 0x00000100,
1277 	0x3c210, 0xffffffff, 0x06000100,
1278 	0x3c290, 0xffffffff, 0x00000100,
1279 	0x3c274, 0xffffffff, 0x00000100,
1280 	0x3c2b4, 0xffffffff, 0x00000100,
1281 	0x3c2b0, 0xffffffff, 0x00000100,
1282 	0x3c270, 0xffffffff, 0x00000100,
1283 	0x30800, 0xffffffff, 0xe0000000,
1284 	0x3c020, 0xffffffff, 0x00010000,
1285 	0x3c024, 0xffffffff, 0x00030002,
1286 	0x3c028, 0xffffffff, 0x00040007,
1287 	0x3c02c, 0xffffffff, 0x00060005,
1288 	0x3c030, 0xffffffff, 0x00090008,
1289 	0x3c034, 0xffffffff, 0x00010000,
1290 	0x3c038, 0xffffffff, 0x00030002,
1291 	0x3c03c, 0xffffffff, 0x00040007,
1292 	0x3c040, 0xffffffff, 0x00060005,
1293 	0x3c044, 0xffffffff, 0x00090008,
1294 	0x3c000, 0xffffffff, 0x96e00200,
1295 	0x8708, 0xffffffff, 0x00900100,
1296 	0xc424, 0xffffffff, 0x0020003f,
1297 	0x38, 0xffffffff, 0x0140001c,
1298 	0x3c, 0x000f0000, 0x000f0000,
1299 	0x220, 0xffffffff, 0xC060000C,
1300 	0x224, 0xc0000fff, 0x00000100,
1301 	0x20a8, 0xffffffff, 0x00000104,
1302 	0x55e4, 0xff000fff, 0x00000100,
1303 	0x30cc, 0xc0000fff, 0x00000104,
1304 	0xc1e4, 0x00000001, 0x00000001,
1305 	0xd00c, 0xff000ff0, 0x00000100,
1306 	0xd80c, 0xff000ff0, 0x00000100
1307 };
1308 
1309 static const u32 hawaii_golden_spm_registers[] =
1310 {
1311 	0x30800, 0xe0ffffff, 0xe0000000
1312 };
1313 
1314 static const u32 hawaii_golden_common_registers[] =
1315 {
1316 	0x30800, 0xffffffff, 0xe0000000,
1317 	0x28350, 0xffffffff, 0x3a00161a,
1318 	0x28354, 0xffffffff, 0x0000002e,
1319 	0x9a10, 0xffffffff, 0x00018208,
1320 	0x98f8, 0xffffffff, 0x12011003
1321 };
1322 
1323 static const u32 hawaii_golden_registers[] =
1324 {
1325 	0x3354, 0x00000333, 0x00000333,
1326 	0x9a10, 0x00010000, 0x00058208,
1327 	0x9830, 0xffffffff, 0x00000000,
1328 	0x9834, 0xf00fffff, 0x00000400,
1329 	0x9838, 0x0002021c, 0x00020200,
1330 	0xc78, 0x00000080, 0x00000000,
1331 	0x5bb0, 0x000000f0, 0x00000070,
1332 	0x5bc0, 0xf0311fff, 0x80300000,
1333 	0x350c, 0x00810000, 0x408af000,
1334 	0x7030, 0x31000111, 0x00000011,
1335 	0x2f48, 0x73773777, 0x12010001,
1336 	0x2120, 0x0000007f, 0x0000001b,
1337 	0x21dc, 0x00007fb6, 0x00002191,
1338 	0x3628, 0x0000003f, 0x0000000a,
1339 	0x362c, 0x0000003f, 0x0000000a,
1340 	0x2ae4, 0x00073ffe, 0x000022a2,
1341 	0x240c, 0x000007ff, 0x00000000,
1342 	0x8bf0, 0x00002001, 0x00000001,
1343 	0x8b24, 0xffffffff, 0x00ffffff,
1344 	0x30a04, 0x0000ff0f, 0x00000000,
1345 	0x28a4c, 0x07ffffff, 0x06000000,
1346 	0x3e78, 0x00000001, 0x00000002,
1347 	0xc768, 0x00000008, 0x00000008,
1348 	0xc770, 0x00000f00, 0x00000800,
1349 	0xc774, 0x00000f00, 0x00000800,
1350 	0xc798, 0x00ffffff, 0x00ff7fbf,
1351 	0xc79c, 0x00ffffff, 0x00ff7faf,
1352 	0x8c00, 0x000000ff, 0x00000800,
1353 	0xe40, 0x00001fff, 0x00001fff,
1354 	0x9060, 0x0000007f, 0x00000020,
1355 	0x9508, 0x00010000, 0x00010000,
1356 	0xae00, 0x00100000, 0x000ff07c,
1357 	0xac14, 0x000003ff, 0x0000000f,
1358 	0xac10, 0xffffffff, 0x7564fdec,
1359 	0xac0c, 0xffffffff, 0x3120b9a8,
1360 	0xac08, 0x20000000, 0x0f9c0000
1361 };
1362 
1363 static const u32 hawaii_mgcg_cgcg_init[] =
1364 {
1365 	0xc420, 0xffffffff, 0xfffffffd,
1366 	0x30800, 0xffffffff, 0xe0000000,
1367 	0x3c2a0, 0xffffffff, 0x00000100,
1368 	0x3c208, 0xffffffff, 0x00000100,
1369 	0x3c2c0, 0xffffffff, 0x00000100,
1370 	0x3c2c8, 0xffffffff, 0x00000100,
1371 	0x3c2c4, 0xffffffff, 0x00000100,
1372 	0x55e4, 0xffffffff, 0x00200100,
1373 	0x3c280, 0xffffffff, 0x00000100,
1374 	0x3c214, 0xffffffff, 0x06000100,
1375 	0x3c220, 0xffffffff, 0x00000100,
1376 	0x3c218, 0xffffffff, 0x06000100,
1377 	0x3c204, 0xffffffff, 0x00000100,
1378 	0x3c2e0, 0xffffffff, 0x00000100,
1379 	0x3c224, 0xffffffff, 0x00000100,
1380 	0x3c200, 0xffffffff, 0x00000100,
1381 	0x3c230, 0xffffffff, 0x00000100,
1382 	0x3c234, 0xffffffff, 0x00000100,
1383 	0x3c250, 0xffffffff, 0x00000100,
1384 	0x3c254, 0xffffffff, 0x00000100,
1385 	0x3c258, 0xffffffff, 0x00000100,
1386 	0x3c25c, 0xffffffff, 0x00000100,
1387 	0x3c260, 0xffffffff, 0x00000100,
1388 	0x3c27c, 0xffffffff, 0x00000100,
1389 	0x3c278, 0xffffffff, 0x00000100,
1390 	0x3c210, 0xffffffff, 0x06000100,
1391 	0x3c290, 0xffffffff, 0x00000100,
1392 	0x3c274, 0xffffffff, 0x00000100,
1393 	0x3c2b4, 0xffffffff, 0x00000100,
1394 	0x3c2b0, 0xffffffff, 0x00000100,
1395 	0x3c270, 0xffffffff, 0x00000100,
1396 	0x30800, 0xffffffff, 0xe0000000,
1397 	0x3c020, 0xffffffff, 0x00010000,
1398 	0x3c024, 0xffffffff, 0x00030002,
1399 	0x3c028, 0xffffffff, 0x00040007,
1400 	0x3c02c, 0xffffffff, 0x00060005,
1401 	0x3c030, 0xffffffff, 0x00090008,
1402 	0x3c034, 0xffffffff, 0x00010000,
1403 	0x3c038, 0xffffffff, 0x00030002,
1404 	0x3c03c, 0xffffffff, 0x00040007,
1405 	0x3c040, 0xffffffff, 0x00060005,
1406 	0x3c044, 0xffffffff, 0x00090008,
1407 	0x3c048, 0xffffffff, 0x00010000,
1408 	0x3c04c, 0xffffffff, 0x00030002,
1409 	0x3c050, 0xffffffff, 0x00040007,
1410 	0x3c054, 0xffffffff, 0x00060005,
1411 	0x3c058, 0xffffffff, 0x00090008,
1412 	0x3c05c, 0xffffffff, 0x00010000,
1413 	0x3c060, 0xffffffff, 0x00030002,
1414 	0x3c064, 0xffffffff, 0x00040007,
1415 	0x3c068, 0xffffffff, 0x00060005,
1416 	0x3c06c, 0xffffffff, 0x00090008,
1417 	0x3c070, 0xffffffff, 0x00010000,
1418 	0x3c074, 0xffffffff, 0x00030002,
1419 	0x3c078, 0xffffffff, 0x00040007,
1420 	0x3c07c, 0xffffffff, 0x00060005,
1421 	0x3c080, 0xffffffff, 0x00090008,
1422 	0x3c084, 0xffffffff, 0x00010000,
1423 	0x3c088, 0xffffffff, 0x00030002,
1424 	0x3c08c, 0xffffffff, 0x00040007,
1425 	0x3c090, 0xffffffff, 0x00060005,
1426 	0x3c094, 0xffffffff, 0x00090008,
1427 	0x3c098, 0xffffffff, 0x00010000,
1428 	0x3c09c, 0xffffffff, 0x00030002,
1429 	0x3c0a0, 0xffffffff, 0x00040007,
1430 	0x3c0a4, 0xffffffff, 0x00060005,
1431 	0x3c0a8, 0xffffffff, 0x00090008,
1432 	0x3c0ac, 0xffffffff, 0x00010000,
1433 	0x3c0b0, 0xffffffff, 0x00030002,
1434 	0x3c0b4, 0xffffffff, 0x00040007,
1435 	0x3c0b8, 0xffffffff, 0x00060005,
1436 	0x3c0bc, 0xffffffff, 0x00090008,
1437 	0x3c0c0, 0xffffffff, 0x00010000,
1438 	0x3c0c4, 0xffffffff, 0x00030002,
1439 	0x3c0c8, 0xffffffff, 0x00040007,
1440 	0x3c0cc, 0xffffffff, 0x00060005,
1441 	0x3c0d0, 0xffffffff, 0x00090008,
1442 	0x3c0d4, 0xffffffff, 0x00010000,
1443 	0x3c0d8, 0xffffffff, 0x00030002,
1444 	0x3c0dc, 0xffffffff, 0x00040007,
1445 	0x3c0e0, 0xffffffff, 0x00060005,
1446 	0x3c0e4, 0xffffffff, 0x00090008,
1447 	0x3c0e8, 0xffffffff, 0x00010000,
1448 	0x3c0ec, 0xffffffff, 0x00030002,
1449 	0x3c0f0, 0xffffffff, 0x00040007,
1450 	0x3c0f4, 0xffffffff, 0x00060005,
1451 	0x3c0f8, 0xffffffff, 0x00090008,
1452 	0xc318, 0xffffffff, 0x00020200,
1453 	0x3350, 0xffffffff, 0x00000200,
1454 	0x15c0, 0xffffffff, 0x00000400,
1455 	0x55e8, 0xffffffff, 0x00000000,
1456 	0x2f50, 0xffffffff, 0x00000902,
1457 	0x3c000, 0xffffffff, 0x96940200,
1458 	0x8708, 0xffffffff, 0x00900100,
1459 	0xc424, 0xffffffff, 0x0020003f,
1460 	0x38, 0xffffffff, 0x0140001c,
1461 	0x3c, 0x000f0000, 0x000f0000,
1462 	0x220, 0xffffffff, 0xc060000c,
1463 	0x224, 0xc0000fff, 0x00000100,
1464 	0xf90, 0xffffffff, 0x00000100,
1465 	0xf98, 0x00000101, 0x00000000,
1466 	0x20a8, 0xffffffff, 0x00000104,
1467 	0x55e4, 0xff000fff, 0x00000100,
1468 	0x30cc, 0xc0000fff, 0x00000104,
1469 	0xc1e4, 0x00000001, 0x00000001,
1470 	0xd00c, 0xff000ff0, 0x00000100,
1471 	0xd80c, 0xff000ff0, 0x00000100
1472 };
1473 
1474 static void cik_init_golden_registers(struct radeon_device *rdev)
1475 {
1476 	switch (rdev->family) {
1477 	case CHIP_BONAIRE:
1478 		radeon_program_register_sequence(rdev,
1479 						 bonaire_mgcg_cgcg_init,
1480 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1481 		radeon_program_register_sequence(rdev,
1482 						 bonaire_golden_registers,
1483 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1484 		radeon_program_register_sequence(rdev,
1485 						 bonaire_golden_common_registers,
1486 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1487 		radeon_program_register_sequence(rdev,
1488 						 bonaire_golden_spm_registers,
1489 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1490 		break;
1491 	case CHIP_KABINI:
1492 		radeon_program_register_sequence(rdev,
1493 						 kalindi_mgcg_cgcg_init,
1494 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1495 		radeon_program_register_sequence(rdev,
1496 						 kalindi_golden_registers,
1497 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1498 		radeon_program_register_sequence(rdev,
1499 						 kalindi_golden_common_registers,
1500 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1501 		radeon_program_register_sequence(rdev,
1502 						 kalindi_golden_spm_registers,
1503 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1504 		break;
1505 	case CHIP_KAVERI:
1506 		radeon_program_register_sequence(rdev,
1507 						 spectre_mgcg_cgcg_init,
1508 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1509 		radeon_program_register_sequence(rdev,
1510 						 spectre_golden_registers,
1511 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1512 		radeon_program_register_sequence(rdev,
1513 						 spectre_golden_common_registers,
1514 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1515 		radeon_program_register_sequence(rdev,
1516 						 spectre_golden_spm_registers,
1517 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1518 		break;
1519 	case CHIP_HAWAII:
1520 		radeon_program_register_sequence(rdev,
1521 						 hawaii_mgcg_cgcg_init,
1522 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1523 		radeon_program_register_sequence(rdev,
1524 						 hawaii_golden_registers,
1525 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1526 		radeon_program_register_sequence(rdev,
1527 						 hawaii_golden_common_registers,
1528 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1529 		radeon_program_register_sequence(rdev,
1530 						 hawaii_golden_spm_registers,
1531 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1532 		break;
1533 	default:
1534 		break;
1535 	}
1536 }
1537 
1538 /**
1539  * cik_get_xclk - get the xclk
1540  *
1541  * @rdev: radeon_device pointer
1542  *
1543  * Returns the reference clock used by the gfx engine
1544  * (CIK).
1545  */
1546 u32 cik_get_xclk(struct radeon_device *rdev)
1547 {
1548         u32 reference_clock = rdev->clock.spll.reference_freq;
1549 
1550 	if (rdev->flags & RADEON_IS_IGP) {
1551 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1552 			return reference_clock / 2;
1553 	} else {
1554 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1555 			return reference_clock / 4;
1556 	}
1557 	return reference_clock;
1558 }
1559 
1560 /**
1561  * cik_mm_rdoorbell - read a doorbell dword
1562  *
1563  * @rdev: radeon_device pointer
1564  * @index: doorbell index
1565  *
1566  * Returns the value in the doorbell aperture at the
1567  * requested doorbell index (CIK).
1568  */
1569 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1570 {
1571 	if (index < rdev->doorbell.num_doorbells) {
1572 		return readl(rdev->doorbell.ptr + index);
1573 	} else {
1574 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1575 		return 0;
1576 	}
1577 }
1578 
1579 /**
1580  * cik_mm_wdoorbell - write a doorbell dword
1581  *
1582  * @rdev: radeon_device pointer
1583  * @index: doorbell index
1584  * @v: value to write
1585  *
1586  * Writes @v to the doorbell aperture at the
1587  * requested doorbell index (CIK).
1588  */
1589 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1590 {
1591 	if (index < rdev->doorbell.num_doorbells) {
1592 		writel(v, rdev->doorbell.ptr + index);
1593 	} else {
1594 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1595 	}
1596 }
1597 
1598 #define BONAIRE_IO_MC_REGS_SIZE 36
1599 
1600 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1601 {
1602 	{0x00000070, 0x04400000},
1603 	{0x00000071, 0x80c01803},
1604 	{0x00000072, 0x00004004},
1605 	{0x00000073, 0x00000100},
1606 	{0x00000074, 0x00ff0000},
1607 	{0x00000075, 0x34000000},
1608 	{0x00000076, 0x08000014},
1609 	{0x00000077, 0x00cc08ec},
1610 	{0x00000078, 0x00000400},
1611 	{0x00000079, 0x00000000},
1612 	{0x0000007a, 0x04090000},
1613 	{0x0000007c, 0x00000000},
1614 	{0x0000007e, 0x4408a8e8},
1615 	{0x0000007f, 0x00000304},
1616 	{0x00000080, 0x00000000},
1617 	{0x00000082, 0x00000001},
1618 	{0x00000083, 0x00000002},
1619 	{0x00000084, 0xf3e4f400},
1620 	{0x00000085, 0x052024e3},
1621 	{0x00000087, 0x00000000},
1622 	{0x00000088, 0x01000000},
1623 	{0x0000008a, 0x1c0a0000},
1624 	{0x0000008b, 0xff010000},
1625 	{0x0000008d, 0xffffefff},
1626 	{0x0000008e, 0xfff3efff},
1627 	{0x0000008f, 0xfff3efbf},
1628 	{0x00000092, 0xf7ffffff},
1629 	{0x00000093, 0xffffff7f},
1630 	{0x00000095, 0x00101101},
1631 	{0x00000096, 0x00000fff},
1632 	{0x00000097, 0x00116fff},
1633 	{0x00000098, 0x60010000},
1634 	{0x00000099, 0x10010000},
1635 	{0x0000009a, 0x00006000},
1636 	{0x0000009b, 0x00001000},
1637 	{0x0000009f, 0x00b48000}
1638 };
1639 
1640 #define HAWAII_IO_MC_REGS_SIZE 22
1641 
1642 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1643 {
1644 	{0x0000007d, 0x40000000},
1645 	{0x0000007e, 0x40180304},
1646 	{0x0000007f, 0x0000ff00},
1647 	{0x00000081, 0x00000000},
1648 	{0x00000083, 0x00000800},
1649 	{0x00000086, 0x00000000},
1650 	{0x00000087, 0x00000100},
1651 	{0x00000088, 0x00020100},
1652 	{0x00000089, 0x00000000},
1653 	{0x0000008b, 0x00040000},
1654 	{0x0000008c, 0x00000100},
1655 	{0x0000008e, 0xff010000},
1656 	{0x00000090, 0xffffefff},
1657 	{0x00000091, 0xfff3efff},
1658 	{0x00000092, 0xfff3efbf},
1659 	{0x00000093, 0xf7ffffff},
1660 	{0x00000094, 0xffffff7f},
1661 	{0x00000095, 0x00000fff},
1662 	{0x00000096, 0x00116fff},
1663 	{0x00000097, 0x60010000},
1664 	{0x00000098, 0x10010000},
1665 	{0x0000009f, 0x00c79000}
1666 };
1667 
1668 
1669 /**
1670  * cik_srbm_select - select specific register instances
1671  *
1672  * @rdev: radeon_device pointer
1673  * @me: selected ME (micro engine)
1674  * @pipe: pipe
1675  * @queue: queue
1676  * @vmid: VMID
1677  *
1678  * Switches the currently active registers instances.  Some
1679  * registers are instanced per VMID, others are instanced per
1680  * me/pipe/queue combination.
1681  */
1682 static void cik_srbm_select(struct radeon_device *rdev,
1683 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1684 {
1685 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1686 			     MEID(me & 0x3) |
1687 			     VMID(vmid & 0xf) |
1688 			     QUEUEID(queue & 0x7));
1689 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1690 }
1691 
1692 /* ucode loading */
1693 /**
1694  * ci_mc_load_microcode - load MC ucode into the hw
1695  *
1696  * @rdev: radeon_device pointer
1697  *
1698  * Load the GDDR MC ucode into the hw (CIK).
1699  * Returns 0 on success, error on failure.
1700  */
1701 int ci_mc_load_microcode(struct radeon_device *rdev)
1702 {
1703 	const __be32 *fw_data;
1704 	u32 running, blackout = 0;
1705 	u32 *io_mc_regs;
1706 	int i, ucode_size, regs_size;
1707 
1708 	if (!rdev->mc_fw)
1709 		return -EINVAL;
1710 
1711 	switch (rdev->family) {
1712 	case CHIP_BONAIRE:
1713 		io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1714 		ucode_size = CIK_MC_UCODE_SIZE;
1715 		regs_size = BONAIRE_IO_MC_REGS_SIZE;
1716 		break;
1717 	case CHIP_HAWAII:
1718 		io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1719 		ucode_size = HAWAII_MC_UCODE_SIZE;
1720 		regs_size = HAWAII_IO_MC_REGS_SIZE;
1721 		break;
1722 	default:
1723 		return -EINVAL;
1724 	}
1725 
1726 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1727 
1728 	if (running == 0) {
1729 		if (running) {
1730 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1731 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1732 		}
1733 
1734 		/* reset the engine and set to writable */
1735 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1736 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1737 
1738 		/* load mc io regs */
1739 		for (i = 0; i < regs_size; i++) {
1740 			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1741 			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1742 		}
1743 		/* load the MC ucode */
1744 		fw_data = (const __be32 *)rdev->mc_fw->data;
1745 		for (i = 0; i < ucode_size; i++)
1746 			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1747 
1748 		/* put the engine back into the active state */
1749 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1750 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1751 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1752 
1753 		/* wait for training to complete */
1754 		for (i = 0; i < rdev->usec_timeout; i++) {
1755 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1756 				break;
1757 			udelay(1);
1758 		}
1759 		for (i = 0; i < rdev->usec_timeout; i++) {
1760 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1761 				break;
1762 			udelay(1);
1763 		}
1764 
1765 		if (running)
1766 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1767 	}
1768 
1769 	return 0;
1770 }
1771 
1772 /**
1773  * cik_init_microcode - load ucode images from disk
1774  *
1775  * @rdev: radeon_device pointer
1776  *
1777  * Use the firmware interface to load the ucode images into
1778  * the driver (not loaded into hw).
1779  * Returns 0 on success, error on failure.
1780  */
1781 static int cik_init_microcode(struct radeon_device *rdev)
1782 {
1783 	const char *chip_name;
1784 	size_t pfp_req_size, me_req_size, ce_req_size,
1785 		mec_req_size, rlc_req_size, mc_req_size = 0,
1786 		sdma_req_size, smc_req_size = 0;
1787 	char fw_name[30];
1788 	int err;
1789 
1790 	DRM_DEBUG("\n");
1791 
1792 	switch (rdev->family) {
1793 	case CHIP_BONAIRE:
1794 		chip_name = "BONAIRE";
1795 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1796 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1797 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1798 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1799 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1800 		mc_req_size = CIK_MC_UCODE_SIZE * 4;
1801 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1802 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1803 		break;
1804 	case CHIP_HAWAII:
1805 		chip_name = "HAWAII";
1806 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1807 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1808 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1809 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1810 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1811 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1812 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1813 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1814 		break;
1815 	case CHIP_KAVERI:
1816 		chip_name = "KAVERI";
1817 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1818 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1819 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1820 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1821 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1822 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1823 		break;
1824 	case CHIP_KABINI:
1825 		chip_name = "KABINI";
1826 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1827 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1828 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1829 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1830 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1831 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1832 		break;
1833 	default: BUG();
1834 	}
1835 
1836 	DRM_INFO("Loading %s Microcode\n", chip_name);
1837 
1838 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1839 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1840 	if (err)
1841 		goto out;
1842 	if (rdev->pfp_fw->size != pfp_req_size) {
1843 		printk(KERN_ERR
1844 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1845 		       rdev->pfp_fw->size, fw_name);
1846 		err = -EINVAL;
1847 		goto out;
1848 	}
1849 
1850 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1851 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1852 	if (err)
1853 		goto out;
1854 	if (rdev->me_fw->size != me_req_size) {
1855 		printk(KERN_ERR
1856 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1857 		       rdev->me_fw->size, fw_name);
1858 		err = -EINVAL;
1859 	}
1860 
1861 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1862 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1863 	if (err)
1864 		goto out;
1865 	if (rdev->ce_fw->size != ce_req_size) {
1866 		printk(KERN_ERR
1867 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1868 		       rdev->ce_fw->size, fw_name);
1869 		err = -EINVAL;
1870 	}
1871 
1872 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1873 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1874 	if (err)
1875 		goto out;
1876 	if (rdev->mec_fw->size != mec_req_size) {
1877 		printk(KERN_ERR
1878 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1879 		       rdev->mec_fw->size, fw_name);
1880 		err = -EINVAL;
1881 	}
1882 
1883 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1884 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1885 	if (err)
1886 		goto out;
1887 	if (rdev->rlc_fw->size != rlc_req_size) {
1888 		printk(KERN_ERR
1889 		       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1890 		       rdev->rlc_fw->size, fw_name);
1891 		err = -EINVAL;
1892 	}
1893 
1894 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1895 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1896 	if (err)
1897 		goto out;
1898 	if (rdev->sdma_fw->size != sdma_req_size) {
1899 		printk(KERN_ERR
1900 		       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1901 		       rdev->sdma_fw->size, fw_name);
1902 		err = -EINVAL;
1903 	}
1904 
1905 	/* No SMC, MC ucode on APUs */
1906 	if (!(rdev->flags & RADEON_IS_IGP)) {
1907 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1908 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1909 		if (err)
1910 			goto out;
1911 		if (rdev->mc_fw->size != mc_req_size) {
1912 			printk(KERN_ERR
1913 			       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1914 			       rdev->mc_fw->size, fw_name);
1915 			err = -EINVAL;
1916 		}
1917 
1918 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1919 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1920 		if (err) {
1921 			printk(KERN_ERR
1922 			       "smc: error loading firmware \"%s\"\n",
1923 			       fw_name);
1924 			release_firmware(rdev->smc_fw);
1925 			rdev->smc_fw = NULL;
1926 			err = 0;
1927 		} else if (rdev->smc_fw->size != smc_req_size) {
1928 			printk(KERN_ERR
1929 			       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1930 			       rdev->smc_fw->size, fw_name);
1931 			err = -EINVAL;
1932 		}
1933 	}
1934 
1935 out:
1936 	if (err) {
1937 		if (err != -EINVAL)
1938 			printk(KERN_ERR
1939 			       "cik_cp: Failed to load firmware \"%s\"\n",
1940 			       fw_name);
1941 		release_firmware(rdev->pfp_fw);
1942 		rdev->pfp_fw = NULL;
1943 		release_firmware(rdev->me_fw);
1944 		rdev->me_fw = NULL;
1945 		release_firmware(rdev->ce_fw);
1946 		rdev->ce_fw = NULL;
1947 		release_firmware(rdev->rlc_fw);
1948 		rdev->rlc_fw = NULL;
1949 		release_firmware(rdev->mc_fw);
1950 		rdev->mc_fw = NULL;
1951 		release_firmware(rdev->smc_fw);
1952 		rdev->smc_fw = NULL;
1953 	}
1954 	return err;
1955 }
1956 
1957 /*
1958  * Core functions
1959  */
1960 /**
1961  * cik_tiling_mode_table_init - init the hw tiling table
1962  *
1963  * @rdev: radeon_device pointer
1964  *
1965  * Starting with SI, the tiling setup is done globally in a
1966  * set of 32 tiling modes.  Rather than selecting each set of
1967  * parameters per surface as on older asics, we just select
1968  * which index in the tiling table we want to use, and the
1969  * surface uses those parameters (CIK).
1970  */
1971 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1972 {
1973 	const u32 num_tile_mode_states = 32;
1974 	const u32 num_secondary_tile_mode_states = 16;
1975 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1976 	u32 num_pipe_configs;
1977 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
1978 		rdev->config.cik.max_shader_engines;
1979 
1980 	switch (rdev->config.cik.mem_row_size_in_kb) {
1981 	case 1:
1982 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1983 		break;
1984 	case 2:
1985 	default:
1986 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1987 		break;
1988 	case 4:
1989 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1990 		break;
1991 	}
1992 
1993 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
1994 	if (num_pipe_configs > 8)
1995 		num_pipe_configs = 16;
1996 
1997 	if (num_pipe_configs == 16) {
1998 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1999 			switch (reg_offset) {
2000 			case 0:
2001 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2002 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2003 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2004 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2005 				break;
2006 			case 1:
2007 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2008 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2009 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2010 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2011 				break;
2012 			case 2:
2013 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2014 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2015 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2016 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2017 				break;
2018 			case 3:
2019 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2020 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2021 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2022 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2023 				break;
2024 			case 4:
2025 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2026 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2027 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2028 						 TILE_SPLIT(split_equal_to_row_size));
2029 				break;
2030 			case 5:
2031 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2032 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2033 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2034 				break;
2035 			case 6:
2036 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2037 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2038 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2039 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2040 				break;
2041 			case 7:
2042 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2043 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2044 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2045 						 TILE_SPLIT(split_equal_to_row_size));
2046 				break;
2047 			case 8:
2048 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2049 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2050 				break;
2051 			case 9:
2052 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2053 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2054 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2055 				break;
2056 			case 10:
2057 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2058 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2059 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2060 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2061 				break;
2062 			case 11:
2063 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2064 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2065 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2066 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2067 				break;
2068 			case 12:
2069 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2070 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2071 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2072 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2073 				break;
2074 			case 13:
2075 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2076 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2077 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2078 				break;
2079 			case 14:
2080 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2081 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2082 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2083 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2084 				break;
2085 			case 16:
2086 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2087 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2088 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2089 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2090 				break;
2091 			case 17:
2092 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2093 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2094 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2095 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2096 				break;
2097 			case 27:
2098 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2099 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2100 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2101 				break;
2102 			case 28:
2103 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2104 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2105 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2106 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2107 				break;
2108 			case 29:
2109 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2110 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2111 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2112 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2113 				break;
2114 			case 30:
2115 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2116 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2117 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2118 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2119 				break;
2120 			default:
2121 				gb_tile_moden = 0;
2122 				break;
2123 			}
2124 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2125 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2126 		}
2127 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2128 			switch (reg_offset) {
2129 			case 0:
2130 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2131 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2132 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2133 						 NUM_BANKS(ADDR_SURF_16_BANK));
2134 				break;
2135 			case 1:
2136 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2137 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2138 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2139 						 NUM_BANKS(ADDR_SURF_16_BANK));
2140 				break;
2141 			case 2:
2142 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2143 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2144 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2145 						 NUM_BANKS(ADDR_SURF_16_BANK));
2146 				break;
2147 			case 3:
2148 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2149 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2150 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2151 						 NUM_BANKS(ADDR_SURF_16_BANK));
2152 				break;
2153 			case 4:
2154 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2155 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2156 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2157 						 NUM_BANKS(ADDR_SURF_8_BANK));
2158 				break;
2159 			case 5:
2160 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2161 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2162 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2163 						 NUM_BANKS(ADDR_SURF_4_BANK));
2164 				break;
2165 			case 6:
2166 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2167 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2168 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2169 						 NUM_BANKS(ADDR_SURF_2_BANK));
2170 				break;
2171 			case 8:
2172 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2173 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2174 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2175 						 NUM_BANKS(ADDR_SURF_16_BANK));
2176 				break;
2177 			case 9:
2178 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2179 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2180 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2181 						 NUM_BANKS(ADDR_SURF_16_BANK));
2182 				break;
2183 			case 10:
2184 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2185 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2186 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2187 						 NUM_BANKS(ADDR_SURF_16_BANK));
2188 				break;
2189 			case 11:
2190 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2191 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2192 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2193 						 NUM_BANKS(ADDR_SURF_8_BANK));
2194 				break;
2195 			case 12:
2196 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2197 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2198 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2199 						 NUM_BANKS(ADDR_SURF_4_BANK));
2200 				break;
2201 			case 13:
2202 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2203 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2204 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2205 						 NUM_BANKS(ADDR_SURF_2_BANK));
2206 				break;
2207 			case 14:
2208 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2209 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2210 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2211 						 NUM_BANKS(ADDR_SURF_2_BANK));
2212 				break;
2213 			default:
2214 				gb_tile_moden = 0;
2215 				break;
2216 			}
2217 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2218 		}
2219 	} else if (num_pipe_configs == 8) {
2220 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2221 			switch (reg_offset) {
2222 			case 0:
2223 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2224 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2225 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2226 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2227 				break;
2228 			case 1:
2229 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2230 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2231 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2232 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2233 				break;
2234 			case 2:
2235 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2236 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2237 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2238 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2239 				break;
2240 			case 3:
2241 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2242 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2243 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2244 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2245 				break;
2246 			case 4:
2247 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2248 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2249 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2250 						 TILE_SPLIT(split_equal_to_row_size));
2251 				break;
2252 			case 5:
2253 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2254 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2255 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2256 				break;
2257 			case 6:
2258 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2259 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2260 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2261 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2262 				break;
2263 			case 7:
2264 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2265 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2266 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2267 						 TILE_SPLIT(split_equal_to_row_size));
2268 				break;
2269 			case 8:
2270 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2271 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2272 				break;
2273 			case 9:
2274 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2275 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2276 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2277 				break;
2278 			case 10:
2279 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2280 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2281 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2282 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2283 				break;
2284 			case 11:
2285 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2286 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2287 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2288 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2289 				break;
2290 			case 12:
2291 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2292 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2293 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2294 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2295 				break;
2296 			case 13:
2297 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2298 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2299 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2300 				break;
2301 			case 14:
2302 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2303 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2304 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2305 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2306 				break;
2307 			case 16:
2308 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2309 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2310 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2311 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2312 				break;
2313 			case 17:
2314 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2315 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2316 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2317 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2318 				break;
2319 			case 27:
2320 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2321 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2322 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2323 				break;
2324 			case 28:
2325 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2326 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2327 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2328 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2329 				break;
2330 			case 29:
2331 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2332 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2333 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2334 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2335 				break;
2336 			case 30:
2337 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2338 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2339 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2340 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2341 				break;
2342 			default:
2343 				gb_tile_moden = 0;
2344 				break;
2345 			}
2346 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2347 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2348 		}
2349 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2350 			switch (reg_offset) {
2351 			case 0:
2352 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2353 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2354 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2355 						 NUM_BANKS(ADDR_SURF_16_BANK));
2356 				break;
2357 			case 1:
2358 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2359 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2360 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2361 						 NUM_BANKS(ADDR_SURF_16_BANK));
2362 				break;
2363 			case 2:
2364 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2365 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2366 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2367 						 NUM_BANKS(ADDR_SURF_16_BANK));
2368 				break;
2369 			case 3:
2370 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2371 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2372 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2373 						 NUM_BANKS(ADDR_SURF_16_BANK));
2374 				break;
2375 			case 4:
2376 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2377 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2378 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2379 						 NUM_BANKS(ADDR_SURF_8_BANK));
2380 				break;
2381 			case 5:
2382 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2383 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2384 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2385 						 NUM_BANKS(ADDR_SURF_4_BANK));
2386 				break;
2387 			case 6:
2388 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2389 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2390 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2391 						 NUM_BANKS(ADDR_SURF_2_BANK));
2392 				break;
2393 			case 8:
2394 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2395 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2396 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2397 						 NUM_BANKS(ADDR_SURF_16_BANK));
2398 				break;
2399 			case 9:
2400 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2401 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2402 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2403 						 NUM_BANKS(ADDR_SURF_16_BANK));
2404 				break;
2405 			case 10:
2406 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2407 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2408 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2409 						 NUM_BANKS(ADDR_SURF_16_BANK));
2410 				break;
2411 			case 11:
2412 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2413 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2414 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2415 						 NUM_BANKS(ADDR_SURF_16_BANK));
2416 				break;
2417 			case 12:
2418 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2419 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2420 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2421 						 NUM_BANKS(ADDR_SURF_8_BANK));
2422 				break;
2423 			case 13:
2424 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2425 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2426 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2427 						 NUM_BANKS(ADDR_SURF_4_BANK));
2428 				break;
2429 			case 14:
2430 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2431 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2432 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2433 						 NUM_BANKS(ADDR_SURF_2_BANK));
2434 				break;
2435 			default:
2436 				gb_tile_moden = 0;
2437 				break;
2438 			}
2439 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2440 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2441 		}
2442 	} else if (num_pipe_configs == 4) {
2443 		if (num_rbs == 4) {
2444 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2445 				switch (reg_offset) {
2446 				case 0:
2447 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2448 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2449 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2450 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2451 					break;
2452 				case 1:
2453 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2454 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2455 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2456 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2457 					break;
2458 				case 2:
2459 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2460 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2461 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2462 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2463 					break;
2464 				case 3:
2465 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2466 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2467 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2468 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2469 					break;
2470 				case 4:
2471 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2472 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2473 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2474 							 TILE_SPLIT(split_equal_to_row_size));
2475 					break;
2476 				case 5:
2477 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2478 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2479 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2480 					break;
2481 				case 6:
2482 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2483 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2484 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2485 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2486 					break;
2487 				case 7:
2488 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2489 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2490 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2491 							 TILE_SPLIT(split_equal_to_row_size));
2492 					break;
2493 				case 8:
2494 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2495 							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2496 					break;
2497 				case 9:
2498 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2499 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2500 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2501 					break;
2502 				case 10:
2503 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2504 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2505 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2506 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2507 					break;
2508 				case 11:
2509 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2510 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2511 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2512 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2513 					break;
2514 				case 12:
2515 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2516 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2517 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2518 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2519 					break;
2520 				case 13:
2521 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2522 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2523 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2524 					break;
2525 				case 14:
2526 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2527 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2528 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2529 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2530 					break;
2531 				case 16:
2532 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2533 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2534 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2535 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2536 					break;
2537 				case 17:
2538 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2539 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2540 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2541 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2542 					break;
2543 				case 27:
2544 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2545 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2546 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2547 					break;
2548 				case 28:
2549 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2550 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2551 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2552 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2553 					break;
2554 				case 29:
2555 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2556 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2557 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2558 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2559 					break;
2560 				case 30:
2561 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2562 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2563 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2564 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2565 					break;
2566 				default:
2567 					gb_tile_moden = 0;
2568 					break;
2569 				}
2570 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2571 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2572 			}
2573 		} else if (num_rbs < 4) {
2574 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2575 				switch (reg_offset) {
2576 				case 0:
2577 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2578 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2579 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2580 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2581 					break;
2582 				case 1:
2583 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2584 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2585 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2586 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2587 					break;
2588 				case 2:
2589 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2590 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2591 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2592 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2593 					break;
2594 				case 3:
2595 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2596 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2597 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2598 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2599 					break;
2600 				case 4:
2601 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2602 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2603 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2604 							 TILE_SPLIT(split_equal_to_row_size));
2605 					break;
2606 				case 5:
2607 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2608 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2609 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2610 					break;
2611 				case 6:
2612 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2613 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2614 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2615 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2616 					break;
2617 				case 7:
2618 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2619 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2620 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2621 							 TILE_SPLIT(split_equal_to_row_size));
2622 					break;
2623 				case 8:
2624 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2625 						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2626 					break;
2627 				case 9:
2628 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2629 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2630 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2631 					break;
2632 				case 10:
2633 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2634 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2635 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2636 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2637 					break;
2638 				case 11:
2639 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2640 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2641 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2642 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2643 					break;
2644 				case 12:
2645 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2646 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2647 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2648 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2649 					break;
2650 				case 13:
2651 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2652 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2653 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2654 					break;
2655 				case 14:
2656 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2657 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2658 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2659 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2660 					break;
2661 				case 16:
2662 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2663 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2664 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2665 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2666 					break;
2667 				case 17:
2668 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2669 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2670 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2671 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2672 					break;
2673 				case 27:
2674 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2675 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2676 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2677 					break;
2678 				case 28:
2679 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2680 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2681 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2682 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2683 					break;
2684 				case 29:
2685 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2686 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2687 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2688 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2689 					break;
2690 				case 30:
2691 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2692 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2693 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2694 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2695 					break;
2696 				default:
2697 					gb_tile_moden = 0;
2698 					break;
2699 				}
2700 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2701 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2702 			}
2703 		}
2704 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2705 			switch (reg_offset) {
2706 			case 0:
2707 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2708 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2709 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2710 						 NUM_BANKS(ADDR_SURF_16_BANK));
2711 				break;
2712 			case 1:
2713 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2714 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2715 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2716 						 NUM_BANKS(ADDR_SURF_16_BANK));
2717 				break;
2718 			case 2:
2719 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2720 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2721 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2722 						 NUM_BANKS(ADDR_SURF_16_BANK));
2723 				break;
2724 			case 3:
2725 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2726 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2727 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2728 						 NUM_BANKS(ADDR_SURF_16_BANK));
2729 				break;
2730 			case 4:
2731 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2732 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2733 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2734 						 NUM_BANKS(ADDR_SURF_16_BANK));
2735 				break;
2736 			case 5:
2737 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2738 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2739 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2740 						 NUM_BANKS(ADDR_SURF_8_BANK));
2741 				break;
2742 			case 6:
2743 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2744 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2745 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2746 						 NUM_BANKS(ADDR_SURF_4_BANK));
2747 				break;
2748 			case 8:
2749 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2750 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2751 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2752 						 NUM_BANKS(ADDR_SURF_16_BANK));
2753 				break;
2754 			case 9:
2755 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2756 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2757 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2758 						 NUM_BANKS(ADDR_SURF_16_BANK));
2759 				break;
2760 			case 10:
2761 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2762 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2763 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2764 						 NUM_BANKS(ADDR_SURF_16_BANK));
2765 				break;
2766 			case 11:
2767 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2768 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2769 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2770 						 NUM_BANKS(ADDR_SURF_16_BANK));
2771 				break;
2772 			case 12:
2773 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2774 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2775 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2776 						 NUM_BANKS(ADDR_SURF_16_BANK));
2777 				break;
2778 			case 13:
2779 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2780 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2781 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2782 						 NUM_BANKS(ADDR_SURF_8_BANK));
2783 				break;
2784 			case 14:
2785 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2786 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2787 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2788 						 NUM_BANKS(ADDR_SURF_4_BANK));
2789 				break;
2790 			default:
2791 				gb_tile_moden = 0;
2792 				break;
2793 			}
2794 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2795 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2796 		}
2797 	} else if (num_pipe_configs == 2) {
2798 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2799 			switch (reg_offset) {
2800 			case 0:
2801 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2802 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2803 						 PIPE_CONFIG(ADDR_SURF_P2) |
2804 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2805 				break;
2806 			case 1:
2807 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2808 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2809 						 PIPE_CONFIG(ADDR_SURF_P2) |
2810 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2811 				break;
2812 			case 2:
2813 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2814 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2815 						 PIPE_CONFIG(ADDR_SURF_P2) |
2816 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2817 				break;
2818 			case 3:
2819 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2820 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2821 						 PIPE_CONFIG(ADDR_SURF_P2) |
2822 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2823 				break;
2824 			case 4:
2825 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2826 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2827 						 PIPE_CONFIG(ADDR_SURF_P2) |
2828 						 TILE_SPLIT(split_equal_to_row_size));
2829 				break;
2830 			case 5:
2831 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2832 						 PIPE_CONFIG(ADDR_SURF_P2) |
2833 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2834 				break;
2835 			case 6:
2836 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2837 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2838 						 PIPE_CONFIG(ADDR_SURF_P2) |
2839 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2840 				break;
2841 			case 7:
2842 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2843 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2844 						 PIPE_CONFIG(ADDR_SURF_P2) |
2845 						 TILE_SPLIT(split_equal_to_row_size));
2846 				break;
2847 			case 8:
2848 				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2849 						PIPE_CONFIG(ADDR_SURF_P2);
2850 				break;
2851 			case 9:
2852 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2853 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2854 						 PIPE_CONFIG(ADDR_SURF_P2));
2855 				break;
2856 			case 10:
2857 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2858 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2859 						 PIPE_CONFIG(ADDR_SURF_P2) |
2860 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2861 				break;
2862 			case 11:
2863 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2864 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2865 						 PIPE_CONFIG(ADDR_SURF_P2) |
2866 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2867 				break;
2868 			case 12:
2869 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2870 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2871 						 PIPE_CONFIG(ADDR_SURF_P2) |
2872 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2873 				break;
2874 			case 13:
2875 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2876 						 PIPE_CONFIG(ADDR_SURF_P2) |
2877 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2878 				break;
2879 			case 14:
2880 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2881 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2882 						 PIPE_CONFIG(ADDR_SURF_P2) |
2883 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2884 				break;
2885 			case 16:
2886 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2887 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2888 						 PIPE_CONFIG(ADDR_SURF_P2) |
2889 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2890 				break;
2891 			case 17:
2892 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2893 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2894 						 PIPE_CONFIG(ADDR_SURF_P2) |
2895 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2896 				break;
2897 			case 27:
2898 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2899 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2900 						 PIPE_CONFIG(ADDR_SURF_P2));
2901 				break;
2902 			case 28:
2903 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2904 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2905 						 PIPE_CONFIG(ADDR_SURF_P2) |
2906 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2907 				break;
2908 			case 29:
2909 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2910 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2911 						 PIPE_CONFIG(ADDR_SURF_P2) |
2912 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2913 				break;
2914 			case 30:
2915 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2916 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2917 						 PIPE_CONFIG(ADDR_SURF_P2) |
2918 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2919 				break;
2920 			default:
2921 				gb_tile_moden = 0;
2922 				break;
2923 			}
2924 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2925 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2926 		}
2927 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2928 			switch (reg_offset) {
2929 			case 0:
2930 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2931 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2932 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2933 						 NUM_BANKS(ADDR_SURF_16_BANK));
2934 				break;
2935 			case 1:
2936 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2937 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2938 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2939 						 NUM_BANKS(ADDR_SURF_16_BANK));
2940 				break;
2941 			case 2:
2942 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2943 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2944 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2945 						 NUM_BANKS(ADDR_SURF_16_BANK));
2946 				break;
2947 			case 3:
2948 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2949 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2950 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2951 						 NUM_BANKS(ADDR_SURF_16_BANK));
2952 				break;
2953 			case 4:
2954 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2955 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2956 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2957 						 NUM_BANKS(ADDR_SURF_16_BANK));
2958 				break;
2959 			case 5:
2960 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2961 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2962 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2963 						 NUM_BANKS(ADDR_SURF_16_BANK));
2964 				break;
2965 			case 6:
2966 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2967 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2968 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2969 						 NUM_BANKS(ADDR_SURF_8_BANK));
2970 				break;
2971 			case 8:
2972 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2973 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2974 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2975 						 NUM_BANKS(ADDR_SURF_16_BANK));
2976 				break;
2977 			case 9:
2978 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2979 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2980 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2981 						 NUM_BANKS(ADDR_SURF_16_BANK));
2982 				break;
2983 			case 10:
2984 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2985 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2986 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2987 						 NUM_BANKS(ADDR_SURF_16_BANK));
2988 				break;
2989 			case 11:
2990 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2991 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2992 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2993 						 NUM_BANKS(ADDR_SURF_16_BANK));
2994 				break;
2995 			case 12:
2996 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2997 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2998 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2999 						 NUM_BANKS(ADDR_SURF_16_BANK));
3000 				break;
3001 			case 13:
3002 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3003 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3004 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3005 						 NUM_BANKS(ADDR_SURF_16_BANK));
3006 				break;
3007 			case 14:
3008 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3009 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3010 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3011 						 NUM_BANKS(ADDR_SURF_8_BANK));
3012 				break;
3013 			default:
3014 				gb_tile_moden = 0;
3015 				break;
3016 			}
3017 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3018 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3019 		}
3020 	} else
3021 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3022 }
3023 
3024 /**
3025  * cik_select_se_sh - select which SE, SH to address
3026  *
3027  * @rdev: radeon_device pointer
3028  * @se_num: shader engine to address
3029  * @sh_num: sh block to address
3030  *
3031  * Select which SE, SH combinations to address. Certain
3032  * registers are instanced per SE or SH.  0xffffffff means
3033  * broadcast to all SEs or SHs (CIK).
3034  */
3035 static void cik_select_se_sh(struct radeon_device *rdev,
3036 			     u32 se_num, u32 sh_num)
3037 {
3038 	u32 data = INSTANCE_BROADCAST_WRITES;
3039 
3040 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3041 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3042 	else if (se_num == 0xffffffff)
3043 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3044 	else if (sh_num == 0xffffffff)
3045 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3046 	else
3047 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3048 	WREG32(GRBM_GFX_INDEX, data);
3049 }
3050 
3051 /**
3052  * cik_create_bitmask - create a bitmask
3053  *
3054  * @bit_width: length of the mask
3055  *
3056  * create a variable length bit mask (CIK).
3057  * Returns the bitmask.
3058  */
3059 static u32 cik_create_bitmask(u32 bit_width)
3060 {
3061 	u32 i, mask = 0;
3062 
3063 	for (i = 0; i < bit_width; i++) {
3064 		mask <<= 1;
3065 		mask |= 1;
3066 	}
3067 	return mask;
3068 }
3069 
3070 /**
3071  * cik_get_rb_disabled - computes the mask of disabled RBs
3072  *
3073  * @rdev: radeon_device pointer
3074  * @max_rb_num: max RBs (render backends) for the asic
3075  * @se_num: number of SEs (shader engines) for the asic
3076  * @sh_per_se: number of SH blocks per SE for the asic
3077  *
3078  * Calculates the bitmask of disabled RBs (CIK).
3079  * Returns the disabled RB bitmask.
3080  */
3081 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3082 			      u32 max_rb_num_per_se,
3083 			      u32 sh_per_se)
3084 {
3085 	u32 data, mask;
3086 
3087 	data = RREG32(CC_RB_BACKEND_DISABLE);
3088 	if (data & 1)
3089 		data &= BACKEND_DISABLE_MASK;
3090 	else
3091 		data = 0;
3092 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3093 
3094 	data >>= BACKEND_DISABLE_SHIFT;
3095 
3096 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3097 
3098 	return data & mask;
3099 }
3100 
3101 /**
3102  * cik_setup_rb - setup the RBs on the asic
3103  *
3104  * @rdev: radeon_device pointer
3105  * @se_num: number of SEs (shader engines) for the asic
3106  * @sh_per_se: number of SH blocks per SE for the asic
3107  * @max_rb_num: max RBs (render backends) for the asic
3108  *
3109  * Configures per-SE/SH RB registers (CIK).
3110  */
3111 static void cik_setup_rb(struct radeon_device *rdev,
3112 			 u32 se_num, u32 sh_per_se,
3113 			 u32 max_rb_num_per_se)
3114 {
3115 	int i, j;
3116 	u32 data, mask;
3117 	u32 disabled_rbs = 0;
3118 	u32 enabled_rbs = 0;
3119 
3120 	for (i = 0; i < se_num; i++) {
3121 		for (j = 0; j < sh_per_se; j++) {
3122 			cik_select_se_sh(rdev, i, j);
3123 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3124 			if (rdev->family == CHIP_HAWAII)
3125 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3126 			else
3127 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3128 		}
3129 	}
3130 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3131 
3132 	mask = 1;
3133 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3134 		if (!(disabled_rbs & mask))
3135 			enabled_rbs |= mask;
3136 		mask <<= 1;
3137 	}
3138 
3139 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3140 
3141 	for (i = 0; i < se_num; i++) {
3142 		cik_select_se_sh(rdev, i, 0xffffffff);
3143 		data = 0;
3144 		for (j = 0; j < sh_per_se; j++) {
3145 			switch (enabled_rbs & 3) {
3146 			case 0:
3147 				if (j == 0)
3148 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3149 				else
3150 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3151 				break;
3152 			case 1:
3153 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3154 				break;
3155 			case 2:
3156 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3157 				break;
3158 			case 3:
3159 			default:
3160 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3161 				break;
3162 			}
3163 			enabled_rbs >>= 2;
3164 		}
3165 		WREG32(PA_SC_RASTER_CONFIG, data);
3166 	}
3167 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3168 }
3169 
3170 /**
3171  * cik_gpu_init - setup the 3D engine
3172  *
3173  * @rdev: radeon_device pointer
3174  *
3175  * Configures the 3D engine and tiling configuration
3176  * registers so that the 3D engine is usable.
3177  */
3178 static void cik_gpu_init(struct radeon_device *rdev)
3179 {
3180 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3181 	u32 mc_shared_chmap, mc_arb_ramcfg;
3182 	u32 hdp_host_path_cntl;
3183 	u32 tmp;
3184 	int i, j;
3185 
3186 	switch (rdev->family) {
3187 	case CHIP_BONAIRE:
3188 		rdev->config.cik.max_shader_engines = 2;
3189 		rdev->config.cik.max_tile_pipes = 4;
3190 		rdev->config.cik.max_cu_per_sh = 7;
3191 		rdev->config.cik.max_sh_per_se = 1;
3192 		rdev->config.cik.max_backends_per_se = 2;
3193 		rdev->config.cik.max_texture_channel_caches = 4;
3194 		rdev->config.cik.max_gprs = 256;
3195 		rdev->config.cik.max_gs_threads = 32;
3196 		rdev->config.cik.max_hw_contexts = 8;
3197 
3198 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3199 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3200 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3201 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3202 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3203 		break;
3204 	case CHIP_HAWAII:
3205 		rdev->config.cik.max_shader_engines = 4;
3206 		rdev->config.cik.max_tile_pipes = 16;
3207 		rdev->config.cik.max_cu_per_sh = 11;
3208 		rdev->config.cik.max_sh_per_se = 1;
3209 		rdev->config.cik.max_backends_per_se = 4;
3210 		rdev->config.cik.max_texture_channel_caches = 16;
3211 		rdev->config.cik.max_gprs = 256;
3212 		rdev->config.cik.max_gs_threads = 32;
3213 		rdev->config.cik.max_hw_contexts = 8;
3214 
3215 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3216 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3217 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3218 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3219 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3220 		break;
3221 	case CHIP_KAVERI:
3222 		rdev->config.cik.max_shader_engines = 1;
3223 		rdev->config.cik.max_tile_pipes = 4;
3224 		if ((rdev->pdev->device == 0x1304) ||
3225 		    (rdev->pdev->device == 0x1305) ||
3226 		    (rdev->pdev->device == 0x130C) ||
3227 		    (rdev->pdev->device == 0x130F) ||
3228 		    (rdev->pdev->device == 0x1310) ||
3229 		    (rdev->pdev->device == 0x1311) ||
3230 		    (rdev->pdev->device == 0x131C)) {
3231 			rdev->config.cik.max_cu_per_sh = 8;
3232 			rdev->config.cik.max_backends_per_se = 2;
3233 		} else if ((rdev->pdev->device == 0x1309) ||
3234 			   (rdev->pdev->device == 0x130A) ||
3235 			   (rdev->pdev->device == 0x130D) ||
3236 			   (rdev->pdev->device == 0x1313) ||
3237 			   (rdev->pdev->device == 0x131D)) {
3238 			rdev->config.cik.max_cu_per_sh = 6;
3239 			rdev->config.cik.max_backends_per_se = 2;
3240 		} else if ((rdev->pdev->device == 0x1306) ||
3241 			   (rdev->pdev->device == 0x1307) ||
3242 			   (rdev->pdev->device == 0x130B) ||
3243 			   (rdev->pdev->device == 0x130E) ||
3244 			   (rdev->pdev->device == 0x1315) ||
3245 			   (rdev->pdev->device == 0x131B)) {
3246 			rdev->config.cik.max_cu_per_sh = 4;
3247 			rdev->config.cik.max_backends_per_se = 1;
3248 		} else {
3249 			rdev->config.cik.max_cu_per_sh = 3;
3250 			rdev->config.cik.max_backends_per_se = 1;
3251 		}
3252 		rdev->config.cik.max_sh_per_se = 1;
3253 		rdev->config.cik.max_texture_channel_caches = 4;
3254 		rdev->config.cik.max_gprs = 256;
3255 		rdev->config.cik.max_gs_threads = 16;
3256 		rdev->config.cik.max_hw_contexts = 8;
3257 
3258 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3259 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3260 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3261 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3262 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3263 		break;
3264 	case CHIP_KABINI:
3265 	default:
3266 		rdev->config.cik.max_shader_engines = 1;
3267 		rdev->config.cik.max_tile_pipes = 2;
3268 		rdev->config.cik.max_cu_per_sh = 2;
3269 		rdev->config.cik.max_sh_per_se = 1;
3270 		rdev->config.cik.max_backends_per_se = 1;
3271 		rdev->config.cik.max_texture_channel_caches = 2;
3272 		rdev->config.cik.max_gprs = 256;
3273 		rdev->config.cik.max_gs_threads = 16;
3274 		rdev->config.cik.max_hw_contexts = 8;
3275 
3276 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3277 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3278 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3279 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3280 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3281 		break;
3282 	}
3283 
3284 	/* Initialize HDP */
3285 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3286 		WREG32((0x2c14 + j), 0x00000000);
3287 		WREG32((0x2c18 + j), 0x00000000);
3288 		WREG32((0x2c1c + j), 0x00000000);
3289 		WREG32((0x2c20 + j), 0x00000000);
3290 		WREG32((0x2c24 + j), 0x00000000);
3291 	}
3292 
3293 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3294 
3295 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3296 
3297 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3298 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3299 
3300 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3301 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3302 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3303 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3304 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3305 		rdev->config.cik.mem_row_size_in_kb = 4;
3306 	/* XXX use MC settings? */
3307 	rdev->config.cik.shader_engine_tile_size = 32;
3308 	rdev->config.cik.num_gpus = 1;
3309 	rdev->config.cik.multi_gpu_tile_size = 64;
3310 
3311 	/* fix up row size */
3312 	gb_addr_config &= ~ROW_SIZE_MASK;
3313 	switch (rdev->config.cik.mem_row_size_in_kb) {
3314 	case 1:
3315 	default:
3316 		gb_addr_config |= ROW_SIZE(0);
3317 		break;
3318 	case 2:
3319 		gb_addr_config |= ROW_SIZE(1);
3320 		break;
3321 	case 4:
3322 		gb_addr_config |= ROW_SIZE(2);
3323 		break;
3324 	}
3325 
3326 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3327 	 * not have bank info, so create a custom tiling dword.
3328 	 * bits 3:0   num_pipes
3329 	 * bits 7:4   num_banks
3330 	 * bits 11:8  group_size
3331 	 * bits 15:12 row_size
3332 	 */
3333 	rdev->config.cik.tile_config = 0;
3334 	switch (rdev->config.cik.num_tile_pipes) {
3335 	case 1:
3336 		rdev->config.cik.tile_config |= (0 << 0);
3337 		break;
3338 	case 2:
3339 		rdev->config.cik.tile_config |= (1 << 0);
3340 		break;
3341 	case 4:
3342 		rdev->config.cik.tile_config |= (2 << 0);
3343 		break;
3344 	case 8:
3345 	default:
3346 		/* XXX what about 12? */
3347 		rdev->config.cik.tile_config |= (3 << 0);
3348 		break;
3349 	}
3350 	rdev->config.cik.tile_config |=
3351 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3352 	rdev->config.cik.tile_config |=
3353 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3354 	rdev->config.cik.tile_config |=
3355 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3356 
3357 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3358 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3359 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3360 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3361 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3362 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3363 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3364 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3365 
3366 	cik_tiling_mode_table_init(rdev);
3367 
3368 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3369 		     rdev->config.cik.max_sh_per_se,
3370 		     rdev->config.cik.max_backends_per_se);
3371 
3372 	/* set HW defaults for 3D engine */
3373 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3374 
3375 	WREG32(SX_DEBUG_1, 0x20);
3376 
3377 	WREG32(TA_CNTL_AUX, 0x00010000);
3378 
3379 	tmp = RREG32(SPI_CONFIG_CNTL);
3380 	tmp |= 0x03000000;
3381 	WREG32(SPI_CONFIG_CNTL, tmp);
3382 
3383 	WREG32(SQ_CONFIG, 1);
3384 
3385 	WREG32(DB_DEBUG, 0);
3386 
3387 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3388 	tmp |= 0x00000400;
3389 	WREG32(DB_DEBUG2, tmp);
3390 
3391 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3392 	tmp |= 0x00020200;
3393 	WREG32(DB_DEBUG3, tmp);
3394 
3395 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3396 	tmp |= 0x00018208;
3397 	WREG32(CB_HW_CONTROL, tmp);
3398 
3399 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3400 
3401 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3402 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3403 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3404 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3405 
3406 	WREG32(VGT_NUM_INSTANCES, 1);
3407 
3408 	WREG32(CP_PERFMON_CNTL, 0);
3409 
3410 	WREG32(SQ_CONFIG, 0);
3411 
3412 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3413 					  FORCE_EOV_MAX_REZ_CNT(255)));
3414 
3415 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3416 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3417 
3418 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3419 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3420 
3421 	tmp = RREG32(HDP_MISC_CNTL);
3422 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3423 	WREG32(HDP_MISC_CNTL, tmp);
3424 
3425 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3426 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3427 
3428 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3429 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3430 
3431 	udelay(50);
3432 }
3433 
3434 /*
3435  * GPU scratch registers helpers function.
3436  */
3437 /**
3438  * cik_scratch_init - setup driver info for CP scratch regs
3439  *
3440  * @rdev: radeon_device pointer
3441  *
3442  * Set up the number and offset of the CP scratch registers.
3443  * NOTE: use of CP scratch registers is a legacy inferface and
3444  * is not used by default on newer asics (r6xx+).  On newer asics,
3445  * memory buffers are used for fences rather than scratch regs.
3446  */
3447 static void cik_scratch_init(struct radeon_device *rdev)
3448 {
3449 	int i;
3450 
3451 	rdev->scratch.num_reg = 7;
3452 	rdev->scratch.reg_base = SCRATCH_REG0;
3453 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3454 		rdev->scratch.free[i] = true;
3455 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3456 	}
3457 }
3458 
3459 /**
3460  * cik_ring_test - basic gfx ring test
3461  *
3462  * @rdev: radeon_device pointer
3463  * @ring: radeon_ring structure holding ring information
3464  *
3465  * Allocate a scratch register and write to it using the gfx ring (CIK).
3466  * Provides a basic gfx ring test to verify that the ring is working.
3467  * Used by cik_cp_gfx_resume();
3468  * Returns 0 on success, error on failure.
3469  */
3470 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3471 {
3472 	uint32_t scratch;
3473 	uint32_t tmp = 0;
3474 	unsigned i;
3475 	int r;
3476 
3477 	r = radeon_scratch_get(rdev, &scratch);
3478 	if (r) {
3479 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3480 		return r;
3481 	}
3482 	WREG32(scratch, 0xCAFEDEAD);
3483 	r = radeon_ring_lock(rdev, ring, 3);
3484 	if (r) {
3485 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3486 		radeon_scratch_free(rdev, scratch);
3487 		return r;
3488 	}
3489 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3490 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3491 	radeon_ring_write(ring, 0xDEADBEEF);
3492 	radeon_ring_unlock_commit(rdev, ring);
3493 
3494 	for (i = 0; i < rdev->usec_timeout; i++) {
3495 		tmp = RREG32(scratch);
3496 		if (tmp == 0xDEADBEEF)
3497 			break;
3498 		DRM_UDELAY(1);
3499 	}
3500 	if (i < rdev->usec_timeout) {
3501 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3502 	} else {
3503 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3504 			  ring->idx, scratch, tmp);
3505 		r = -EINVAL;
3506 	}
3507 	radeon_scratch_free(rdev, scratch);
3508 	return r;
3509 }
3510 
3511 /**
3512  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3513  *
3514  * @rdev: radeon_device pointer
3515  * @ridx: radeon ring index
3516  *
3517  * Emits an hdp flush on the cp.
3518  */
3519 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3520 				       int ridx)
3521 {
3522 	struct radeon_ring *ring = &rdev->ring[ridx];
3523 	u32 ref_and_mask;
3524 
3525 	switch (ring->idx) {
3526 	case CAYMAN_RING_TYPE_CP1_INDEX:
3527 	case CAYMAN_RING_TYPE_CP2_INDEX:
3528 	default:
3529 		switch (ring->me) {
3530 		case 0:
3531 			ref_and_mask = CP2 << ring->pipe;
3532 			break;
3533 		case 1:
3534 			ref_and_mask = CP6 << ring->pipe;
3535 			break;
3536 		default:
3537 			return;
3538 		}
3539 		break;
3540 	case RADEON_RING_TYPE_GFX_INDEX:
3541 		ref_and_mask = CP0;
3542 		break;
3543 	}
3544 
3545 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3546 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3547 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3548 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3549 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3550 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3551 	radeon_ring_write(ring, ref_and_mask);
3552 	radeon_ring_write(ring, ref_and_mask);
3553 	radeon_ring_write(ring, 0x20); /* poll interval */
3554 }
3555 
3556 /**
3557  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3558  *
3559  * @rdev: radeon_device pointer
3560  * @fence: radeon fence object
3561  *
3562  * Emits a fence sequnce number on the gfx ring and flushes
3563  * GPU caches.
3564  */
3565 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3566 			     struct radeon_fence *fence)
3567 {
3568 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3569 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3570 
3571 	/* EVENT_WRITE_EOP - flush caches, send int */
3572 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3573 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3574 				 EOP_TC_ACTION_EN |
3575 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3576 				 EVENT_INDEX(5)));
3577 	radeon_ring_write(ring, addr & 0xfffffffc);
3578 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3579 	radeon_ring_write(ring, fence->seq);
3580 	radeon_ring_write(ring, 0);
3581 	/* HDP flush */
3582 	cik_hdp_flush_cp_ring_emit(rdev, fence->ring);
3583 }
3584 
3585 /**
3586  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3587  *
3588  * @rdev: radeon_device pointer
3589  * @fence: radeon fence object
3590  *
3591  * Emits a fence sequnce number on the compute ring and flushes
3592  * GPU caches.
3593  */
3594 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3595 				 struct radeon_fence *fence)
3596 {
3597 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3598 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3599 
3600 	/* RELEASE_MEM - flush caches, send int */
3601 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3602 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3603 				 EOP_TC_ACTION_EN |
3604 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3605 				 EVENT_INDEX(5)));
3606 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3607 	radeon_ring_write(ring, addr & 0xfffffffc);
3608 	radeon_ring_write(ring, upper_32_bits(addr));
3609 	radeon_ring_write(ring, fence->seq);
3610 	radeon_ring_write(ring, 0);
3611 	/* HDP flush */
3612 	cik_hdp_flush_cp_ring_emit(rdev, fence->ring);
3613 }
3614 
3615 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3616 			     struct radeon_ring *ring,
3617 			     struct radeon_semaphore *semaphore,
3618 			     bool emit_wait)
3619 {
3620 	uint64_t addr = semaphore->gpu_addr;
3621 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3622 
3623 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3624 	radeon_ring_write(ring, addr & 0xffffffff);
3625 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3626 
3627 	return true;
3628 }
3629 
3630 /**
3631  * cik_copy_cpdma - copy pages using the CP DMA engine
3632  *
3633  * @rdev: radeon_device pointer
3634  * @src_offset: src GPU address
3635  * @dst_offset: dst GPU address
3636  * @num_gpu_pages: number of GPU pages to xfer
3637  * @fence: radeon fence object
3638  *
3639  * Copy GPU paging using the CP DMA engine (CIK+).
3640  * Used by the radeon ttm implementation to move pages if
3641  * registered as the asic copy callback.
3642  */
3643 int cik_copy_cpdma(struct radeon_device *rdev,
3644 		   uint64_t src_offset, uint64_t dst_offset,
3645 		   unsigned num_gpu_pages,
3646 		   struct radeon_fence **fence)
3647 {
3648 	struct radeon_semaphore *sem = NULL;
3649 	int ring_index = rdev->asic->copy.blit_ring_index;
3650 	struct radeon_ring *ring = &rdev->ring[ring_index];
3651 	u32 size_in_bytes, cur_size_in_bytes, control;
3652 	int i, num_loops;
3653 	int r = 0;
3654 
3655 	r = radeon_semaphore_create(rdev, &sem);
3656 	if (r) {
3657 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3658 		return r;
3659 	}
3660 
3661 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3662 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3663 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3664 	if (r) {
3665 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3666 		radeon_semaphore_free(rdev, &sem, NULL);
3667 		return r;
3668 	}
3669 
3670 	radeon_semaphore_sync_to(sem, *fence);
3671 	radeon_semaphore_sync_rings(rdev, sem, ring->idx);
3672 
3673 	for (i = 0; i < num_loops; i++) {
3674 		cur_size_in_bytes = size_in_bytes;
3675 		if (cur_size_in_bytes > 0x1fffff)
3676 			cur_size_in_bytes = 0x1fffff;
3677 		size_in_bytes -= cur_size_in_bytes;
3678 		control = 0;
3679 		if (size_in_bytes == 0)
3680 			control |= PACKET3_DMA_DATA_CP_SYNC;
3681 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3682 		radeon_ring_write(ring, control);
3683 		radeon_ring_write(ring, lower_32_bits(src_offset));
3684 		radeon_ring_write(ring, upper_32_bits(src_offset));
3685 		radeon_ring_write(ring, lower_32_bits(dst_offset));
3686 		radeon_ring_write(ring, upper_32_bits(dst_offset));
3687 		radeon_ring_write(ring, cur_size_in_bytes);
3688 		src_offset += cur_size_in_bytes;
3689 		dst_offset += cur_size_in_bytes;
3690 	}
3691 
3692 	r = radeon_fence_emit(rdev, fence, ring->idx);
3693 	if (r) {
3694 		radeon_ring_unlock_undo(rdev, ring);
3695 		return r;
3696 	}
3697 
3698 	radeon_ring_unlock_commit(rdev, ring);
3699 	radeon_semaphore_free(rdev, &sem, *fence);
3700 
3701 	return r;
3702 }
3703 
3704 /*
3705  * IB stuff
3706  */
3707 /**
3708  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3709  *
3710  * @rdev: radeon_device pointer
3711  * @ib: radeon indirect buffer object
3712  *
3713  * Emits an DE (drawing engine) or CE (constant engine) IB
3714  * on the gfx ring.  IBs are usually generated by userspace
3715  * acceleration drivers and submitted to the kernel for
3716  * sheduling on the ring.  This function schedules the IB
3717  * on the gfx ring for execution by the GPU.
3718  */
3719 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3720 {
3721 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3722 	u32 header, control = INDIRECT_BUFFER_VALID;
3723 
3724 	if (ib->is_const_ib) {
3725 		/* set switch buffer packet before const IB */
3726 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3727 		radeon_ring_write(ring, 0);
3728 
3729 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3730 	} else {
3731 		u32 next_rptr;
3732 		if (ring->rptr_save_reg) {
3733 			next_rptr = ring->wptr + 3 + 4;
3734 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3735 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3736 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
3737 			radeon_ring_write(ring, next_rptr);
3738 		} else if (rdev->wb.enabled) {
3739 			next_rptr = ring->wptr + 5 + 4;
3740 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3741 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3742 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3743 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3744 			radeon_ring_write(ring, next_rptr);
3745 		}
3746 
3747 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3748 	}
3749 
3750 	control |= ib->length_dw |
3751 		(ib->vm ? (ib->vm->id << 24) : 0);
3752 
3753 	radeon_ring_write(ring, header);
3754 	radeon_ring_write(ring,
3755 #ifdef __BIG_ENDIAN
3756 			  (2 << 0) |
3757 #endif
3758 			  (ib->gpu_addr & 0xFFFFFFFC));
3759 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3760 	radeon_ring_write(ring, control);
3761 }
3762 
3763 /**
3764  * cik_ib_test - basic gfx ring IB test
3765  *
3766  * @rdev: radeon_device pointer
3767  * @ring: radeon_ring structure holding ring information
3768  *
3769  * Allocate an IB and execute it on the gfx ring (CIK).
3770  * Provides a basic gfx ring test to verify that IBs are working.
3771  * Returns 0 on success, error on failure.
3772  */
3773 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3774 {
3775 	struct radeon_ib ib;
3776 	uint32_t scratch;
3777 	uint32_t tmp = 0;
3778 	unsigned i;
3779 	int r;
3780 
3781 	r = radeon_scratch_get(rdev, &scratch);
3782 	if (r) {
3783 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3784 		return r;
3785 	}
3786 	WREG32(scratch, 0xCAFEDEAD);
3787 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3788 	if (r) {
3789 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3790 		radeon_scratch_free(rdev, scratch);
3791 		return r;
3792 	}
3793 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3794 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3795 	ib.ptr[2] = 0xDEADBEEF;
3796 	ib.length_dw = 3;
3797 	r = radeon_ib_schedule(rdev, &ib, NULL);
3798 	if (r) {
3799 		radeon_scratch_free(rdev, scratch);
3800 		radeon_ib_free(rdev, &ib);
3801 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3802 		return r;
3803 	}
3804 	r = radeon_fence_wait(ib.fence, false);
3805 	if (r) {
3806 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3807 		radeon_scratch_free(rdev, scratch);
3808 		radeon_ib_free(rdev, &ib);
3809 		return r;
3810 	}
3811 	for (i = 0; i < rdev->usec_timeout; i++) {
3812 		tmp = RREG32(scratch);
3813 		if (tmp == 0xDEADBEEF)
3814 			break;
3815 		DRM_UDELAY(1);
3816 	}
3817 	if (i < rdev->usec_timeout) {
3818 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3819 	} else {
3820 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3821 			  scratch, tmp);
3822 		r = -EINVAL;
3823 	}
3824 	radeon_scratch_free(rdev, scratch);
3825 	radeon_ib_free(rdev, &ib);
3826 	return r;
3827 }
3828 
3829 /*
3830  * CP.
3831  * On CIK, gfx and compute now have independant command processors.
3832  *
3833  * GFX
3834  * Gfx consists of a single ring and can process both gfx jobs and
3835  * compute jobs.  The gfx CP consists of three microengines (ME):
3836  * PFP - Pre-Fetch Parser
3837  * ME - Micro Engine
3838  * CE - Constant Engine
3839  * The PFP and ME make up what is considered the Drawing Engine (DE).
3840  * The CE is an asynchronous engine used for updating buffer desciptors
3841  * used by the DE so that they can be loaded into cache in parallel
3842  * while the DE is processing state update packets.
3843  *
3844  * Compute
3845  * The compute CP consists of two microengines (ME):
3846  * MEC1 - Compute MicroEngine 1
3847  * MEC2 - Compute MicroEngine 2
3848  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3849  * The queues are exposed to userspace and are programmed directly
3850  * by the compute runtime.
3851  */
3852 /**
3853  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3854  *
3855  * @rdev: radeon_device pointer
3856  * @enable: enable or disable the MEs
3857  *
3858  * Halts or unhalts the gfx MEs.
3859  */
3860 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3861 {
3862 	if (enable)
3863 		WREG32(CP_ME_CNTL, 0);
3864 	else {
3865 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3866 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3867 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3868 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3869 	}
3870 	udelay(50);
3871 }
3872 
3873 /**
3874  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3875  *
3876  * @rdev: radeon_device pointer
3877  *
3878  * Loads the gfx PFP, ME, and CE ucode.
3879  * Returns 0 for success, -EINVAL if the ucode is not available.
3880  */
3881 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3882 {
3883 	const __be32 *fw_data;
3884 	int i;
3885 
3886 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3887 		return -EINVAL;
3888 
3889 	cik_cp_gfx_enable(rdev, false);
3890 
3891 	/* PFP */
3892 	fw_data = (const __be32 *)rdev->pfp_fw->data;
3893 	WREG32(CP_PFP_UCODE_ADDR, 0);
3894 	for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3895 		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3896 	WREG32(CP_PFP_UCODE_ADDR, 0);
3897 
3898 	/* CE */
3899 	fw_data = (const __be32 *)rdev->ce_fw->data;
3900 	WREG32(CP_CE_UCODE_ADDR, 0);
3901 	for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3902 		WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3903 	WREG32(CP_CE_UCODE_ADDR, 0);
3904 
3905 	/* ME */
3906 	fw_data = (const __be32 *)rdev->me_fw->data;
3907 	WREG32(CP_ME_RAM_WADDR, 0);
3908 	for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3909 		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3910 	WREG32(CP_ME_RAM_WADDR, 0);
3911 
3912 	WREG32(CP_PFP_UCODE_ADDR, 0);
3913 	WREG32(CP_CE_UCODE_ADDR, 0);
3914 	WREG32(CP_ME_RAM_WADDR, 0);
3915 	WREG32(CP_ME_RAM_RADDR, 0);
3916 	return 0;
3917 }
3918 
3919 /**
3920  * cik_cp_gfx_start - start the gfx ring
3921  *
3922  * @rdev: radeon_device pointer
3923  *
3924  * Enables the ring and loads the clear state context and other
3925  * packets required to init the ring.
3926  * Returns 0 for success, error for failure.
3927  */
3928 static int cik_cp_gfx_start(struct radeon_device *rdev)
3929 {
3930 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3931 	int r, i;
3932 
3933 	/* init the CP */
3934 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3935 	WREG32(CP_ENDIAN_SWAP, 0);
3936 	WREG32(CP_DEVICE_ID, 1);
3937 
3938 	cik_cp_gfx_enable(rdev, true);
3939 
3940 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3941 	if (r) {
3942 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3943 		return r;
3944 	}
3945 
3946 	/* init the CE partitions.  CE only used for gfx on CIK */
3947 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3948 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3949 	radeon_ring_write(ring, 0xc000);
3950 	radeon_ring_write(ring, 0xc000);
3951 
3952 	/* setup clear context state */
3953 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3954 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3955 
3956 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3957 	radeon_ring_write(ring, 0x80000000);
3958 	radeon_ring_write(ring, 0x80000000);
3959 
3960 	for (i = 0; i < cik_default_size; i++)
3961 		radeon_ring_write(ring, cik_default_state[i]);
3962 
3963 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3964 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3965 
3966 	/* set clear context state */
3967 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3968 	radeon_ring_write(ring, 0);
3969 
3970 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3971 	radeon_ring_write(ring, 0x00000316);
3972 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3973 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3974 
3975 	radeon_ring_unlock_commit(rdev, ring);
3976 
3977 	return 0;
3978 }
3979 
3980 /**
3981  * cik_cp_gfx_fini - stop the gfx ring
3982  *
3983  * @rdev: radeon_device pointer
3984  *
3985  * Stop the gfx ring and tear down the driver ring
3986  * info.
3987  */
3988 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3989 {
3990 	cik_cp_gfx_enable(rdev, false);
3991 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3992 }
3993 
3994 /**
3995  * cik_cp_gfx_resume - setup the gfx ring buffer registers
3996  *
3997  * @rdev: radeon_device pointer
3998  *
3999  * Program the location and size of the gfx ring buffer
4000  * and test it to make sure it's working.
4001  * Returns 0 for success, error for failure.
4002  */
4003 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4004 {
4005 	struct radeon_ring *ring;
4006 	u32 tmp;
4007 	u32 rb_bufsz;
4008 	u64 rb_addr;
4009 	int r;
4010 
4011 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4012 	if (rdev->family != CHIP_HAWAII)
4013 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4014 
4015 	/* Set the write pointer delay */
4016 	WREG32(CP_RB_WPTR_DELAY, 0);
4017 
4018 	/* set the RB to use vmid 0 */
4019 	WREG32(CP_RB_VMID, 0);
4020 
4021 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4022 
4023 	/* ring 0 - compute and gfx */
4024 	/* Set ring buffer size */
4025 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4026 	rb_bufsz = order_base_2(ring->ring_size / 8);
4027 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4028 #ifdef __BIG_ENDIAN
4029 	tmp |= BUF_SWAP_32BIT;
4030 #endif
4031 	WREG32(CP_RB0_CNTL, tmp);
4032 
4033 	/* Initialize the ring buffer's read and write pointers */
4034 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4035 	ring->wptr = 0;
4036 	WREG32(CP_RB0_WPTR, ring->wptr);
4037 
4038 	/* set the wb address wether it's enabled or not */
4039 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4040 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4041 
4042 	/* scratch register shadowing is no longer supported */
4043 	WREG32(SCRATCH_UMSK, 0);
4044 
4045 	if (!rdev->wb.enabled)
4046 		tmp |= RB_NO_UPDATE;
4047 
4048 	mdelay(1);
4049 	WREG32(CP_RB0_CNTL, tmp);
4050 
4051 	rb_addr = ring->gpu_addr >> 8;
4052 	WREG32(CP_RB0_BASE, rb_addr);
4053 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4054 
4055 	/* start the ring */
4056 	cik_cp_gfx_start(rdev);
4057 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4058 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4059 	if (r) {
4060 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4061 		return r;
4062 	}
4063 
4064 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4065 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4066 
4067 	return 0;
4068 }
4069 
4070 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4071 		     struct radeon_ring *ring)
4072 {
4073 	u32 rptr;
4074 
4075 	if (rdev->wb.enabled)
4076 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4077 	else
4078 		rptr = RREG32(CP_RB0_RPTR);
4079 
4080 	return rptr;
4081 }
4082 
4083 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4084 		     struct radeon_ring *ring)
4085 {
4086 	u32 wptr;
4087 
4088 	wptr = RREG32(CP_RB0_WPTR);
4089 
4090 	return wptr;
4091 }
4092 
4093 void cik_gfx_set_wptr(struct radeon_device *rdev,
4094 		      struct radeon_ring *ring)
4095 {
4096 	WREG32(CP_RB0_WPTR, ring->wptr);
4097 	(void)RREG32(CP_RB0_WPTR);
4098 }
4099 
4100 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4101 			 struct radeon_ring *ring)
4102 {
4103 	u32 rptr;
4104 
4105 	if (rdev->wb.enabled) {
4106 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4107 	} else {
4108 		mutex_lock(&rdev->srbm_mutex);
4109 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4110 		rptr = RREG32(CP_HQD_PQ_RPTR);
4111 		cik_srbm_select(rdev, 0, 0, 0, 0);
4112 		mutex_unlock(&rdev->srbm_mutex);
4113 	}
4114 
4115 	return rptr;
4116 }
4117 
4118 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4119 			 struct radeon_ring *ring)
4120 {
4121 	u32 wptr;
4122 
4123 	if (rdev->wb.enabled) {
4124 		/* XXX check if swapping is necessary on BE */
4125 		wptr = rdev->wb.wb[ring->wptr_offs/4];
4126 	} else {
4127 		mutex_lock(&rdev->srbm_mutex);
4128 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4129 		wptr = RREG32(CP_HQD_PQ_WPTR);
4130 		cik_srbm_select(rdev, 0, 0, 0, 0);
4131 		mutex_unlock(&rdev->srbm_mutex);
4132 	}
4133 
4134 	return wptr;
4135 }
4136 
4137 void cik_compute_set_wptr(struct radeon_device *rdev,
4138 			  struct radeon_ring *ring)
4139 {
4140 	/* XXX check if swapping is necessary on BE */
4141 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4142 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4143 }
4144 
4145 /**
4146  * cik_cp_compute_enable - enable/disable the compute CP MEs
4147  *
4148  * @rdev: radeon_device pointer
4149  * @enable: enable or disable the MEs
4150  *
4151  * Halts or unhalts the compute MEs.
4152  */
4153 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4154 {
4155 	if (enable)
4156 		WREG32(CP_MEC_CNTL, 0);
4157 	else {
4158 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4159 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4160 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4161 	}
4162 	udelay(50);
4163 }
4164 
4165 /**
4166  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4167  *
4168  * @rdev: radeon_device pointer
4169  *
4170  * Loads the compute MEC1&2 ucode.
4171  * Returns 0 for success, -EINVAL if the ucode is not available.
4172  */
4173 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4174 {
4175 	const __be32 *fw_data;
4176 	int i;
4177 
4178 	if (!rdev->mec_fw)
4179 		return -EINVAL;
4180 
4181 	cik_cp_compute_enable(rdev, false);
4182 
4183 	/* MEC1 */
4184 	fw_data = (const __be32 *)rdev->mec_fw->data;
4185 	WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4186 	for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4187 		WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4188 	WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4189 
4190 	if (rdev->family == CHIP_KAVERI) {
4191 		/* MEC2 */
4192 		fw_data = (const __be32 *)rdev->mec_fw->data;
4193 		WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4194 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4195 			WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4196 		WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4197 	}
4198 
4199 	return 0;
4200 }
4201 
4202 /**
4203  * cik_cp_compute_start - start the compute queues
4204  *
4205  * @rdev: radeon_device pointer
4206  *
4207  * Enable the compute queues.
4208  * Returns 0 for success, error for failure.
4209  */
4210 static int cik_cp_compute_start(struct radeon_device *rdev)
4211 {
4212 	cik_cp_compute_enable(rdev, true);
4213 
4214 	return 0;
4215 }
4216 
4217 /**
4218  * cik_cp_compute_fini - stop the compute queues
4219  *
4220  * @rdev: radeon_device pointer
4221  *
4222  * Stop the compute queues and tear down the driver queue
4223  * info.
4224  */
4225 static void cik_cp_compute_fini(struct radeon_device *rdev)
4226 {
4227 	int i, idx, r;
4228 
4229 	cik_cp_compute_enable(rdev, false);
4230 
4231 	for (i = 0; i < 2; i++) {
4232 		if (i == 0)
4233 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4234 		else
4235 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4236 
4237 		if (rdev->ring[idx].mqd_obj) {
4238 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4239 			if (unlikely(r != 0))
4240 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4241 
4242 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4243 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4244 
4245 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4246 			rdev->ring[idx].mqd_obj = NULL;
4247 		}
4248 	}
4249 }
4250 
4251 static void cik_mec_fini(struct radeon_device *rdev)
4252 {
4253 	int r;
4254 
4255 	if (rdev->mec.hpd_eop_obj) {
4256 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4257 		if (unlikely(r != 0))
4258 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4259 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4260 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4261 
4262 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4263 		rdev->mec.hpd_eop_obj = NULL;
4264 	}
4265 }
4266 
4267 #define MEC_HPD_SIZE 2048
4268 
4269 static int cik_mec_init(struct radeon_device *rdev)
4270 {
4271 	int r;
4272 	u32 *hpd;
4273 
4274 	/*
4275 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4276 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4277 	 */
4278 	if (rdev->family == CHIP_KAVERI)
4279 		rdev->mec.num_mec = 2;
4280 	else
4281 		rdev->mec.num_mec = 1;
4282 	rdev->mec.num_pipe = 4;
4283 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4284 
4285 	if (rdev->mec.hpd_eop_obj == NULL) {
4286 		r = radeon_bo_create(rdev,
4287 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4288 				     PAGE_SIZE, true,
4289 				     RADEON_GEM_DOMAIN_GTT, NULL,
4290 				     &rdev->mec.hpd_eop_obj);
4291 		if (r) {
4292 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4293 			return r;
4294 		}
4295 	}
4296 
4297 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4298 	if (unlikely(r != 0)) {
4299 		cik_mec_fini(rdev);
4300 		return r;
4301 	}
4302 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4303 			  &rdev->mec.hpd_eop_gpu_addr);
4304 	if (r) {
4305 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4306 		cik_mec_fini(rdev);
4307 		return r;
4308 	}
4309 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4310 	if (r) {
4311 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4312 		cik_mec_fini(rdev);
4313 		return r;
4314 	}
4315 
4316 	/* clear memory.  Not sure if this is required or not */
4317 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4318 
4319 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4320 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4321 
4322 	return 0;
4323 }
4324 
4325 struct hqd_registers
4326 {
4327 	u32 cp_mqd_base_addr;
4328 	u32 cp_mqd_base_addr_hi;
4329 	u32 cp_hqd_active;
4330 	u32 cp_hqd_vmid;
4331 	u32 cp_hqd_persistent_state;
4332 	u32 cp_hqd_pipe_priority;
4333 	u32 cp_hqd_queue_priority;
4334 	u32 cp_hqd_quantum;
4335 	u32 cp_hqd_pq_base;
4336 	u32 cp_hqd_pq_base_hi;
4337 	u32 cp_hqd_pq_rptr;
4338 	u32 cp_hqd_pq_rptr_report_addr;
4339 	u32 cp_hqd_pq_rptr_report_addr_hi;
4340 	u32 cp_hqd_pq_wptr_poll_addr;
4341 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4342 	u32 cp_hqd_pq_doorbell_control;
4343 	u32 cp_hqd_pq_wptr;
4344 	u32 cp_hqd_pq_control;
4345 	u32 cp_hqd_ib_base_addr;
4346 	u32 cp_hqd_ib_base_addr_hi;
4347 	u32 cp_hqd_ib_rptr;
4348 	u32 cp_hqd_ib_control;
4349 	u32 cp_hqd_iq_timer;
4350 	u32 cp_hqd_iq_rptr;
4351 	u32 cp_hqd_dequeue_request;
4352 	u32 cp_hqd_dma_offload;
4353 	u32 cp_hqd_sema_cmd;
4354 	u32 cp_hqd_msg_type;
4355 	u32 cp_hqd_atomic0_preop_lo;
4356 	u32 cp_hqd_atomic0_preop_hi;
4357 	u32 cp_hqd_atomic1_preop_lo;
4358 	u32 cp_hqd_atomic1_preop_hi;
4359 	u32 cp_hqd_hq_scheduler0;
4360 	u32 cp_hqd_hq_scheduler1;
4361 	u32 cp_mqd_control;
4362 };
4363 
4364 struct bonaire_mqd
4365 {
4366 	u32 header;
4367 	u32 dispatch_initiator;
4368 	u32 dimensions[3];
4369 	u32 start_idx[3];
4370 	u32 num_threads[3];
4371 	u32 pipeline_stat_enable;
4372 	u32 perf_counter_enable;
4373 	u32 pgm[2];
4374 	u32 tba[2];
4375 	u32 tma[2];
4376 	u32 pgm_rsrc[2];
4377 	u32 vmid;
4378 	u32 resource_limits;
4379 	u32 static_thread_mgmt01[2];
4380 	u32 tmp_ring_size;
4381 	u32 static_thread_mgmt23[2];
4382 	u32 restart[3];
4383 	u32 thread_trace_enable;
4384 	u32 reserved1;
4385 	u32 user_data[16];
4386 	u32 vgtcs_invoke_count[2];
4387 	struct hqd_registers queue_state;
4388 	u32 dequeue_cntr;
4389 	u32 interrupt_queue[64];
4390 };
4391 
4392 /**
4393  * cik_cp_compute_resume - setup the compute queue registers
4394  *
4395  * @rdev: radeon_device pointer
4396  *
4397  * Program the compute queues and test them to make sure they
4398  * are working.
4399  * Returns 0 for success, error for failure.
4400  */
4401 static int cik_cp_compute_resume(struct radeon_device *rdev)
4402 {
4403 	int r, i, idx;
4404 	u32 tmp;
4405 	bool use_doorbell = true;
4406 	u64 hqd_gpu_addr;
4407 	u64 mqd_gpu_addr;
4408 	u64 eop_gpu_addr;
4409 	u64 wb_gpu_addr;
4410 	u32 *buf;
4411 	struct bonaire_mqd *mqd;
4412 
4413 	r = cik_cp_compute_start(rdev);
4414 	if (r)
4415 		return r;
4416 
4417 	/* fix up chicken bits */
4418 	tmp = RREG32(CP_CPF_DEBUG);
4419 	tmp |= (1 << 23);
4420 	WREG32(CP_CPF_DEBUG, tmp);
4421 
4422 	/* init the pipes */
4423 	mutex_lock(&rdev->srbm_mutex);
4424 	for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
4425 		int me = (i < 4) ? 1 : 2;
4426 		int pipe = (i < 4) ? i : (i - 4);
4427 
4428 		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
4429 
4430 		cik_srbm_select(rdev, me, pipe, 0, 0);
4431 
4432 		/* write the EOP addr */
4433 		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4434 		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4435 
4436 		/* set the VMID assigned */
4437 		WREG32(CP_HPD_EOP_VMID, 0);
4438 
4439 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4440 		tmp = RREG32(CP_HPD_EOP_CONTROL);
4441 		tmp &= ~EOP_SIZE_MASK;
4442 		tmp |= order_base_2(MEC_HPD_SIZE / 8);
4443 		WREG32(CP_HPD_EOP_CONTROL, tmp);
4444 	}
4445 	cik_srbm_select(rdev, 0, 0, 0, 0);
4446 	mutex_unlock(&rdev->srbm_mutex);
4447 
4448 	/* init the queues.  Just two for now. */
4449 	for (i = 0; i < 2; i++) {
4450 		if (i == 0)
4451 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4452 		else
4453 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4454 
4455 		if (rdev->ring[idx].mqd_obj == NULL) {
4456 			r = radeon_bo_create(rdev,
4457 					     sizeof(struct bonaire_mqd),
4458 					     PAGE_SIZE, true,
4459 					     RADEON_GEM_DOMAIN_GTT, NULL,
4460 					     &rdev->ring[idx].mqd_obj);
4461 			if (r) {
4462 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4463 				return r;
4464 			}
4465 		}
4466 
4467 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4468 		if (unlikely(r != 0)) {
4469 			cik_cp_compute_fini(rdev);
4470 			return r;
4471 		}
4472 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4473 				  &mqd_gpu_addr);
4474 		if (r) {
4475 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4476 			cik_cp_compute_fini(rdev);
4477 			return r;
4478 		}
4479 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4480 		if (r) {
4481 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4482 			cik_cp_compute_fini(rdev);
4483 			return r;
4484 		}
4485 
4486 		/* init the mqd struct */
4487 		memset(buf, 0, sizeof(struct bonaire_mqd));
4488 
4489 		mqd = (struct bonaire_mqd *)buf;
4490 		mqd->header = 0xC0310800;
4491 		mqd->static_thread_mgmt01[0] = 0xffffffff;
4492 		mqd->static_thread_mgmt01[1] = 0xffffffff;
4493 		mqd->static_thread_mgmt23[0] = 0xffffffff;
4494 		mqd->static_thread_mgmt23[1] = 0xffffffff;
4495 
4496 		mutex_lock(&rdev->srbm_mutex);
4497 		cik_srbm_select(rdev, rdev->ring[idx].me,
4498 				rdev->ring[idx].pipe,
4499 				rdev->ring[idx].queue, 0);
4500 
4501 		/* disable wptr polling */
4502 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4503 		tmp &= ~WPTR_POLL_EN;
4504 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4505 
4506 		/* enable doorbell? */
4507 		mqd->queue_state.cp_hqd_pq_doorbell_control =
4508 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4509 		if (use_doorbell)
4510 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4511 		else
4512 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4513 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4514 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4515 
4516 		/* disable the queue if it's active */
4517 		mqd->queue_state.cp_hqd_dequeue_request = 0;
4518 		mqd->queue_state.cp_hqd_pq_rptr = 0;
4519 		mqd->queue_state.cp_hqd_pq_wptr= 0;
4520 		if (RREG32(CP_HQD_ACTIVE) & 1) {
4521 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4522 			for (i = 0; i < rdev->usec_timeout; i++) {
4523 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4524 					break;
4525 				udelay(1);
4526 			}
4527 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4528 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4529 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4530 		}
4531 
4532 		/* set the pointer to the MQD */
4533 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4534 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4535 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4536 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4537 		/* set MQD vmid to 0 */
4538 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4539 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4540 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4541 
4542 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4543 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4544 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4545 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4546 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4547 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4548 
4549 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4550 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4551 		mqd->queue_state.cp_hqd_pq_control &=
4552 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4553 
4554 		mqd->queue_state.cp_hqd_pq_control |=
4555 			order_base_2(rdev->ring[idx].ring_size / 8);
4556 		mqd->queue_state.cp_hqd_pq_control |=
4557 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4558 #ifdef __BIG_ENDIAN
4559 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4560 #endif
4561 		mqd->queue_state.cp_hqd_pq_control &=
4562 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4563 		mqd->queue_state.cp_hqd_pq_control |=
4564 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4565 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4566 
4567 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4568 		if (i == 0)
4569 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4570 		else
4571 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4572 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4573 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4574 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4575 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4576 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4577 
4578 		/* set the wb address wether it's enabled or not */
4579 		if (i == 0)
4580 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4581 		else
4582 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4583 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4584 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4585 			upper_32_bits(wb_gpu_addr) & 0xffff;
4586 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4587 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4588 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4589 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4590 
4591 		/* enable the doorbell if requested */
4592 		if (use_doorbell) {
4593 			mqd->queue_state.cp_hqd_pq_doorbell_control =
4594 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4595 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4596 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
4597 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4598 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4599 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
4600 				~(DOORBELL_SOURCE | DOORBELL_HIT);
4601 
4602 		} else {
4603 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4604 		}
4605 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4606 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4607 
4608 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4609 		rdev->ring[idx].wptr = 0;
4610 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4611 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4612 		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4613 
4614 		/* set the vmid for the queue */
4615 		mqd->queue_state.cp_hqd_vmid = 0;
4616 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4617 
4618 		/* activate the queue */
4619 		mqd->queue_state.cp_hqd_active = 1;
4620 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4621 
4622 		cik_srbm_select(rdev, 0, 0, 0, 0);
4623 		mutex_unlock(&rdev->srbm_mutex);
4624 
4625 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4626 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4627 
4628 		rdev->ring[idx].ready = true;
4629 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4630 		if (r)
4631 			rdev->ring[idx].ready = false;
4632 	}
4633 
4634 	return 0;
4635 }
4636 
4637 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4638 {
4639 	cik_cp_gfx_enable(rdev, enable);
4640 	cik_cp_compute_enable(rdev, enable);
4641 }
4642 
4643 static int cik_cp_load_microcode(struct radeon_device *rdev)
4644 {
4645 	int r;
4646 
4647 	r = cik_cp_gfx_load_microcode(rdev);
4648 	if (r)
4649 		return r;
4650 	r = cik_cp_compute_load_microcode(rdev);
4651 	if (r)
4652 		return r;
4653 
4654 	return 0;
4655 }
4656 
4657 static void cik_cp_fini(struct radeon_device *rdev)
4658 {
4659 	cik_cp_gfx_fini(rdev);
4660 	cik_cp_compute_fini(rdev);
4661 }
4662 
4663 static int cik_cp_resume(struct radeon_device *rdev)
4664 {
4665 	int r;
4666 
4667 	cik_enable_gui_idle_interrupt(rdev, false);
4668 
4669 	r = cik_cp_load_microcode(rdev);
4670 	if (r)
4671 		return r;
4672 
4673 	r = cik_cp_gfx_resume(rdev);
4674 	if (r)
4675 		return r;
4676 	r = cik_cp_compute_resume(rdev);
4677 	if (r)
4678 		return r;
4679 
4680 	cik_enable_gui_idle_interrupt(rdev, true);
4681 
4682 	return 0;
4683 }
4684 
4685 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4686 {
4687 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4688 		RREG32(GRBM_STATUS));
4689 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4690 		RREG32(GRBM_STATUS2));
4691 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4692 		RREG32(GRBM_STATUS_SE0));
4693 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4694 		RREG32(GRBM_STATUS_SE1));
4695 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4696 		RREG32(GRBM_STATUS_SE2));
4697 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4698 		RREG32(GRBM_STATUS_SE3));
4699 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4700 		RREG32(SRBM_STATUS));
4701 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4702 		RREG32(SRBM_STATUS2));
4703 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4704 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4705 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4706 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4707 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4708 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4709 		 RREG32(CP_STALLED_STAT1));
4710 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4711 		 RREG32(CP_STALLED_STAT2));
4712 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4713 		 RREG32(CP_STALLED_STAT3));
4714 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4715 		 RREG32(CP_CPF_BUSY_STAT));
4716 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4717 		 RREG32(CP_CPF_STALLED_STAT1));
4718 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4719 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4720 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4721 		 RREG32(CP_CPC_STALLED_STAT1));
4722 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4723 }
4724 
4725 /**
4726  * cik_gpu_check_soft_reset - check which blocks are busy
4727  *
4728  * @rdev: radeon_device pointer
4729  *
4730  * Check which blocks are busy and return the relevant reset
4731  * mask to be used by cik_gpu_soft_reset().
4732  * Returns a mask of the blocks to be reset.
4733  */
4734 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4735 {
4736 	u32 reset_mask = 0;
4737 	u32 tmp;
4738 
4739 	/* GRBM_STATUS */
4740 	tmp = RREG32(GRBM_STATUS);
4741 	if (tmp & (PA_BUSY | SC_BUSY |
4742 		   BCI_BUSY | SX_BUSY |
4743 		   TA_BUSY | VGT_BUSY |
4744 		   DB_BUSY | CB_BUSY |
4745 		   GDS_BUSY | SPI_BUSY |
4746 		   IA_BUSY | IA_BUSY_NO_DMA))
4747 		reset_mask |= RADEON_RESET_GFX;
4748 
4749 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4750 		reset_mask |= RADEON_RESET_CP;
4751 
4752 	/* GRBM_STATUS2 */
4753 	tmp = RREG32(GRBM_STATUS2);
4754 	if (tmp & RLC_BUSY)
4755 		reset_mask |= RADEON_RESET_RLC;
4756 
4757 	/* SDMA0_STATUS_REG */
4758 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4759 	if (!(tmp & SDMA_IDLE))
4760 		reset_mask |= RADEON_RESET_DMA;
4761 
4762 	/* SDMA1_STATUS_REG */
4763 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4764 	if (!(tmp & SDMA_IDLE))
4765 		reset_mask |= RADEON_RESET_DMA1;
4766 
4767 	/* SRBM_STATUS2 */
4768 	tmp = RREG32(SRBM_STATUS2);
4769 	if (tmp & SDMA_BUSY)
4770 		reset_mask |= RADEON_RESET_DMA;
4771 
4772 	if (tmp & SDMA1_BUSY)
4773 		reset_mask |= RADEON_RESET_DMA1;
4774 
4775 	/* SRBM_STATUS */
4776 	tmp = RREG32(SRBM_STATUS);
4777 
4778 	if (tmp & IH_BUSY)
4779 		reset_mask |= RADEON_RESET_IH;
4780 
4781 	if (tmp & SEM_BUSY)
4782 		reset_mask |= RADEON_RESET_SEM;
4783 
4784 	if (tmp & GRBM_RQ_PENDING)
4785 		reset_mask |= RADEON_RESET_GRBM;
4786 
4787 	if (tmp & VMC_BUSY)
4788 		reset_mask |= RADEON_RESET_VMC;
4789 
4790 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4791 		   MCC_BUSY | MCD_BUSY))
4792 		reset_mask |= RADEON_RESET_MC;
4793 
4794 	if (evergreen_is_display_hung(rdev))
4795 		reset_mask |= RADEON_RESET_DISPLAY;
4796 
4797 	/* Skip MC reset as it's mostly likely not hung, just busy */
4798 	if (reset_mask & RADEON_RESET_MC) {
4799 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4800 		reset_mask &= ~RADEON_RESET_MC;
4801 	}
4802 
4803 	return reset_mask;
4804 }
4805 
4806 /**
4807  * cik_gpu_soft_reset - soft reset GPU
4808  *
4809  * @rdev: radeon_device pointer
4810  * @reset_mask: mask of which blocks to reset
4811  *
4812  * Soft reset the blocks specified in @reset_mask.
4813  */
4814 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4815 {
4816 	struct evergreen_mc_save save;
4817 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4818 	u32 tmp;
4819 
4820 	if (reset_mask == 0)
4821 		return;
4822 
4823 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4824 
4825 	cik_print_gpu_status_regs(rdev);
4826 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4827 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4828 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4829 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4830 
4831 	/* disable CG/PG */
4832 	cik_fini_pg(rdev);
4833 	cik_fini_cg(rdev);
4834 
4835 	/* stop the rlc */
4836 	cik_rlc_stop(rdev);
4837 
4838 	/* Disable GFX parsing/prefetching */
4839 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4840 
4841 	/* Disable MEC parsing/prefetching */
4842 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4843 
4844 	if (reset_mask & RADEON_RESET_DMA) {
4845 		/* sdma0 */
4846 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4847 		tmp |= SDMA_HALT;
4848 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4849 	}
4850 	if (reset_mask & RADEON_RESET_DMA1) {
4851 		/* sdma1 */
4852 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4853 		tmp |= SDMA_HALT;
4854 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4855 	}
4856 
4857 	evergreen_mc_stop(rdev, &save);
4858 	if (evergreen_mc_wait_for_idle(rdev)) {
4859 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4860 	}
4861 
4862 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4863 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4864 
4865 	if (reset_mask & RADEON_RESET_CP) {
4866 		grbm_soft_reset |= SOFT_RESET_CP;
4867 
4868 		srbm_soft_reset |= SOFT_RESET_GRBM;
4869 	}
4870 
4871 	if (reset_mask & RADEON_RESET_DMA)
4872 		srbm_soft_reset |= SOFT_RESET_SDMA;
4873 
4874 	if (reset_mask & RADEON_RESET_DMA1)
4875 		srbm_soft_reset |= SOFT_RESET_SDMA1;
4876 
4877 	if (reset_mask & RADEON_RESET_DISPLAY)
4878 		srbm_soft_reset |= SOFT_RESET_DC;
4879 
4880 	if (reset_mask & RADEON_RESET_RLC)
4881 		grbm_soft_reset |= SOFT_RESET_RLC;
4882 
4883 	if (reset_mask & RADEON_RESET_SEM)
4884 		srbm_soft_reset |= SOFT_RESET_SEM;
4885 
4886 	if (reset_mask & RADEON_RESET_IH)
4887 		srbm_soft_reset |= SOFT_RESET_IH;
4888 
4889 	if (reset_mask & RADEON_RESET_GRBM)
4890 		srbm_soft_reset |= SOFT_RESET_GRBM;
4891 
4892 	if (reset_mask & RADEON_RESET_VMC)
4893 		srbm_soft_reset |= SOFT_RESET_VMC;
4894 
4895 	if (!(rdev->flags & RADEON_IS_IGP)) {
4896 		if (reset_mask & RADEON_RESET_MC)
4897 			srbm_soft_reset |= SOFT_RESET_MC;
4898 	}
4899 
4900 	if (grbm_soft_reset) {
4901 		tmp = RREG32(GRBM_SOFT_RESET);
4902 		tmp |= grbm_soft_reset;
4903 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4904 		WREG32(GRBM_SOFT_RESET, tmp);
4905 		tmp = RREG32(GRBM_SOFT_RESET);
4906 
4907 		udelay(50);
4908 
4909 		tmp &= ~grbm_soft_reset;
4910 		WREG32(GRBM_SOFT_RESET, tmp);
4911 		tmp = RREG32(GRBM_SOFT_RESET);
4912 	}
4913 
4914 	if (srbm_soft_reset) {
4915 		tmp = RREG32(SRBM_SOFT_RESET);
4916 		tmp |= srbm_soft_reset;
4917 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4918 		WREG32(SRBM_SOFT_RESET, tmp);
4919 		tmp = RREG32(SRBM_SOFT_RESET);
4920 
4921 		udelay(50);
4922 
4923 		tmp &= ~srbm_soft_reset;
4924 		WREG32(SRBM_SOFT_RESET, tmp);
4925 		tmp = RREG32(SRBM_SOFT_RESET);
4926 	}
4927 
4928 	/* Wait a little for things to settle down */
4929 	udelay(50);
4930 
4931 	evergreen_mc_resume(rdev, &save);
4932 	udelay(50);
4933 
4934 	cik_print_gpu_status_regs(rdev);
4935 }
4936 
4937 struct kv_reset_save_regs {
4938 	u32 gmcon_reng_execute;
4939 	u32 gmcon_misc;
4940 	u32 gmcon_misc3;
4941 };
4942 
4943 static void kv_save_regs_for_reset(struct radeon_device *rdev,
4944 				   struct kv_reset_save_regs *save)
4945 {
4946 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
4947 	save->gmcon_misc = RREG32(GMCON_MISC);
4948 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
4949 
4950 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
4951 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
4952 						STCTRL_STUTTER_EN));
4953 }
4954 
4955 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
4956 				      struct kv_reset_save_regs *save)
4957 {
4958 	int i;
4959 
4960 	WREG32(GMCON_PGFSM_WRITE, 0);
4961 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
4962 
4963 	for (i = 0; i < 5; i++)
4964 		WREG32(GMCON_PGFSM_WRITE, 0);
4965 
4966 	WREG32(GMCON_PGFSM_WRITE, 0);
4967 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
4968 
4969 	for (i = 0; i < 5; i++)
4970 		WREG32(GMCON_PGFSM_WRITE, 0);
4971 
4972 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
4973 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
4974 
4975 	for (i = 0; i < 5; i++)
4976 		WREG32(GMCON_PGFSM_WRITE, 0);
4977 
4978 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
4979 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
4980 
4981 	for (i = 0; i < 5; i++)
4982 		WREG32(GMCON_PGFSM_WRITE, 0);
4983 
4984 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
4985 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
4986 
4987 	for (i = 0; i < 5; i++)
4988 		WREG32(GMCON_PGFSM_WRITE, 0);
4989 
4990 	WREG32(GMCON_PGFSM_WRITE, 0);
4991 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
4992 
4993 	for (i = 0; i < 5; i++)
4994 		WREG32(GMCON_PGFSM_WRITE, 0);
4995 
4996 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
4997 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
4998 
4999 	for (i = 0; i < 5; i++)
5000 		WREG32(GMCON_PGFSM_WRITE, 0);
5001 
5002 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5003 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5004 
5005 	for (i = 0; i < 5; i++)
5006 		WREG32(GMCON_PGFSM_WRITE, 0);
5007 
5008 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5009 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5010 
5011 	for (i = 0; i < 5; i++)
5012 		WREG32(GMCON_PGFSM_WRITE, 0);
5013 
5014 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5015 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5016 
5017 	for (i = 0; i < 5; i++)
5018 		WREG32(GMCON_PGFSM_WRITE, 0);
5019 
5020 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5021 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5022 
5023 	WREG32(GMCON_MISC3, save->gmcon_misc3);
5024 	WREG32(GMCON_MISC, save->gmcon_misc);
5025 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5026 }
5027 
5028 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5029 {
5030 	struct evergreen_mc_save save;
5031 	struct kv_reset_save_regs kv_save = { 0 };
5032 	u32 tmp, i;
5033 
5034 	dev_info(rdev->dev, "GPU pci config reset\n");
5035 
5036 	/* disable dpm? */
5037 
5038 	/* disable cg/pg */
5039 	cik_fini_pg(rdev);
5040 	cik_fini_cg(rdev);
5041 
5042 	/* Disable GFX parsing/prefetching */
5043 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5044 
5045 	/* Disable MEC parsing/prefetching */
5046 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5047 
5048 	/* sdma0 */
5049 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5050 	tmp |= SDMA_HALT;
5051 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5052 	/* sdma1 */
5053 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5054 	tmp |= SDMA_HALT;
5055 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5056 	/* XXX other engines? */
5057 
5058 	/* halt the rlc, disable cp internal ints */
5059 	cik_rlc_stop(rdev);
5060 
5061 	udelay(50);
5062 
5063 	/* disable mem access */
5064 	evergreen_mc_stop(rdev, &save);
5065 	if (evergreen_mc_wait_for_idle(rdev)) {
5066 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5067 	}
5068 
5069 	if (rdev->flags & RADEON_IS_IGP)
5070 		kv_save_regs_for_reset(rdev, &kv_save);
5071 
5072 	/* disable BM */
5073 	pci_clear_master(rdev->pdev);
5074 	/* reset */
5075 	radeon_pci_config_reset(rdev);
5076 
5077 	udelay(100);
5078 
5079 	/* wait for asic to come out of reset */
5080 	for (i = 0; i < rdev->usec_timeout; i++) {
5081 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5082 			break;
5083 		udelay(1);
5084 	}
5085 
5086 	/* does asic init need to be run first??? */
5087 	if (rdev->flags & RADEON_IS_IGP)
5088 		kv_restore_regs_for_reset(rdev, &kv_save);
5089 }
5090 
5091 /**
5092  * cik_asic_reset - soft reset GPU
5093  *
5094  * @rdev: radeon_device pointer
5095  *
5096  * Look up which blocks are hung and attempt
5097  * to reset them.
5098  * Returns 0 for success.
5099  */
5100 int cik_asic_reset(struct radeon_device *rdev)
5101 {
5102 	u32 reset_mask;
5103 
5104 	reset_mask = cik_gpu_check_soft_reset(rdev);
5105 
5106 	if (reset_mask)
5107 		r600_set_bios_scratch_engine_hung(rdev, true);
5108 
5109 	/* try soft reset */
5110 	cik_gpu_soft_reset(rdev, reset_mask);
5111 
5112 	reset_mask = cik_gpu_check_soft_reset(rdev);
5113 
5114 	/* try pci config reset */
5115 	if (reset_mask && radeon_hard_reset)
5116 		cik_gpu_pci_config_reset(rdev);
5117 
5118 	reset_mask = cik_gpu_check_soft_reset(rdev);
5119 
5120 	if (!reset_mask)
5121 		r600_set_bios_scratch_engine_hung(rdev, false);
5122 
5123 	return 0;
5124 }
5125 
5126 /**
5127  * cik_gfx_is_lockup - check if the 3D engine is locked up
5128  *
5129  * @rdev: radeon_device pointer
5130  * @ring: radeon_ring structure holding ring information
5131  *
5132  * Check if the 3D engine is locked up (CIK).
5133  * Returns true if the engine is locked, false if not.
5134  */
5135 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5136 {
5137 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5138 
5139 	if (!(reset_mask & (RADEON_RESET_GFX |
5140 			    RADEON_RESET_COMPUTE |
5141 			    RADEON_RESET_CP))) {
5142 		radeon_ring_lockup_update(rdev, ring);
5143 		return false;
5144 	}
5145 	return radeon_ring_test_lockup(rdev, ring);
5146 }
5147 
5148 /* MC */
5149 /**
5150  * cik_mc_program - program the GPU memory controller
5151  *
5152  * @rdev: radeon_device pointer
5153  *
5154  * Set the location of vram, gart, and AGP in the GPU's
5155  * physical address space (CIK).
5156  */
5157 static void cik_mc_program(struct radeon_device *rdev)
5158 {
5159 	struct evergreen_mc_save save;
5160 	u32 tmp;
5161 	int i, j;
5162 
5163 	/* Initialize HDP */
5164 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5165 		WREG32((0x2c14 + j), 0x00000000);
5166 		WREG32((0x2c18 + j), 0x00000000);
5167 		WREG32((0x2c1c + j), 0x00000000);
5168 		WREG32((0x2c20 + j), 0x00000000);
5169 		WREG32((0x2c24 + j), 0x00000000);
5170 	}
5171 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5172 
5173 	evergreen_mc_stop(rdev, &save);
5174 	if (radeon_mc_wait_for_idle(rdev)) {
5175 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5176 	}
5177 	/* Lockout access through VGA aperture*/
5178 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5179 	/* Update configuration */
5180 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5181 	       rdev->mc.vram_start >> 12);
5182 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5183 	       rdev->mc.vram_end >> 12);
5184 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5185 	       rdev->vram_scratch.gpu_addr >> 12);
5186 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5187 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5188 	WREG32(MC_VM_FB_LOCATION, tmp);
5189 	/* XXX double check these! */
5190 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5191 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5192 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5193 	WREG32(MC_VM_AGP_BASE, 0);
5194 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5195 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5196 	if (radeon_mc_wait_for_idle(rdev)) {
5197 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5198 	}
5199 	evergreen_mc_resume(rdev, &save);
5200 	/* we need to own VRAM, so turn off the VGA renderer here
5201 	 * to stop it overwriting our objects */
5202 	rv515_vga_render_disable(rdev);
5203 }
5204 
5205 /**
5206  * cik_mc_init - initialize the memory controller driver params
5207  *
5208  * @rdev: radeon_device pointer
5209  *
5210  * Look up the amount of vram, vram width, and decide how to place
5211  * vram and gart within the GPU's physical address space (CIK).
5212  * Returns 0 for success.
5213  */
5214 static int cik_mc_init(struct radeon_device *rdev)
5215 {
5216 	u32 tmp;
5217 	int chansize, numchan;
5218 
5219 	/* Get VRAM informations */
5220 	rdev->mc.vram_is_ddr = true;
5221 	tmp = RREG32(MC_ARB_RAMCFG);
5222 	if (tmp & CHANSIZE_MASK) {
5223 		chansize = 64;
5224 	} else {
5225 		chansize = 32;
5226 	}
5227 	tmp = RREG32(MC_SHARED_CHMAP);
5228 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5229 	case 0:
5230 	default:
5231 		numchan = 1;
5232 		break;
5233 	case 1:
5234 		numchan = 2;
5235 		break;
5236 	case 2:
5237 		numchan = 4;
5238 		break;
5239 	case 3:
5240 		numchan = 8;
5241 		break;
5242 	case 4:
5243 		numchan = 3;
5244 		break;
5245 	case 5:
5246 		numchan = 6;
5247 		break;
5248 	case 6:
5249 		numchan = 10;
5250 		break;
5251 	case 7:
5252 		numchan = 12;
5253 		break;
5254 	case 8:
5255 		numchan = 16;
5256 		break;
5257 	}
5258 	rdev->mc.vram_width = numchan * chansize;
5259 	/* Could aper size report 0 ? */
5260 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5261 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5262 	/* size in MB on si */
5263 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5264 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5265 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5266 	si_vram_gtt_location(rdev, &rdev->mc);
5267 	radeon_update_bandwidth_info(rdev);
5268 
5269 	return 0;
5270 }
5271 
5272 /*
5273  * GART
5274  * VMID 0 is the physical GPU addresses as used by the kernel.
5275  * VMIDs 1-15 are used for userspace clients and are handled
5276  * by the radeon vm/hsa code.
5277  */
5278 /**
5279  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5280  *
5281  * @rdev: radeon_device pointer
5282  *
5283  * Flush the TLB for the VMID 0 page table (CIK).
5284  */
5285 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5286 {
5287 	/* flush hdp cache */
5288 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5289 
5290 	/* bits 0-15 are the VM contexts0-15 */
5291 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5292 }
5293 
5294 /**
5295  * cik_pcie_gart_enable - gart enable
5296  *
5297  * @rdev: radeon_device pointer
5298  *
5299  * This sets up the TLBs, programs the page tables for VMID0,
5300  * sets up the hw for VMIDs 1-15 which are allocated on
5301  * demand, and sets up the global locations for the LDS, GDS,
5302  * and GPUVM for FSA64 clients (CIK).
5303  * Returns 0 for success, errors for failure.
5304  */
5305 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5306 {
5307 	int r, i;
5308 
5309 	if (rdev->gart.robj == NULL) {
5310 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5311 		return -EINVAL;
5312 	}
5313 	r = radeon_gart_table_vram_pin(rdev);
5314 	if (r)
5315 		return r;
5316 	radeon_gart_restore(rdev);
5317 	/* Setup TLB control */
5318 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5319 	       (0xA << 7) |
5320 	       ENABLE_L1_TLB |
5321 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5322 	       ENABLE_ADVANCED_DRIVER_MODEL |
5323 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5324 	/* Setup L2 cache */
5325 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5326 	       ENABLE_L2_FRAGMENT_PROCESSING |
5327 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5328 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5329 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5330 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5331 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5332 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5333 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5334 	/* setup context0 */
5335 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5336 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5337 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5338 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5339 			(u32)(rdev->dummy_page.addr >> 12));
5340 	WREG32(VM_CONTEXT0_CNTL2, 0);
5341 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5342 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5343 
5344 	WREG32(0x15D4, 0);
5345 	WREG32(0x15D8, 0);
5346 	WREG32(0x15DC, 0);
5347 
5348 	/* empty context1-15 */
5349 	/* FIXME start with 4G, once using 2 level pt switch to full
5350 	 * vm size space
5351 	 */
5352 	/* set vm size, must be a multiple of 4 */
5353 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5354 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5355 	for (i = 1; i < 16; i++) {
5356 		if (i < 8)
5357 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5358 			       rdev->gart.table_addr >> 12);
5359 		else
5360 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5361 			       rdev->gart.table_addr >> 12);
5362 	}
5363 
5364 	/* enable context1-15 */
5365 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5366 	       (u32)(rdev->dummy_page.addr >> 12));
5367 	WREG32(VM_CONTEXT1_CNTL2, 4);
5368 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5369 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5370 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5371 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5372 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5373 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5374 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5375 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5376 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5377 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5378 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5379 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5380 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5381 
5382 	if (rdev->family == CHIP_KAVERI) {
5383 		u32 tmp = RREG32(CHUB_CONTROL);
5384 		tmp &= ~BYPASS_VM;
5385 		WREG32(CHUB_CONTROL, tmp);
5386 	}
5387 
5388 	/* XXX SH_MEM regs */
5389 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5390 	mutex_lock(&rdev->srbm_mutex);
5391 	for (i = 0; i < 16; i++) {
5392 		cik_srbm_select(rdev, 0, 0, 0, i);
5393 		/* CP and shaders */
5394 		WREG32(SH_MEM_CONFIG, 0);
5395 		WREG32(SH_MEM_APE1_BASE, 1);
5396 		WREG32(SH_MEM_APE1_LIMIT, 0);
5397 		WREG32(SH_MEM_BASES, 0);
5398 		/* SDMA GFX */
5399 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5400 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5401 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5402 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5403 		/* XXX SDMA RLC - todo */
5404 	}
5405 	cik_srbm_select(rdev, 0, 0, 0, 0);
5406 	mutex_unlock(&rdev->srbm_mutex);
5407 
5408 	cik_pcie_gart_tlb_flush(rdev);
5409 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5410 		 (unsigned)(rdev->mc.gtt_size >> 20),
5411 		 (unsigned long long)rdev->gart.table_addr);
5412 	rdev->gart.ready = true;
5413 	return 0;
5414 }
5415 
5416 /**
5417  * cik_pcie_gart_disable - gart disable
5418  *
5419  * @rdev: radeon_device pointer
5420  *
5421  * This disables all VM page table (CIK).
5422  */
5423 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5424 {
5425 	/* Disable all tables */
5426 	WREG32(VM_CONTEXT0_CNTL, 0);
5427 	WREG32(VM_CONTEXT1_CNTL, 0);
5428 	/* Setup TLB control */
5429 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5430 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5431 	/* Setup L2 cache */
5432 	WREG32(VM_L2_CNTL,
5433 	       ENABLE_L2_FRAGMENT_PROCESSING |
5434 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5435 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5436 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5437 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5438 	WREG32(VM_L2_CNTL2, 0);
5439 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5440 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5441 	radeon_gart_table_vram_unpin(rdev);
5442 }
5443 
5444 /**
5445  * cik_pcie_gart_fini - vm fini callback
5446  *
5447  * @rdev: radeon_device pointer
5448  *
5449  * Tears down the driver GART/VM setup (CIK).
5450  */
5451 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5452 {
5453 	cik_pcie_gart_disable(rdev);
5454 	radeon_gart_table_vram_free(rdev);
5455 	radeon_gart_fini(rdev);
5456 }
5457 
5458 /* vm parser */
5459 /**
5460  * cik_ib_parse - vm ib_parse callback
5461  *
5462  * @rdev: radeon_device pointer
5463  * @ib: indirect buffer pointer
5464  *
5465  * CIK uses hw IB checking so this is a nop (CIK).
5466  */
5467 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5468 {
5469 	return 0;
5470 }
5471 
5472 /*
5473  * vm
5474  * VMID 0 is the physical GPU addresses as used by the kernel.
5475  * VMIDs 1-15 are used for userspace clients and are handled
5476  * by the radeon vm/hsa code.
5477  */
5478 /**
5479  * cik_vm_init - cik vm init callback
5480  *
5481  * @rdev: radeon_device pointer
5482  *
5483  * Inits cik specific vm parameters (number of VMs, base of vram for
5484  * VMIDs 1-15) (CIK).
5485  * Returns 0 for success.
5486  */
5487 int cik_vm_init(struct radeon_device *rdev)
5488 {
5489 	/* number of VMs */
5490 	rdev->vm_manager.nvm = 16;
5491 	/* base offset of vram pages */
5492 	if (rdev->flags & RADEON_IS_IGP) {
5493 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5494 		tmp <<= 22;
5495 		rdev->vm_manager.vram_base_offset = tmp;
5496 	} else
5497 		rdev->vm_manager.vram_base_offset = 0;
5498 
5499 	return 0;
5500 }
5501 
5502 /**
5503  * cik_vm_fini - cik vm fini callback
5504  *
5505  * @rdev: radeon_device pointer
5506  *
5507  * Tear down any asic specific VM setup (CIK).
5508  */
5509 void cik_vm_fini(struct radeon_device *rdev)
5510 {
5511 }
5512 
5513 /**
5514  * cik_vm_decode_fault - print human readable fault info
5515  *
5516  * @rdev: radeon_device pointer
5517  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5518  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5519  *
5520  * Print human readable fault information (CIK).
5521  */
5522 static void cik_vm_decode_fault(struct radeon_device *rdev,
5523 				u32 status, u32 addr, u32 mc_client)
5524 {
5525 	u32 mc_id;
5526 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5527 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5528 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5529 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5530 
5531 	if (rdev->family == CHIP_HAWAII)
5532 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5533 	else
5534 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5535 
5536 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5537 	       protections, vmid, addr,
5538 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5539 	       block, mc_client, mc_id);
5540 }
5541 
5542 /**
5543  * cik_vm_flush - cik vm flush using the CP
5544  *
5545  * @rdev: radeon_device pointer
5546  *
5547  * Update the page table base and flush the VM TLB
5548  * using the CP (CIK).
5549  */
5550 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5551 {
5552 	struct radeon_ring *ring = &rdev->ring[ridx];
5553 
5554 	if (vm == NULL)
5555 		return;
5556 
5557 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5558 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5559 				 WRITE_DATA_DST_SEL(0)));
5560 	if (vm->id < 8) {
5561 		radeon_ring_write(ring,
5562 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5563 	} else {
5564 		radeon_ring_write(ring,
5565 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5566 	}
5567 	radeon_ring_write(ring, 0);
5568 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5569 
5570 	/* update SH_MEM_* regs */
5571 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5572 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5573 				 WRITE_DATA_DST_SEL(0)));
5574 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5575 	radeon_ring_write(ring, 0);
5576 	radeon_ring_write(ring, VMID(vm->id));
5577 
5578 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5579 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5580 				 WRITE_DATA_DST_SEL(0)));
5581 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
5582 	radeon_ring_write(ring, 0);
5583 
5584 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5585 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5586 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5587 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5588 
5589 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5590 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5591 				 WRITE_DATA_DST_SEL(0)));
5592 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5593 	radeon_ring_write(ring, 0);
5594 	radeon_ring_write(ring, VMID(0));
5595 
5596 	/* HDP flush */
5597 	cik_hdp_flush_cp_ring_emit(rdev, ridx);
5598 
5599 	/* bits 0-15 are the VM contexts0-15 */
5600 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5601 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5602 				 WRITE_DATA_DST_SEL(0)));
5603 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5604 	radeon_ring_write(ring, 0);
5605 	radeon_ring_write(ring, 1 << vm->id);
5606 
5607 	/* compute doesn't have PFP */
5608 	if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
5609 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5610 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5611 		radeon_ring_write(ring, 0x0);
5612 	}
5613 }
5614 
5615 /*
5616  * RLC
5617  * The RLC is a multi-purpose microengine that handles a
5618  * variety of functions, the most important of which is
5619  * the interrupt controller.
5620  */
5621 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5622 					  bool enable)
5623 {
5624 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5625 
5626 	if (enable)
5627 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5628 	else
5629 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5630 	WREG32(CP_INT_CNTL_RING0, tmp);
5631 }
5632 
5633 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5634 {
5635 	u32 tmp;
5636 
5637 	tmp = RREG32(RLC_LB_CNTL);
5638 	if (enable)
5639 		tmp |= LOAD_BALANCE_ENABLE;
5640 	else
5641 		tmp &= ~LOAD_BALANCE_ENABLE;
5642 	WREG32(RLC_LB_CNTL, tmp);
5643 }
5644 
5645 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5646 {
5647 	u32 i, j, k;
5648 	u32 mask;
5649 
5650 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5651 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5652 			cik_select_se_sh(rdev, i, j);
5653 			for (k = 0; k < rdev->usec_timeout; k++) {
5654 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5655 					break;
5656 				udelay(1);
5657 			}
5658 		}
5659 	}
5660 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5661 
5662 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5663 	for (k = 0; k < rdev->usec_timeout; k++) {
5664 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5665 			break;
5666 		udelay(1);
5667 	}
5668 }
5669 
5670 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5671 {
5672 	u32 tmp;
5673 
5674 	tmp = RREG32(RLC_CNTL);
5675 	if (tmp != rlc)
5676 		WREG32(RLC_CNTL, rlc);
5677 }
5678 
5679 static u32 cik_halt_rlc(struct radeon_device *rdev)
5680 {
5681 	u32 data, orig;
5682 
5683 	orig = data = RREG32(RLC_CNTL);
5684 
5685 	if (data & RLC_ENABLE) {
5686 		u32 i;
5687 
5688 		data &= ~RLC_ENABLE;
5689 		WREG32(RLC_CNTL, data);
5690 
5691 		for (i = 0; i < rdev->usec_timeout; i++) {
5692 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5693 				break;
5694 			udelay(1);
5695 		}
5696 
5697 		cik_wait_for_rlc_serdes(rdev);
5698 	}
5699 
5700 	return orig;
5701 }
5702 
5703 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5704 {
5705 	u32 tmp, i, mask;
5706 
5707 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5708 	WREG32(RLC_GPR_REG2, tmp);
5709 
5710 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5711 	for (i = 0; i < rdev->usec_timeout; i++) {
5712 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5713 			break;
5714 		udelay(1);
5715 	}
5716 
5717 	for (i = 0; i < rdev->usec_timeout; i++) {
5718 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5719 			break;
5720 		udelay(1);
5721 	}
5722 }
5723 
5724 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5725 {
5726 	u32 tmp;
5727 
5728 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5729 	WREG32(RLC_GPR_REG2, tmp);
5730 }
5731 
5732 /**
5733  * cik_rlc_stop - stop the RLC ME
5734  *
5735  * @rdev: radeon_device pointer
5736  *
5737  * Halt the RLC ME (MicroEngine) (CIK).
5738  */
5739 static void cik_rlc_stop(struct radeon_device *rdev)
5740 {
5741 	WREG32(RLC_CNTL, 0);
5742 
5743 	cik_enable_gui_idle_interrupt(rdev, false);
5744 
5745 	cik_wait_for_rlc_serdes(rdev);
5746 }
5747 
5748 /**
5749  * cik_rlc_start - start the RLC ME
5750  *
5751  * @rdev: radeon_device pointer
5752  *
5753  * Unhalt the RLC ME (MicroEngine) (CIK).
5754  */
5755 static void cik_rlc_start(struct radeon_device *rdev)
5756 {
5757 	WREG32(RLC_CNTL, RLC_ENABLE);
5758 
5759 	cik_enable_gui_idle_interrupt(rdev, true);
5760 
5761 	udelay(50);
5762 }
5763 
5764 /**
5765  * cik_rlc_resume - setup the RLC hw
5766  *
5767  * @rdev: radeon_device pointer
5768  *
5769  * Initialize the RLC registers, load the ucode,
5770  * and start the RLC (CIK).
5771  * Returns 0 for success, -EINVAL if the ucode is not available.
5772  */
5773 static int cik_rlc_resume(struct radeon_device *rdev)
5774 {
5775 	u32 i, size, tmp;
5776 	const __be32 *fw_data;
5777 
5778 	if (!rdev->rlc_fw)
5779 		return -EINVAL;
5780 
5781 	switch (rdev->family) {
5782 	case CHIP_BONAIRE:
5783 	case CHIP_HAWAII:
5784 	default:
5785 		size = BONAIRE_RLC_UCODE_SIZE;
5786 		break;
5787 	case CHIP_KAVERI:
5788 		size = KV_RLC_UCODE_SIZE;
5789 		break;
5790 	case CHIP_KABINI:
5791 		size = KB_RLC_UCODE_SIZE;
5792 		break;
5793 	}
5794 
5795 	cik_rlc_stop(rdev);
5796 
5797 	/* disable CG */
5798 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5799 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5800 
5801 	si_rlc_reset(rdev);
5802 
5803 	cik_init_pg(rdev);
5804 
5805 	cik_init_cg(rdev);
5806 
5807 	WREG32(RLC_LB_CNTR_INIT, 0);
5808 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5809 
5810 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5811 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5812 	WREG32(RLC_LB_PARAMS, 0x00600408);
5813 	WREG32(RLC_LB_CNTL, 0x80000004);
5814 
5815 	WREG32(RLC_MC_CNTL, 0);
5816 	WREG32(RLC_UCODE_CNTL, 0);
5817 
5818 	fw_data = (const __be32 *)rdev->rlc_fw->data;
5819 		WREG32(RLC_GPM_UCODE_ADDR, 0);
5820 	for (i = 0; i < size; i++)
5821 		WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5822 	WREG32(RLC_GPM_UCODE_ADDR, 0);
5823 
5824 	/* XXX - find out what chips support lbpw */
5825 	cik_enable_lbpw(rdev, false);
5826 
5827 	if (rdev->family == CHIP_BONAIRE)
5828 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
5829 
5830 	cik_rlc_start(rdev);
5831 
5832 	return 0;
5833 }
5834 
5835 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5836 {
5837 	u32 data, orig, tmp, tmp2;
5838 
5839 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5840 
5841 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5842 		cik_enable_gui_idle_interrupt(rdev, true);
5843 
5844 		tmp = cik_halt_rlc(rdev);
5845 
5846 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5847 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5848 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5849 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5850 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
5851 
5852 		cik_update_rlc(rdev, tmp);
5853 
5854 		data |= CGCG_EN | CGLS_EN;
5855 	} else {
5856 		cik_enable_gui_idle_interrupt(rdev, false);
5857 
5858 		RREG32(CB_CGTT_SCLK_CTRL);
5859 		RREG32(CB_CGTT_SCLK_CTRL);
5860 		RREG32(CB_CGTT_SCLK_CTRL);
5861 		RREG32(CB_CGTT_SCLK_CTRL);
5862 
5863 		data &= ~(CGCG_EN | CGLS_EN);
5864 	}
5865 
5866 	if (orig != data)
5867 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5868 
5869 }
5870 
5871 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5872 {
5873 	u32 data, orig, tmp = 0;
5874 
5875 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5876 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5877 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5878 				orig = data = RREG32(CP_MEM_SLP_CNTL);
5879 				data |= CP_MEM_LS_EN;
5880 				if (orig != data)
5881 					WREG32(CP_MEM_SLP_CNTL, data);
5882 			}
5883 		}
5884 
5885 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5886 		data &= 0xfffffffd;
5887 		if (orig != data)
5888 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5889 
5890 		tmp = cik_halt_rlc(rdev);
5891 
5892 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5893 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5894 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5895 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5896 		WREG32(RLC_SERDES_WR_CTRL, data);
5897 
5898 		cik_update_rlc(rdev, tmp);
5899 
5900 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
5901 			orig = data = RREG32(CGTS_SM_CTRL_REG);
5902 			data &= ~SM_MODE_MASK;
5903 			data |= SM_MODE(0x2);
5904 			data |= SM_MODE_ENABLE;
5905 			data &= ~CGTS_OVERRIDE;
5906 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
5907 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
5908 				data &= ~CGTS_LS_OVERRIDE;
5909 			data &= ~ON_MONITOR_ADD_MASK;
5910 			data |= ON_MONITOR_ADD_EN;
5911 			data |= ON_MONITOR_ADD(0x96);
5912 			if (orig != data)
5913 				WREG32(CGTS_SM_CTRL_REG, data);
5914 		}
5915 	} else {
5916 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5917 		data |= 0x00000002;
5918 		if (orig != data)
5919 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5920 
5921 		data = RREG32(RLC_MEM_SLP_CNTL);
5922 		if (data & RLC_MEM_LS_EN) {
5923 			data &= ~RLC_MEM_LS_EN;
5924 			WREG32(RLC_MEM_SLP_CNTL, data);
5925 		}
5926 
5927 		data = RREG32(CP_MEM_SLP_CNTL);
5928 		if (data & CP_MEM_LS_EN) {
5929 			data &= ~CP_MEM_LS_EN;
5930 			WREG32(CP_MEM_SLP_CNTL, data);
5931 		}
5932 
5933 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5934 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5935 		if (orig != data)
5936 			WREG32(CGTS_SM_CTRL_REG, data);
5937 
5938 		tmp = cik_halt_rlc(rdev);
5939 
5940 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5941 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5942 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5943 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5944 		WREG32(RLC_SERDES_WR_CTRL, data);
5945 
5946 		cik_update_rlc(rdev, tmp);
5947 	}
5948 }
5949 
5950 static const u32 mc_cg_registers[] =
5951 {
5952 	MC_HUB_MISC_HUB_CG,
5953 	MC_HUB_MISC_SIP_CG,
5954 	MC_HUB_MISC_VM_CG,
5955 	MC_XPB_CLK_GAT,
5956 	ATC_MISC_CG,
5957 	MC_CITF_MISC_WR_CG,
5958 	MC_CITF_MISC_RD_CG,
5959 	MC_CITF_MISC_VM_CG,
5960 	VM_L2_CG,
5961 };
5962 
5963 static void cik_enable_mc_ls(struct radeon_device *rdev,
5964 			     bool enable)
5965 {
5966 	int i;
5967 	u32 orig, data;
5968 
5969 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5970 		orig = data = RREG32(mc_cg_registers[i]);
5971 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5972 			data |= MC_LS_ENABLE;
5973 		else
5974 			data &= ~MC_LS_ENABLE;
5975 		if (data != orig)
5976 			WREG32(mc_cg_registers[i], data);
5977 	}
5978 }
5979 
5980 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5981 			       bool enable)
5982 {
5983 	int i;
5984 	u32 orig, data;
5985 
5986 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5987 		orig = data = RREG32(mc_cg_registers[i]);
5988 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5989 			data |= MC_CG_ENABLE;
5990 		else
5991 			data &= ~MC_CG_ENABLE;
5992 		if (data != orig)
5993 			WREG32(mc_cg_registers[i], data);
5994 	}
5995 }
5996 
5997 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
5998 				 bool enable)
5999 {
6000 	u32 orig, data;
6001 
6002 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6003 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6004 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6005 	} else {
6006 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6007 		data |= 0xff000000;
6008 		if (data != orig)
6009 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6010 
6011 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6012 		data |= 0xff000000;
6013 		if (data != orig)
6014 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6015 	}
6016 }
6017 
6018 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6019 				 bool enable)
6020 {
6021 	u32 orig, data;
6022 
6023 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6024 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6025 		data |= 0x100;
6026 		if (orig != data)
6027 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6028 
6029 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6030 		data |= 0x100;
6031 		if (orig != data)
6032 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6033 	} else {
6034 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6035 		data &= ~0x100;
6036 		if (orig != data)
6037 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6038 
6039 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6040 		data &= ~0x100;
6041 		if (orig != data)
6042 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6043 	}
6044 }
6045 
6046 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6047 				bool enable)
6048 {
6049 	u32 orig, data;
6050 
6051 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6052 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6053 		data = 0xfff;
6054 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6055 
6056 		orig = data = RREG32(UVD_CGC_CTRL);
6057 		data |= DCM;
6058 		if (orig != data)
6059 			WREG32(UVD_CGC_CTRL, data);
6060 	} else {
6061 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6062 		data &= ~0xfff;
6063 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6064 
6065 		orig = data = RREG32(UVD_CGC_CTRL);
6066 		data &= ~DCM;
6067 		if (orig != data)
6068 			WREG32(UVD_CGC_CTRL, data);
6069 	}
6070 }
6071 
6072 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6073 			       bool enable)
6074 {
6075 	u32 orig, data;
6076 
6077 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6078 
6079 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6080 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6081 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6082 	else
6083 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6084 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6085 
6086 	if (orig != data)
6087 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6088 }
6089 
6090 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6091 				bool enable)
6092 {
6093 	u32 orig, data;
6094 
6095 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6096 
6097 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6098 		data &= ~CLOCK_GATING_DIS;
6099 	else
6100 		data |= CLOCK_GATING_DIS;
6101 
6102 	if (orig != data)
6103 		WREG32(HDP_HOST_PATH_CNTL, data);
6104 }
6105 
6106 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6107 			      bool enable)
6108 {
6109 	u32 orig, data;
6110 
6111 	orig = data = RREG32(HDP_MEM_POWER_LS);
6112 
6113 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6114 		data |= HDP_LS_ENABLE;
6115 	else
6116 		data &= ~HDP_LS_ENABLE;
6117 
6118 	if (orig != data)
6119 		WREG32(HDP_MEM_POWER_LS, data);
6120 }
6121 
6122 void cik_update_cg(struct radeon_device *rdev,
6123 		   u32 block, bool enable)
6124 {
6125 
6126 	if (block & RADEON_CG_BLOCK_GFX) {
6127 		cik_enable_gui_idle_interrupt(rdev, false);
6128 		/* order matters! */
6129 		if (enable) {
6130 			cik_enable_mgcg(rdev, true);
6131 			cik_enable_cgcg(rdev, true);
6132 		} else {
6133 			cik_enable_cgcg(rdev, false);
6134 			cik_enable_mgcg(rdev, false);
6135 		}
6136 		cik_enable_gui_idle_interrupt(rdev, true);
6137 	}
6138 
6139 	if (block & RADEON_CG_BLOCK_MC) {
6140 		if (!(rdev->flags & RADEON_IS_IGP)) {
6141 			cik_enable_mc_mgcg(rdev, enable);
6142 			cik_enable_mc_ls(rdev, enable);
6143 		}
6144 	}
6145 
6146 	if (block & RADEON_CG_BLOCK_SDMA) {
6147 		cik_enable_sdma_mgcg(rdev, enable);
6148 		cik_enable_sdma_mgls(rdev, enable);
6149 	}
6150 
6151 	if (block & RADEON_CG_BLOCK_BIF) {
6152 		cik_enable_bif_mgls(rdev, enable);
6153 	}
6154 
6155 	if (block & RADEON_CG_BLOCK_UVD) {
6156 		if (rdev->has_uvd)
6157 			cik_enable_uvd_mgcg(rdev, enable);
6158 	}
6159 
6160 	if (block & RADEON_CG_BLOCK_HDP) {
6161 		cik_enable_hdp_mgcg(rdev, enable);
6162 		cik_enable_hdp_ls(rdev, enable);
6163 	}
6164 
6165 	if (block & RADEON_CG_BLOCK_VCE) {
6166 		vce_v2_0_enable_mgcg(rdev, enable);
6167 	}
6168 }
6169 
6170 static void cik_init_cg(struct radeon_device *rdev)
6171 {
6172 
6173 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6174 
6175 	if (rdev->has_uvd)
6176 		si_init_uvd_internal_cg(rdev);
6177 
6178 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6179 			     RADEON_CG_BLOCK_SDMA |
6180 			     RADEON_CG_BLOCK_BIF |
6181 			     RADEON_CG_BLOCK_UVD |
6182 			     RADEON_CG_BLOCK_HDP), true);
6183 }
6184 
6185 static void cik_fini_cg(struct radeon_device *rdev)
6186 {
6187 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6188 			     RADEON_CG_BLOCK_SDMA |
6189 			     RADEON_CG_BLOCK_BIF |
6190 			     RADEON_CG_BLOCK_UVD |
6191 			     RADEON_CG_BLOCK_HDP), false);
6192 
6193 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6194 }
6195 
6196 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6197 					  bool enable)
6198 {
6199 	u32 data, orig;
6200 
6201 	orig = data = RREG32(RLC_PG_CNTL);
6202 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6203 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6204 	else
6205 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6206 	if (orig != data)
6207 		WREG32(RLC_PG_CNTL, data);
6208 }
6209 
6210 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6211 					  bool enable)
6212 {
6213 	u32 data, orig;
6214 
6215 	orig = data = RREG32(RLC_PG_CNTL);
6216 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6217 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6218 	else
6219 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6220 	if (orig != data)
6221 		WREG32(RLC_PG_CNTL, data);
6222 }
6223 
6224 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6225 {
6226 	u32 data, orig;
6227 
6228 	orig = data = RREG32(RLC_PG_CNTL);
6229 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6230 		data &= ~DISABLE_CP_PG;
6231 	else
6232 		data |= DISABLE_CP_PG;
6233 	if (orig != data)
6234 		WREG32(RLC_PG_CNTL, data);
6235 }
6236 
6237 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6238 {
6239 	u32 data, orig;
6240 
6241 	orig = data = RREG32(RLC_PG_CNTL);
6242 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6243 		data &= ~DISABLE_GDS_PG;
6244 	else
6245 		data |= DISABLE_GDS_PG;
6246 	if (orig != data)
6247 		WREG32(RLC_PG_CNTL, data);
6248 }
6249 
6250 #define CP_ME_TABLE_SIZE    96
6251 #define CP_ME_TABLE_OFFSET  2048
6252 #define CP_MEC_TABLE_OFFSET 4096
6253 
6254 void cik_init_cp_pg_table(struct radeon_device *rdev)
6255 {
6256 	const __be32 *fw_data;
6257 	volatile u32 *dst_ptr;
6258 	int me, i, max_me = 4;
6259 	u32 bo_offset = 0;
6260 	u32 table_offset;
6261 
6262 	if (rdev->family == CHIP_KAVERI)
6263 		max_me = 5;
6264 
6265 	if (rdev->rlc.cp_table_ptr == NULL)
6266 		return;
6267 
6268 	/* write the cp table buffer */
6269 	dst_ptr = rdev->rlc.cp_table_ptr;
6270 	for (me = 0; me < max_me; me++) {
6271 		if (me == 0) {
6272 			fw_data = (const __be32 *)rdev->ce_fw->data;
6273 			table_offset = CP_ME_TABLE_OFFSET;
6274 		} else if (me == 1) {
6275 			fw_data = (const __be32 *)rdev->pfp_fw->data;
6276 			table_offset = CP_ME_TABLE_OFFSET;
6277 		} else if (me == 2) {
6278 			fw_data = (const __be32 *)rdev->me_fw->data;
6279 			table_offset = CP_ME_TABLE_OFFSET;
6280 		} else {
6281 			fw_data = (const __be32 *)rdev->mec_fw->data;
6282 			table_offset = CP_MEC_TABLE_OFFSET;
6283 		}
6284 
6285 		for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
6286 			dst_ptr[bo_offset + i] = cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6287 		}
6288 		bo_offset += CP_ME_TABLE_SIZE;
6289 	}
6290 }
6291 
6292 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6293 				bool enable)
6294 {
6295 	u32 data, orig;
6296 
6297 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6298 		orig = data = RREG32(RLC_PG_CNTL);
6299 		data |= GFX_PG_ENABLE;
6300 		if (orig != data)
6301 			WREG32(RLC_PG_CNTL, data);
6302 
6303 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6304 		data |= AUTO_PG_EN;
6305 		if (orig != data)
6306 			WREG32(RLC_AUTO_PG_CTRL, data);
6307 	} else {
6308 		orig = data = RREG32(RLC_PG_CNTL);
6309 		data &= ~GFX_PG_ENABLE;
6310 		if (orig != data)
6311 			WREG32(RLC_PG_CNTL, data);
6312 
6313 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6314 		data &= ~AUTO_PG_EN;
6315 		if (orig != data)
6316 			WREG32(RLC_AUTO_PG_CTRL, data);
6317 
6318 		data = RREG32(DB_RENDER_CONTROL);
6319 	}
6320 }
6321 
6322 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6323 {
6324 	u32 mask = 0, tmp, tmp1;
6325 	int i;
6326 
6327 	cik_select_se_sh(rdev, se, sh);
6328 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6329 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6330 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6331 
6332 	tmp &= 0xffff0000;
6333 
6334 	tmp |= tmp1;
6335 	tmp >>= 16;
6336 
6337 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6338 		mask <<= 1;
6339 		mask |= 1;
6340 	}
6341 
6342 	return (~tmp) & mask;
6343 }
6344 
6345 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6346 {
6347 	u32 i, j, k, active_cu_number = 0;
6348 	u32 mask, counter, cu_bitmap;
6349 	u32 tmp = 0;
6350 
6351 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6352 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6353 			mask = 1;
6354 			cu_bitmap = 0;
6355 			counter = 0;
6356 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6357 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6358 					if (counter < 2)
6359 						cu_bitmap |= mask;
6360 					counter ++;
6361 				}
6362 				mask <<= 1;
6363 			}
6364 
6365 			active_cu_number += counter;
6366 			tmp |= (cu_bitmap << (i * 16 + j * 8));
6367 		}
6368 	}
6369 
6370 	WREG32(RLC_PG_AO_CU_MASK, tmp);
6371 
6372 	tmp = RREG32(RLC_MAX_PG_CU);
6373 	tmp &= ~MAX_PU_CU_MASK;
6374 	tmp |= MAX_PU_CU(active_cu_number);
6375 	WREG32(RLC_MAX_PG_CU, tmp);
6376 }
6377 
6378 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6379 				       bool enable)
6380 {
6381 	u32 data, orig;
6382 
6383 	orig = data = RREG32(RLC_PG_CNTL);
6384 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6385 		data |= STATIC_PER_CU_PG_ENABLE;
6386 	else
6387 		data &= ~STATIC_PER_CU_PG_ENABLE;
6388 	if (orig != data)
6389 		WREG32(RLC_PG_CNTL, data);
6390 }
6391 
6392 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6393 					bool enable)
6394 {
6395 	u32 data, orig;
6396 
6397 	orig = data = RREG32(RLC_PG_CNTL);
6398 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6399 		data |= DYN_PER_CU_PG_ENABLE;
6400 	else
6401 		data &= ~DYN_PER_CU_PG_ENABLE;
6402 	if (orig != data)
6403 		WREG32(RLC_PG_CNTL, data);
6404 }
6405 
6406 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6407 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6408 
6409 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6410 {
6411 	u32 data, orig;
6412 	u32 i;
6413 
6414 	if (rdev->rlc.cs_data) {
6415 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6416 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6417 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6418 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6419 	} else {
6420 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6421 		for (i = 0; i < 3; i++)
6422 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6423 	}
6424 	if (rdev->rlc.reg_list) {
6425 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6426 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6427 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6428 	}
6429 
6430 	orig = data = RREG32(RLC_PG_CNTL);
6431 	data |= GFX_PG_SRC;
6432 	if (orig != data)
6433 		WREG32(RLC_PG_CNTL, data);
6434 
6435 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6436 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6437 
6438 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6439 	data &= ~IDLE_POLL_COUNT_MASK;
6440 	data |= IDLE_POLL_COUNT(0x60);
6441 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6442 
6443 	data = 0x10101010;
6444 	WREG32(RLC_PG_DELAY, data);
6445 
6446 	data = RREG32(RLC_PG_DELAY_2);
6447 	data &= ~0xff;
6448 	data |= 0x3;
6449 	WREG32(RLC_PG_DELAY_2, data);
6450 
6451 	data = RREG32(RLC_AUTO_PG_CTRL);
6452 	data &= ~GRBM_REG_SGIT_MASK;
6453 	data |= GRBM_REG_SGIT(0x700);
6454 	WREG32(RLC_AUTO_PG_CTRL, data);
6455 
6456 }
6457 
6458 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6459 {
6460 	cik_enable_gfx_cgpg(rdev, enable);
6461 	cik_enable_gfx_static_mgpg(rdev, enable);
6462 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
6463 }
6464 
6465 u32 cik_get_csb_size(struct radeon_device *rdev)
6466 {
6467 	u32 count = 0;
6468 	const struct cs_section_def *sect = NULL;
6469 	const struct cs_extent_def *ext = NULL;
6470 
6471 	if (rdev->rlc.cs_data == NULL)
6472 		return 0;
6473 
6474 	/* begin clear state */
6475 	count += 2;
6476 	/* context control state */
6477 	count += 3;
6478 
6479 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6480 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6481 			if (sect->id == SECT_CONTEXT)
6482 				count += 2 + ext->reg_count;
6483 			else
6484 				return 0;
6485 		}
6486 	}
6487 	/* pa_sc_raster_config/pa_sc_raster_config1 */
6488 	count += 4;
6489 	/* end clear state */
6490 	count += 2;
6491 	/* clear state */
6492 	count += 2;
6493 
6494 	return count;
6495 }
6496 
6497 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6498 {
6499 	u32 count = 0, i;
6500 	const struct cs_section_def *sect = NULL;
6501 	const struct cs_extent_def *ext = NULL;
6502 
6503 	if (rdev->rlc.cs_data == NULL)
6504 		return;
6505 	if (buffer == NULL)
6506 		return;
6507 
6508 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6509 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6510 
6511 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6512 	buffer[count++] = cpu_to_le32(0x80000000);
6513 	buffer[count++] = cpu_to_le32(0x80000000);
6514 
6515 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6516 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6517 			if (sect->id == SECT_CONTEXT) {
6518 				buffer[count++] =
6519 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6520 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6521 				for (i = 0; i < ext->reg_count; i++)
6522 					buffer[count++] = cpu_to_le32(ext->extent[i]);
6523 			} else {
6524 				return;
6525 			}
6526 		}
6527 	}
6528 
6529 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6530 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6531 	switch (rdev->family) {
6532 	case CHIP_BONAIRE:
6533 		buffer[count++] = cpu_to_le32(0x16000012);
6534 		buffer[count++] = cpu_to_le32(0x00000000);
6535 		break;
6536 	case CHIP_KAVERI:
6537 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6538 		buffer[count++] = cpu_to_le32(0x00000000);
6539 		break;
6540 	case CHIP_KABINI:
6541 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6542 		buffer[count++] = cpu_to_le32(0x00000000);
6543 		break;
6544 	case CHIP_HAWAII:
6545 		buffer[count++] = cpu_to_le32(0x3a00161a);
6546 		buffer[count++] = cpu_to_le32(0x0000002e);
6547 		break;
6548 	default:
6549 		buffer[count++] = cpu_to_le32(0x00000000);
6550 		buffer[count++] = cpu_to_le32(0x00000000);
6551 		break;
6552 	}
6553 
6554 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6555 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6556 
6557 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6558 	buffer[count++] = cpu_to_le32(0);
6559 }
6560 
6561 static void cik_init_pg(struct radeon_device *rdev)
6562 {
6563 	if (rdev->pg_flags) {
6564 		cik_enable_sck_slowdown_on_pu(rdev, true);
6565 		cik_enable_sck_slowdown_on_pd(rdev, true);
6566 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6567 			cik_init_gfx_cgpg(rdev);
6568 			cik_enable_cp_pg(rdev, true);
6569 			cik_enable_gds_pg(rdev, true);
6570 		}
6571 		cik_init_ao_cu_mask(rdev);
6572 		cik_update_gfx_pg(rdev, true);
6573 	}
6574 }
6575 
6576 static void cik_fini_pg(struct radeon_device *rdev)
6577 {
6578 	if (rdev->pg_flags) {
6579 		cik_update_gfx_pg(rdev, false);
6580 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6581 			cik_enable_cp_pg(rdev, false);
6582 			cik_enable_gds_pg(rdev, false);
6583 		}
6584 	}
6585 }
6586 
6587 /*
6588  * Interrupts
6589  * Starting with r6xx, interrupts are handled via a ring buffer.
6590  * Ring buffers are areas of GPU accessible memory that the GPU
6591  * writes interrupt vectors into and the host reads vectors out of.
6592  * There is a rptr (read pointer) that determines where the
6593  * host is currently reading, and a wptr (write pointer)
6594  * which determines where the GPU has written.  When the
6595  * pointers are equal, the ring is idle.  When the GPU
6596  * writes vectors to the ring buffer, it increments the
6597  * wptr.  When there is an interrupt, the host then starts
6598  * fetching commands and processing them until the pointers are
6599  * equal again at which point it updates the rptr.
6600  */
6601 
6602 /**
6603  * cik_enable_interrupts - Enable the interrupt ring buffer
6604  *
6605  * @rdev: radeon_device pointer
6606  *
6607  * Enable the interrupt ring buffer (CIK).
6608  */
6609 static void cik_enable_interrupts(struct radeon_device *rdev)
6610 {
6611 	u32 ih_cntl = RREG32(IH_CNTL);
6612 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6613 
6614 	ih_cntl |= ENABLE_INTR;
6615 	ih_rb_cntl |= IH_RB_ENABLE;
6616 	WREG32(IH_CNTL, ih_cntl);
6617 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6618 	rdev->ih.enabled = true;
6619 }
6620 
6621 /**
6622  * cik_disable_interrupts - Disable the interrupt ring buffer
6623  *
6624  * @rdev: radeon_device pointer
6625  *
6626  * Disable the interrupt ring buffer (CIK).
6627  */
6628 static void cik_disable_interrupts(struct radeon_device *rdev)
6629 {
6630 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6631 	u32 ih_cntl = RREG32(IH_CNTL);
6632 
6633 	ih_rb_cntl &= ~IH_RB_ENABLE;
6634 	ih_cntl &= ~ENABLE_INTR;
6635 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6636 	WREG32(IH_CNTL, ih_cntl);
6637 	/* set rptr, wptr to 0 */
6638 	WREG32(IH_RB_RPTR, 0);
6639 	WREG32(IH_RB_WPTR, 0);
6640 	rdev->ih.enabled = false;
6641 	rdev->ih.rptr = 0;
6642 }
6643 
6644 /**
6645  * cik_disable_interrupt_state - Disable all interrupt sources
6646  *
6647  * @rdev: radeon_device pointer
6648  *
6649  * Clear all interrupt enable bits used by the driver (CIK).
6650  */
6651 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6652 {
6653 	u32 tmp;
6654 
6655 	/* gfx ring */
6656 	tmp = RREG32(CP_INT_CNTL_RING0) &
6657 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6658 	WREG32(CP_INT_CNTL_RING0, tmp);
6659 	/* sdma */
6660 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6661 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6662 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6663 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6664 	/* compute queues */
6665 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6666 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6667 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6668 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6669 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6670 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6671 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6672 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6673 	/* grbm */
6674 	WREG32(GRBM_INT_CNTL, 0);
6675 	/* vline/vblank, etc. */
6676 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6677 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6678 	if (rdev->num_crtc >= 4) {
6679 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6680 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6681 	}
6682 	if (rdev->num_crtc >= 6) {
6683 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6684 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6685 	}
6686 
6687 	/* dac hotplug */
6688 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6689 
6690 	/* digital hotplug */
6691 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6692 	WREG32(DC_HPD1_INT_CONTROL, tmp);
6693 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6694 	WREG32(DC_HPD2_INT_CONTROL, tmp);
6695 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6696 	WREG32(DC_HPD3_INT_CONTROL, tmp);
6697 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6698 	WREG32(DC_HPD4_INT_CONTROL, tmp);
6699 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6700 	WREG32(DC_HPD5_INT_CONTROL, tmp);
6701 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6702 	WREG32(DC_HPD6_INT_CONTROL, tmp);
6703 
6704 }
6705 
6706 /**
6707  * cik_irq_init - init and enable the interrupt ring
6708  *
6709  * @rdev: radeon_device pointer
6710  *
6711  * Allocate a ring buffer for the interrupt controller,
6712  * enable the RLC, disable interrupts, enable the IH
6713  * ring buffer and enable it (CIK).
6714  * Called at device load and reume.
6715  * Returns 0 for success, errors for failure.
6716  */
6717 static int cik_irq_init(struct radeon_device *rdev)
6718 {
6719 	int ret = 0;
6720 	int rb_bufsz;
6721 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6722 
6723 	/* allocate ring */
6724 	ret = r600_ih_ring_alloc(rdev);
6725 	if (ret)
6726 		return ret;
6727 
6728 	/* disable irqs */
6729 	cik_disable_interrupts(rdev);
6730 
6731 	/* init rlc */
6732 	ret = cik_rlc_resume(rdev);
6733 	if (ret) {
6734 		r600_ih_ring_fini(rdev);
6735 		return ret;
6736 	}
6737 
6738 	/* setup interrupt control */
6739 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
6740 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6741 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
6742 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6743 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6744 	 */
6745 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6746 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6747 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6748 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6749 
6750 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6751 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6752 
6753 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6754 		      IH_WPTR_OVERFLOW_CLEAR |
6755 		      (rb_bufsz << 1));
6756 
6757 	if (rdev->wb.enabled)
6758 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6759 
6760 	/* set the writeback address whether it's enabled or not */
6761 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6762 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6763 
6764 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6765 
6766 	/* set rptr, wptr to 0 */
6767 	WREG32(IH_RB_RPTR, 0);
6768 	WREG32(IH_RB_WPTR, 0);
6769 
6770 	/* Default settings for IH_CNTL (disabled at first) */
6771 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6772 	/* RPTR_REARM only works if msi's are enabled */
6773 	if (rdev->msi_enabled)
6774 		ih_cntl |= RPTR_REARM;
6775 	WREG32(IH_CNTL, ih_cntl);
6776 
6777 	/* force the active interrupt state to all disabled */
6778 	cik_disable_interrupt_state(rdev);
6779 
6780 	pci_set_master(rdev->pdev);
6781 
6782 	/* enable irqs */
6783 	cik_enable_interrupts(rdev);
6784 
6785 	return ret;
6786 }
6787 
6788 /**
6789  * cik_irq_set - enable/disable interrupt sources
6790  *
6791  * @rdev: radeon_device pointer
6792  *
6793  * Enable interrupt sources on the GPU (vblanks, hpd,
6794  * etc.) (CIK).
6795  * Returns 0 for success, errors for failure.
6796  */
6797 int cik_irq_set(struct radeon_device *rdev)
6798 {
6799 	u32 cp_int_cntl;
6800 	u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6801 	u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6802 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6803 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6804 	u32 grbm_int_cntl = 0;
6805 	u32 dma_cntl, dma_cntl1;
6806 	u32 thermal_int;
6807 
6808 	if (!rdev->irq.installed) {
6809 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6810 		return -EINVAL;
6811 	}
6812 	/* don't enable anything if the ih is disabled */
6813 	if (!rdev->ih.enabled) {
6814 		cik_disable_interrupts(rdev);
6815 		/* force the active interrupt state to all disabled */
6816 		cik_disable_interrupt_state(rdev);
6817 		return 0;
6818 	}
6819 
6820 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6821 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6822 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6823 
6824 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6825 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6826 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6827 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6828 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6829 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6830 
6831 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6832 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6833 
6834 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6835 	cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6836 	cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6837 	cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6838 	cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6839 	cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6840 	cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6841 	cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6842 
6843 	if (rdev->flags & RADEON_IS_IGP)
6844 		thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6845 			~(THERM_INTH_MASK | THERM_INTL_MASK);
6846 	else
6847 		thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6848 			~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6849 
6850 	/* enable CP interrupts on all rings */
6851 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6852 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
6853 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6854 	}
6855 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6856 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6857 		DRM_DEBUG("si_irq_set: sw int cp1\n");
6858 		if (ring->me == 1) {
6859 			switch (ring->pipe) {
6860 			case 0:
6861 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6862 				break;
6863 			case 1:
6864 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6865 				break;
6866 			case 2:
6867 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6868 				break;
6869 			case 3:
6870 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6871 				break;
6872 			default:
6873 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6874 				break;
6875 			}
6876 		} else if (ring->me == 2) {
6877 			switch (ring->pipe) {
6878 			case 0:
6879 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6880 				break;
6881 			case 1:
6882 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6883 				break;
6884 			case 2:
6885 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6886 				break;
6887 			case 3:
6888 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6889 				break;
6890 			default:
6891 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6892 				break;
6893 			}
6894 		} else {
6895 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6896 		}
6897 	}
6898 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6899 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6900 		DRM_DEBUG("si_irq_set: sw int cp2\n");
6901 		if (ring->me == 1) {
6902 			switch (ring->pipe) {
6903 			case 0:
6904 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6905 				break;
6906 			case 1:
6907 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6908 				break;
6909 			case 2:
6910 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6911 				break;
6912 			case 3:
6913 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6914 				break;
6915 			default:
6916 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6917 				break;
6918 			}
6919 		} else if (ring->me == 2) {
6920 			switch (ring->pipe) {
6921 			case 0:
6922 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6923 				break;
6924 			case 1:
6925 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6926 				break;
6927 			case 2:
6928 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6929 				break;
6930 			case 3:
6931 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6932 				break;
6933 			default:
6934 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6935 				break;
6936 			}
6937 		} else {
6938 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6939 		}
6940 	}
6941 
6942 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6943 		DRM_DEBUG("cik_irq_set: sw int dma\n");
6944 		dma_cntl |= TRAP_ENABLE;
6945 	}
6946 
6947 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6948 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
6949 		dma_cntl1 |= TRAP_ENABLE;
6950 	}
6951 
6952 	if (rdev->irq.crtc_vblank_int[0] ||
6953 	    atomic_read(&rdev->irq.pflip[0])) {
6954 		DRM_DEBUG("cik_irq_set: vblank 0\n");
6955 		crtc1 |= VBLANK_INTERRUPT_MASK;
6956 	}
6957 	if (rdev->irq.crtc_vblank_int[1] ||
6958 	    atomic_read(&rdev->irq.pflip[1])) {
6959 		DRM_DEBUG("cik_irq_set: vblank 1\n");
6960 		crtc2 |= VBLANK_INTERRUPT_MASK;
6961 	}
6962 	if (rdev->irq.crtc_vblank_int[2] ||
6963 	    atomic_read(&rdev->irq.pflip[2])) {
6964 		DRM_DEBUG("cik_irq_set: vblank 2\n");
6965 		crtc3 |= VBLANK_INTERRUPT_MASK;
6966 	}
6967 	if (rdev->irq.crtc_vblank_int[3] ||
6968 	    atomic_read(&rdev->irq.pflip[3])) {
6969 		DRM_DEBUG("cik_irq_set: vblank 3\n");
6970 		crtc4 |= VBLANK_INTERRUPT_MASK;
6971 	}
6972 	if (rdev->irq.crtc_vblank_int[4] ||
6973 	    atomic_read(&rdev->irq.pflip[4])) {
6974 		DRM_DEBUG("cik_irq_set: vblank 4\n");
6975 		crtc5 |= VBLANK_INTERRUPT_MASK;
6976 	}
6977 	if (rdev->irq.crtc_vblank_int[5] ||
6978 	    atomic_read(&rdev->irq.pflip[5])) {
6979 		DRM_DEBUG("cik_irq_set: vblank 5\n");
6980 		crtc6 |= VBLANK_INTERRUPT_MASK;
6981 	}
6982 	if (rdev->irq.hpd[0]) {
6983 		DRM_DEBUG("cik_irq_set: hpd 1\n");
6984 		hpd1 |= DC_HPDx_INT_EN;
6985 	}
6986 	if (rdev->irq.hpd[1]) {
6987 		DRM_DEBUG("cik_irq_set: hpd 2\n");
6988 		hpd2 |= DC_HPDx_INT_EN;
6989 	}
6990 	if (rdev->irq.hpd[2]) {
6991 		DRM_DEBUG("cik_irq_set: hpd 3\n");
6992 		hpd3 |= DC_HPDx_INT_EN;
6993 	}
6994 	if (rdev->irq.hpd[3]) {
6995 		DRM_DEBUG("cik_irq_set: hpd 4\n");
6996 		hpd4 |= DC_HPDx_INT_EN;
6997 	}
6998 	if (rdev->irq.hpd[4]) {
6999 		DRM_DEBUG("cik_irq_set: hpd 5\n");
7000 		hpd5 |= DC_HPDx_INT_EN;
7001 	}
7002 	if (rdev->irq.hpd[5]) {
7003 		DRM_DEBUG("cik_irq_set: hpd 6\n");
7004 		hpd6 |= DC_HPDx_INT_EN;
7005 	}
7006 
7007 	if (rdev->irq.dpm_thermal) {
7008 		DRM_DEBUG("dpm thermal\n");
7009 		if (rdev->flags & RADEON_IS_IGP)
7010 			thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
7011 		else
7012 			thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
7013 	}
7014 
7015 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7016 
7017 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7018 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7019 
7020 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7021 	WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7022 	WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7023 	WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7024 	WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7025 	WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7026 	WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7027 	WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7028 
7029 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7030 
7031 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7032 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7033 	if (rdev->num_crtc >= 4) {
7034 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7035 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7036 	}
7037 	if (rdev->num_crtc >= 6) {
7038 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7039 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7040 	}
7041 
7042 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7043 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7044 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7045 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7046 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7047 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7048 
7049 	if (rdev->flags & RADEON_IS_IGP)
7050 		WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
7051 	else
7052 		WREG32_SMC(CG_THERMAL_INT, thermal_int);
7053 
7054 	return 0;
7055 }
7056 
7057 /**
7058  * cik_irq_ack - ack interrupt sources
7059  *
7060  * @rdev: radeon_device pointer
7061  *
7062  * Ack interrupt sources on the GPU (vblanks, hpd,
7063  * etc.) (CIK).  Certain interrupts sources are sw
7064  * generated and do not require an explicit ack.
7065  */
7066 static inline void cik_irq_ack(struct radeon_device *rdev)
7067 {
7068 	u32 tmp;
7069 
7070 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7071 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7072 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7073 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7074 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7075 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7076 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7077 
7078 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7079 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7080 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7081 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7082 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7083 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7084 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7085 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7086 
7087 	if (rdev->num_crtc >= 4) {
7088 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7089 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7090 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7091 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7092 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7093 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7094 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7095 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7096 	}
7097 
7098 	if (rdev->num_crtc >= 6) {
7099 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7100 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7101 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7102 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7103 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7104 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7105 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7106 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7107 	}
7108 
7109 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7110 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7111 		tmp |= DC_HPDx_INT_ACK;
7112 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7113 	}
7114 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7115 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7116 		tmp |= DC_HPDx_INT_ACK;
7117 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7118 	}
7119 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7120 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7121 		tmp |= DC_HPDx_INT_ACK;
7122 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7123 	}
7124 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7125 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7126 		tmp |= DC_HPDx_INT_ACK;
7127 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7128 	}
7129 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7130 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7131 		tmp |= DC_HPDx_INT_ACK;
7132 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7133 	}
7134 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7135 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7136 		tmp |= DC_HPDx_INT_ACK;
7137 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7138 	}
7139 }
7140 
7141 /**
7142  * cik_irq_disable - disable interrupts
7143  *
7144  * @rdev: radeon_device pointer
7145  *
7146  * Disable interrupts on the hw (CIK).
7147  */
7148 static void cik_irq_disable(struct radeon_device *rdev)
7149 {
7150 	cik_disable_interrupts(rdev);
7151 	/* Wait and acknowledge irq */
7152 	mdelay(1);
7153 	cik_irq_ack(rdev);
7154 	cik_disable_interrupt_state(rdev);
7155 }
7156 
7157 /**
7158  * cik_irq_disable - disable interrupts for suspend
7159  *
7160  * @rdev: radeon_device pointer
7161  *
7162  * Disable interrupts and stop the RLC (CIK).
7163  * Used for suspend.
7164  */
7165 static void cik_irq_suspend(struct radeon_device *rdev)
7166 {
7167 	cik_irq_disable(rdev);
7168 	cik_rlc_stop(rdev);
7169 }
7170 
7171 /**
7172  * cik_irq_fini - tear down interrupt support
7173  *
7174  * @rdev: radeon_device pointer
7175  *
7176  * Disable interrupts on the hw and free the IH ring
7177  * buffer (CIK).
7178  * Used for driver unload.
7179  */
7180 static void cik_irq_fini(struct radeon_device *rdev)
7181 {
7182 	cik_irq_suspend(rdev);
7183 	r600_ih_ring_fini(rdev);
7184 }
7185 
7186 /**
7187  * cik_get_ih_wptr - get the IH ring buffer wptr
7188  *
7189  * @rdev: radeon_device pointer
7190  *
7191  * Get the IH ring buffer wptr from either the register
7192  * or the writeback memory buffer (CIK).  Also check for
7193  * ring buffer overflow and deal with it.
7194  * Used by cik_irq_process().
7195  * Returns the value of the wptr.
7196  */
7197 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7198 {
7199 	u32 wptr, tmp;
7200 
7201 	if (rdev->wb.enabled)
7202 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7203 	else
7204 		wptr = RREG32(IH_RB_WPTR);
7205 
7206 	if (wptr & RB_OVERFLOW) {
7207 		/* When a ring buffer overflow happen start parsing interrupt
7208 		 * from the last not overwritten vector (wptr + 16). Hopefully
7209 		 * this should allow us to catchup.
7210 		 */
7211 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
7212 			wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
7213 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7214 		tmp = RREG32(IH_RB_CNTL);
7215 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7216 		WREG32(IH_RB_CNTL, tmp);
7217 	}
7218 	return (wptr & rdev->ih.ptr_mask);
7219 }
7220 
7221 /*        CIK IV Ring
7222  * Each IV ring entry is 128 bits:
7223  * [7:0]    - interrupt source id
7224  * [31:8]   - reserved
7225  * [59:32]  - interrupt source data
7226  * [63:60]  - reserved
7227  * [71:64]  - RINGID
7228  *            CP:
7229  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7230  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7231  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7232  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7233  *            PIPE_ID - ME0 0=3D
7234  *                    - ME1&2 compute dispatcher (4 pipes each)
7235  *            SDMA:
7236  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7237  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7238  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7239  * [79:72]  - VMID
7240  * [95:80]  - PASID
7241  * [127:96] - reserved
7242  */
7243 /**
7244  * cik_irq_process - interrupt handler
7245  *
7246  * @rdev: radeon_device pointer
7247  *
7248  * Interrupt hander (CIK).  Walk the IH ring,
7249  * ack interrupts and schedule work to handle
7250  * interrupt events.
7251  * Returns irq process return code.
7252  */
7253 int cik_irq_process(struct radeon_device *rdev)
7254 {
7255 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7256 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7257 	u32 wptr;
7258 	u32 rptr;
7259 	u32 src_id, src_data, ring_id;
7260 	u8 me_id, pipe_id, queue_id;
7261 	u32 ring_index;
7262 	bool queue_hotplug = false;
7263 	bool queue_reset = false;
7264 	u32 addr, status, mc_client;
7265 	bool queue_thermal = false;
7266 
7267 	if (!rdev->ih.enabled || rdev->shutdown)
7268 		return IRQ_NONE;
7269 
7270 	wptr = cik_get_ih_wptr(rdev);
7271 
7272 restart_ih:
7273 	/* is somebody else already processing irqs? */
7274 	if (atomic_xchg(&rdev->ih.lock, 1))
7275 		return IRQ_NONE;
7276 
7277 	rptr = rdev->ih.rptr;
7278 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7279 
7280 	/* Order reading of wptr vs. reading of IH ring data */
7281 	rmb();
7282 
7283 	/* display interrupts */
7284 	cik_irq_ack(rdev);
7285 
7286 	while (rptr != wptr) {
7287 		/* wptr/rptr are in bytes! */
7288 		ring_index = rptr / 4;
7289 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7290 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7291 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7292 
7293 		switch (src_id) {
7294 		case 1: /* D1 vblank/vline */
7295 			switch (src_data) {
7296 			case 0: /* D1 vblank */
7297 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7298 					if (rdev->irq.crtc_vblank_int[0]) {
7299 						drm_handle_vblank(rdev->ddev, 0);
7300 						rdev->pm.vblank_sync = true;
7301 						wake_up(&rdev->irq.vblank_queue);
7302 					}
7303 					if (atomic_read(&rdev->irq.pflip[0]))
7304 						radeon_crtc_handle_flip(rdev, 0);
7305 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7306 					DRM_DEBUG("IH: D1 vblank\n");
7307 				}
7308 				break;
7309 			case 1: /* D1 vline */
7310 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7311 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7312 					DRM_DEBUG("IH: D1 vline\n");
7313 				}
7314 				break;
7315 			default:
7316 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7317 				break;
7318 			}
7319 			break;
7320 		case 2: /* D2 vblank/vline */
7321 			switch (src_data) {
7322 			case 0: /* D2 vblank */
7323 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7324 					if (rdev->irq.crtc_vblank_int[1]) {
7325 						drm_handle_vblank(rdev->ddev, 1);
7326 						rdev->pm.vblank_sync = true;
7327 						wake_up(&rdev->irq.vblank_queue);
7328 					}
7329 					if (atomic_read(&rdev->irq.pflip[1]))
7330 						radeon_crtc_handle_flip(rdev, 1);
7331 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7332 					DRM_DEBUG("IH: D2 vblank\n");
7333 				}
7334 				break;
7335 			case 1: /* D2 vline */
7336 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7337 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7338 					DRM_DEBUG("IH: D2 vline\n");
7339 				}
7340 				break;
7341 			default:
7342 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7343 				break;
7344 			}
7345 			break;
7346 		case 3: /* D3 vblank/vline */
7347 			switch (src_data) {
7348 			case 0: /* D3 vblank */
7349 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7350 					if (rdev->irq.crtc_vblank_int[2]) {
7351 						drm_handle_vblank(rdev->ddev, 2);
7352 						rdev->pm.vblank_sync = true;
7353 						wake_up(&rdev->irq.vblank_queue);
7354 					}
7355 					if (atomic_read(&rdev->irq.pflip[2]))
7356 						radeon_crtc_handle_flip(rdev, 2);
7357 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7358 					DRM_DEBUG("IH: D3 vblank\n");
7359 				}
7360 				break;
7361 			case 1: /* D3 vline */
7362 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7363 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7364 					DRM_DEBUG("IH: D3 vline\n");
7365 				}
7366 				break;
7367 			default:
7368 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7369 				break;
7370 			}
7371 			break;
7372 		case 4: /* D4 vblank/vline */
7373 			switch (src_data) {
7374 			case 0: /* D4 vblank */
7375 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7376 					if (rdev->irq.crtc_vblank_int[3]) {
7377 						drm_handle_vblank(rdev->ddev, 3);
7378 						rdev->pm.vblank_sync = true;
7379 						wake_up(&rdev->irq.vblank_queue);
7380 					}
7381 					if (atomic_read(&rdev->irq.pflip[3]))
7382 						radeon_crtc_handle_flip(rdev, 3);
7383 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7384 					DRM_DEBUG("IH: D4 vblank\n");
7385 				}
7386 				break;
7387 			case 1: /* D4 vline */
7388 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7389 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7390 					DRM_DEBUG("IH: D4 vline\n");
7391 				}
7392 				break;
7393 			default:
7394 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7395 				break;
7396 			}
7397 			break;
7398 		case 5: /* D5 vblank/vline */
7399 			switch (src_data) {
7400 			case 0: /* D5 vblank */
7401 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7402 					if (rdev->irq.crtc_vblank_int[4]) {
7403 						drm_handle_vblank(rdev->ddev, 4);
7404 						rdev->pm.vblank_sync = true;
7405 						wake_up(&rdev->irq.vblank_queue);
7406 					}
7407 					if (atomic_read(&rdev->irq.pflip[4]))
7408 						radeon_crtc_handle_flip(rdev, 4);
7409 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7410 					DRM_DEBUG("IH: D5 vblank\n");
7411 				}
7412 				break;
7413 			case 1: /* D5 vline */
7414 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7415 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7416 					DRM_DEBUG("IH: D5 vline\n");
7417 				}
7418 				break;
7419 			default:
7420 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7421 				break;
7422 			}
7423 			break;
7424 		case 6: /* D6 vblank/vline */
7425 			switch (src_data) {
7426 			case 0: /* D6 vblank */
7427 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7428 					if (rdev->irq.crtc_vblank_int[5]) {
7429 						drm_handle_vblank(rdev->ddev, 5);
7430 						rdev->pm.vblank_sync = true;
7431 						wake_up(&rdev->irq.vblank_queue);
7432 					}
7433 					if (atomic_read(&rdev->irq.pflip[5]))
7434 						radeon_crtc_handle_flip(rdev, 5);
7435 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7436 					DRM_DEBUG("IH: D6 vblank\n");
7437 				}
7438 				break;
7439 			case 1: /* D6 vline */
7440 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7441 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7442 					DRM_DEBUG("IH: D6 vline\n");
7443 				}
7444 				break;
7445 			default:
7446 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7447 				break;
7448 			}
7449 			break;
7450 		case 42: /* HPD hotplug */
7451 			switch (src_data) {
7452 			case 0:
7453 				if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7454 					rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7455 					queue_hotplug = true;
7456 					DRM_DEBUG("IH: HPD1\n");
7457 				}
7458 				break;
7459 			case 1:
7460 				if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7461 					rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7462 					queue_hotplug = true;
7463 					DRM_DEBUG("IH: HPD2\n");
7464 				}
7465 				break;
7466 			case 2:
7467 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7468 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7469 					queue_hotplug = true;
7470 					DRM_DEBUG("IH: HPD3\n");
7471 				}
7472 				break;
7473 			case 3:
7474 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7475 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7476 					queue_hotplug = true;
7477 					DRM_DEBUG("IH: HPD4\n");
7478 				}
7479 				break;
7480 			case 4:
7481 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7482 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7483 					queue_hotplug = true;
7484 					DRM_DEBUG("IH: HPD5\n");
7485 				}
7486 				break;
7487 			case 5:
7488 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7489 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7490 					queue_hotplug = true;
7491 					DRM_DEBUG("IH: HPD6\n");
7492 				}
7493 				break;
7494 			default:
7495 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7496 				break;
7497 			}
7498 			break;
7499 		case 124: /* UVD */
7500 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7501 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7502 			break;
7503 		case 146:
7504 		case 147:
7505 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7506 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7507 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7508 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7509 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7510 				addr);
7511 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7512 				status);
7513 			cik_vm_decode_fault(rdev, status, addr, mc_client);
7514 			/* reset addr and status */
7515 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7516 			break;
7517 		case 167: /* VCE */
7518 			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7519 			switch (src_data) {
7520 			case 0:
7521 				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7522 				break;
7523 			case 1:
7524 				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7525 				break;
7526 			default:
7527 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7528 				break;
7529 			}
7530 			break;
7531 		case 176: /* GFX RB CP_INT */
7532 		case 177: /* GFX IB CP_INT */
7533 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7534 			break;
7535 		case 181: /* CP EOP event */
7536 			DRM_DEBUG("IH: CP EOP\n");
7537 			/* XXX check the bitfield order! */
7538 			me_id = (ring_id & 0x60) >> 5;
7539 			pipe_id = (ring_id & 0x18) >> 3;
7540 			queue_id = (ring_id & 0x7) >> 0;
7541 			switch (me_id) {
7542 			case 0:
7543 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7544 				break;
7545 			case 1:
7546 			case 2:
7547 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7548 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7549 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7550 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7551 				break;
7552 			}
7553 			break;
7554 		case 184: /* CP Privileged reg access */
7555 			DRM_ERROR("Illegal register access in command stream\n");
7556 			/* XXX check the bitfield order! */
7557 			me_id = (ring_id & 0x60) >> 5;
7558 			pipe_id = (ring_id & 0x18) >> 3;
7559 			queue_id = (ring_id & 0x7) >> 0;
7560 			switch (me_id) {
7561 			case 0:
7562 				/* This results in a full GPU reset, but all we need to do is soft
7563 				 * reset the CP for gfx
7564 				 */
7565 				queue_reset = true;
7566 				break;
7567 			case 1:
7568 				/* XXX compute */
7569 				queue_reset = true;
7570 				break;
7571 			case 2:
7572 				/* XXX compute */
7573 				queue_reset = true;
7574 				break;
7575 			}
7576 			break;
7577 		case 185: /* CP Privileged inst */
7578 			DRM_ERROR("Illegal instruction in command stream\n");
7579 			/* XXX check the bitfield order! */
7580 			me_id = (ring_id & 0x60) >> 5;
7581 			pipe_id = (ring_id & 0x18) >> 3;
7582 			queue_id = (ring_id & 0x7) >> 0;
7583 			switch (me_id) {
7584 			case 0:
7585 				/* This results in a full GPU reset, but all we need to do is soft
7586 				 * reset the CP for gfx
7587 				 */
7588 				queue_reset = true;
7589 				break;
7590 			case 1:
7591 				/* XXX compute */
7592 				queue_reset = true;
7593 				break;
7594 			case 2:
7595 				/* XXX compute */
7596 				queue_reset = true;
7597 				break;
7598 			}
7599 			break;
7600 		case 224: /* SDMA trap event */
7601 			/* XXX check the bitfield order! */
7602 			me_id = (ring_id & 0x3) >> 0;
7603 			queue_id = (ring_id & 0xc) >> 2;
7604 			DRM_DEBUG("IH: SDMA trap\n");
7605 			switch (me_id) {
7606 			case 0:
7607 				switch (queue_id) {
7608 				case 0:
7609 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7610 					break;
7611 				case 1:
7612 					/* XXX compute */
7613 					break;
7614 				case 2:
7615 					/* XXX compute */
7616 					break;
7617 				}
7618 				break;
7619 			case 1:
7620 				switch (queue_id) {
7621 				case 0:
7622 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7623 					break;
7624 				case 1:
7625 					/* XXX compute */
7626 					break;
7627 				case 2:
7628 					/* XXX compute */
7629 					break;
7630 				}
7631 				break;
7632 			}
7633 			break;
7634 		case 230: /* thermal low to high */
7635 			DRM_DEBUG("IH: thermal low to high\n");
7636 			rdev->pm.dpm.thermal.high_to_low = false;
7637 			queue_thermal = true;
7638 			break;
7639 		case 231: /* thermal high to low */
7640 			DRM_DEBUG("IH: thermal high to low\n");
7641 			rdev->pm.dpm.thermal.high_to_low = true;
7642 			queue_thermal = true;
7643 			break;
7644 		case 233: /* GUI IDLE */
7645 			DRM_DEBUG("IH: GUI idle\n");
7646 			break;
7647 		case 241: /* SDMA Privileged inst */
7648 		case 247: /* SDMA Privileged inst */
7649 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
7650 			/* XXX check the bitfield order! */
7651 			me_id = (ring_id & 0x3) >> 0;
7652 			queue_id = (ring_id & 0xc) >> 2;
7653 			switch (me_id) {
7654 			case 0:
7655 				switch (queue_id) {
7656 				case 0:
7657 					queue_reset = true;
7658 					break;
7659 				case 1:
7660 					/* XXX compute */
7661 					queue_reset = true;
7662 					break;
7663 				case 2:
7664 					/* XXX compute */
7665 					queue_reset = true;
7666 					break;
7667 				}
7668 				break;
7669 			case 1:
7670 				switch (queue_id) {
7671 				case 0:
7672 					queue_reset = true;
7673 					break;
7674 				case 1:
7675 					/* XXX compute */
7676 					queue_reset = true;
7677 					break;
7678 				case 2:
7679 					/* XXX compute */
7680 					queue_reset = true;
7681 					break;
7682 				}
7683 				break;
7684 			}
7685 			break;
7686 		default:
7687 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7688 			break;
7689 		}
7690 
7691 		/* wptr/rptr are in bytes! */
7692 		rptr += 16;
7693 		rptr &= rdev->ih.ptr_mask;
7694 	}
7695 	if (queue_hotplug)
7696 		schedule_work(&rdev->hotplug_work);
7697 	if (queue_reset)
7698 		schedule_work(&rdev->reset_work);
7699 	if (queue_thermal)
7700 		schedule_work(&rdev->pm.dpm.thermal.work);
7701 	rdev->ih.rptr = rptr;
7702 	WREG32(IH_RB_RPTR, rdev->ih.rptr);
7703 	atomic_set(&rdev->ih.lock, 0);
7704 
7705 	/* make sure wptr hasn't changed while processing */
7706 	wptr = cik_get_ih_wptr(rdev);
7707 	if (wptr != rptr)
7708 		goto restart_ih;
7709 
7710 	return IRQ_HANDLED;
7711 }
7712 
7713 /*
7714  * startup/shutdown callbacks
7715  */
7716 /**
7717  * cik_startup - program the asic to a functional state
7718  *
7719  * @rdev: radeon_device pointer
7720  *
7721  * Programs the asic to a functional state (CIK).
7722  * Called by cik_init() and cik_resume().
7723  * Returns 0 for success, error for failure.
7724  */
7725 static int cik_startup(struct radeon_device *rdev)
7726 {
7727 	struct radeon_ring *ring;
7728 	int r;
7729 
7730 	/* enable pcie gen2/3 link */
7731 	cik_pcie_gen3_enable(rdev);
7732 	/* enable aspm */
7733 	cik_program_aspm(rdev);
7734 
7735 	/* scratch needs to be initialized before MC */
7736 	r = r600_vram_scratch_init(rdev);
7737 	if (r)
7738 		return r;
7739 
7740 	cik_mc_program(rdev);
7741 
7742 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
7743 		r = ci_mc_load_microcode(rdev);
7744 		if (r) {
7745 			DRM_ERROR("Failed to load MC firmware!\n");
7746 			return r;
7747 		}
7748 	}
7749 
7750 	r = cik_pcie_gart_enable(rdev);
7751 	if (r)
7752 		return r;
7753 	cik_gpu_init(rdev);
7754 
7755 	/* allocate rlc buffers */
7756 	if (rdev->flags & RADEON_IS_IGP) {
7757 		if (rdev->family == CHIP_KAVERI) {
7758 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7759 			rdev->rlc.reg_list_size =
7760 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7761 		} else {
7762 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7763 			rdev->rlc.reg_list_size =
7764 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7765 		}
7766 	}
7767 	rdev->rlc.cs_data = ci_cs_data;
7768 	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7769 	r = sumo_rlc_init(rdev);
7770 	if (r) {
7771 		DRM_ERROR("Failed to init rlc BOs!\n");
7772 		return r;
7773 	}
7774 
7775 	/* allocate wb buffer */
7776 	r = radeon_wb_init(rdev);
7777 	if (r)
7778 		return r;
7779 
7780 	/* allocate mec buffers */
7781 	r = cik_mec_init(rdev);
7782 	if (r) {
7783 		DRM_ERROR("Failed to init MEC BOs!\n");
7784 		return r;
7785 	}
7786 
7787 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7788 	if (r) {
7789 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7790 		return r;
7791 	}
7792 
7793 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7794 	if (r) {
7795 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7796 		return r;
7797 	}
7798 
7799 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7800 	if (r) {
7801 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7802 		return r;
7803 	}
7804 
7805 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7806 	if (r) {
7807 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7808 		return r;
7809 	}
7810 
7811 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7812 	if (r) {
7813 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7814 		return r;
7815 	}
7816 
7817 	r = radeon_uvd_resume(rdev);
7818 	if (!r) {
7819 		r = uvd_v4_2_resume(rdev);
7820 		if (!r) {
7821 			r = radeon_fence_driver_start_ring(rdev,
7822 							   R600_RING_TYPE_UVD_INDEX);
7823 			if (r)
7824 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7825 		}
7826 	}
7827 	if (r)
7828 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7829 
7830 	r = radeon_vce_resume(rdev);
7831 	if (!r) {
7832 		r = vce_v2_0_resume(rdev);
7833 		if (!r)
7834 			r = radeon_fence_driver_start_ring(rdev,
7835 							   TN_RING_TYPE_VCE1_INDEX);
7836 		if (!r)
7837 			r = radeon_fence_driver_start_ring(rdev,
7838 							   TN_RING_TYPE_VCE2_INDEX);
7839 	}
7840 	if (r) {
7841 		dev_err(rdev->dev, "VCE init error (%d).\n", r);
7842 		rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
7843 		rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
7844 	}
7845 
7846 	/* Enable IRQ */
7847 	if (!rdev->irq.installed) {
7848 		r = radeon_irq_kms_init(rdev);
7849 		if (r)
7850 			return r;
7851 	}
7852 
7853 	r = cik_irq_init(rdev);
7854 	if (r) {
7855 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
7856 		radeon_irq_kms_fini(rdev);
7857 		return r;
7858 	}
7859 	cik_irq_set(rdev);
7860 
7861 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7862 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7863 			     PACKET3(PACKET3_NOP, 0x3FFF));
7864 	if (r)
7865 		return r;
7866 
7867 	/* set up the compute queues */
7868 	/* type-2 packets are deprecated on MEC, use type-3 instead */
7869 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7870 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7871 			     PACKET3(PACKET3_NOP, 0x3FFF));
7872 	if (r)
7873 		return r;
7874 	ring->me = 1; /* first MEC */
7875 	ring->pipe = 0; /* first pipe */
7876 	ring->queue = 0; /* first queue */
7877 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
7878 
7879 	/* type-2 packets are deprecated on MEC, use type-3 instead */
7880 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7881 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7882 			     PACKET3(PACKET3_NOP, 0x3FFF));
7883 	if (r)
7884 		return r;
7885 	/* dGPU only have 1 MEC */
7886 	ring->me = 1; /* first MEC */
7887 	ring->pipe = 0; /* first pipe */
7888 	ring->queue = 1; /* second queue */
7889 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
7890 
7891 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7892 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7893 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7894 	if (r)
7895 		return r;
7896 
7897 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7898 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7899 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7900 	if (r)
7901 		return r;
7902 
7903 	r = cik_cp_resume(rdev);
7904 	if (r)
7905 		return r;
7906 
7907 	r = cik_sdma_resume(rdev);
7908 	if (r)
7909 		return r;
7910 
7911 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7912 	if (ring->ring_size) {
7913 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7914 				     RADEON_CP_PACKET2);
7915 		if (!r)
7916 			r = uvd_v1_0_init(rdev);
7917 		if (r)
7918 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7919 	}
7920 
7921 	r = -ENOENT;
7922 
7923 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
7924 	if (ring->ring_size)
7925 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7926 				     VCE_CMD_NO_OP);
7927 
7928 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
7929 	if (ring->ring_size)
7930 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7931 				     VCE_CMD_NO_OP);
7932 
7933 	if (!r)
7934 		r = vce_v1_0_init(rdev);
7935 	else if (r != -ENOENT)
7936 		DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
7937 
7938 	r = radeon_ib_pool_init(rdev);
7939 	if (r) {
7940 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7941 		return r;
7942 	}
7943 
7944 	r = radeon_vm_manager_init(rdev);
7945 	if (r) {
7946 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7947 		return r;
7948 	}
7949 
7950 	r = dce6_audio_init(rdev);
7951 	if (r)
7952 		return r;
7953 
7954 	return 0;
7955 }
7956 
7957 /**
7958  * cik_resume - resume the asic to a functional state
7959  *
7960  * @rdev: radeon_device pointer
7961  *
7962  * Programs the asic to a functional state (CIK).
7963  * Called at resume.
7964  * Returns 0 for success, error for failure.
7965  */
7966 int cik_resume(struct radeon_device *rdev)
7967 {
7968 	int r;
7969 
7970 	/* post card */
7971 	atom_asic_init(rdev->mode_info.atom_context);
7972 
7973 	/* init golden registers */
7974 	cik_init_golden_registers(rdev);
7975 
7976 	if (rdev->pm.pm_method == PM_METHOD_DPM)
7977 		radeon_pm_resume(rdev);
7978 
7979 	rdev->accel_working = true;
7980 	r = cik_startup(rdev);
7981 	if (r) {
7982 		DRM_ERROR("cik startup failed on resume\n");
7983 		rdev->accel_working = false;
7984 		return r;
7985 	}
7986 
7987 	return r;
7988 
7989 }
7990 
7991 /**
7992  * cik_suspend - suspend the asic
7993  *
7994  * @rdev: radeon_device pointer
7995  *
7996  * Bring the chip into a state suitable for suspend (CIK).
7997  * Called at suspend.
7998  * Returns 0 for success.
7999  */
8000 int cik_suspend(struct radeon_device *rdev)
8001 {
8002 	radeon_pm_suspend(rdev);
8003 	dce6_audio_fini(rdev);
8004 	radeon_vm_manager_fini(rdev);
8005 	cik_cp_enable(rdev, false);
8006 	cik_sdma_enable(rdev, false);
8007 	uvd_v1_0_fini(rdev);
8008 	radeon_uvd_suspend(rdev);
8009 	radeon_vce_suspend(rdev);
8010 	cik_fini_pg(rdev);
8011 	cik_fini_cg(rdev);
8012 	cik_irq_suspend(rdev);
8013 	radeon_wb_disable(rdev);
8014 	cik_pcie_gart_disable(rdev);
8015 	return 0;
8016 }
8017 
8018 /* Plan is to move initialization in that function and use
8019  * helper function so that radeon_device_init pretty much
8020  * do nothing more than calling asic specific function. This
8021  * should also allow to remove a bunch of callback function
8022  * like vram_info.
8023  */
8024 /**
8025  * cik_init - asic specific driver and hw init
8026  *
8027  * @rdev: radeon_device pointer
8028  *
8029  * Setup asic specific driver variables and program the hw
8030  * to a functional state (CIK).
8031  * Called at driver startup.
8032  * Returns 0 for success, errors for failure.
8033  */
8034 int cik_init(struct radeon_device *rdev)
8035 {
8036 	struct radeon_ring *ring;
8037 	int r;
8038 
8039 	/* Read BIOS */
8040 	if (!radeon_get_bios(rdev)) {
8041 		if (ASIC_IS_AVIVO(rdev))
8042 			return -EINVAL;
8043 	}
8044 	/* Must be an ATOMBIOS */
8045 	if (!rdev->is_atom_bios) {
8046 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8047 		return -EINVAL;
8048 	}
8049 	r = radeon_atombios_init(rdev);
8050 	if (r)
8051 		return r;
8052 
8053 	/* Post card if necessary */
8054 	if (!radeon_card_posted(rdev)) {
8055 		if (!rdev->bios) {
8056 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8057 			return -EINVAL;
8058 		}
8059 		DRM_INFO("GPU not posted. posting now...\n");
8060 		atom_asic_init(rdev->mode_info.atom_context);
8061 	}
8062 	/* init golden registers */
8063 	cik_init_golden_registers(rdev);
8064 	/* Initialize scratch registers */
8065 	cik_scratch_init(rdev);
8066 	/* Initialize surface registers */
8067 	radeon_surface_init(rdev);
8068 	/* Initialize clocks */
8069 	radeon_get_clock_info(rdev->ddev);
8070 
8071 	/* Fence driver */
8072 	r = radeon_fence_driver_init(rdev);
8073 	if (r)
8074 		return r;
8075 
8076 	/* initialize memory controller */
8077 	r = cik_mc_init(rdev);
8078 	if (r)
8079 		return r;
8080 	/* Memory manager */
8081 	r = radeon_bo_init(rdev);
8082 	if (r)
8083 		return r;
8084 
8085 	if (rdev->flags & RADEON_IS_IGP) {
8086 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8087 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8088 			r = cik_init_microcode(rdev);
8089 			if (r) {
8090 				DRM_ERROR("Failed to load firmware!\n");
8091 				return r;
8092 			}
8093 		}
8094 	} else {
8095 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8096 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8097 		    !rdev->mc_fw) {
8098 			r = cik_init_microcode(rdev);
8099 			if (r) {
8100 				DRM_ERROR("Failed to load firmware!\n");
8101 				return r;
8102 			}
8103 		}
8104 	}
8105 
8106 	/* Initialize power management */
8107 	radeon_pm_init(rdev);
8108 
8109 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8110 	ring->ring_obj = NULL;
8111 	r600_ring_init(rdev, ring, 1024 * 1024);
8112 
8113 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8114 	ring->ring_obj = NULL;
8115 	r600_ring_init(rdev, ring, 1024 * 1024);
8116 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8117 	if (r)
8118 		return r;
8119 
8120 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8121 	ring->ring_obj = NULL;
8122 	r600_ring_init(rdev, ring, 1024 * 1024);
8123 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8124 	if (r)
8125 		return r;
8126 
8127 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8128 	ring->ring_obj = NULL;
8129 	r600_ring_init(rdev, ring, 256 * 1024);
8130 
8131 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8132 	ring->ring_obj = NULL;
8133 	r600_ring_init(rdev, ring, 256 * 1024);
8134 
8135 	r = radeon_uvd_init(rdev);
8136 	if (!r) {
8137 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8138 		ring->ring_obj = NULL;
8139 		r600_ring_init(rdev, ring, 4096);
8140 	}
8141 
8142 	r = radeon_vce_init(rdev);
8143 	if (!r) {
8144 		ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8145 		ring->ring_obj = NULL;
8146 		r600_ring_init(rdev, ring, 4096);
8147 
8148 		ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8149 		ring->ring_obj = NULL;
8150 		r600_ring_init(rdev, ring, 4096);
8151 	}
8152 
8153 	rdev->ih.ring_obj = NULL;
8154 	r600_ih_ring_init(rdev, 64 * 1024);
8155 
8156 	r = r600_pcie_gart_init(rdev);
8157 	if (r)
8158 		return r;
8159 
8160 	rdev->accel_working = true;
8161 	r = cik_startup(rdev);
8162 	if (r) {
8163 		dev_err(rdev->dev, "disabling GPU acceleration\n");
8164 		cik_cp_fini(rdev);
8165 		cik_sdma_fini(rdev);
8166 		cik_irq_fini(rdev);
8167 		sumo_rlc_fini(rdev);
8168 		cik_mec_fini(rdev);
8169 		radeon_wb_fini(rdev);
8170 		radeon_ib_pool_fini(rdev);
8171 		radeon_vm_manager_fini(rdev);
8172 		radeon_irq_kms_fini(rdev);
8173 		cik_pcie_gart_fini(rdev);
8174 		rdev->accel_working = false;
8175 	}
8176 
8177 	/* Don't start up if the MC ucode is missing.
8178 	 * The default clocks and voltages before the MC ucode
8179 	 * is loaded are not suffient for advanced operations.
8180 	 */
8181 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8182 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8183 		return -EINVAL;
8184 	}
8185 
8186 	return 0;
8187 }
8188 
8189 /**
8190  * cik_fini - asic specific driver and hw fini
8191  *
8192  * @rdev: radeon_device pointer
8193  *
8194  * Tear down the asic specific driver variables and program the hw
8195  * to an idle state (CIK).
8196  * Called at driver unload.
8197  */
8198 void cik_fini(struct radeon_device *rdev)
8199 {
8200 	radeon_pm_fini(rdev);
8201 	cik_cp_fini(rdev);
8202 	cik_sdma_fini(rdev);
8203 	cik_fini_pg(rdev);
8204 	cik_fini_cg(rdev);
8205 	cik_irq_fini(rdev);
8206 	sumo_rlc_fini(rdev);
8207 	cik_mec_fini(rdev);
8208 	radeon_wb_fini(rdev);
8209 	radeon_vm_manager_fini(rdev);
8210 	radeon_ib_pool_fini(rdev);
8211 	radeon_irq_kms_fini(rdev);
8212 	uvd_v1_0_fini(rdev);
8213 	radeon_uvd_fini(rdev);
8214 	radeon_vce_fini(rdev);
8215 	cik_pcie_gart_fini(rdev);
8216 	r600_vram_scratch_fini(rdev);
8217 	radeon_gem_fini(rdev);
8218 	radeon_fence_driver_fini(rdev);
8219 	radeon_bo_fini(rdev);
8220 	radeon_atombios_fini(rdev);
8221 	kfree(rdev->bios);
8222 	rdev->bios = NULL;
8223 }
8224 
8225 void dce8_program_fmt(struct drm_encoder *encoder)
8226 {
8227 	struct drm_device *dev = encoder->dev;
8228 	struct radeon_device *rdev = dev->dev_private;
8229 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8230 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8231 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8232 	int bpc = 0;
8233 	u32 tmp = 0;
8234 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8235 
8236 	if (connector) {
8237 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8238 		bpc = radeon_get_monitor_bpc(connector);
8239 		dither = radeon_connector->dither;
8240 	}
8241 
8242 	/* LVDS/eDP FMT is set up by atom */
8243 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8244 		return;
8245 
8246 	/* not needed for analog */
8247 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8248 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8249 		return;
8250 
8251 	if (bpc == 0)
8252 		return;
8253 
8254 	switch (bpc) {
8255 	case 6:
8256 		if (dither == RADEON_FMT_DITHER_ENABLE)
8257 			/* XXX sort out optimal dither settings */
8258 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8259 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8260 		else
8261 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8262 		break;
8263 	case 8:
8264 		if (dither == RADEON_FMT_DITHER_ENABLE)
8265 			/* XXX sort out optimal dither settings */
8266 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8267 				FMT_RGB_RANDOM_ENABLE |
8268 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8269 		else
8270 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8271 		break;
8272 	case 10:
8273 		if (dither == RADEON_FMT_DITHER_ENABLE)
8274 			/* XXX sort out optimal dither settings */
8275 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8276 				FMT_RGB_RANDOM_ENABLE |
8277 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8278 		else
8279 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8280 		break;
8281 	default:
8282 		/* not needed */
8283 		break;
8284 	}
8285 
8286 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8287 }
8288 
8289 /* display watermark setup */
8290 /**
8291  * dce8_line_buffer_adjust - Set up the line buffer
8292  *
8293  * @rdev: radeon_device pointer
8294  * @radeon_crtc: the selected display controller
8295  * @mode: the current display mode on the selected display
8296  * controller
8297  *
8298  * Setup up the line buffer allocation for
8299  * the selected display controller (CIK).
8300  * Returns the line buffer size in pixels.
8301  */
8302 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8303 				   struct radeon_crtc *radeon_crtc,
8304 				   struct drm_display_mode *mode)
8305 {
8306 	u32 tmp, buffer_alloc, i;
8307 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8308 	/*
8309 	 * Line Buffer Setup
8310 	 * There are 6 line buffers, one for each display controllers.
8311 	 * There are 3 partitions per LB. Select the number of partitions
8312 	 * to enable based on the display width.  For display widths larger
8313 	 * than 4096, you need use to use 2 display controllers and combine
8314 	 * them using the stereo blender.
8315 	 */
8316 	if (radeon_crtc->base.enabled && mode) {
8317 		if (mode->crtc_hdisplay < 1920) {
8318 			tmp = 1;
8319 			buffer_alloc = 2;
8320 		} else if (mode->crtc_hdisplay < 2560) {
8321 			tmp = 2;
8322 			buffer_alloc = 2;
8323 		} else if (mode->crtc_hdisplay < 4096) {
8324 			tmp = 0;
8325 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8326 		} else {
8327 			DRM_DEBUG_KMS("Mode too big for LB!\n");
8328 			tmp = 0;
8329 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8330 		}
8331 	} else {
8332 		tmp = 1;
8333 		buffer_alloc = 0;
8334 	}
8335 
8336 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8337 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8338 
8339 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8340 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8341 	for (i = 0; i < rdev->usec_timeout; i++) {
8342 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8343 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8344 			break;
8345 		udelay(1);
8346 	}
8347 
8348 	if (radeon_crtc->base.enabled && mode) {
8349 		switch (tmp) {
8350 		case 0:
8351 		default:
8352 			return 4096 * 2;
8353 		case 1:
8354 			return 1920 * 2;
8355 		case 2:
8356 			return 2560 * 2;
8357 		}
8358 	}
8359 
8360 	/* controller not enabled, so no lb used */
8361 	return 0;
8362 }
8363 
8364 /**
8365  * cik_get_number_of_dram_channels - get the number of dram channels
8366  *
8367  * @rdev: radeon_device pointer
8368  *
8369  * Look up the number of video ram channels (CIK).
8370  * Used for display watermark bandwidth calculations
8371  * Returns the number of dram channels
8372  */
8373 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8374 {
8375 	u32 tmp = RREG32(MC_SHARED_CHMAP);
8376 
8377 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8378 	case 0:
8379 	default:
8380 		return 1;
8381 	case 1:
8382 		return 2;
8383 	case 2:
8384 		return 4;
8385 	case 3:
8386 		return 8;
8387 	case 4:
8388 		return 3;
8389 	case 5:
8390 		return 6;
8391 	case 6:
8392 		return 10;
8393 	case 7:
8394 		return 12;
8395 	case 8:
8396 		return 16;
8397 	}
8398 }
8399 
8400 struct dce8_wm_params {
8401 	u32 dram_channels; /* number of dram channels */
8402 	u32 yclk;          /* bandwidth per dram data pin in kHz */
8403 	u32 sclk;          /* engine clock in kHz */
8404 	u32 disp_clk;      /* display clock in kHz */
8405 	u32 src_width;     /* viewport width */
8406 	u32 active_time;   /* active display time in ns */
8407 	u32 blank_time;    /* blank time in ns */
8408 	bool interlaced;    /* mode is interlaced */
8409 	fixed20_12 vsc;    /* vertical scale ratio */
8410 	u32 num_heads;     /* number of active crtcs */
8411 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8412 	u32 lb_size;       /* line buffer allocated to pipe */
8413 	u32 vtaps;         /* vertical scaler taps */
8414 };
8415 
8416 /**
8417  * dce8_dram_bandwidth - get the dram bandwidth
8418  *
8419  * @wm: watermark calculation data
8420  *
8421  * Calculate the raw dram bandwidth (CIK).
8422  * Used for display watermark bandwidth calculations
8423  * Returns the dram bandwidth in MBytes/s
8424  */
8425 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8426 {
8427 	/* Calculate raw DRAM Bandwidth */
8428 	fixed20_12 dram_efficiency; /* 0.7 */
8429 	fixed20_12 yclk, dram_channels, bandwidth;
8430 	fixed20_12 a;
8431 
8432 	a.full = dfixed_const(1000);
8433 	yclk.full = dfixed_const(wm->yclk);
8434 	yclk.full = dfixed_div(yclk, a);
8435 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8436 	a.full = dfixed_const(10);
8437 	dram_efficiency.full = dfixed_const(7);
8438 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
8439 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8440 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8441 
8442 	return dfixed_trunc(bandwidth);
8443 }
8444 
8445 /**
8446  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8447  *
8448  * @wm: watermark calculation data
8449  *
8450  * Calculate the dram bandwidth used for display (CIK).
8451  * Used for display watermark bandwidth calculations
8452  * Returns the dram bandwidth for display in MBytes/s
8453  */
8454 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8455 {
8456 	/* Calculate DRAM Bandwidth and the part allocated to display. */
8457 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8458 	fixed20_12 yclk, dram_channels, bandwidth;
8459 	fixed20_12 a;
8460 
8461 	a.full = dfixed_const(1000);
8462 	yclk.full = dfixed_const(wm->yclk);
8463 	yclk.full = dfixed_div(yclk, a);
8464 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8465 	a.full = dfixed_const(10);
8466 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8467 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8468 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8469 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8470 
8471 	return dfixed_trunc(bandwidth);
8472 }
8473 
8474 /**
8475  * dce8_data_return_bandwidth - get the data return bandwidth
8476  *
8477  * @wm: watermark calculation data
8478  *
8479  * Calculate the data return bandwidth used for display (CIK).
8480  * Used for display watermark bandwidth calculations
8481  * Returns the data return bandwidth in MBytes/s
8482  */
8483 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8484 {
8485 	/* Calculate the display Data return Bandwidth */
8486 	fixed20_12 return_efficiency; /* 0.8 */
8487 	fixed20_12 sclk, bandwidth;
8488 	fixed20_12 a;
8489 
8490 	a.full = dfixed_const(1000);
8491 	sclk.full = dfixed_const(wm->sclk);
8492 	sclk.full = dfixed_div(sclk, a);
8493 	a.full = dfixed_const(10);
8494 	return_efficiency.full = dfixed_const(8);
8495 	return_efficiency.full = dfixed_div(return_efficiency, a);
8496 	a.full = dfixed_const(32);
8497 	bandwidth.full = dfixed_mul(a, sclk);
8498 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8499 
8500 	return dfixed_trunc(bandwidth);
8501 }
8502 
8503 /**
8504  * dce8_dmif_request_bandwidth - get the dmif bandwidth
8505  *
8506  * @wm: watermark calculation data
8507  *
8508  * Calculate the dmif bandwidth used for display (CIK).
8509  * Used for display watermark bandwidth calculations
8510  * Returns the dmif bandwidth in MBytes/s
8511  */
8512 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
8513 {
8514 	/* Calculate the DMIF Request Bandwidth */
8515 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
8516 	fixed20_12 disp_clk, bandwidth;
8517 	fixed20_12 a, b;
8518 
8519 	a.full = dfixed_const(1000);
8520 	disp_clk.full = dfixed_const(wm->disp_clk);
8521 	disp_clk.full = dfixed_div(disp_clk, a);
8522 	a.full = dfixed_const(32);
8523 	b.full = dfixed_mul(a, disp_clk);
8524 
8525 	a.full = dfixed_const(10);
8526 	disp_clk_request_efficiency.full = dfixed_const(8);
8527 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
8528 
8529 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
8530 
8531 	return dfixed_trunc(bandwidth);
8532 }
8533 
8534 /**
8535  * dce8_available_bandwidth - get the min available bandwidth
8536  *
8537  * @wm: watermark calculation data
8538  *
8539  * Calculate the min available bandwidth used for display (CIK).
8540  * Used for display watermark bandwidth calculations
8541  * Returns the min available bandwidth in MBytes/s
8542  */
8543 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
8544 {
8545 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8546 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
8547 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
8548 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
8549 
8550 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
8551 }
8552 
8553 /**
8554  * dce8_average_bandwidth - get the average available bandwidth
8555  *
8556  * @wm: watermark calculation data
8557  *
8558  * Calculate the average available bandwidth used for display (CIK).
8559  * Used for display watermark bandwidth calculations
8560  * Returns the average available bandwidth in MBytes/s
8561  */
8562 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
8563 {
8564 	/* Calculate the display mode Average Bandwidth
8565 	 * DisplayMode should contain the source and destination dimensions,
8566 	 * timing, etc.
8567 	 */
8568 	fixed20_12 bpp;
8569 	fixed20_12 line_time;
8570 	fixed20_12 src_width;
8571 	fixed20_12 bandwidth;
8572 	fixed20_12 a;
8573 
8574 	a.full = dfixed_const(1000);
8575 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
8576 	line_time.full = dfixed_div(line_time, a);
8577 	bpp.full = dfixed_const(wm->bytes_per_pixel);
8578 	src_width.full = dfixed_const(wm->src_width);
8579 	bandwidth.full = dfixed_mul(src_width, bpp);
8580 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
8581 	bandwidth.full = dfixed_div(bandwidth, line_time);
8582 
8583 	return dfixed_trunc(bandwidth);
8584 }
8585 
8586 /**
8587  * dce8_latency_watermark - get the latency watermark
8588  *
8589  * @wm: watermark calculation data
8590  *
8591  * Calculate the latency watermark (CIK).
8592  * Used for display watermark bandwidth calculations
8593  * Returns the latency watermark in ns
8594  */
8595 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
8596 {
8597 	/* First calculate the latency in ns */
8598 	u32 mc_latency = 2000; /* 2000 ns. */
8599 	u32 available_bandwidth = dce8_available_bandwidth(wm);
8600 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
8601 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
8602 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
8603 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
8604 		(wm->num_heads * cursor_line_pair_return_time);
8605 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
8606 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
8607 	u32 tmp, dmif_size = 12288;
8608 	fixed20_12 a, b, c;
8609 
8610 	if (wm->num_heads == 0)
8611 		return 0;
8612 
8613 	a.full = dfixed_const(2);
8614 	b.full = dfixed_const(1);
8615 	if ((wm->vsc.full > a.full) ||
8616 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
8617 	    (wm->vtaps >= 5) ||
8618 	    ((wm->vsc.full >= a.full) && wm->interlaced))
8619 		max_src_lines_per_dst_line = 4;
8620 	else
8621 		max_src_lines_per_dst_line = 2;
8622 
8623 	a.full = dfixed_const(available_bandwidth);
8624 	b.full = dfixed_const(wm->num_heads);
8625 	a.full = dfixed_div(a, b);
8626 
8627 	b.full = dfixed_const(mc_latency + 512);
8628 	c.full = dfixed_const(wm->disp_clk);
8629 	b.full = dfixed_div(b, c);
8630 
8631 	c.full = dfixed_const(dmif_size);
8632 	b.full = dfixed_div(c, b);
8633 
8634 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
8635 
8636 	b.full = dfixed_const(1000);
8637 	c.full = dfixed_const(wm->disp_clk);
8638 	b.full = dfixed_div(c, b);
8639 	c.full = dfixed_const(wm->bytes_per_pixel);
8640 	b.full = dfixed_mul(b, c);
8641 
8642 	lb_fill_bw = min(tmp, dfixed_trunc(b));
8643 
8644 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
8645 	b.full = dfixed_const(1000);
8646 	c.full = dfixed_const(lb_fill_bw);
8647 	b.full = dfixed_div(c, b);
8648 	a.full = dfixed_div(a, b);
8649 	line_fill_time = dfixed_trunc(a);
8650 
8651 	if (line_fill_time < wm->active_time)
8652 		return latency;
8653 	else
8654 		return latency + (line_fill_time - wm->active_time);
8655 
8656 }
8657 
8658 /**
8659  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
8660  * average and available dram bandwidth
8661  *
8662  * @wm: watermark calculation data
8663  *
8664  * Check if the display average bandwidth fits in the display
8665  * dram bandwidth (CIK).
8666  * Used for display watermark bandwidth calculations
8667  * Returns true if the display fits, false if not.
8668  */
8669 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8670 {
8671 	if (dce8_average_bandwidth(wm) <=
8672 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
8673 		return true;
8674 	else
8675 		return false;
8676 }
8677 
8678 /**
8679  * dce8_average_bandwidth_vs_available_bandwidth - check
8680  * average and available bandwidth
8681  *
8682  * @wm: watermark calculation data
8683  *
8684  * Check if the display average bandwidth fits in the display
8685  * available bandwidth (CIK).
8686  * Used for display watermark bandwidth calculations
8687  * Returns true if the display fits, false if not.
8688  */
8689 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
8690 {
8691 	if (dce8_average_bandwidth(wm) <=
8692 	    (dce8_available_bandwidth(wm) / wm->num_heads))
8693 		return true;
8694 	else
8695 		return false;
8696 }
8697 
8698 /**
8699  * dce8_check_latency_hiding - check latency hiding
8700  *
8701  * @wm: watermark calculation data
8702  *
8703  * Check latency hiding (CIK).
8704  * Used for display watermark bandwidth calculations
8705  * Returns true if the display fits, false if not.
8706  */
8707 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
8708 {
8709 	u32 lb_partitions = wm->lb_size / wm->src_width;
8710 	u32 line_time = wm->active_time + wm->blank_time;
8711 	u32 latency_tolerant_lines;
8712 	u32 latency_hiding;
8713 	fixed20_12 a;
8714 
8715 	a.full = dfixed_const(1);
8716 	if (wm->vsc.full > a.full)
8717 		latency_tolerant_lines = 1;
8718 	else {
8719 		if (lb_partitions <= (wm->vtaps + 1))
8720 			latency_tolerant_lines = 1;
8721 		else
8722 			latency_tolerant_lines = 2;
8723 	}
8724 
8725 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
8726 
8727 	if (dce8_latency_watermark(wm) <= latency_hiding)
8728 		return true;
8729 	else
8730 		return false;
8731 }
8732 
8733 /**
8734  * dce8_program_watermarks - program display watermarks
8735  *
8736  * @rdev: radeon_device pointer
8737  * @radeon_crtc: the selected display controller
8738  * @lb_size: line buffer size
8739  * @num_heads: number of display controllers in use
8740  *
8741  * Calculate and program the display watermarks for the
8742  * selected display controller (CIK).
8743  */
8744 static void dce8_program_watermarks(struct radeon_device *rdev,
8745 				    struct radeon_crtc *radeon_crtc,
8746 				    u32 lb_size, u32 num_heads)
8747 {
8748 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
8749 	struct dce8_wm_params wm_low, wm_high;
8750 	u32 pixel_period;
8751 	u32 line_time = 0;
8752 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
8753 	u32 tmp, wm_mask;
8754 
8755 	if (radeon_crtc->base.enabled && num_heads && mode) {
8756 		pixel_period = 1000000 / (u32)mode->clock;
8757 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
8758 
8759 		/* watermark for high clocks */
8760 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8761 		    rdev->pm.dpm_enabled) {
8762 			wm_high.yclk =
8763 				radeon_dpm_get_mclk(rdev, false) * 10;
8764 			wm_high.sclk =
8765 				radeon_dpm_get_sclk(rdev, false) * 10;
8766 		} else {
8767 			wm_high.yclk = rdev->pm.current_mclk * 10;
8768 			wm_high.sclk = rdev->pm.current_sclk * 10;
8769 		}
8770 
8771 		wm_high.disp_clk = mode->clock;
8772 		wm_high.src_width = mode->crtc_hdisplay;
8773 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
8774 		wm_high.blank_time = line_time - wm_high.active_time;
8775 		wm_high.interlaced = false;
8776 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8777 			wm_high.interlaced = true;
8778 		wm_high.vsc = radeon_crtc->vsc;
8779 		wm_high.vtaps = 1;
8780 		if (radeon_crtc->rmx_type != RMX_OFF)
8781 			wm_high.vtaps = 2;
8782 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
8783 		wm_high.lb_size = lb_size;
8784 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
8785 		wm_high.num_heads = num_heads;
8786 
8787 		/* set for high clocks */
8788 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
8789 
8790 		/* possibly force display priority to high */
8791 		/* should really do this at mode validation time... */
8792 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
8793 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
8794 		    !dce8_check_latency_hiding(&wm_high) ||
8795 		    (rdev->disp_priority == 2)) {
8796 			DRM_DEBUG_KMS("force priority to high\n");
8797 		}
8798 
8799 		/* watermark for low clocks */
8800 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8801 		    rdev->pm.dpm_enabled) {
8802 			wm_low.yclk =
8803 				radeon_dpm_get_mclk(rdev, true) * 10;
8804 			wm_low.sclk =
8805 				radeon_dpm_get_sclk(rdev, true) * 10;
8806 		} else {
8807 			wm_low.yclk = rdev->pm.current_mclk * 10;
8808 			wm_low.sclk = rdev->pm.current_sclk * 10;
8809 		}
8810 
8811 		wm_low.disp_clk = mode->clock;
8812 		wm_low.src_width = mode->crtc_hdisplay;
8813 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
8814 		wm_low.blank_time = line_time - wm_low.active_time;
8815 		wm_low.interlaced = false;
8816 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8817 			wm_low.interlaced = true;
8818 		wm_low.vsc = radeon_crtc->vsc;
8819 		wm_low.vtaps = 1;
8820 		if (radeon_crtc->rmx_type != RMX_OFF)
8821 			wm_low.vtaps = 2;
8822 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
8823 		wm_low.lb_size = lb_size;
8824 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
8825 		wm_low.num_heads = num_heads;
8826 
8827 		/* set for low clocks */
8828 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
8829 
8830 		/* possibly force display priority to high */
8831 		/* should really do this at mode validation time... */
8832 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
8833 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
8834 		    !dce8_check_latency_hiding(&wm_low) ||
8835 		    (rdev->disp_priority == 2)) {
8836 			DRM_DEBUG_KMS("force priority to high\n");
8837 		}
8838 	}
8839 
8840 	/* select wm A */
8841 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8842 	tmp = wm_mask;
8843 	tmp &= ~LATENCY_WATERMARK_MASK(3);
8844 	tmp |= LATENCY_WATERMARK_MASK(1);
8845 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8846 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8847 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
8848 		LATENCY_HIGH_WATERMARK(line_time)));
8849 	/* select wm B */
8850 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8851 	tmp &= ~LATENCY_WATERMARK_MASK(3);
8852 	tmp |= LATENCY_WATERMARK_MASK(2);
8853 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8854 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8855 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
8856 		LATENCY_HIGH_WATERMARK(line_time)));
8857 	/* restore original selection */
8858 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
8859 
8860 	/* save values for DPM */
8861 	radeon_crtc->line_time = line_time;
8862 	radeon_crtc->wm_high = latency_watermark_a;
8863 	radeon_crtc->wm_low = latency_watermark_b;
8864 }
8865 
8866 /**
8867  * dce8_bandwidth_update - program display watermarks
8868  *
8869  * @rdev: radeon_device pointer
8870  *
8871  * Calculate and program the display watermarks and line
8872  * buffer allocation (CIK).
8873  */
8874 void dce8_bandwidth_update(struct radeon_device *rdev)
8875 {
8876 	struct drm_display_mode *mode = NULL;
8877 	u32 num_heads = 0, lb_size;
8878 	int i;
8879 
8880 	radeon_update_display_priority(rdev);
8881 
8882 	for (i = 0; i < rdev->num_crtc; i++) {
8883 		if (rdev->mode_info.crtcs[i]->base.enabled)
8884 			num_heads++;
8885 	}
8886 	for (i = 0; i < rdev->num_crtc; i++) {
8887 		mode = &rdev->mode_info.crtcs[i]->base.mode;
8888 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
8889 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
8890 	}
8891 }
8892 
8893 /**
8894  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8895  *
8896  * @rdev: radeon_device pointer
8897  *
8898  * Fetches a GPU clock counter snapshot (SI).
8899  * Returns the 64 bit clock counter snapshot.
8900  */
8901 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
8902 {
8903 	uint64_t clock;
8904 
8905 	mutex_lock(&rdev->gpu_clock_mutex);
8906 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
8907 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
8908 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
8909 	mutex_unlock(&rdev->gpu_clock_mutex);
8910 	return clock;
8911 }
8912 
8913 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
8914                               u32 cntl_reg, u32 status_reg)
8915 {
8916 	int r, i;
8917 	struct atom_clock_dividers dividers;
8918 	uint32_t tmp;
8919 
8920 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8921 					   clock, false, &dividers);
8922 	if (r)
8923 		return r;
8924 
8925 	tmp = RREG32_SMC(cntl_reg);
8926 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
8927 	tmp |= dividers.post_divider;
8928 	WREG32_SMC(cntl_reg, tmp);
8929 
8930 	for (i = 0; i < 100; i++) {
8931 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
8932 			break;
8933 		mdelay(10);
8934 	}
8935 	if (i == 100)
8936 		return -ETIMEDOUT;
8937 
8938 	return 0;
8939 }
8940 
8941 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
8942 {
8943 	int r = 0;
8944 
8945 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
8946 	if (r)
8947 		return r;
8948 
8949 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
8950 	return r;
8951 }
8952 
8953 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
8954 {
8955 	int r, i;
8956 	struct atom_clock_dividers dividers;
8957 	u32 tmp;
8958 
8959 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8960 					   ecclk, false, &dividers);
8961 	if (r)
8962 		return r;
8963 
8964 	for (i = 0; i < 100; i++) {
8965 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
8966 			break;
8967 		mdelay(10);
8968 	}
8969 	if (i == 100)
8970 		return -ETIMEDOUT;
8971 
8972 	tmp = RREG32_SMC(CG_ECLK_CNTL);
8973 	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
8974 	tmp |= dividers.post_divider;
8975 	WREG32_SMC(CG_ECLK_CNTL, tmp);
8976 
8977 	for (i = 0; i < 100; i++) {
8978 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
8979 			break;
8980 		mdelay(10);
8981 	}
8982 	if (i == 100)
8983 		return -ETIMEDOUT;
8984 
8985 	return 0;
8986 }
8987 
8988 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
8989 {
8990 	struct pci_dev *root = rdev->pdev->bus->self;
8991 	int bridge_pos, gpu_pos;
8992 	u32 speed_cntl, mask, current_data_rate;
8993 	int ret, i;
8994 	u16 tmp16;
8995 
8996 	if (radeon_pcie_gen2 == 0)
8997 		return;
8998 
8999 	if (rdev->flags & RADEON_IS_IGP)
9000 		return;
9001 
9002 	if (!(rdev->flags & RADEON_IS_PCIE))
9003 		return;
9004 
9005 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9006 	if (ret != 0)
9007 		return;
9008 
9009 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9010 		return;
9011 
9012 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9013 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9014 		LC_CURRENT_DATA_RATE_SHIFT;
9015 	if (mask & DRM_PCIE_SPEED_80) {
9016 		if (current_data_rate == 2) {
9017 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9018 			return;
9019 		}
9020 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9021 	} else if (mask & DRM_PCIE_SPEED_50) {
9022 		if (current_data_rate == 1) {
9023 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9024 			return;
9025 		}
9026 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9027 	}
9028 
9029 	bridge_pos = pci_pcie_cap(root);
9030 	if (!bridge_pos)
9031 		return;
9032 
9033 	gpu_pos = pci_pcie_cap(rdev->pdev);
9034 	if (!gpu_pos)
9035 		return;
9036 
9037 	if (mask & DRM_PCIE_SPEED_80) {
9038 		/* re-try equalization if gen3 is not already enabled */
9039 		if (current_data_rate != 2) {
9040 			u16 bridge_cfg, gpu_cfg;
9041 			u16 bridge_cfg2, gpu_cfg2;
9042 			u32 max_lw, current_lw, tmp;
9043 
9044 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9045 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9046 
9047 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9048 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9049 
9050 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9051 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9052 
9053 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9054 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9055 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9056 
9057 			if (current_lw < max_lw) {
9058 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9059 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9060 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9061 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9062 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9063 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9064 				}
9065 			}
9066 
9067 			for (i = 0; i < 10; i++) {
9068 				/* check status */
9069 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9070 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9071 					break;
9072 
9073 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9074 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9075 
9076 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9077 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9078 
9079 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9080 				tmp |= LC_SET_QUIESCE;
9081 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9082 
9083 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9084 				tmp |= LC_REDO_EQ;
9085 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9086 
9087 				mdelay(100);
9088 
9089 				/* linkctl */
9090 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9091 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9092 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9093 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9094 
9095 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9096 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9097 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9098 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9099 
9100 				/* linkctl2 */
9101 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9102 				tmp16 &= ~((1 << 4) | (7 << 9));
9103 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9104 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9105 
9106 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9107 				tmp16 &= ~((1 << 4) | (7 << 9));
9108 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9109 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9110 
9111 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9112 				tmp &= ~LC_SET_QUIESCE;
9113 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9114 			}
9115 		}
9116 	}
9117 
9118 	/* set the link speed */
9119 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9120 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9121 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9122 
9123 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9124 	tmp16 &= ~0xf;
9125 	if (mask & DRM_PCIE_SPEED_80)
9126 		tmp16 |= 3; /* gen3 */
9127 	else if (mask & DRM_PCIE_SPEED_50)
9128 		tmp16 |= 2; /* gen2 */
9129 	else
9130 		tmp16 |= 1; /* gen1 */
9131 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9132 
9133 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9134 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9135 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9136 
9137 	for (i = 0; i < rdev->usec_timeout; i++) {
9138 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9139 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9140 			break;
9141 		udelay(1);
9142 	}
9143 }
9144 
9145 static void cik_program_aspm(struct radeon_device *rdev)
9146 {
9147 	u32 data, orig;
9148 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9149 	bool disable_clkreq = false;
9150 
9151 	if (radeon_aspm == 0)
9152 		return;
9153 
9154 	/* XXX double check IGPs */
9155 	if (rdev->flags & RADEON_IS_IGP)
9156 		return;
9157 
9158 	if (!(rdev->flags & RADEON_IS_PCIE))
9159 		return;
9160 
9161 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9162 	data &= ~LC_XMIT_N_FTS_MASK;
9163 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9164 	if (orig != data)
9165 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9166 
9167 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9168 	data |= LC_GO_TO_RECOVERY;
9169 	if (orig != data)
9170 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9171 
9172 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9173 	data |= P_IGNORE_EDB_ERR;
9174 	if (orig != data)
9175 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9176 
9177 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9178 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9179 	data |= LC_PMI_TO_L1_DIS;
9180 	if (!disable_l0s)
9181 		data |= LC_L0S_INACTIVITY(7);
9182 
9183 	if (!disable_l1) {
9184 		data |= LC_L1_INACTIVITY(7);
9185 		data &= ~LC_PMI_TO_L1_DIS;
9186 		if (orig != data)
9187 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9188 
9189 		if (!disable_plloff_in_l1) {
9190 			bool clk_req_support;
9191 
9192 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9193 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9194 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9195 			if (orig != data)
9196 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9197 
9198 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9199 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9200 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9201 			if (orig != data)
9202 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9203 
9204 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9205 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9206 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9207 			if (orig != data)
9208 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9209 
9210 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9211 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9212 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9213 			if (orig != data)
9214 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9215 
9216 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9217 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9218 			data |= LC_DYN_LANES_PWR_STATE(3);
9219 			if (orig != data)
9220 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9221 
9222 			if (!disable_clkreq) {
9223 				struct pci_dev *root = rdev->pdev->bus->self;
9224 				u32 lnkcap;
9225 
9226 				clk_req_support = false;
9227 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9228 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9229 					clk_req_support = true;
9230 			} else {
9231 				clk_req_support = false;
9232 			}
9233 
9234 			if (clk_req_support) {
9235 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9236 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9237 				if (orig != data)
9238 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9239 
9240 				orig = data = RREG32_SMC(THM_CLK_CNTL);
9241 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9242 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9243 				if (orig != data)
9244 					WREG32_SMC(THM_CLK_CNTL, data);
9245 
9246 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9247 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9248 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9249 				if (orig != data)
9250 					WREG32_SMC(MISC_CLK_CTRL, data);
9251 
9252 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9253 				data &= ~BCLK_AS_XCLK;
9254 				if (orig != data)
9255 					WREG32_SMC(CG_CLKPIN_CNTL, data);
9256 
9257 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9258 				data &= ~FORCE_BIF_REFCLK_EN;
9259 				if (orig != data)
9260 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9261 
9262 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9263 				data &= ~MPLL_CLKOUT_SEL_MASK;
9264 				data |= MPLL_CLKOUT_SEL(4);
9265 				if (orig != data)
9266 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9267 			}
9268 		}
9269 	} else {
9270 		if (orig != data)
9271 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9272 	}
9273 
9274 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9275 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9276 	if (orig != data)
9277 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9278 
9279 	if (!disable_l0s) {
9280 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9281 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9282 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9283 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9284 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9285 				data &= ~LC_L0S_INACTIVITY_MASK;
9286 				if (orig != data)
9287 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9288 			}
9289 		}
9290 	}
9291 }
9292