xref: /openbmc/linux/drivers/gpu/drm/radeon/cik.c (revision a1d6f97c8cfa7c3554d0391c0b16505d1d97f380)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35 
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
44 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
45 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
46 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
47 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
48 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
49 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
50 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
51 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
52 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
53 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
54 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
55 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
56 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
57 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
58 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
59 MODULE_FIRMWARE("radeon/KABINI_me.bin");
60 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
61 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
62 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
63 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
64 
65 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
66 extern void r600_ih_ring_fini(struct radeon_device *rdev);
67 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
68 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
69 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
70 extern void sumo_rlc_fini(struct radeon_device *rdev);
71 extern int sumo_rlc_init(struct radeon_device *rdev);
72 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
73 extern void si_rlc_reset(struct radeon_device *rdev);
74 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
75 extern int cik_sdma_resume(struct radeon_device *rdev);
76 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
77 extern void cik_sdma_fini(struct radeon_device *rdev);
78 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
79 static void cik_rlc_stop(struct radeon_device *rdev);
80 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
81 static void cik_program_aspm(struct radeon_device *rdev);
82 static void cik_init_pg(struct radeon_device *rdev);
83 static void cik_init_cg(struct radeon_device *rdev);
84 static void cik_fini_pg(struct radeon_device *rdev);
85 static void cik_fini_cg(struct radeon_device *rdev);
86 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
87 					  bool enable);
88 
89 /* get temperature in millidegrees */
90 int ci_get_temp(struct radeon_device *rdev)
91 {
92 	u32 temp;
93 	int actual_temp = 0;
94 
95 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
96 		CTF_TEMP_SHIFT;
97 
98 	if (temp & 0x200)
99 		actual_temp = 255;
100 	else
101 		actual_temp = temp & 0x1ff;
102 
103 	actual_temp = actual_temp * 1000;
104 
105 	return actual_temp;
106 }
107 
108 /* get temperature in millidegrees */
109 int kv_get_temp(struct radeon_device *rdev)
110 {
111 	u32 temp;
112 	int actual_temp = 0;
113 
114 	temp = RREG32_SMC(0xC0300E0C);
115 
116 	if (temp)
117 		actual_temp = (temp / 8) - 49;
118 	else
119 		actual_temp = 0;
120 
121 	actual_temp = actual_temp * 1000;
122 
123 	return actual_temp;
124 }
125 
126 /*
127  * Indirect registers accessor
128  */
129 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
130 {
131 	unsigned long flags;
132 	u32 r;
133 
134 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
135 	WREG32(PCIE_INDEX, reg);
136 	(void)RREG32(PCIE_INDEX);
137 	r = RREG32(PCIE_DATA);
138 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
139 	return r;
140 }
141 
142 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
143 {
144 	unsigned long flags;
145 
146 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
147 	WREG32(PCIE_INDEX, reg);
148 	(void)RREG32(PCIE_INDEX);
149 	WREG32(PCIE_DATA, v);
150 	(void)RREG32(PCIE_DATA);
151 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
152 }
153 
154 static const u32 spectre_rlc_save_restore_register_list[] =
155 {
156 	(0x0e00 << 16) | (0xc12c >> 2),
157 	0x00000000,
158 	(0x0e00 << 16) | (0xc140 >> 2),
159 	0x00000000,
160 	(0x0e00 << 16) | (0xc150 >> 2),
161 	0x00000000,
162 	(0x0e00 << 16) | (0xc15c >> 2),
163 	0x00000000,
164 	(0x0e00 << 16) | (0xc168 >> 2),
165 	0x00000000,
166 	(0x0e00 << 16) | (0xc170 >> 2),
167 	0x00000000,
168 	(0x0e00 << 16) | (0xc178 >> 2),
169 	0x00000000,
170 	(0x0e00 << 16) | (0xc204 >> 2),
171 	0x00000000,
172 	(0x0e00 << 16) | (0xc2b4 >> 2),
173 	0x00000000,
174 	(0x0e00 << 16) | (0xc2b8 >> 2),
175 	0x00000000,
176 	(0x0e00 << 16) | (0xc2bc >> 2),
177 	0x00000000,
178 	(0x0e00 << 16) | (0xc2c0 >> 2),
179 	0x00000000,
180 	(0x0e00 << 16) | (0x8228 >> 2),
181 	0x00000000,
182 	(0x0e00 << 16) | (0x829c >> 2),
183 	0x00000000,
184 	(0x0e00 << 16) | (0x869c >> 2),
185 	0x00000000,
186 	(0x0600 << 16) | (0x98f4 >> 2),
187 	0x00000000,
188 	(0x0e00 << 16) | (0x98f8 >> 2),
189 	0x00000000,
190 	(0x0e00 << 16) | (0x9900 >> 2),
191 	0x00000000,
192 	(0x0e00 << 16) | (0xc260 >> 2),
193 	0x00000000,
194 	(0x0e00 << 16) | (0x90e8 >> 2),
195 	0x00000000,
196 	(0x0e00 << 16) | (0x3c000 >> 2),
197 	0x00000000,
198 	(0x0e00 << 16) | (0x3c00c >> 2),
199 	0x00000000,
200 	(0x0e00 << 16) | (0x8c1c >> 2),
201 	0x00000000,
202 	(0x0e00 << 16) | (0x9700 >> 2),
203 	0x00000000,
204 	(0x0e00 << 16) | (0xcd20 >> 2),
205 	0x00000000,
206 	(0x4e00 << 16) | (0xcd20 >> 2),
207 	0x00000000,
208 	(0x5e00 << 16) | (0xcd20 >> 2),
209 	0x00000000,
210 	(0x6e00 << 16) | (0xcd20 >> 2),
211 	0x00000000,
212 	(0x7e00 << 16) | (0xcd20 >> 2),
213 	0x00000000,
214 	(0x8e00 << 16) | (0xcd20 >> 2),
215 	0x00000000,
216 	(0x9e00 << 16) | (0xcd20 >> 2),
217 	0x00000000,
218 	(0xae00 << 16) | (0xcd20 >> 2),
219 	0x00000000,
220 	(0xbe00 << 16) | (0xcd20 >> 2),
221 	0x00000000,
222 	(0x0e00 << 16) | (0x89bc >> 2),
223 	0x00000000,
224 	(0x0e00 << 16) | (0x8900 >> 2),
225 	0x00000000,
226 	0x3,
227 	(0x0e00 << 16) | (0xc130 >> 2),
228 	0x00000000,
229 	(0x0e00 << 16) | (0xc134 >> 2),
230 	0x00000000,
231 	(0x0e00 << 16) | (0xc1fc >> 2),
232 	0x00000000,
233 	(0x0e00 << 16) | (0xc208 >> 2),
234 	0x00000000,
235 	(0x0e00 << 16) | (0xc264 >> 2),
236 	0x00000000,
237 	(0x0e00 << 16) | (0xc268 >> 2),
238 	0x00000000,
239 	(0x0e00 << 16) | (0xc26c >> 2),
240 	0x00000000,
241 	(0x0e00 << 16) | (0xc270 >> 2),
242 	0x00000000,
243 	(0x0e00 << 16) | (0xc274 >> 2),
244 	0x00000000,
245 	(0x0e00 << 16) | (0xc278 >> 2),
246 	0x00000000,
247 	(0x0e00 << 16) | (0xc27c >> 2),
248 	0x00000000,
249 	(0x0e00 << 16) | (0xc280 >> 2),
250 	0x00000000,
251 	(0x0e00 << 16) | (0xc284 >> 2),
252 	0x00000000,
253 	(0x0e00 << 16) | (0xc288 >> 2),
254 	0x00000000,
255 	(0x0e00 << 16) | (0xc28c >> 2),
256 	0x00000000,
257 	(0x0e00 << 16) | (0xc290 >> 2),
258 	0x00000000,
259 	(0x0e00 << 16) | (0xc294 >> 2),
260 	0x00000000,
261 	(0x0e00 << 16) | (0xc298 >> 2),
262 	0x00000000,
263 	(0x0e00 << 16) | (0xc29c >> 2),
264 	0x00000000,
265 	(0x0e00 << 16) | (0xc2a0 >> 2),
266 	0x00000000,
267 	(0x0e00 << 16) | (0xc2a4 >> 2),
268 	0x00000000,
269 	(0x0e00 << 16) | (0xc2a8 >> 2),
270 	0x00000000,
271 	(0x0e00 << 16) | (0xc2ac  >> 2),
272 	0x00000000,
273 	(0x0e00 << 16) | (0xc2b0 >> 2),
274 	0x00000000,
275 	(0x0e00 << 16) | (0x301d0 >> 2),
276 	0x00000000,
277 	(0x0e00 << 16) | (0x30238 >> 2),
278 	0x00000000,
279 	(0x0e00 << 16) | (0x30250 >> 2),
280 	0x00000000,
281 	(0x0e00 << 16) | (0x30254 >> 2),
282 	0x00000000,
283 	(0x0e00 << 16) | (0x30258 >> 2),
284 	0x00000000,
285 	(0x0e00 << 16) | (0x3025c >> 2),
286 	0x00000000,
287 	(0x4e00 << 16) | (0xc900 >> 2),
288 	0x00000000,
289 	(0x5e00 << 16) | (0xc900 >> 2),
290 	0x00000000,
291 	(0x6e00 << 16) | (0xc900 >> 2),
292 	0x00000000,
293 	(0x7e00 << 16) | (0xc900 >> 2),
294 	0x00000000,
295 	(0x8e00 << 16) | (0xc900 >> 2),
296 	0x00000000,
297 	(0x9e00 << 16) | (0xc900 >> 2),
298 	0x00000000,
299 	(0xae00 << 16) | (0xc900 >> 2),
300 	0x00000000,
301 	(0xbe00 << 16) | (0xc900 >> 2),
302 	0x00000000,
303 	(0x4e00 << 16) | (0xc904 >> 2),
304 	0x00000000,
305 	(0x5e00 << 16) | (0xc904 >> 2),
306 	0x00000000,
307 	(0x6e00 << 16) | (0xc904 >> 2),
308 	0x00000000,
309 	(0x7e00 << 16) | (0xc904 >> 2),
310 	0x00000000,
311 	(0x8e00 << 16) | (0xc904 >> 2),
312 	0x00000000,
313 	(0x9e00 << 16) | (0xc904 >> 2),
314 	0x00000000,
315 	(0xae00 << 16) | (0xc904 >> 2),
316 	0x00000000,
317 	(0xbe00 << 16) | (0xc904 >> 2),
318 	0x00000000,
319 	(0x4e00 << 16) | (0xc908 >> 2),
320 	0x00000000,
321 	(0x5e00 << 16) | (0xc908 >> 2),
322 	0x00000000,
323 	(0x6e00 << 16) | (0xc908 >> 2),
324 	0x00000000,
325 	(0x7e00 << 16) | (0xc908 >> 2),
326 	0x00000000,
327 	(0x8e00 << 16) | (0xc908 >> 2),
328 	0x00000000,
329 	(0x9e00 << 16) | (0xc908 >> 2),
330 	0x00000000,
331 	(0xae00 << 16) | (0xc908 >> 2),
332 	0x00000000,
333 	(0xbe00 << 16) | (0xc908 >> 2),
334 	0x00000000,
335 	(0x4e00 << 16) | (0xc90c >> 2),
336 	0x00000000,
337 	(0x5e00 << 16) | (0xc90c >> 2),
338 	0x00000000,
339 	(0x6e00 << 16) | (0xc90c >> 2),
340 	0x00000000,
341 	(0x7e00 << 16) | (0xc90c >> 2),
342 	0x00000000,
343 	(0x8e00 << 16) | (0xc90c >> 2),
344 	0x00000000,
345 	(0x9e00 << 16) | (0xc90c >> 2),
346 	0x00000000,
347 	(0xae00 << 16) | (0xc90c >> 2),
348 	0x00000000,
349 	(0xbe00 << 16) | (0xc90c >> 2),
350 	0x00000000,
351 	(0x4e00 << 16) | (0xc910 >> 2),
352 	0x00000000,
353 	(0x5e00 << 16) | (0xc910 >> 2),
354 	0x00000000,
355 	(0x6e00 << 16) | (0xc910 >> 2),
356 	0x00000000,
357 	(0x7e00 << 16) | (0xc910 >> 2),
358 	0x00000000,
359 	(0x8e00 << 16) | (0xc910 >> 2),
360 	0x00000000,
361 	(0x9e00 << 16) | (0xc910 >> 2),
362 	0x00000000,
363 	(0xae00 << 16) | (0xc910 >> 2),
364 	0x00000000,
365 	(0xbe00 << 16) | (0xc910 >> 2),
366 	0x00000000,
367 	(0x0e00 << 16) | (0xc99c >> 2),
368 	0x00000000,
369 	(0x0e00 << 16) | (0x9834 >> 2),
370 	0x00000000,
371 	(0x0000 << 16) | (0x30f00 >> 2),
372 	0x00000000,
373 	(0x0001 << 16) | (0x30f00 >> 2),
374 	0x00000000,
375 	(0x0000 << 16) | (0x30f04 >> 2),
376 	0x00000000,
377 	(0x0001 << 16) | (0x30f04 >> 2),
378 	0x00000000,
379 	(0x0000 << 16) | (0x30f08 >> 2),
380 	0x00000000,
381 	(0x0001 << 16) | (0x30f08 >> 2),
382 	0x00000000,
383 	(0x0000 << 16) | (0x30f0c >> 2),
384 	0x00000000,
385 	(0x0001 << 16) | (0x30f0c >> 2),
386 	0x00000000,
387 	(0x0600 << 16) | (0x9b7c >> 2),
388 	0x00000000,
389 	(0x0e00 << 16) | (0x8a14 >> 2),
390 	0x00000000,
391 	(0x0e00 << 16) | (0x8a18 >> 2),
392 	0x00000000,
393 	(0x0600 << 16) | (0x30a00 >> 2),
394 	0x00000000,
395 	(0x0e00 << 16) | (0x8bf0 >> 2),
396 	0x00000000,
397 	(0x0e00 << 16) | (0x8bcc >> 2),
398 	0x00000000,
399 	(0x0e00 << 16) | (0x8b24 >> 2),
400 	0x00000000,
401 	(0x0e00 << 16) | (0x30a04 >> 2),
402 	0x00000000,
403 	(0x0600 << 16) | (0x30a10 >> 2),
404 	0x00000000,
405 	(0x0600 << 16) | (0x30a14 >> 2),
406 	0x00000000,
407 	(0x0600 << 16) | (0x30a18 >> 2),
408 	0x00000000,
409 	(0x0600 << 16) | (0x30a2c >> 2),
410 	0x00000000,
411 	(0x0e00 << 16) | (0xc700 >> 2),
412 	0x00000000,
413 	(0x0e00 << 16) | (0xc704 >> 2),
414 	0x00000000,
415 	(0x0e00 << 16) | (0xc708 >> 2),
416 	0x00000000,
417 	(0x0e00 << 16) | (0xc768 >> 2),
418 	0x00000000,
419 	(0x0400 << 16) | (0xc770 >> 2),
420 	0x00000000,
421 	(0x0400 << 16) | (0xc774 >> 2),
422 	0x00000000,
423 	(0x0400 << 16) | (0xc778 >> 2),
424 	0x00000000,
425 	(0x0400 << 16) | (0xc77c >> 2),
426 	0x00000000,
427 	(0x0400 << 16) | (0xc780 >> 2),
428 	0x00000000,
429 	(0x0400 << 16) | (0xc784 >> 2),
430 	0x00000000,
431 	(0x0400 << 16) | (0xc788 >> 2),
432 	0x00000000,
433 	(0x0400 << 16) | (0xc78c >> 2),
434 	0x00000000,
435 	(0x0400 << 16) | (0xc798 >> 2),
436 	0x00000000,
437 	(0x0400 << 16) | (0xc79c >> 2),
438 	0x00000000,
439 	(0x0400 << 16) | (0xc7a0 >> 2),
440 	0x00000000,
441 	(0x0400 << 16) | (0xc7a4 >> 2),
442 	0x00000000,
443 	(0x0400 << 16) | (0xc7a8 >> 2),
444 	0x00000000,
445 	(0x0400 << 16) | (0xc7ac >> 2),
446 	0x00000000,
447 	(0x0400 << 16) | (0xc7b0 >> 2),
448 	0x00000000,
449 	(0x0400 << 16) | (0xc7b4 >> 2),
450 	0x00000000,
451 	(0x0e00 << 16) | (0x9100 >> 2),
452 	0x00000000,
453 	(0x0e00 << 16) | (0x3c010 >> 2),
454 	0x00000000,
455 	(0x0e00 << 16) | (0x92a8 >> 2),
456 	0x00000000,
457 	(0x0e00 << 16) | (0x92ac >> 2),
458 	0x00000000,
459 	(0x0e00 << 16) | (0x92b4 >> 2),
460 	0x00000000,
461 	(0x0e00 << 16) | (0x92b8 >> 2),
462 	0x00000000,
463 	(0x0e00 << 16) | (0x92bc >> 2),
464 	0x00000000,
465 	(0x0e00 << 16) | (0x92c0 >> 2),
466 	0x00000000,
467 	(0x0e00 << 16) | (0x92c4 >> 2),
468 	0x00000000,
469 	(0x0e00 << 16) | (0x92c8 >> 2),
470 	0x00000000,
471 	(0x0e00 << 16) | (0x92cc >> 2),
472 	0x00000000,
473 	(0x0e00 << 16) | (0x92d0 >> 2),
474 	0x00000000,
475 	(0x0e00 << 16) | (0x8c00 >> 2),
476 	0x00000000,
477 	(0x0e00 << 16) | (0x8c04 >> 2),
478 	0x00000000,
479 	(0x0e00 << 16) | (0x8c20 >> 2),
480 	0x00000000,
481 	(0x0e00 << 16) | (0x8c38 >> 2),
482 	0x00000000,
483 	(0x0e00 << 16) | (0x8c3c >> 2),
484 	0x00000000,
485 	(0x0e00 << 16) | (0xae00 >> 2),
486 	0x00000000,
487 	(0x0e00 << 16) | (0x9604 >> 2),
488 	0x00000000,
489 	(0x0e00 << 16) | (0xac08 >> 2),
490 	0x00000000,
491 	(0x0e00 << 16) | (0xac0c >> 2),
492 	0x00000000,
493 	(0x0e00 << 16) | (0xac10 >> 2),
494 	0x00000000,
495 	(0x0e00 << 16) | (0xac14 >> 2),
496 	0x00000000,
497 	(0x0e00 << 16) | (0xac58 >> 2),
498 	0x00000000,
499 	(0x0e00 << 16) | (0xac68 >> 2),
500 	0x00000000,
501 	(0x0e00 << 16) | (0xac6c >> 2),
502 	0x00000000,
503 	(0x0e00 << 16) | (0xac70 >> 2),
504 	0x00000000,
505 	(0x0e00 << 16) | (0xac74 >> 2),
506 	0x00000000,
507 	(0x0e00 << 16) | (0xac78 >> 2),
508 	0x00000000,
509 	(0x0e00 << 16) | (0xac7c >> 2),
510 	0x00000000,
511 	(0x0e00 << 16) | (0xac80 >> 2),
512 	0x00000000,
513 	(0x0e00 << 16) | (0xac84 >> 2),
514 	0x00000000,
515 	(0x0e00 << 16) | (0xac88 >> 2),
516 	0x00000000,
517 	(0x0e00 << 16) | (0xac8c >> 2),
518 	0x00000000,
519 	(0x0e00 << 16) | (0x970c >> 2),
520 	0x00000000,
521 	(0x0e00 << 16) | (0x9714 >> 2),
522 	0x00000000,
523 	(0x0e00 << 16) | (0x9718 >> 2),
524 	0x00000000,
525 	(0x0e00 << 16) | (0x971c >> 2),
526 	0x00000000,
527 	(0x0e00 << 16) | (0x31068 >> 2),
528 	0x00000000,
529 	(0x4e00 << 16) | (0x31068 >> 2),
530 	0x00000000,
531 	(0x5e00 << 16) | (0x31068 >> 2),
532 	0x00000000,
533 	(0x6e00 << 16) | (0x31068 >> 2),
534 	0x00000000,
535 	(0x7e00 << 16) | (0x31068 >> 2),
536 	0x00000000,
537 	(0x8e00 << 16) | (0x31068 >> 2),
538 	0x00000000,
539 	(0x9e00 << 16) | (0x31068 >> 2),
540 	0x00000000,
541 	(0xae00 << 16) | (0x31068 >> 2),
542 	0x00000000,
543 	(0xbe00 << 16) | (0x31068 >> 2),
544 	0x00000000,
545 	(0x0e00 << 16) | (0xcd10 >> 2),
546 	0x00000000,
547 	(0x0e00 << 16) | (0xcd14 >> 2),
548 	0x00000000,
549 	(0x0e00 << 16) | (0x88b0 >> 2),
550 	0x00000000,
551 	(0x0e00 << 16) | (0x88b4 >> 2),
552 	0x00000000,
553 	(0x0e00 << 16) | (0x88b8 >> 2),
554 	0x00000000,
555 	(0x0e00 << 16) | (0x88bc >> 2),
556 	0x00000000,
557 	(0x0400 << 16) | (0x89c0 >> 2),
558 	0x00000000,
559 	(0x0e00 << 16) | (0x88c4 >> 2),
560 	0x00000000,
561 	(0x0e00 << 16) | (0x88c8 >> 2),
562 	0x00000000,
563 	(0x0e00 << 16) | (0x88d0 >> 2),
564 	0x00000000,
565 	(0x0e00 << 16) | (0x88d4 >> 2),
566 	0x00000000,
567 	(0x0e00 << 16) | (0x88d8 >> 2),
568 	0x00000000,
569 	(0x0e00 << 16) | (0x8980 >> 2),
570 	0x00000000,
571 	(0x0e00 << 16) | (0x30938 >> 2),
572 	0x00000000,
573 	(0x0e00 << 16) | (0x3093c >> 2),
574 	0x00000000,
575 	(0x0e00 << 16) | (0x30940 >> 2),
576 	0x00000000,
577 	(0x0e00 << 16) | (0x89a0 >> 2),
578 	0x00000000,
579 	(0x0e00 << 16) | (0x30900 >> 2),
580 	0x00000000,
581 	(0x0e00 << 16) | (0x30904 >> 2),
582 	0x00000000,
583 	(0x0e00 << 16) | (0x89b4 >> 2),
584 	0x00000000,
585 	(0x0e00 << 16) | (0x3c210 >> 2),
586 	0x00000000,
587 	(0x0e00 << 16) | (0x3c214 >> 2),
588 	0x00000000,
589 	(0x0e00 << 16) | (0x3c218 >> 2),
590 	0x00000000,
591 	(0x0e00 << 16) | (0x8904 >> 2),
592 	0x00000000,
593 	0x5,
594 	(0x0e00 << 16) | (0x8c28 >> 2),
595 	(0x0e00 << 16) | (0x8c2c >> 2),
596 	(0x0e00 << 16) | (0x8c30 >> 2),
597 	(0x0e00 << 16) | (0x8c34 >> 2),
598 	(0x0e00 << 16) | (0x9600 >> 2),
599 };
600 
601 static const u32 kalindi_rlc_save_restore_register_list[] =
602 {
603 	(0x0e00 << 16) | (0xc12c >> 2),
604 	0x00000000,
605 	(0x0e00 << 16) | (0xc140 >> 2),
606 	0x00000000,
607 	(0x0e00 << 16) | (0xc150 >> 2),
608 	0x00000000,
609 	(0x0e00 << 16) | (0xc15c >> 2),
610 	0x00000000,
611 	(0x0e00 << 16) | (0xc168 >> 2),
612 	0x00000000,
613 	(0x0e00 << 16) | (0xc170 >> 2),
614 	0x00000000,
615 	(0x0e00 << 16) | (0xc204 >> 2),
616 	0x00000000,
617 	(0x0e00 << 16) | (0xc2b4 >> 2),
618 	0x00000000,
619 	(0x0e00 << 16) | (0xc2b8 >> 2),
620 	0x00000000,
621 	(0x0e00 << 16) | (0xc2bc >> 2),
622 	0x00000000,
623 	(0x0e00 << 16) | (0xc2c0 >> 2),
624 	0x00000000,
625 	(0x0e00 << 16) | (0x8228 >> 2),
626 	0x00000000,
627 	(0x0e00 << 16) | (0x829c >> 2),
628 	0x00000000,
629 	(0x0e00 << 16) | (0x869c >> 2),
630 	0x00000000,
631 	(0x0600 << 16) | (0x98f4 >> 2),
632 	0x00000000,
633 	(0x0e00 << 16) | (0x98f8 >> 2),
634 	0x00000000,
635 	(0x0e00 << 16) | (0x9900 >> 2),
636 	0x00000000,
637 	(0x0e00 << 16) | (0xc260 >> 2),
638 	0x00000000,
639 	(0x0e00 << 16) | (0x90e8 >> 2),
640 	0x00000000,
641 	(0x0e00 << 16) | (0x3c000 >> 2),
642 	0x00000000,
643 	(0x0e00 << 16) | (0x3c00c >> 2),
644 	0x00000000,
645 	(0x0e00 << 16) | (0x8c1c >> 2),
646 	0x00000000,
647 	(0x0e00 << 16) | (0x9700 >> 2),
648 	0x00000000,
649 	(0x0e00 << 16) | (0xcd20 >> 2),
650 	0x00000000,
651 	(0x4e00 << 16) | (0xcd20 >> 2),
652 	0x00000000,
653 	(0x5e00 << 16) | (0xcd20 >> 2),
654 	0x00000000,
655 	(0x6e00 << 16) | (0xcd20 >> 2),
656 	0x00000000,
657 	(0x7e00 << 16) | (0xcd20 >> 2),
658 	0x00000000,
659 	(0x0e00 << 16) | (0x89bc >> 2),
660 	0x00000000,
661 	(0x0e00 << 16) | (0x8900 >> 2),
662 	0x00000000,
663 	0x3,
664 	(0x0e00 << 16) | (0xc130 >> 2),
665 	0x00000000,
666 	(0x0e00 << 16) | (0xc134 >> 2),
667 	0x00000000,
668 	(0x0e00 << 16) | (0xc1fc >> 2),
669 	0x00000000,
670 	(0x0e00 << 16) | (0xc208 >> 2),
671 	0x00000000,
672 	(0x0e00 << 16) | (0xc264 >> 2),
673 	0x00000000,
674 	(0x0e00 << 16) | (0xc268 >> 2),
675 	0x00000000,
676 	(0x0e00 << 16) | (0xc26c >> 2),
677 	0x00000000,
678 	(0x0e00 << 16) | (0xc270 >> 2),
679 	0x00000000,
680 	(0x0e00 << 16) | (0xc274 >> 2),
681 	0x00000000,
682 	(0x0e00 << 16) | (0xc28c >> 2),
683 	0x00000000,
684 	(0x0e00 << 16) | (0xc290 >> 2),
685 	0x00000000,
686 	(0x0e00 << 16) | (0xc294 >> 2),
687 	0x00000000,
688 	(0x0e00 << 16) | (0xc298 >> 2),
689 	0x00000000,
690 	(0x0e00 << 16) | (0xc2a0 >> 2),
691 	0x00000000,
692 	(0x0e00 << 16) | (0xc2a4 >> 2),
693 	0x00000000,
694 	(0x0e00 << 16) | (0xc2a8 >> 2),
695 	0x00000000,
696 	(0x0e00 << 16) | (0xc2ac >> 2),
697 	0x00000000,
698 	(0x0e00 << 16) | (0x301d0 >> 2),
699 	0x00000000,
700 	(0x0e00 << 16) | (0x30238 >> 2),
701 	0x00000000,
702 	(0x0e00 << 16) | (0x30250 >> 2),
703 	0x00000000,
704 	(0x0e00 << 16) | (0x30254 >> 2),
705 	0x00000000,
706 	(0x0e00 << 16) | (0x30258 >> 2),
707 	0x00000000,
708 	(0x0e00 << 16) | (0x3025c >> 2),
709 	0x00000000,
710 	(0x4e00 << 16) | (0xc900 >> 2),
711 	0x00000000,
712 	(0x5e00 << 16) | (0xc900 >> 2),
713 	0x00000000,
714 	(0x6e00 << 16) | (0xc900 >> 2),
715 	0x00000000,
716 	(0x7e00 << 16) | (0xc900 >> 2),
717 	0x00000000,
718 	(0x4e00 << 16) | (0xc904 >> 2),
719 	0x00000000,
720 	(0x5e00 << 16) | (0xc904 >> 2),
721 	0x00000000,
722 	(0x6e00 << 16) | (0xc904 >> 2),
723 	0x00000000,
724 	(0x7e00 << 16) | (0xc904 >> 2),
725 	0x00000000,
726 	(0x4e00 << 16) | (0xc908 >> 2),
727 	0x00000000,
728 	(0x5e00 << 16) | (0xc908 >> 2),
729 	0x00000000,
730 	(0x6e00 << 16) | (0xc908 >> 2),
731 	0x00000000,
732 	(0x7e00 << 16) | (0xc908 >> 2),
733 	0x00000000,
734 	(0x4e00 << 16) | (0xc90c >> 2),
735 	0x00000000,
736 	(0x5e00 << 16) | (0xc90c >> 2),
737 	0x00000000,
738 	(0x6e00 << 16) | (0xc90c >> 2),
739 	0x00000000,
740 	(0x7e00 << 16) | (0xc90c >> 2),
741 	0x00000000,
742 	(0x4e00 << 16) | (0xc910 >> 2),
743 	0x00000000,
744 	(0x5e00 << 16) | (0xc910 >> 2),
745 	0x00000000,
746 	(0x6e00 << 16) | (0xc910 >> 2),
747 	0x00000000,
748 	(0x7e00 << 16) | (0xc910 >> 2),
749 	0x00000000,
750 	(0x0e00 << 16) | (0xc99c >> 2),
751 	0x00000000,
752 	(0x0e00 << 16) | (0x9834 >> 2),
753 	0x00000000,
754 	(0x0000 << 16) | (0x30f00 >> 2),
755 	0x00000000,
756 	(0x0000 << 16) | (0x30f04 >> 2),
757 	0x00000000,
758 	(0x0000 << 16) | (0x30f08 >> 2),
759 	0x00000000,
760 	(0x0000 << 16) | (0x30f0c >> 2),
761 	0x00000000,
762 	(0x0600 << 16) | (0x9b7c >> 2),
763 	0x00000000,
764 	(0x0e00 << 16) | (0x8a14 >> 2),
765 	0x00000000,
766 	(0x0e00 << 16) | (0x8a18 >> 2),
767 	0x00000000,
768 	(0x0600 << 16) | (0x30a00 >> 2),
769 	0x00000000,
770 	(0x0e00 << 16) | (0x8bf0 >> 2),
771 	0x00000000,
772 	(0x0e00 << 16) | (0x8bcc >> 2),
773 	0x00000000,
774 	(0x0e00 << 16) | (0x8b24 >> 2),
775 	0x00000000,
776 	(0x0e00 << 16) | (0x30a04 >> 2),
777 	0x00000000,
778 	(0x0600 << 16) | (0x30a10 >> 2),
779 	0x00000000,
780 	(0x0600 << 16) | (0x30a14 >> 2),
781 	0x00000000,
782 	(0x0600 << 16) | (0x30a18 >> 2),
783 	0x00000000,
784 	(0x0600 << 16) | (0x30a2c >> 2),
785 	0x00000000,
786 	(0x0e00 << 16) | (0xc700 >> 2),
787 	0x00000000,
788 	(0x0e00 << 16) | (0xc704 >> 2),
789 	0x00000000,
790 	(0x0e00 << 16) | (0xc708 >> 2),
791 	0x00000000,
792 	(0x0e00 << 16) | (0xc768 >> 2),
793 	0x00000000,
794 	(0x0400 << 16) | (0xc770 >> 2),
795 	0x00000000,
796 	(0x0400 << 16) | (0xc774 >> 2),
797 	0x00000000,
798 	(0x0400 << 16) | (0xc798 >> 2),
799 	0x00000000,
800 	(0x0400 << 16) | (0xc79c >> 2),
801 	0x00000000,
802 	(0x0e00 << 16) | (0x9100 >> 2),
803 	0x00000000,
804 	(0x0e00 << 16) | (0x3c010 >> 2),
805 	0x00000000,
806 	(0x0e00 << 16) | (0x8c00 >> 2),
807 	0x00000000,
808 	(0x0e00 << 16) | (0x8c04 >> 2),
809 	0x00000000,
810 	(0x0e00 << 16) | (0x8c20 >> 2),
811 	0x00000000,
812 	(0x0e00 << 16) | (0x8c38 >> 2),
813 	0x00000000,
814 	(0x0e00 << 16) | (0x8c3c >> 2),
815 	0x00000000,
816 	(0x0e00 << 16) | (0xae00 >> 2),
817 	0x00000000,
818 	(0x0e00 << 16) | (0x9604 >> 2),
819 	0x00000000,
820 	(0x0e00 << 16) | (0xac08 >> 2),
821 	0x00000000,
822 	(0x0e00 << 16) | (0xac0c >> 2),
823 	0x00000000,
824 	(0x0e00 << 16) | (0xac10 >> 2),
825 	0x00000000,
826 	(0x0e00 << 16) | (0xac14 >> 2),
827 	0x00000000,
828 	(0x0e00 << 16) | (0xac58 >> 2),
829 	0x00000000,
830 	(0x0e00 << 16) | (0xac68 >> 2),
831 	0x00000000,
832 	(0x0e00 << 16) | (0xac6c >> 2),
833 	0x00000000,
834 	(0x0e00 << 16) | (0xac70 >> 2),
835 	0x00000000,
836 	(0x0e00 << 16) | (0xac74 >> 2),
837 	0x00000000,
838 	(0x0e00 << 16) | (0xac78 >> 2),
839 	0x00000000,
840 	(0x0e00 << 16) | (0xac7c >> 2),
841 	0x00000000,
842 	(0x0e00 << 16) | (0xac80 >> 2),
843 	0x00000000,
844 	(0x0e00 << 16) | (0xac84 >> 2),
845 	0x00000000,
846 	(0x0e00 << 16) | (0xac88 >> 2),
847 	0x00000000,
848 	(0x0e00 << 16) | (0xac8c >> 2),
849 	0x00000000,
850 	(0x0e00 << 16) | (0x970c >> 2),
851 	0x00000000,
852 	(0x0e00 << 16) | (0x9714 >> 2),
853 	0x00000000,
854 	(0x0e00 << 16) | (0x9718 >> 2),
855 	0x00000000,
856 	(0x0e00 << 16) | (0x971c >> 2),
857 	0x00000000,
858 	(0x0e00 << 16) | (0x31068 >> 2),
859 	0x00000000,
860 	(0x4e00 << 16) | (0x31068 >> 2),
861 	0x00000000,
862 	(0x5e00 << 16) | (0x31068 >> 2),
863 	0x00000000,
864 	(0x6e00 << 16) | (0x31068 >> 2),
865 	0x00000000,
866 	(0x7e00 << 16) | (0x31068 >> 2),
867 	0x00000000,
868 	(0x0e00 << 16) | (0xcd10 >> 2),
869 	0x00000000,
870 	(0x0e00 << 16) | (0xcd14 >> 2),
871 	0x00000000,
872 	(0x0e00 << 16) | (0x88b0 >> 2),
873 	0x00000000,
874 	(0x0e00 << 16) | (0x88b4 >> 2),
875 	0x00000000,
876 	(0x0e00 << 16) | (0x88b8 >> 2),
877 	0x00000000,
878 	(0x0e00 << 16) | (0x88bc >> 2),
879 	0x00000000,
880 	(0x0400 << 16) | (0x89c0 >> 2),
881 	0x00000000,
882 	(0x0e00 << 16) | (0x88c4 >> 2),
883 	0x00000000,
884 	(0x0e00 << 16) | (0x88c8 >> 2),
885 	0x00000000,
886 	(0x0e00 << 16) | (0x88d0 >> 2),
887 	0x00000000,
888 	(0x0e00 << 16) | (0x88d4 >> 2),
889 	0x00000000,
890 	(0x0e00 << 16) | (0x88d8 >> 2),
891 	0x00000000,
892 	(0x0e00 << 16) | (0x8980 >> 2),
893 	0x00000000,
894 	(0x0e00 << 16) | (0x30938 >> 2),
895 	0x00000000,
896 	(0x0e00 << 16) | (0x3093c >> 2),
897 	0x00000000,
898 	(0x0e00 << 16) | (0x30940 >> 2),
899 	0x00000000,
900 	(0x0e00 << 16) | (0x89a0 >> 2),
901 	0x00000000,
902 	(0x0e00 << 16) | (0x30900 >> 2),
903 	0x00000000,
904 	(0x0e00 << 16) | (0x30904 >> 2),
905 	0x00000000,
906 	(0x0e00 << 16) | (0x89b4 >> 2),
907 	0x00000000,
908 	(0x0e00 << 16) | (0x3e1fc >> 2),
909 	0x00000000,
910 	(0x0e00 << 16) | (0x3c210 >> 2),
911 	0x00000000,
912 	(0x0e00 << 16) | (0x3c214 >> 2),
913 	0x00000000,
914 	(0x0e00 << 16) | (0x3c218 >> 2),
915 	0x00000000,
916 	(0x0e00 << 16) | (0x8904 >> 2),
917 	0x00000000,
918 	0x5,
919 	(0x0e00 << 16) | (0x8c28 >> 2),
920 	(0x0e00 << 16) | (0x8c2c >> 2),
921 	(0x0e00 << 16) | (0x8c30 >> 2),
922 	(0x0e00 << 16) | (0x8c34 >> 2),
923 	(0x0e00 << 16) | (0x9600 >> 2),
924 };
925 
926 static const u32 bonaire_golden_spm_registers[] =
927 {
928 	0x30800, 0xe0ffffff, 0xe0000000
929 };
930 
931 static const u32 bonaire_golden_common_registers[] =
932 {
933 	0xc770, 0xffffffff, 0x00000800,
934 	0xc774, 0xffffffff, 0x00000800,
935 	0xc798, 0xffffffff, 0x00007fbf,
936 	0xc79c, 0xffffffff, 0x00007faf
937 };
938 
939 static const u32 bonaire_golden_registers[] =
940 {
941 	0x3354, 0x00000333, 0x00000333,
942 	0x3350, 0x000c0fc0, 0x00040200,
943 	0x9a10, 0x00010000, 0x00058208,
944 	0x3c000, 0xffff1fff, 0x00140000,
945 	0x3c200, 0xfdfc0fff, 0x00000100,
946 	0x3c234, 0x40000000, 0x40000200,
947 	0x9830, 0xffffffff, 0x00000000,
948 	0x9834, 0xf00fffff, 0x00000400,
949 	0x9838, 0x0002021c, 0x00020200,
950 	0xc78, 0x00000080, 0x00000000,
951 	0x5bb0, 0x000000f0, 0x00000070,
952 	0x5bc0, 0xf0311fff, 0x80300000,
953 	0x98f8, 0x73773777, 0x12010001,
954 	0x350c, 0x00810000, 0x408af000,
955 	0x7030, 0x31000111, 0x00000011,
956 	0x2f48, 0x73773777, 0x12010001,
957 	0x220c, 0x00007fb6, 0x0021a1b1,
958 	0x2210, 0x00007fb6, 0x002021b1,
959 	0x2180, 0x00007fb6, 0x00002191,
960 	0x2218, 0x00007fb6, 0x002121b1,
961 	0x221c, 0x00007fb6, 0x002021b1,
962 	0x21dc, 0x00007fb6, 0x00002191,
963 	0x21e0, 0x00007fb6, 0x00002191,
964 	0x3628, 0x0000003f, 0x0000000a,
965 	0x362c, 0x0000003f, 0x0000000a,
966 	0x2ae4, 0x00073ffe, 0x000022a2,
967 	0x240c, 0x000007ff, 0x00000000,
968 	0x8a14, 0xf000003f, 0x00000007,
969 	0x8bf0, 0x00002001, 0x00000001,
970 	0x8b24, 0xffffffff, 0x00ffffff,
971 	0x30a04, 0x0000ff0f, 0x00000000,
972 	0x28a4c, 0x07ffffff, 0x06000000,
973 	0x4d8, 0x00000fff, 0x00000100,
974 	0x3e78, 0x00000001, 0x00000002,
975 	0x9100, 0x03000000, 0x0362c688,
976 	0x8c00, 0x000000ff, 0x00000001,
977 	0xe40, 0x00001fff, 0x00001fff,
978 	0x9060, 0x0000007f, 0x00000020,
979 	0x9508, 0x00010000, 0x00010000,
980 	0xac14, 0x000003ff, 0x000000f3,
981 	0xac0c, 0xffffffff, 0x00001032
982 };
983 
984 static const u32 bonaire_mgcg_cgcg_init[] =
985 {
986 	0xc420, 0xffffffff, 0xfffffffc,
987 	0x30800, 0xffffffff, 0xe0000000,
988 	0x3c2a0, 0xffffffff, 0x00000100,
989 	0x3c208, 0xffffffff, 0x00000100,
990 	0x3c2c0, 0xffffffff, 0xc0000100,
991 	0x3c2c8, 0xffffffff, 0xc0000100,
992 	0x3c2c4, 0xffffffff, 0xc0000100,
993 	0x55e4, 0xffffffff, 0x00600100,
994 	0x3c280, 0xffffffff, 0x00000100,
995 	0x3c214, 0xffffffff, 0x06000100,
996 	0x3c220, 0xffffffff, 0x00000100,
997 	0x3c218, 0xffffffff, 0x06000100,
998 	0x3c204, 0xffffffff, 0x00000100,
999 	0x3c2e0, 0xffffffff, 0x00000100,
1000 	0x3c224, 0xffffffff, 0x00000100,
1001 	0x3c200, 0xffffffff, 0x00000100,
1002 	0x3c230, 0xffffffff, 0x00000100,
1003 	0x3c234, 0xffffffff, 0x00000100,
1004 	0x3c250, 0xffffffff, 0x00000100,
1005 	0x3c254, 0xffffffff, 0x00000100,
1006 	0x3c258, 0xffffffff, 0x00000100,
1007 	0x3c25c, 0xffffffff, 0x00000100,
1008 	0x3c260, 0xffffffff, 0x00000100,
1009 	0x3c27c, 0xffffffff, 0x00000100,
1010 	0x3c278, 0xffffffff, 0x00000100,
1011 	0x3c210, 0xffffffff, 0x06000100,
1012 	0x3c290, 0xffffffff, 0x00000100,
1013 	0x3c274, 0xffffffff, 0x00000100,
1014 	0x3c2b4, 0xffffffff, 0x00000100,
1015 	0x3c2b0, 0xffffffff, 0x00000100,
1016 	0x3c270, 0xffffffff, 0x00000100,
1017 	0x30800, 0xffffffff, 0xe0000000,
1018 	0x3c020, 0xffffffff, 0x00010000,
1019 	0x3c024, 0xffffffff, 0x00030002,
1020 	0x3c028, 0xffffffff, 0x00040007,
1021 	0x3c02c, 0xffffffff, 0x00060005,
1022 	0x3c030, 0xffffffff, 0x00090008,
1023 	0x3c034, 0xffffffff, 0x00010000,
1024 	0x3c038, 0xffffffff, 0x00030002,
1025 	0x3c03c, 0xffffffff, 0x00040007,
1026 	0x3c040, 0xffffffff, 0x00060005,
1027 	0x3c044, 0xffffffff, 0x00090008,
1028 	0x3c048, 0xffffffff, 0x00010000,
1029 	0x3c04c, 0xffffffff, 0x00030002,
1030 	0x3c050, 0xffffffff, 0x00040007,
1031 	0x3c054, 0xffffffff, 0x00060005,
1032 	0x3c058, 0xffffffff, 0x00090008,
1033 	0x3c05c, 0xffffffff, 0x00010000,
1034 	0x3c060, 0xffffffff, 0x00030002,
1035 	0x3c064, 0xffffffff, 0x00040007,
1036 	0x3c068, 0xffffffff, 0x00060005,
1037 	0x3c06c, 0xffffffff, 0x00090008,
1038 	0x3c070, 0xffffffff, 0x00010000,
1039 	0x3c074, 0xffffffff, 0x00030002,
1040 	0x3c078, 0xffffffff, 0x00040007,
1041 	0x3c07c, 0xffffffff, 0x00060005,
1042 	0x3c080, 0xffffffff, 0x00090008,
1043 	0x3c084, 0xffffffff, 0x00010000,
1044 	0x3c088, 0xffffffff, 0x00030002,
1045 	0x3c08c, 0xffffffff, 0x00040007,
1046 	0x3c090, 0xffffffff, 0x00060005,
1047 	0x3c094, 0xffffffff, 0x00090008,
1048 	0x3c098, 0xffffffff, 0x00010000,
1049 	0x3c09c, 0xffffffff, 0x00030002,
1050 	0x3c0a0, 0xffffffff, 0x00040007,
1051 	0x3c0a4, 0xffffffff, 0x00060005,
1052 	0x3c0a8, 0xffffffff, 0x00090008,
1053 	0x3c000, 0xffffffff, 0x96e00200,
1054 	0x8708, 0xffffffff, 0x00900100,
1055 	0xc424, 0xffffffff, 0x0020003f,
1056 	0x38, 0xffffffff, 0x0140001c,
1057 	0x3c, 0x000f0000, 0x000f0000,
1058 	0x220, 0xffffffff, 0xC060000C,
1059 	0x224, 0xc0000fff, 0x00000100,
1060 	0xf90, 0xffffffff, 0x00000100,
1061 	0xf98, 0x00000101, 0x00000000,
1062 	0x20a8, 0xffffffff, 0x00000104,
1063 	0x55e4, 0xff000fff, 0x00000100,
1064 	0x30cc, 0xc0000fff, 0x00000104,
1065 	0xc1e4, 0x00000001, 0x00000001,
1066 	0xd00c, 0xff000ff0, 0x00000100,
1067 	0xd80c, 0xff000ff0, 0x00000100
1068 };
1069 
1070 static const u32 spectre_golden_spm_registers[] =
1071 {
1072 	0x30800, 0xe0ffffff, 0xe0000000
1073 };
1074 
1075 static const u32 spectre_golden_common_registers[] =
1076 {
1077 	0xc770, 0xffffffff, 0x00000800,
1078 	0xc774, 0xffffffff, 0x00000800,
1079 	0xc798, 0xffffffff, 0x00007fbf,
1080 	0xc79c, 0xffffffff, 0x00007faf
1081 };
1082 
1083 static const u32 spectre_golden_registers[] =
1084 {
1085 	0x3c000, 0xffff1fff, 0x96940200,
1086 	0x3c00c, 0xffff0001, 0xff000000,
1087 	0x3c200, 0xfffc0fff, 0x00000100,
1088 	0x6ed8, 0x00010101, 0x00010000,
1089 	0x9834, 0xf00fffff, 0x00000400,
1090 	0x9838, 0xfffffffc, 0x00020200,
1091 	0x5bb0, 0x000000f0, 0x00000070,
1092 	0x5bc0, 0xf0311fff, 0x80300000,
1093 	0x98f8, 0x73773777, 0x12010001,
1094 	0x9b7c, 0x00ff0000, 0x00fc0000,
1095 	0x2f48, 0x73773777, 0x12010001,
1096 	0x8a14, 0xf000003f, 0x00000007,
1097 	0x8b24, 0xffffffff, 0x00ffffff,
1098 	0x28350, 0x3f3f3fff, 0x00000082,
1099 	0x28355, 0x0000003f, 0x00000000,
1100 	0x3e78, 0x00000001, 0x00000002,
1101 	0x913c, 0xffff03df, 0x00000004,
1102 	0xc768, 0x00000008, 0x00000008,
1103 	0x8c00, 0x000008ff, 0x00000800,
1104 	0x9508, 0x00010000, 0x00010000,
1105 	0xac0c, 0xffffffff, 0x54763210,
1106 	0x214f8, 0x01ff01ff, 0x00000002,
1107 	0x21498, 0x007ff800, 0x00200000,
1108 	0x2015c, 0xffffffff, 0x00000f40,
1109 	0x30934, 0xffffffff, 0x00000001
1110 };
1111 
1112 static const u32 spectre_mgcg_cgcg_init[] =
1113 {
1114 	0xc420, 0xffffffff, 0xfffffffc,
1115 	0x30800, 0xffffffff, 0xe0000000,
1116 	0x3c2a0, 0xffffffff, 0x00000100,
1117 	0x3c208, 0xffffffff, 0x00000100,
1118 	0x3c2c0, 0xffffffff, 0x00000100,
1119 	0x3c2c8, 0xffffffff, 0x00000100,
1120 	0x3c2c4, 0xffffffff, 0x00000100,
1121 	0x55e4, 0xffffffff, 0x00600100,
1122 	0x3c280, 0xffffffff, 0x00000100,
1123 	0x3c214, 0xffffffff, 0x06000100,
1124 	0x3c220, 0xffffffff, 0x00000100,
1125 	0x3c218, 0xffffffff, 0x06000100,
1126 	0x3c204, 0xffffffff, 0x00000100,
1127 	0x3c2e0, 0xffffffff, 0x00000100,
1128 	0x3c224, 0xffffffff, 0x00000100,
1129 	0x3c200, 0xffffffff, 0x00000100,
1130 	0x3c230, 0xffffffff, 0x00000100,
1131 	0x3c234, 0xffffffff, 0x00000100,
1132 	0x3c250, 0xffffffff, 0x00000100,
1133 	0x3c254, 0xffffffff, 0x00000100,
1134 	0x3c258, 0xffffffff, 0x00000100,
1135 	0x3c25c, 0xffffffff, 0x00000100,
1136 	0x3c260, 0xffffffff, 0x00000100,
1137 	0x3c27c, 0xffffffff, 0x00000100,
1138 	0x3c278, 0xffffffff, 0x00000100,
1139 	0x3c210, 0xffffffff, 0x06000100,
1140 	0x3c290, 0xffffffff, 0x00000100,
1141 	0x3c274, 0xffffffff, 0x00000100,
1142 	0x3c2b4, 0xffffffff, 0x00000100,
1143 	0x3c2b0, 0xffffffff, 0x00000100,
1144 	0x3c270, 0xffffffff, 0x00000100,
1145 	0x30800, 0xffffffff, 0xe0000000,
1146 	0x3c020, 0xffffffff, 0x00010000,
1147 	0x3c024, 0xffffffff, 0x00030002,
1148 	0x3c028, 0xffffffff, 0x00040007,
1149 	0x3c02c, 0xffffffff, 0x00060005,
1150 	0x3c030, 0xffffffff, 0x00090008,
1151 	0x3c034, 0xffffffff, 0x00010000,
1152 	0x3c038, 0xffffffff, 0x00030002,
1153 	0x3c03c, 0xffffffff, 0x00040007,
1154 	0x3c040, 0xffffffff, 0x00060005,
1155 	0x3c044, 0xffffffff, 0x00090008,
1156 	0x3c048, 0xffffffff, 0x00010000,
1157 	0x3c04c, 0xffffffff, 0x00030002,
1158 	0x3c050, 0xffffffff, 0x00040007,
1159 	0x3c054, 0xffffffff, 0x00060005,
1160 	0x3c058, 0xffffffff, 0x00090008,
1161 	0x3c05c, 0xffffffff, 0x00010000,
1162 	0x3c060, 0xffffffff, 0x00030002,
1163 	0x3c064, 0xffffffff, 0x00040007,
1164 	0x3c068, 0xffffffff, 0x00060005,
1165 	0x3c06c, 0xffffffff, 0x00090008,
1166 	0x3c070, 0xffffffff, 0x00010000,
1167 	0x3c074, 0xffffffff, 0x00030002,
1168 	0x3c078, 0xffffffff, 0x00040007,
1169 	0x3c07c, 0xffffffff, 0x00060005,
1170 	0x3c080, 0xffffffff, 0x00090008,
1171 	0x3c084, 0xffffffff, 0x00010000,
1172 	0x3c088, 0xffffffff, 0x00030002,
1173 	0x3c08c, 0xffffffff, 0x00040007,
1174 	0x3c090, 0xffffffff, 0x00060005,
1175 	0x3c094, 0xffffffff, 0x00090008,
1176 	0x3c098, 0xffffffff, 0x00010000,
1177 	0x3c09c, 0xffffffff, 0x00030002,
1178 	0x3c0a0, 0xffffffff, 0x00040007,
1179 	0x3c0a4, 0xffffffff, 0x00060005,
1180 	0x3c0a8, 0xffffffff, 0x00090008,
1181 	0x3c0ac, 0xffffffff, 0x00010000,
1182 	0x3c0b0, 0xffffffff, 0x00030002,
1183 	0x3c0b4, 0xffffffff, 0x00040007,
1184 	0x3c0b8, 0xffffffff, 0x00060005,
1185 	0x3c0bc, 0xffffffff, 0x00090008,
1186 	0x3c000, 0xffffffff, 0x96e00200,
1187 	0x8708, 0xffffffff, 0x00900100,
1188 	0xc424, 0xffffffff, 0x0020003f,
1189 	0x38, 0xffffffff, 0x0140001c,
1190 	0x3c, 0x000f0000, 0x000f0000,
1191 	0x220, 0xffffffff, 0xC060000C,
1192 	0x224, 0xc0000fff, 0x00000100,
1193 	0xf90, 0xffffffff, 0x00000100,
1194 	0xf98, 0x00000101, 0x00000000,
1195 	0x20a8, 0xffffffff, 0x00000104,
1196 	0x55e4, 0xff000fff, 0x00000100,
1197 	0x30cc, 0xc0000fff, 0x00000104,
1198 	0xc1e4, 0x00000001, 0x00000001,
1199 	0xd00c, 0xff000ff0, 0x00000100,
1200 	0xd80c, 0xff000ff0, 0x00000100
1201 };
1202 
1203 static const u32 kalindi_golden_spm_registers[] =
1204 {
1205 	0x30800, 0xe0ffffff, 0xe0000000
1206 };
1207 
1208 static const u32 kalindi_golden_common_registers[] =
1209 {
1210 	0xc770, 0xffffffff, 0x00000800,
1211 	0xc774, 0xffffffff, 0x00000800,
1212 	0xc798, 0xffffffff, 0x00007fbf,
1213 	0xc79c, 0xffffffff, 0x00007faf
1214 };
1215 
1216 static const u32 kalindi_golden_registers[] =
1217 {
1218 	0x3c000, 0xffffdfff, 0x6e944040,
1219 	0x55e4, 0xff607fff, 0xfc000100,
1220 	0x3c220, 0xff000fff, 0x00000100,
1221 	0x3c224, 0xff000fff, 0x00000100,
1222 	0x3c200, 0xfffc0fff, 0x00000100,
1223 	0x6ed8, 0x00010101, 0x00010000,
1224 	0x9830, 0xffffffff, 0x00000000,
1225 	0x9834, 0xf00fffff, 0x00000400,
1226 	0x5bb0, 0x000000f0, 0x00000070,
1227 	0x5bc0, 0xf0311fff, 0x80300000,
1228 	0x98f8, 0x73773777, 0x12010001,
1229 	0x98fc, 0xffffffff, 0x00000010,
1230 	0x9b7c, 0x00ff0000, 0x00fc0000,
1231 	0x8030, 0x00001f0f, 0x0000100a,
1232 	0x2f48, 0x73773777, 0x12010001,
1233 	0x2408, 0x000fffff, 0x000c007f,
1234 	0x8a14, 0xf000003f, 0x00000007,
1235 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1236 	0x30a04, 0x0000ff0f, 0x00000000,
1237 	0x28a4c, 0x07ffffff, 0x06000000,
1238 	0x4d8, 0x00000fff, 0x00000100,
1239 	0x3e78, 0x00000001, 0x00000002,
1240 	0xc768, 0x00000008, 0x00000008,
1241 	0x8c00, 0x000000ff, 0x00000003,
1242 	0x214f8, 0x01ff01ff, 0x00000002,
1243 	0x21498, 0x007ff800, 0x00200000,
1244 	0x2015c, 0xffffffff, 0x00000f40,
1245 	0x88c4, 0x001f3ae3, 0x00000082,
1246 	0x88d4, 0x0000001f, 0x00000010,
1247 	0x30934, 0xffffffff, 0x00000000
1248 };
1249 
1250 static const u32 kalindi_mgcg_cgcg_init[] =
1251 {
1252 	0xc420, 0xffffffff, 0xfffffffc,
1253 	0x30800, 0xffffffff, 0xe0000000,
1254 	0x3c2a0, 0xffffffff, 0x00000100,
1255 	0x3c208, 0xffffffff, 0x00000100,
1256 	0x3c2c0, 0xffffffff, 0x00000100,
1257 	0x3c2c8, 0xffffffff, 0x00000100,
1258 	0x3c2c4, 0xffffffff, 0x00000100,
1259 	0x55e4, 0xffffffff, 0x00600100,
1260 	0x3c280, 0xffffffff, 0x00000100,
1261 	0x3c214, 0xffffffff, 0x06000100,
1262 	0x3c220, 0xffffffff, 0x00000100,
1263 	0x3c218, 0xffffffff, 0x06000100,
1264 	0x3c204, 0xffffffff, 0x00000100,
1265 	0x3c2e0, 0xffffffff, 0x00000100,
1266 	0x3c224, 0xffffffff, 0x00000100,
1267 	0x3c200, 0xffffffff, 0x00000100,
1268 	0x3c230, 0xffffffff, 0x00000100,
1269 	0x3c234, 0xffffffff, 0x00000100,
1270 	0x3c250, 0xffffffff, 0x00000100,
1271 	0x3c254, 0xffffffff, 0x00000100,
1272 	0x3c258, 0xffffffff, 0x00000100,
1273 	0x3c25c, 0xffffffff, 0x00000100,
1274 	0x3c260, 0xffffffff, 0x00000100,
1275 	0x3c27c, 0xffffffff, 0x00000100,
1276 	0x3c278, 0xffffffff, 0x00000100,
1277 	0x3c210, 0xffffffff, 0x06000100,
1278 	0x3c290, 0xffffffff, 0x00000100,
1279 	0x3c274, 0xffffffff, 0x00000100,
1280 	0x3c2b4, 0xffffffff, 0x00000100,
1281 	0x3c2b0, 0xffffffff, 0x00000100,
1282 	0x3c270, 0xffffffff, 0x00000100,
1283 	0x30800, 0xffffffff, 0xe0000000,
1284 	0x3c020, 0xffffffff, 0x00010000,
1285 	0x3c024, 0xffffffff, 0x00030002,
1286 	0x3c028, 0xffffffff, 0x00040007,
1287 	0x3c02c, 0xffffffff, 0x00060005,
1288 	0x3c030, 0xffffffff, 0x00090008,
1289 	0x3c034, 0xffffffff, 0x00010000,
1290 	0x3c038, 0xffffffff, 0x00030002,
1291 	0x3c03c, 0xffffffff, 0x00040007,
1292 	0x3c040, 0xffffffff, 0x00060005,
1293 	0x3c044, 0xffffffff, 0x00090008,
1294 	0x3c000, 0xffffffff, 0x96e00200,
1295 	0x8708, 0xffffffff, 0x00900100,
1296 	0xc424, 0xffffffff, 0x0020003f,
1297 	0x38, 0xffffffff, 0x0140001c,
1298 	0x3c, 0x000f0000, 0x000f0000,
1299 	0x220, 0xffffffff, 0xC060000C,
1300 	0x224, 0xc0000fff, 0x00000100,
1301 	0x20a8, 0xffffffff, 0x00000104,
1302 	0x55e4, 0xff000fff, 0x00000100,
1303 	0x30cc, 0xc0000fff, 0x00000104,
1304 	0xc1e4, 0x00000001, 0x00000001,
1305 	0xd00c, 0xff000ff0, 0x00000100,
1306 	0xd80c, 0xff000ff0, 0x00000100
1307 };
1308 
1309 static const u32 hawaii_golden_spm_registers[] =
1310 {
1311 	0x30800, 0xe0ffffff, 0xe0000000
1312 };
1313 
1314 static const u32 hawaii_golden_common_registers[] =
1315 {
1316 	0x30800, 0xffffffff, 0xe0000000,
1317 	0x28350, 0xffffffff, 0x3a00161a,
1318 	0x28354, 0xffffffff, 0x0000002e,
1319 	0x9a10, 0xffffffff, 0x00018208,
1320 	0x98f8, 0xffffffff, 0x12011003
1321 };
1322 
1323 static const u32 hawaii_golden_registers[] =
1324 {
1325 	0x3354, 0x00000333, 0x00000333,
1326 	0x9a10, 0x00010000, 0x00058208,
1327 	0x9830, 0xffffffff, 0x00000000,
1328 	0x9834, 0xf00fffff, 0x00000400,
1329 	0x9838, 0x0002021c, 0x00020200,
1330 	0xc78, 0x00000080, 0x00000000,
1331 	0x5bb0, 0x000000f0, 0x00000070,
1332 	0x5bc0, 0xf0311fff, 0x80300000,
1333 	0x350c, 0x00810000, 0x408af000,
1334 	0x7030, 0x31000111, 0x00000011,
1335 	0x2f48, 0x73773777, 0x12010001,
1336 	0x2120, 0x0000007f, 0x0000001b,
1337 	0x21dc, 0x00007fb6, 0x00002191,
1338 	0x3628, 0x0000003f, 0x0000000a,
1339 	0x362c, 0x0000003f, 0x0000000a,
1340 	0x2ae4, 0x00073ffe, 0x000022a2,
1341 	0x240c, 0x000007ff, 0x00000000,
1342 	0x8bf0, 0x00002001, 0x00000001,
1343 	0x8b24, 0xffffffff, 0x00ffffff,
1344 	0x30a04, 0x0000ff0f, 0x00000000,
1345 	0x28a4c, 0x07ffffff, 0x06000000,
1346 	0x3e78, 0x00000001, 0x00000002,
1347 	0xc768, 0x00000008, 0x00000008,
1348 	0xc770, 0x00000f00, 0x00000800,
1349 	0xc774, 0x00000f00, 0x00000800,
1350 	0xc798, 0x00ffffff, 0x00ff7fbf,
1351 	0xc79c, 0x00ffffff, 0x00ff7faf,
1352 	0x8c00, 0x000000ff, 0x00000800,
1353 	0xe40, 0x00001fff, 0x00001fff,
1354 	0x9060, 0x0000007f, 0x00000020,
1355 	0x9508, 0x00010000, 0x00010000,
1356 	0xae00, 0x00100000, 0x000ff07c,
1357 	0xac14, 0x000003ff, 0x0000000f,
1358 	0xac10, 0xffffffff, 0x7564fdec,
1359 	0xac0c, 0xffffffff, 0x3120b9a8,
1360 	0xac08, 0x20000000, 0x0f9c0000
1361 };
1362 
1363 static const u32 hawaii_mgcg_cgcg_init[] =
1364 {
1365 	0xc420, 0xffffffff, 0xfffffffd,
1366 	0x30800, 0xffffffff, 0xe0000000,
1367 	0x3c2a0, 0xffffffff, 0x00000100,
1368 	0x3c208, 0xffffffff, 0x00000100,
1369 	0x3c2c0, 0xffffffff, 0x00000100,
1370 	0x3c2c8, 0xffffffff, 0x00000100,
1371 	0x3c2c4, 0xffffffff, 0x00000100,
1372 	0x55e4, 0xffffffff, 0x00200100,
1373 	0x3c280, 0xffffffff, 0x00000100,
1374 	0x3c214, 0xffffffff, 0x06000100,
1375 	0x3c220, 0xffffffff, 0x00000100,
1376 	0x3c218, 0xffffffff, 0x06000100,
1377 	0x3c204, 0xffffffff, 0x00000100,
1378 	0x3c2e0, 0xffffffff, 0x00000100,
1379 	0x3c224, 0xffffffff, 0x00000100,
1380 	0x3c200, 0xffffffff, 0x00000100,
1381 	0x3c230, 0xffffffff, 0x00000100,
1382 	0x3c234, 0xffffffff, 0x00000100,
1383 	0x3c250, 0xffffffff, 0x00000100,
1384 	0x3c254, 0xffffffff, 0x00000100,
1385 	0x3c258, 0xffffffff, 0x00000100,
1386 	0x3c25c, 0xffffffff, 0x00000100,
1387 	0x3c260, 0xffffffff, 0x00000100,
1388 	0x3c27c, 0xffffffff, 0x00000100,
1389 	0x3c278, 0xffffffff, 0x00000100,
1390 	0x3c210, 0xffffffff, 0x06000100,
1391 	0x3c290, 0xffffffff, 0x00000100,
1392 	0x3c274, 0xffffffff, 0x00000100,
1393 	0x3c2b4, 0xffffffff, 0x00000100,
1394 	0x3c2b0, 0xffffffff, 0x00000100,
1395 	0x3c270, 0xffffffff, 0x00000100,
1396 	0x30800, 0xffffffff, 0xe0000000,
1397 	0x3c020, 0xffffffff, 0x00010000,
1398 	0x3c024, 0xffffffff, 0x00030002,
1399 	0x3c028, 0xffffffff, 0x00040007,
1400 	0x3c02c, 0xffffffff, 0x00060005,
1401 	0x3c030, 0xffffffff, 0x00090008,
1402 	0x3c034, 0xffffffff, 0x00010000,
1403 	0x3c038, 0xffffffff, 0x00030002,
1404 	0x3c03c, 0xffffffff, 0x00040007,
1405 	0x3c040, 0xffffffff, 0x00060005,
1406 	0x3c044, 0xffffffff, 0x00090008,
1407 	0x3c048, 0xffffffff, 0x00010000,
1408 	0x3c04c, 0xffffffff, 0x00030002,
1409 	0x3c050, 0xffffffff, 0x00040007,
1410 	0x3c054, 0xffffffff, 0x00060005,
1411 	0x3c058, 0xffffffff, 0x00090008,
1412 	0x3c05c, 0xffffffff, 0x00010000,
1413 	0x3c060, 0xffffffff, 0x00030002,
1414 	0x3c064, 0xffffffff, 0x00040007,
1415 	0x3c068, 0xffffffff, 0x00060005,
1416 	0x3c06c, 0xffffffff, 0x00090008,
1417 	0x3c070, 0xffffffff, 0x00010000,
1418 	0x3c074, 0xffffffff, 0x00030002,
1419 	0x3c078, 0xffffffff, 0x00040007,
1420 	0x3c07c, 0xffffffff, 0x00060005,
1421 	0x3c080, 0xffffffff, 0x00090008,
1422 	0x3c084, 0xffffffff, 0x00010000,
1423 	0x3c088, 0xffffffff, 0x00030002,
1424 	0x3c08c, 0xffffffff, 0x00040007,
1425 	0x3c090, 0xffffffff, 0x00060005,
1426 	0x3c094, 0xffffffff, 0x00090008,
1427 	0x3c098, 0xffffffff, 0x00010000,
1428 	0x3c09c, 0xffffffff, 0x00030002,
1429 	0x3c0a0, 0xffffffff, 0x00040007,
1430 	0x3c0a4, 0xffffffff, 0x00060005,
1431 	0x3c0a8, 0xffffffff, 0x00090008,
1432 	0x3c0ac, 0xffffffff, 0x00010000,
1433 	0x3c0b0, 0xffffffff, 0x00030002,
1434 	0x3c0b4, 0xffffffff, 0x00040007,
1435 	0x3c0b8, 0xffffffff, 0x00060005,
1436 	0x3c0bc, 0xffffffff, 0x00090008,
1437 	0x3c0c0, 0xffffffff, 0x00010000,
1438 	0x3c0c4, 0xffffffff, 0x00030002,
1439 	0x3c0c8, 0xffffffff, 0x00040007,
1440 	0x3c0cc, 0xffffffff, 0x00060005,
1441 	0x3c0d0, 0xffffffff, 0x00090008,
1442 	0x3c0d4, 0xffffffff, 0x00010000,
1443 	0x3c0d8, 0xffffffff, 0x00030002,
1444 	0x3c0dc, 0xffffffff, 0x00040007,
1445 	0x3c0e0, 0xffffffff, 0x00060005,
1446 	0x3c0e4, 0xffffffff, 0x00090008,
1447 	0x3c0e8, 0xffffffff, 0x00010000,
1448 	0x3c0ec, 0xffffffff, 0x00030002,
1449 	0x3c0f0, 0xffffffff, 0x00040007,
1450 	0x3c0f4, 0xffffffff, 0x00060005,
1451 	0x3c0f8, 0xffffffff, 0x00090008,
1452 	0xc318, 0xffffffff, 0x00020200,
1453 	0x3350, 0xffffffff, 0x00000200,
1454 	0x15c0, 0xffffffff, 0x00000400,
1455 	0x55e8, 0xffffffff, 0x00000000,
1456 	0x2f50, 0xffffffff, 0x00000902,
1457 	0x3c000, 0xffffffff, 0x96940200,
1458 	0x8708, 0xffffffff, 0x00900100,
1459 	0xc424, 0xffffffff, 0x0020003f,
1460 	0x38, 0xffffffff, 0x0140001c,
1461 	0x3c, 0x000f0000, 0x000f0000,
1462 	0x220, 0xffffffff, 0xc060000c,
1463 	0x224, 0xc0000fff, 0x00000100,
1464 	0xf90, 0xffffffff, 0x00000100,
1465 	0xf98, 0x00000101, 0x00000000,
1466 	0x20a8, 0xffffffff, 0x00000104,
1467 	0x55e4, 0xff000fff, 0x00000100,
1468 	0x30cc, 0xc0000fff, 0x00000104,
1469 	0xc1e4, 0x00000001, 0x00000001,
1470 	0xd00c, 0xff000ff0, 0x00000100,
1471 	0xd80c, 0xff000ff0, 0x00000100
1472 };
1473 
1474 static void cik_init_golden_registers(struct radeon_device *rdev)
1475 {
1476 	switch (rdev->family) {
1477 	case CHIP_BONAIRE:
1478 		radeon_program_register_sequence(rdev,
1479 						 bonaire_mgcg_cgcg_init,
1480 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1481 		radeon_program_register_sequence(rdev,
1482 						 bonaire_golden_registers,
1483 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1484 		radeon_program_register_sequence(rdev,
1485 						 bonaire_golden_common_registers,
1486 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1487 		radeon_program_register_sequence(rdev,
1488 						 bonaire_golden_spm_registers,
1489 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1490 		break;
1491 	case CHIP_KABINI:
1492 		radeon_program_register_sequence(rdev,
1493 						 kalindi_mgcg_cgcg_init,
1494 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1495 		radeon_program_register_sequence(rdev,
1496 						 kalindi_golden_registers,
1497 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1498 		radeon_program_register_sequence(rdev,
1499 						 kalindi_golden_common_registers,
1500 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1501 		radeon_program_register_sequence(rdev,
1502 						 kalindi_golden_spm_registers,
1503 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1504 		break;
1505 	case CHIP_KAVERI:
1506 		radeon_program_register_sequence(rdev,
1507 						 spectre_mgcg_cgcg_init,
1508 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1509 		radeon_program_register_sequence(rdev,
1510 						 spectre_golden_registers,
1511 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1512 		radeon_program_register_sequence(rdev,
1513 						 spectre_golden_common_registers,
1514 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1515 		radeon_program_register_sequence(rdev,
1516 						 spectre_golden_spm_registers,
1517 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1518 		break;
1519 	case CHIP_HAWAII:
1520 		radeon_program_register_sequence(rdev,
1521 						 hawaii_mgcg_cgcg_init,
1522 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1523 		radeon_program_register_sequence(rdev,
1524 						 hawaii_golden_registers,
1525 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1526 		radeon_program_register_sequence(rdev,
1527 						 hawaii_golden_common_registers,
1528 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1529 		radeon_program_register_sequence(rdev,
1530 						 hawaii_golden_spm_registers,
1531 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1532 		break;
1533 	default:
1534 		break;
1535 	}
1536 }
1537 
1538 /**
1539  * cik_get_xclk - get the xclk
1540  *
1541  * @rdev: radeon_device pointer
1542  *
1543  * Returns the reference clock used by the gfx engine
1544  * (CIK).
1545  */
1546 u32 cik_get_xclk(struct radeon_device *rdev)
1547 {
1548         u32 reference_clock = rdev->clock.spll.reference_freq;
1549 
1550 	if (rdev->flags & RADEON_IS_IGP) {
1551 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1552 			return reference_clock / 2;
1553 	} else {
1554 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1555 			return reference_clock / 4;
1556 	}
1557 	return reference_clock;
1558 }
1559 
1560 /**
1561  * cik_mm_rdoorbell - read a doorbell dword
1562  *
1563  * @rdev: radeon_device pointer
1564  * @index: doorbell index
1565  *
1566  * Returns the value in the doorbell aperture at the
1567  * requested doorbell index (CIK).
1568  */
1569 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1570 {
1571 	if (index < rdev->doorbell.num_doorbells) {
1572 		return readl(rdev->doorbell.ptr + index);
1573 	} else {
1574 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1575 		return 0;
1576 	}
1577 }
1578 
1579 /**
1580  * cik_mm_wdoorbell - write a doorbell dword
1581  *
1582  * @rdev: radeon_device pointer
1583  * @index: doorbell index
1584  * @v: value to write
1585  *
1586  * Writes @v to the doorbell aperture at the
1587  * requested doorbell index (CIK).
1588  */
1589 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1590 {
1591 	if (index < rdev->doorbell.num_doorbells) {
1592 		writel(v, rdev->doorbell.ptr + index);
1593 	} else {
1594 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1595 	}
1596 }
1597 
1598 #define BONAIRE_IO_MC_REGS_SIZE 36
1599 
1600 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1601 {
1602 	{0x00000070, 0x04400000},
1603 	{0x00000071, 0x80c01803},
1604 	{0x00000072, 0x00004004},
1605 	{0x00000073, 0x00000100},
1606 	{0x00000074, 0x00ff0000},
1607 	{0x00000075, 0x34000000},
1608 	{0x00000076, 0x08000014},
1609 	{0x00000077, 0x00cc08ec},
1610 	{0x00000078, 0x00000400},
1611 	{0x00000079, 0x00000000},
1612 	{0x0000007a, 0x04090000},
1613 	{0x0000007c, 0x00000000},
1614 	{0x0000007e, 0x4408a8e8},
1615 	{0x0000007f, 0x00000304},
1616 	{0x00000080, 0x00000000},
1617 	{0x00000082, 0x00000001},
1618 	{0x00000083, 0x00000002},
1619 	{0x00000084, 0xf3e4f400},
1620 	{0x00000085, 0x052024e3},
1621 	{0x00000087, 0x00000000},
1622 	{0x00000088, 0x01000000},
1623 	{0x0000008a, 0x1c0a0000},
1624 	{0x0000008b, 0xff010000},
1625 	{0x0000008d, 0xffffefff},
1626 	{0x0000008e, 0xfff3efff},
1627 	{0x0000008f, 0xfff3efbf},
1628 	{0x00000092, 0xf7ffffff},
1629 	{0x00000093, 0xffffff7f},
1630 	{0x00000095, 0x00101101},
1631 	{0x00000096, 0x00000fff},
1632 	{0x00000097, 0x00116fff},
1633 	{0x00000098, 0x60010000},
1634 	{0x00000099, 0x10010000},
1635 	{0x0000009a, 0x00006000},
1636 	{0x0000009b, 0x00001000},
1637 	{0x0000009f, 0x00b48000}
1638 };
1639 
1640 #define HAWAII_IO_MC_REGS_SIZE 22
1641 
1642 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1643 {
1644 	{0x0000007d, 0x40000000},
1645 	{0x0000007e, 0x40180304},
1646 	{0x0000007f, 0x0000ff00},
1647 	{0x00000081, 0x00000000},
1648 	{0x00000083, 0x00000800},
1649 	{0x00000086, 0x00000000},
1650 	{0x00000087, 0x00000100},
1651 	{0x00000088, 0x00020100},
1652 	{0x00000089, 0x00000000},
1653 	{0x0000008b, 0x00040000},
1654 	{0x0000008c, 0x00000100},
1655 	{0x0000008e, 0xff010000},
1656 	{0x00000090, 0xffffefff},
1657 	{0x00000091, 0xfff3efff},
1658 	{0x00000092, 0xfff3efbf},
1659 	{0x00000093, 0xf7ffffff},
1660 	{0x00000094, 0xffffff7f},
1661 	{0x00000095, 0x00000fff},
1662 	{0x00000096, 0x00116fff},
1663 	{0x00000097, 0x60010000},
1664 	{0x00000098, 0x10010000},
1665 	{0x0000009f, 0x00c79000}
1666 };
1667 
1668 
1669 /**
1670  * cik_srbm_select - select specific register instances
1671  *
1672  * @rdev: radeon_device pointer
1673  * @me: selected ME (micro engine)
1674  * @pipe: pipe
1675  * @queue: queue
1676  * @vmid: VMID
1677  *
1678  * Switches the currently active registers instances.  Some
1679  * registers are instanced per VMID, others are instanced per
1680  * me/pipe/queue combination.
1681  */
1682 static void cik_srbm_select(struct radeon_device *rdev,
1683 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1684 {
1685 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1686 			     MEID(me & 0x3) |
1687 			     VMID(vmid & 0xf) |
1688 			     QUEUEID(queue & 0x7));
1689 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1690 }
1691 
1692 /* ucode loading */
1693 /**
1694  * ci_mc_load_microcode - load MC ucode into the hw
1695  *
1696  * @rdev: radeon_device pointer
1697  *
1698  * Load the GDDR MC ucode into the hw (CIK).
1699  * Returns 0 on success, error on failure.
1700  */
1701 int ci_mc_load_microcode(struct radeon_device *rdev)
1702 {
1703 	const __be32 *fw_data;
1704 	u32 running, blackout = 0;
1705 	u32 *io_mc_regs;
1706 	int i, ucode_size, regs_size;
1707 
1708 	if (!rdev->mc_fw)
1709 		return -EINVAL;
1710 
1711 	switch (rdev->family) {
1712 	case CHIP_BONAIRE:
1713 		io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1714 		ucode_size = CIK_MC_UCODE_SIZE;
1715 		regs_size = BONAIRE_IO_MC_REGS_SIZE;
1716 		break;
1717 	case CHIP_HAWAII:
1718 		io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1719 		ucode_size = HAWAII_MC_UCODE_SIZE;
1720 		regs_size = HAWAII_IO_MC_REGS_SIZE;
1721 		break;
1722 	default:
1723 		return -EINVAL;
1724 	}
1725 
1726 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1727 
1728 	if (running == 0) {
1729 		if (running) {
1730 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1731 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1732 		}
1733 
1734 		/* reset the engine and set to writable */
1735 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1736 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1737 
1738 		/* load mc io regs */
1739 		for (i = 0; i < regs_size; i++) {
1740 			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1741 			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1742 		}
1743 		/* load the MC ucode */
1744 		fw_data = (const __be32 *)rdev->mc_fw->data;
1745 		for (i = 0; i < ucode_size; i++)
1746 			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1747 
1748 		/* put the engine back into the active state */
1749 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1750 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1751 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1752 
1753 		/* wait for training to complete */
1754 		for (i = 0; i < rdev->usec_timeout; i++) {
1755 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1756 				break;
1757 			udelay(1);
1758 		}
1759 		for (i = 0; i < rdev->usec_timeout; i++) {
1760 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1761 				break;
1762 			udelay(1);
1763 		}
1764 
1765 		if (running)
1766 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1767 	}
1768 
1769 	return 0;
1770 }
1771 
1772 /**
1773  * cik_init_microcode - load ucode images from disk
1774  *
1775  * @rdev: radeon_device pointer
1776  *
1777  * Use the firmware interface to load the ucode images into
1778  * the driver (not loaded into hw).
1779  * Returns 0 on success, error on failure.
1780  */
1781 static int cik_init_microcode(struct radeon_device *rdev)
1782 {
1783 	const char *chip_name;
1784 	size_t pfp_req_size, me_req_size, ce_req_size,
1785 		mec_req_size, rlc_req_size, mc_req_size = 0,
1786 		sdma_req_size, smc_req_size = 0;
1787 	char fw_name[30];
1788 	int err;
1789 
1790 	DRM_DEBUG("\n");
1791 
1792 	switch (rdev->family) {
1793 	case CHIP_BONAIRE:
1794 		chip_name = "BONAIRE";
1795 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1796 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1797 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1798 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1799 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1800 		mc_req_size = CIK_MC_UCODE_SIZE * 4;
1801 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1802 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1803 		break;
1804 	case CHIP_HAWAII:
1805 		chip_name = "HAWAII";
1806 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1807 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1808 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1809 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1810 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1811 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1812 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1813 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1814 		break;
1815 	case CHIP_KAVERI:
1816 		chip_name = "KAVERI";
1817 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1818 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1819 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1820 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1821 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1822 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1823 		break;
1824 	case CHIP_KABINI:
1825 		chip_name = "KABINI";
1826 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1827 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1828 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1829 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1830 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1831 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1832 		break;
1833 	default: BUG();
1834 	}
1835 
1836 	DRM_INFO("Loading %s Microcode\n", chip_name);
1837 
1838 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1839 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1840 	if (err)
1841 		goto out;
1842 	if (rdev->pfp_fw->size != pfp_req_size) {
1843 		printk(KERN_ERR
1844 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1845 		       rdev->pfp_fw->size, fw_name);
1846 		err = -EINVAL;
1847 		goto out;
1848 	}
1849 
1850 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1851 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1852 	if (err)
1853 		goto out;
1854 	if (rdev->me_fw->size != me_req_size) {
1855 		printk(KERN_ERR
1856 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1857 		       rdev->me_fw->size, fw_name);
1858 		err = -EINVAL;
1859 	}
1860 
1861 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1862 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1863 	if (err)
1864 		goto out;
1865 	if (rdev->ce_fw->size != ce_req_size) {
1866 		printk(KERN_ERR
1867 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1868 		       rdev->ce_fw->size, fw_name);
1869 		err = -EINVAL;
1870 	}
1871 
1872 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1873 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1874 	if (err)
1875 		goto out;
1876 	if (rdev->mec_fw->size != mec_req_size) {
1877 		printk(KERN_ERR
1878 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1879 		       rdev->mec_fw->size, fw_name);
1880 		err = -EINVAL;
1881 	}
1882 
1883 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1884 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1885 	if (err)
1886 		goto out;
1887 	if (rdev->rlc_fw->size != rlc_req_size) {
1888 		printk(KERN_ERR
1889 		       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1890 		       rdev->rlc_fw->size, fw_name);
1891 		err = -EINVAL;
1892 	}
1893 
1894 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1895 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1896 	if (err)
1897 		goto out;
1898 	if (rdev->sdma_fw->size != sdma_req_size) {
1899 		printk(KERN_ERR
1900 		       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1901 		       rdev->sdma_fw->size, fw_name);
1902 		err = -EINVAL;
1903 	}
1904 
1905 	/* No SMC, MC ucode on APUs */
1906 	if (!(rdev->flags & RADEON_IS_IGP)) {
1907 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1908 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1909 		if (err)
1910 			goto out;
1911 		if (rdev->mc_fw->size != mc_req_size) {
1912 			printk(KERN_ERR
1913 			       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1914 			       rdev->mc_fw->size, fw_name);
1915 			err = -EINVAL;
1916 		}
1917 
1918 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1919 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1920 		if (err) {
1921 			printk(KERN_ERR
1922 			       "smc: error loading firmware \"%s\"\n",
1923 			       fw_name);
1924 			release_firmware(rdev->smc_fw);
1925 			rdev->smc_fw = NULL;
1926 			err = 0;
1927 		} else if (rdev->smc_fw->size != smc_req_size) {
1928 			printk(KERN_ERR
1929 			       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1930 			       rdev->smc_fw->size, fw_name);
1931 			err = -EINVAL;
1932 		}
1933 	}
1934 
1935 out:
1936 	if (err) {
1937 		if (err != -EINVAL)
1938 			printk(KERN_ERR
1939 			       "cik_cp: Failed to load firmware \"%s\"\n",
1940 			       fw_name);
1941 		release_firmware(rdev->pfp_fw);
1942 		rdev->pfp_fw = NULL;
1943 		release_firmware(rdev->me_fw);
1944 		rdev->me_fw = NULL;
1945 		release_firmware(rdev->ce_fw);
1946 		rdev->ce_fw = NULL;
1947 		release_firmware(rdev->rlc_fw);
1948 		rdev->rlc_fw = NULL;
1949 		release_firmware(rdev->mc_fw);
1950 		rdev->mc_fw = NULL;
1951 		release_firmware(rdev->smc_fw);
1952 		rdev->smc_fw = NULL;
1953 	}
1954 	return err;
1955 }
1956 
1957 /*
1958  * Core functions
1959  */
1960 /**
1961  * cik_tiling_mode_table_init - init the hw tiling table
1962  *
1963  * @rdev: radeon_device pointer
1964  *
1965  * Starting with SI, the tiling setup is done globally in a
1966  * set of 32 tiling modes.  Rather than selecting each set of
1967  * parameters per surface as on older asics, we just select
1968  * which index in the tiling table we want to use, and the
1969  * surface uses those parameters (CIK).
1970  */
1971 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1972 {
1973 	const u32 num_tile_mode_states = 32;
1974 	const u32 num_secondary_tile_mode_states = 16;
1975 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1976 	u32 num_pipe_configs;
1977 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
1978 		rdev->config.cik.max_shader_engines;
1979 
1980 	switch (rdev->config.cik.mem_row_size_in_kb) {
1981 	case 1:
1982 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1983 		break;
1984 	case 2:
1985 	default:
1986 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1987 		break;
1988 	case 4:
1989 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1990 		break;
1991 	}
1992 
1993 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
1994 	if (num_pipe_configs > 8)
1995 		num_pipe_configs = 16;
1996 
1997 	if (num_pipe_configs == 16) {
1998 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1999 			switch (reg_offset) {
2000 			case 0:
2001 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2002 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2003 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2004 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2005 				break;
2006 			case 1:
2007 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2008 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2009 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2010 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2011 				break;
2012 			case 2:
2013 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2014 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2015 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2016 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2017 				break;
2018 			case 3:
2019 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2020 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2021 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2022 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2023 				break;
2024 			case 4:
2025 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2026 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2027 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2028 						 TILE_SPLIT(split_equal_to_row_size));
2029 				break;
2030 			case 5:
2031 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2032 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2033 				break;
2034 			case 6:
2035 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2036 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2037 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2038 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2039 				break;
2040 			case 7:
2041 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2042 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2043 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2044 						 TILE_SPLIT(split_equal_to_row_size));
2045 				break;
2046 			case 8:
2047 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2048 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2049 				break;
2050 			case 9:
2051 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2052 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2053 				break;
2054 			case 10:
2055 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2056 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2057 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2058 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2059 				break;
2060 			case 11:
2061 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2062 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2063 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2064 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2065 				break;
2066 			case 12:
2067 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2068 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2069 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2070 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2071 				break;
2072 			case 13:
2073 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2074 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2075 				break;
2076 			case 14:
2077 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2078 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2079 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2080 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2081 				break;
2082 			case 16:
2083 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2084 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2085 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2086 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2087 				break;
2088 			case 17:
2089 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2090 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2091 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2092 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2093 				break;
2094 			case 27:
2095 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2096 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2097 				break;
2098 			case 28:
2099 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2100 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2101 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2102 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2103 				break;
2104 			case 29:
2105 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2106 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2107 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2108 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2109 				break;
2110 			case 30:
2111 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2112 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2113 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2114 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2115 				break;
2116 			default:
2117 				gb_tile_moden = 0;
2118 				break;
2119 			}
2120 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2121 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2122 		}
2123 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2124 			switch (reg_offset) {
2125 			case 0:
2126 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2127 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2128 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2129 						 NUM_BANKS(ADDR_SURF_16_BANK));
2130 				break;
2131 			case 1:
2132 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2133 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2134 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2135 						 NUM_BANKS(ADDR_SURF_16_BANK));
2136 				break;
2137 			case 2:
2138 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2139 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2140 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2141 						 NUM_BANKS(ADDR_SURF_16_BANK));
2142 				break;
2143 			case 3:
2144 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2145 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2146 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2147 						 NUM_BANKS(ADDR_SURF_16_BANK));
2148 				break;
2149 			case 4:
2150 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2151 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2152 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2153 						 NUM_BANKS(ADDR_SURF_8_BANK));
2154 				break;
2155 			case 5:
2156 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2157 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2158 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2159 						 NUM_BANKS(ADDR_SURF_4_BANK));
2160 				break;
2161 			case 6:
2162 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2163 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2164 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2165 						 NUM_BANKS(ADDR_SURF_2_BANK));
2166 				break;
2167 			case 8:
2168 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2169 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2170 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2171 						 NUM_BANKS(ADDR_SURF_16_BANK));
2172 				break;
2173 			case 9:
2174 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2175 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2176 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2177 						 NUM_BANKS(ADDR_SURF_16_BANK));
2178 				break;
2179 			case 10:
2180 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2181 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2182 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2183 						 NUM_BANKS(ADDR_SURF_16_BANK));
2184 				break;
2185 			case 11:
2186 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2187 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2188 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2189 						 NUM_BANKS(ADDR_SURF_8_BANK));
2190 				break;
2191 			case 12:
2192 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2193 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2194 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2195 						 NUM_BANKS(ADDR_SURF_4_BANK));
2196 				break;
2197 			case 13:
2198 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2199 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2200 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2201 						 NUM_BANKS(ADDR_SURF_2_BANK));
2202 				break;
2203 			case 14:
2204 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2205 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2206 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2207 						 NUM_BANKS(ADDR_SURF_2_BANK));
2208 				break;
2209 			default:
2210 				gb_tile_moden = 0;
2211 				break;
2212 			}
2213 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2214 		}
2215 	} else if (num_pipe_configs == 8) {
2216 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2217 			switch (reg_offset) {
2218 			case 0:
2219 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2220 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2221 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2222 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2223 				break;
2224 			case 1:
2225 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2226 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2227 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2228 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2229 				break;
2230 			case 2:
2231 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2232 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2233 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2234 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2235 				break;
2236 			case 3:
2237 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2238 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2239 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2240 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2241 				break;
2242 			case 4:
2243 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2244 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2245 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2246 						 TILE_SPLIT(split_equal_to_row_size));
2247 				break;
2248 			case 5:
2249 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2250 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2251 				break;
2252 			case 6:
2253 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2254 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2255 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2256 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2257 				break;
2258 			case 7:
2259 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2260 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2261 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2262 						 TILE_SPLIT(split_equal_to_row_size));
2263 				break;
2264 			case 8:
2265 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2266 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2267 				break;
2268 			case 9:
2269 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2270 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2271 				break;
2272 			case 10:
2273 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2274 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2275 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2276 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2277 				break;
2278 			case 11:
2279 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2280 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2281 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2282 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2283 				break;
2284 			case 12:
2285 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2286 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2287 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2288 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2289 				break;
2290 			case 13:
2291 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2292 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2293 				break;
2294 			case 14:
2295 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2296 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2297 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2298 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2299 				break;
2300 			case 16:
2301 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2302 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2303 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2304 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2305 				break;
2306 			case 17:
2307 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2308 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2309 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2310 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2311 				break;
2312 			case 27:
2313 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2314 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2315 				break;
2316 			case 28:
2317 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2318 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2319 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2320 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2321 				break;
2322 			case 29:
2323 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2324 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2325 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2326 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2327 				break;
2328 			case 30:
2329 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2330 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2331 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2332 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2333 				break;
2334 			default:
2335 				gb_tile_moden = 0;
2336 				break;
2337 			}
2338 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2339 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2340 		}
2341 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2342 			switch (reg_offset) {
2343 			case 0:
2344 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2345 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2346 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2347 						 NUM_BANKS(ADDR_SURF_16_BANK));
2348 				break;
2349 			case 1:
2350 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2351 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2352 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2353 						 NUM_BANKS(ADDR_SURF_16_BANK));
2354 				break;
2355 			case 2:
2356 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2357 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2358 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2359 						 NUM_BANKS(ADDR_SURF_16_BANK));
2360 				break;
2361 			case 3:
2362 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2363 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2364 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2365 						 NUM_BANKS(ADDR_SURF_16_BANK));
2366 				break;
2367 			case 4:
2368 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2369 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2370 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2371 						 NUM_BANKS(ADDR_SURF_8_BANK));
2372 				break;
2373 			case 5:
2374 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2375 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2376 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2377 						 NUM_BANKS(ADDR_SURF_4_BANK));
2378 				break;
2379 			case 6:
2380 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2381 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2382 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2383 						 NUM_BANKS(ADDR_SURF_2_BANK));
2384 				break;
2385 			case 8:
2386 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2387 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2388 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2389 						 NUM_BANKS(ADDR_SURF_16_BANK));
2390 				break;
2391 			case 9:
2392 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2393 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2394 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2395 						 NUM_BANKS(ADDR_SURF_16_BANK));
2396 				break;
2397 			case 10:
2398 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2399 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2400 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2401 						 NUM_BANKS(ADDR_SURF_16_BANK));
2402 				break;
2403 			case 11:
2404 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2405 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2406 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2407 						 NUM_BANKS(ADDR_SURF_16_BANK));
2408 				break;
2409 			case 12:
2410 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2411 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2412 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2413 						 NUM_BANKS(ADDR_SURF_8_BANK));
2414 				break;
2415 			case 13:
2416 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2417 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2418 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2419 						 NUM_BANKS(ADDR_SURF_4_BANK));
2420 				break;
2421 			case 14:
2422 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2423 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2424 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2425 						 NUM_BANKS(ADDR_SURF_2_BANK));
2426 				break;
2427 			default:
2428 				gb_tile_moden = 0;
2429 				break;
2430 			}
2431 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2432 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2433 		}
2434 	} else if (num_pipe_configs == 4) {
2435 		if (num_rbs == 4) {
2436 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2437 				switch (reg_offset) {
2438 				case 0:
2439 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2440 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2441 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2442 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2443 					break;
2444 				case 1:
2445 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2446 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2447 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2448 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2449 					break;
2450 				case 2:
2451 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2452 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2453 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2454 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2455 					break;
2456 				case 3:
2457 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2458 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2459 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2460 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2461 					break;
2462 				case 4:
2463 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2464 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2465 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2466 							 TILE_SPLIT(split_equal_to_row_size));
2467 					break;
2468 				case 5:
2469 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2470 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2471 					break;
2472 				case 6:
2473 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2474 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2475 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2476 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2477 					break;
2478 				case 7:
2479 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2480 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2481 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2482 							 TILE_SPLIT(split_equal_to_row_size));
2483 					break;
2484 				case 8:
2485 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2486 							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2487 					break;
2488 				case 9:
2489 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2490 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2491 					break;
2492 				case 10:
2493 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2494 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2495 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2496 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2497 					break;
2498 				case 11:
2499 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2500 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2501 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2502 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2503 					break;
2504 				case 12:
2505 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2506 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2507 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2508 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2509 					break;
2510 				case 13:
2511 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2512 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2513 					break;
2514 				case 14:
2515 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2516 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2517 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2518 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2519 					break;
2520 				case 16:
2521 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2522 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2523 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2524 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2525 					break;
2526 				case 17:
2527 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2528 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2529 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2530 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2531 					break;
2532 				case 27:
2533 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2534 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2535 					break;
2536 				case 28:
2537 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2538 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2539 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2540 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2541 					break;
2542 				case 29:
2543 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2544 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2545 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2546 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2547 					break;
2548 				case 30:
2549 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2550 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2551 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2552 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2553 					break;
2554 				default:
2555 					gb_tile_moden = 0;
2556 					break;
2557 				}
2558 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2559 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2560 			}
2561 		} else if (num_rbs < 4) {
2562 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2563 				switch (reg_offset) {
2564 				case 0:
2565 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2566 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2567 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2568 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2569 					break;
2570 				case 1:
2571 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2572 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2573 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2574 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2575 					break;
2576 				case 2:
2577 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2578 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2579 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2580 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2581 					break;
2582 				case 3:
2583 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2584 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2585 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2586 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2587 					break;
2588 				case 4:
2589 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2590 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2591 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2592 							 TILE_SPLIT(split_equal_to_row_size));
2593 					break;
2594 				case 5:
2595 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2596 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2597 					break;
2598 				case 6:
2599 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2600 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2601 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2602 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2603 					break;
2604 				case 7:
2605 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2606 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2607 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2608 							 TILE_SPLIT(split_equal_to_row_size));
2609 					break;
2610 				case 8:
2611 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2612 						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2613 					break;
2614 				case 9:
2615 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2616 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2617 					break;
2618 				case 10:
2619 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2620 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2621 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2622 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2623 					break;
2624 				case 11:
2625 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2626 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2627 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2628 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2629 					break;
2630 				case 12:
2631 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2632 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2633 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2634 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2635 					break;
2636 				case 13:
2637 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2638 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2639 					break;
2640 				case 14:
2641 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2642 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2643 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2644 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2645 					break;
2646 				case 16:
2647 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2648 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2649 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2650 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2651 					break;
2652 				case 17:
2653 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2654 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2655 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2656 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2657 					break;
2658 				case 27:
2659 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2660 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2661 					break;
2662 				case 28:
2663 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2664 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2665 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2666 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2667 					break;
2668 				case 29:
2669 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2670 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2671 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2672 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2673 					break;
2674 				case 30:
2675 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2676 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2677 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2678 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2679 					break;
2680 				default:
2681 					gb_tile_moden = 0;
2682 					break;
2683 				}
2684 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2685 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2686 			}
2687 		}
2688 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2689 			switch (reg_offset) {
2690 			case 0:
2691 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2692 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2693 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2694 						 NUM_BANKS(ADDR_SURF_16_BANK));
2695 				break;
2696 			case 1:
2697 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2698 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2699 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2700 						 NUM_BANKS(ADDR_SURF_16_BANK));
2701 				break;
2702 			case 2:
2703 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2704 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2705 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2706 						 NUM_BANKS(ADDR_SURF_16_BANK));
2707 				break;
2708 			case 3:
2709 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2710 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2711 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2712 						 NUM_BANKS(ADDR_SURF_16_BANK));
2713 				break;
2714 			case 4:
2715 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2716 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2717 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2718 						 NUM_BANKS(ADDR_SURF_16_BANK));
2719 				break;
2720 			case 5:
2721 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2722 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2723 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2724 						 NUM_BANKS(ADDR_SURF_8_BANK));
2725 				break;
2726 			case 6:
2727 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2728 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2729 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2730 						 NUM_BANKS(ADDR_SURF_4_BANK));
2731 				break;
2732 			case 8:
2733 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2734 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2735 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2736 						 NUM_BANKS(ADDR_SURF_16_BANK));
2737 				break;
2738 			case 9:
2739 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2740 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2741 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2742 						 NUM_BANKS(ADDR_SURF_16_BANK));
2743 				break;
2744 			case 10:
2745 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2746 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2747 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2748 						 NUM_BANKS(ADDR_SURF_16_BANK));
2749 				break;
2750 			case 11:
2751 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2752 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2753 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2754 						 NUM_BANKS(ADDR_SURF_16_BANK));
2755 				break;
2756 			case 12:
2757 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2758 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2759 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2760 						 NUM_BANKS(ADDR_SURF_16_BANK));
2761 				break;
2762 			case 13:
2763 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2764 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2765 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2766 						 NUM_BANKS(ADDR_SURF_8_BANK));
2767 				break;
2768 			case 14:
2769 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2770 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2771 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2772 						 NUM_BANKS(ADDR_SURF_4_BANK));
2773 				break;
2774 			default:
2775 				gb_tile_moden = 0;
2776 				break;
2777 			}
2778 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2779 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2780 		}
2781 	} else if (num_pipe_configs == 2) {
2782 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2783 			switch (reg_offset) {
2784 			case 0:
2785 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2786 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2787 						 PIPE_CONFIG(ADDR_SURF_P2) |
2788 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2789 				break;
2790 			case 1:
2791 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2792 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2793 						 PIPE_CONFIG(ADDR_SURF_P2) |
2794 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2795 				break;
2796 			case 2:
2797 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2798 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2799 						 PIPE_CONFIG(ADDR_SURF_P2) |
2800 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2801 				break;
2802 			case 3:
2803 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2804 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2805 						 PIPE_CONFIG(ADDR_SURF_P2) |
2806 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2807 				break;
2808 			case 4:
2809 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2810 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2811 						 PIPE_CONFIG(ADDR_SURF_P2) |
2812 						 TILE_SPLIT(split_equal_to_row_size));
2813 				break;
2814 			case 5:
2815 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2816 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2817 				break;
2818 			case 6:
2819 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2820 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2821 						 PIPE_CONFIG(ADDR_SURF_P2) |
2822 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2823 				break;
2824 			case 7:
2825 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2826 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2827 						 PIPE_CONFIG(ADDR_SURF_P2) |
2828 						 TILE_SPLIT(split_equal_to_row_size));
2829 				break;
2830 			case 8:
2831 				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
2832 				break;
2833 			case 9:
2834 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2835 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2836 				break;
2837 			case 10:
2838 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2839 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2840 						 PIPE_CONFIG(ADDR_SURF_P2) |
2841 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2842 				break;
2843 			case 11:
2844 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2845 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2846 						 PIPE_CONFIG(ADDR_SURF_P2) |
2847 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2848 				break;
2849 			case 12:
2850 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2851 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2852 						 PIPE_CONFIG(ADDR_SURF_P2) |
2853 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2854 				break;
2855 			case 13:
2856 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2857 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2858 				break;
2859 			case 14:
2860 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2861 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2862 						 PIPE_CONFIG(ADDR_SURF_P2) |
2863 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2864 				break;
2865 			case 16:
2866 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2867 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2868 						 PIPE_CONFIG(ADDR_SURF_P2) |
2869 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2870 				break;
2871 			case 17:
2872 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2873 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2874 						 PIPE_CONFIG(ADDR_SURF_P2) |
2875 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2876 				break;
2877 			case 27:
2878 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2879 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2880 				break;
2881 			case 28:
2882 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2883 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2884 						 PIPE_CONFIG(ADDR_SURF_P2) |
2885 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2886 				break;
2887 			case 29:
2888 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2889 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2890 						 PIPE_CONFIG(ADDR_SURF_P2) |
2891 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2892 				break;
2893 			case 30:
2894 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2895 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2896 						 PIPE_CONFIG(ADDR_SURF_P2) |
2897 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2898 				break;
2899 			default:
2900 				gb_tile_moden = 0;
2901 				break;
2902 			}
2903 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2904 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2905 		}
2906 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2907 			switch (reg_offset) {
2908 			case 0:
2909 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2910 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2911 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2912 						 NUM_BANKS(ADDR_SURF_16_BANK));
2913 				break;
2914 			case 1:
2915 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2916 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2917 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2918 						 NUM_BANKS(ADDR_SURF_16_BANK));
2919 				break;
2920 			case 2:
2921 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2922 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2923 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2924 						 NUM_BANKS(ADDR_SURF_16_BANK));
2925 				break;
2926 			case 3:
2927 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2928 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2929 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2930 						 NUM_BANKS(ADDR_SURF_16_BANK));
2931 				break;
2932 			case 4:
2933 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2934 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2935 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2936 						 NUM_BANKS(ADDR_SURF_16_BANK));
2937 				break;
2938 			case 5:
2939 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2940 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2941 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2942 						 NUM_BANKS(ADDR_SURF_16_BANK));
2943 				break;
2944 			case 6:
2945 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2946 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2947 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2948 						 NUM_BANKS(ADDR_SURF_8_BANK));
2949 				break;
2950 			case 8:
2951 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2952 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2953 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2954 						 NUM_BANKS(ADDR_SURF_16_BANK));
2955 				break;
2956 			case 9:
2957 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2958 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2959 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2960 						 NUM_BANKS(ADDR_SURF_16_BANK));
2961 				break;
2962 			case 10:
2963 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2964 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2965 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2966 						 NUM_BANKS(ADDR_SURF_16_BANK));
2967 				break;
2968 			case 11:
2969 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2970 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2971 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2972 						 NUM_BANKS(ADDR_SURF_16_BANK));
2973 				break;
2974 			case 12:
2975 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2976 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2977 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2978 						 NUM_BANKS(ADDR_SURF_16_BANK));
2979 				break;
2980 			case 13:
2981 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2982 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2983 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2984 						 NUM_BANKS(ADDR_SURF_16_BANK));
2985 				break;
2986 			case 14:
2987 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2988 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2989 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2990 						 NUM_BANKS(ADDR_SURF_8_BANK));
2991 				break;
2992 			default:
2993 				gb_tile_moden = 0;
2994 				break;
2995 			}
2996 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2997 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2998 		}
2999 	} else
3000 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3001 }
3002 
3003 /**
3004  * cik_select_se_sh - select which SE, SH to address
3005  *
3006  * @rdev: radeon_device pointer
3007  * @se_num: shader engine to address
3008  * @sh_num: sh block to address
3009  *
3010  * Select which SE, SH combinations to address. Certain
3011  * registers are instanced per SE or SH.  0xffffffff means
3012  * broadcast to all SEs or SHs (CIK).
3013  */
3014 static void cik_select_se_sh(struct radeon_device *rdev,
3015 			     u32 se_num, u32 sh_num)
3016 {
3017 	u32 data = INSTANCE_BROADCAST_WRITES;
3018 
3019 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3020 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3021 	else if (se_num == 0xffffffff)
3022 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3023 	else if (sh_num == 0xffffffff)
3024 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3025 	else
3026 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3027 	WREG32(GRBM_GFX_INDEX, data);
3028 }
3029 
3030 /**
3031  * cik_create_bitmask - create a bitmask
3032  *
3033  * @bit_width: length of the mask
3034  *
3035  * create a variable length bit mask (CIK).
3036  * Returns the bitmask.
3037  */
3038 static u32 cik_create_bitmask(u32 bit_width)
3039 {
3040 	u32 i, mask = 0;
3041 
3042 	for (i = 0; i < bit_width; i++) {
3043 		mask <<= 1;
3044 		mask |= 1;
3045 	}
3046 	return mask;
3047 }
3048 
3049 /**
3050  * cik_select_se_sh - select which SE, SH to address
3051  *
3052  * @rdev: radeon_device pointer
3053  * @max_rb_num: max RBs (render backends) for the asic
3054  * @se_num: number of SEs (shader engines) for the asic
3055  * @sh_per_se: number of SH blocks per SE for the asic
3056  *
3057  * Calculates the bitmask of disabled RBs (CIK).
3058  * Returns the disabled RB bitmask.
3059  */
3060 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3061 			      u32 max_rb_num_per_se,
3062 			      u32 sh_per_se)
3063 {
3064 	u32 data, mask;
3065 
3066 	data = RREG32(CC_RB_BACKEND_DISABLE);
3067 	if (data & 1)
3068 		data &= BACKEND_DISABLE_MASK;
3069 	else
3070 		data = 0;
3071 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3072 
3073 	data >>= BACKEND_DISABLE_SHIFT;
3074 
3075 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3076 
3077 	return data & mask;
3078 }
3079 
3080 /**
3081  * cik_setup_rb - setup the RBs on the asic
3082  *
3083  * @rdev: radeon_device pointer
3084  * @se_num: number of SEs (shader engines) for the asic
3085  * @sh_per_se: number of SH blocks per SE for the asic
3086  * @max_rb_num: max RBs (render backends) for the asic
3087  *
3088  * Configures per-SE/SH RB registers (CIK).
3089  */
3090 static void cik_setup_rb(struct radeon_device *rdev,
3091 			 u32 se_num, u32 sh_per_se,
3092 			 u32 max_rb_num_per_se)
3093 {
3094 	int i, j;
3095 	u32 data, mask;
3096 	u32 disabled_rbs = 0;
3097 	u32 enabled_rbs = 0;
3098 
3099 	for (i = 0; i < se_num; i++) {
3100 		for (j = 0; j < sh_per_se; j++) {
3101 			cik_select_se_sh(rdev, i, j);
3102 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3103 			if (rdev->family == CHIP_HAWAII)
3104 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3105 			else
3106 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3107 		}
3108 	}
3109 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3110 
3111 	mask = 1;
3112 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3113 		if (!(disabled_rbs & mask))
3114 			enabled_rbs |= mask;
3115 		mask <<= 1;
3116 	}
3117 
3118 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3119 
3120 	for (i = 0; i < se_num; i++) {
3121 		cik_select_se_sh(rdev, i, 0xffffffff);
3122 		data = 0;
3123 		for (j = 0; j < sh_per_se; j++) {
3124 			switch (enabled_rbs & 3) {
3125 			case 0:
3126 				if (j == 0)
3127 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3128 				else
3129 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3130 				break;
3131 			case 1:
3132 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3133 				break;
3134 			case 2:
3135 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3136 				break;
3137 			case 3:
3138 			default:
3139 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3140 				break;
3141 			}
3142 			enabled_rbs >>= 2;
3143 		}
3144 		WREG32(PA_SC_RASTER_CONFIG, data);
3145 	}
3146 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3147 }
3148 
3149 /**
3150  * cik_gpu_init - setup the 3D engine
3151  *
3152  * @rdev: radeon_device pointer
3153  *
3154  * Configures the 3D engine and tiling configuration
3155  * registers so that the 3D engine is usable.
3156  */
3157 static void cik_gpu_init(struct radeon_device *rdev)
3158 {
3159 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3160 	u32 mc_shared_chmap, mc_arb_ramcfg;
3161 	u32 hdp_host_path_cntl;
3162 	u32 tmp;
3163 	int i, j;
3164 
3165 	switch (rdev->family) {
3166 	case CHIP_BONAIRE:
3167 		rdev->config.cik.max_shader_engines = 2;
3168 		rdev->config.cik.max_tile_pipes = 4;
3169 		rdev->config.cik.max_cu_per_sh = 7;
3170 		rdev->config.cik.max_sh_per_se = 1;
3171 		rdev->config.cik.max_backends_per_se = 2;
3172 		rdev->config.cik.max_texture_channel_caches = 4;
3173 		rdev->config.cik.max_gprs = 256;
3174 		rdev->config.cik.max_gs_threads = 32;
3175 		rdev->config.cik.max_hw_contexts = 8;
3176 
3177 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3178 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3179 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3180 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3181 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3182 		break;
3183 	case CHIP_HAWAII:
3184 		rdev->config.cik.max_shader_engines = 4;
3185 		rdev->config.cik.max_tile_pipes = 16;
3186 		rdev->config.cik.max_cu_per_sh = 11;
3187 		rdev->config.cik.max_sh_per_se = 1;
3188 		rdev->config.cik.max_backends_per_se = 4;
3189 		rdev->config.cik.max_texture_channel_caches = 16;
3190 		rdev->config.cik.max_gprs = 256;
3191 		rdev->config.cik.max_gs_threads = 32;
3192 		rdev->config.cik.max_hw_contexts = 8;
3193 
3194 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3195 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3196 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3197 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3198 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3199 		break;
3200 	case CHIP_KAVERI:
3201 		rdev->config.cik.max_shader_engines = 1;
3202 		rdev->config.cik.max_tile_pipes = 4;
3203 		if ((rdev->pdev->device == 0x1304) ||
3204 		    (rdev->pdev->device == 0x1305) ||
3205 		    (rdev->pdev->device == 0x130C) ||
3206 		    (rdev->pdev->device == 0x130F) ||
3207 		    (rdev->pdev->device == 0x1310) ||
3208 		    (rdev->pdev->device == 0x1311) ||
3209 		    (rdev->pdev->device == 0x131C)) {
3210 			rdev->config.cik.max_cu_per_sh = 8;
3211 			rdev->config.cik.max_backends_per_se = 2;
3212 		} else if ((rdev->pdev->device == 0x1309) ||
3213 			   (rdev->pdev->device == 0x130A) ||
3214 			   (rdev->pdev->device == 0x130D) ||
3215 			   (rdev->pdev->device == 0x1313) ||
3216 			   (rdev->pdev->device == 0x131D)) {
3217 			rdev->config.cik.max_cu_per_sh = 6;
3218 			rdev->config.cik.max_backends_per_se = 2;
3219 		} else if ((rdev->pdev->device == 0x1306) ||
3220 			   (rdev->pdev->device == 0x1307) ||
3221 			   (rdev->pdev->device == 0x130B) ||
3222 			   (rdev->pdev->device == 0x130E) ||
3223 			   (rdev->pdev->device == 0x1315) ||
3224 			   (rdev->pdev->device == 0x131B)) {
3225 			rdev->config.cik.max_cu_per_sh = 4;
3226 			rdev->config.cik.max_backends_per_se = 1;
3227 		} else {
3228 			rdev->config.cik.max_cu_per_sh = 3;
3229 			rdev->config.cik.max_backends_per_se = 1;
3230 		}
3231 		rdev->config.cik.max_sh_per_se = 1;
3232 		rdev->config.cik.max_texture_channel_caches = 4;
3233 		rdev->config.cik.max_gprs = 256;
3234 		rdev->config.cik.max_gs_threads = 16;
3235 		rdev->config.cik.max_hw_contexts = 8;
3236 
3237 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3238 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3239 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3240 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3241 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3242 		break;
3243 	case CHIP_KABINI:
3244 	default:
3245 		rdev->config.cik.max_shader_engines = 1;
3246 		rdev->config.cik.max_tile_pipes = 2;
3247 		rdev->config.cik.max_cu_per_sh = 2;
3248 		rdev->config.cik.max_sh_per_se = 1;
3249 		rdev->config.cik.max_backends_per_se = 1;
3250 		rdev->config.cik.max_texture_channel_caches = 2;
3251 		rdev->config.cik.max_gprs = 256;
3252 		rdev->config.cik.max_gs_threads = 16;
3253 		rdev->config.cik.max_hw_contexts = 8;
3254 
3255 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3256 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3257 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3258 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3259 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3260 		break;
3261 	}
3262 
3263 	/* Initialize HDP */
3264 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3265 		WREG32((0x2c14 + j), 0x00000000);
3266 		WREG32((0x2c18 + j), 0x00000000);
3267 		WREG32((0x2c1c + j), 0x00000000);
3268 		WREG32((0x2c20 + j), 0x00000000);
3269 		WREG32((0x2c24 + j), 0x00000000);
3270 	}
3271 
3272 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3273 
3274 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3275 
3276 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3277 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3278 
3279 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3280 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3281 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3282 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3283 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3284 		rdev->config.cik.mem_row_size_in_kb = 4;
3285 	/* XXX use MC settings? */
3286 	rdev->config.cik.shader_engine_tile_size = 32;
3287 	rdev->config.cik.num_gpus = 1;
3288 	rdev->config.cik.multi_gpu_tile_size = 64;
3289 
3290 	/* fix up row size */
3291 	gb_addr_config &= ~ROW_SIZE_MASK;
3292 	switch (rdev->config.cik.mem_row_size_in_kb) {
3293 	case 1:
3294 	default:
3295 		gb_addr_config |= ROW_SIZE(0);
3296 		break;
3297 	case 2:
3298 		gb_addr_config |= ROW_SIZE(1);
3299 		break;
3300 	case 4:
3301 		gb_addr_config |= ROW_SIZE(2);
3302 		break;
3303 	}
3304 
3305 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3306 	 * not have bank info, so create a custom tiling dword.
3307 	 * bits 3:0   num_pipes
3308 	 * bits 7:4   num_banks
3309 	 * bits 11:8  group_size
3310 	 * bits 15:12 row_size
3311 	 */
3312 	rdev->config.cik.tile_config = 0;
3313 	switch (rdev->config.cik.num_tile_pipes) {
3314 	case 1:
3315 		rdev->config.cik.tile_config |= (0 << 0);
3316 		break;
3317 	case 2:
3318 		rdev->config.cik.tile_config |= (1 << 0);
3319 		break;
3320 	case 4:
3321 		rdev->config.cik.tile_config |= (2 << 0);
3322 		break;
3323 	case 8:
3324 	default:
3325 		/* XXX what about 12? */
3326 		rdev->config.cik.tile_config |= (3 << 0);
3327 		break;
3328 	}
3329 	rdev->config.cik.tile_config |=
3330 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3331 	rdev->config.cik.tile_config |=
3332 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3333 	rdev->config.cik.tile_config |=
3334 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3335 
3336 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3337 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3338 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3339 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3340 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3341 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3342 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3343 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3344 
3345 	cik_tiling_mode_table_init(rdev);
3346 
3347 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3348 		     rdev->config.cik.max_sh_per_se,
3349 		     rdev->config.cik.max_backends_per_se);
3350 
3351 	/* set HW defaults for 3D engine */
3352 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3353 
3354 	WREG32(SX_DEBUG_1, 0x20);
3355 
3356 	WREG32(TA_CNTL_AUX, 0x00010000);
3357 
3358 	tmp = RREG32(SPI_CONFIG_CNTL);
3359 	tmp |= 0x03000000;
3360 	WREG32(SPI_CONFIG_CNTL, tmp);
3361 
3362 	WREG32(SQ_CONFIG, 1);
3363 
3364 	WREG32(DB_DEBUG, 0);
3365 
3366 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3367 	tmp |= 0x00000400;
3368 	WREG32(DB_DEBUG2, tmp);
3369 
3370 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3371 	tmp |= 0x00020200;
3372 	WREG32(DB_DEBUG3, tmp);
3373 
3374 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3375 	tmp |= 0x00018208;
3376 	WREG32(CB_HW_CONTROL, tmp);
3377 
3378 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3379 
3380 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3381 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3382 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3383 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3384 
3385 	WREG32(VGT_NUM_INSTANCES, 1);
3386 
3387 	WREG32(CP_PERFMON_CNTL, 0);
3388 
3389 	WREG32(SQ_CONFIG, 0);
3390 
3391 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3392 					  FORCE_EOV_MAX_REZ_CNT(255)));
3393 
3394 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3395 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3396 
3397 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3398 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3399 
3400 	tmp = RREG32(HDP_MISC_CNTL);
3401 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3402 	WREG32(HDP_MISC_CNTL, tmp);
3403 
3404 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3405 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3406 
3407 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3408 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3409 
3410 	udelay(50);
3411 }
3412 
3413 /*
3414  * GPU scratch registers helpers function.
3415  */
3416 /**
3417  * cik_scratch_init - setup driver info for CP scratch regs
3418  *
3419  * @rdev: radeon_device pointer
3420  *
3421  * Set up the number and offset of the CP scratch registers.
3422  * NOTE: use of CP scratch registers is a legacy inferface and
3423  * is not used by default on newer asics (r6xx+).  On newer asics,
3424  * memory buffers are used for fences rather than scratch regs.
3425  */
3426 static void cik_scratch_init(struct radeon_device *rdev)
3427 {
3428 	int i;
3429 
3430 	rdev->scratch.num_reg = 7;
3431 	rdev->scratch.reg_base = SCRATCH_REG0;
3432 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3433 		rdev->scratch.free[i] = true;
3434 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3435 	}
3436 }
3437 
3438 /**
3439  * cik_ring_test - basic gfx ring test
3440  *
3441  * @rdev: radeon_device pointer
3442  * @ring: radeon_ring structure holding ring information
3443  *
3444  * Allocate a scratch register and write to it using the gfx ring (CIK).
3445  * Provides a basic gfx ring test to verify that the ring is working.
3446  * Used by cik_cp_gfx_resume();
3447  * Returns 0 on success, error on failure.
3448  */
3449 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3450 {
3451 	uint32_t scratch;
3452 	uint32_t tmp = 0;
3453 	unsigned i;
3454 	int r;
3455 
3456 	r = radeon_scratch_get(rdev, &scratch);
3457 	if (r) {
3458 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3459 		return r;
3460 	}
3461 	WREG32(scratch, 0xCAFEDEAD);
3462 	r = radeon_ring_lock(rdev, ring, 3);
3463 	if (r) {
3464 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3465 		radeon_scratch_free(rdev, scratch);
3466 		return r;
3467 	}
3468 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3469 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3470 	radeon_ring_write(ring, 0xDEADBEEF);
3471 	radeon_ring_unlock_commit(rdev, ring);
3472 
3473 	for (i = 0; i < rdev->usec_timeout; i++) {
3474 		tmp = RREG32(scratch);
3475 		if (tmp == 0xDEADBEEF)
3476 			break;
3477 		DRM_UDELAY(1);
3478 	}
3479 	if (i < rdev->usec_timeout) {
3480 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3481 	} else {
3482 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3483 			  ring->idx, scratch, tmp);
3484 		r = -EINVAL;
3485 	}
3486 	radeon_scratch_free(rdev, scratch);
3487 	return r;
3488 }
3489 
3490 /**
3491  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3492  *
3493  * @rdev: radeon_device pointer
3494  * @ridx: radeon ring index
3495  *
3496  * Emits an hdp flush on the cp.
3497  */
3498 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3499 				       int ridx)
3500 {
3501 	struct radeon_ring *ring = &rdev->ring[ridx];
3502 	u32 ref_and_mask;
3503 
3504 	switch (ring->idx) {
3505 	case CAYMAN_RING_TYPE_CP1_INDEX:
3506 	case CAYMAN_RING_TYPE_CP2_INDEX:
3507 	default:
3508 		switch (ring->me) {
3509 		case 0:
3510 			ref_and_mask = CP2 << ring->pipe;
3511 			break;
3512 		case 1:
3513 			ref_and_mask = CP6 << ring->pipe;
3514 			break;
3515 		default:
3516 			return;
3517 		}
3518 		break;
3519 	case RADEON_RING_TYPE_GFX_INDEX:
3520 		ref_and_mask = CP0;
3521 		break;
3522 	}
3523 
3524 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3525 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3526 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3527 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3528 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3529 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3530 	radeon_ring_write(ring, ref_and_mask);
3531 	radeon_ring_write(ring, ref_and_mask);
3532 	radeon_ring_write(ring, 0x20); /* poll interval */
3533 }
3534 
3535 /**
3536  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3537  *
3538  * @rdev: radeon_device pointer
3539  * @fence: radeon fence object
3540  *
3541  * Emits a fence sequnce number on the gfx ring and flushes
3542  * GPU caches.
3543  */
3544 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3545 			     struct radeon_fence *fence)
3546 {
3547 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3548 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3549 
3550 	/* EVENT_WRITE_EOP - flush caches, send int */
3551 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3552 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3553 				 EOP_TC_ACTION_EN |
3554 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3555 				 EVENT_INDEX(5)));
3556 	radeon_ring_write(ring, addr & 0xfffffffc);
3557 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3558 	radeon_ring_write(ring, fence->seq);
3559 	radeon_ring_write(ring, 0);
3560 	/* HDP flush */
3561 	cik_hdp_flush_cp_ring_emit(rdev, fence->ring);
3562 }
3563 
3564 /**
3565  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3566  *
3567  * @rdev: radeon_device pointer
3568  * @fence: radeon fence object
3569  *
3570  * Emits a fence sequnce number on the compute ring and flushes
3571  * GPU caches.
3572  */
3573 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3574 				 struct radeon_fence *fence)
3575 {
3576 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3577 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3578 
3579 	/* RELEASE_MEM - flush caches, send int */
3580 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3581 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3582 				 EOP_TC_ACTION_EN |
3583 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3584 				 EVENT_INDEX(5)));
3585 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3586 	radeon_ring_write(ring, addr & 0xfffffffc);
3587 	radeon_ring_write(ring, upper_32_bits(addr));
3588 	radeon_ring_write(ring, fence->seq);
3589 	radeon_ring_write(ring, 0);
3590 	/* HDP flush */
3591 	cik_hdp_flush_cp_ring_emit(rdev, fence->ring);
3592 }
3593 
3594 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3595 			     struct radeon_ring *ring,
3596 			     struct radeon_semaphore *semaphore,
3597 			     bool emit_wait)
3598 {
3599 	uint64_t addr = semaphore->gpu_addr;
3600 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3601 
3602 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3603 	radeon_ring_write(ring, addr & 0xffffffff);
3604 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3605 
3606 	return true;
3607 }
3608 
3609 /**
3610  * cik_copy_cpdma - copy pages using the CP DMA engine
3611  *
3612  * @rdev: radeon_device pointer
3613  * @src_offset: src GPU address
3614  * @dst_offset: dst GPU address
3615  * @num_gpu_pages: number of GPU pages to xfer
3616  * @fence: radeon fence object
3617  *
3618  * Copy GPU paging using the CP DMA engine (CIK+).
3619  * Used by the radeon ttm implementation to move pages if
3620  * registered as the asic copy callback.
3621  */
3622 int cik_copy_cpdma(struct radeon_device *rdev,
3623 		   uint64_t src_offset, uint64_t dst_offset,
3624 		   unsigned num_gpu_pages,
3625 		   struct radeon_fence **fence)
3626 {
3627 	struct radeon_semaphore *sem = NULL;
3628 	int ring_index = rdev->asic->copy.blit_ring_index;
3629 	struct radeon_ring *ring = &rdev->ring[ring_index];
3630 	u32 size_in_bytes, cur_size_in_bytes, control;
3631 	int i, num_loops;
3632 	int r = 0;
3633 
3634 	r = radeon_semaphore_create(rdev, &sem);
3635 	if (r) {
3636 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3637 		return r;
3638 	}
3639 
3640 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3641 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3642 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3643 	if (r) {
3644 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3645 		radeon_semaphore_free(rdev, &sem, NULL);
3646 		return r;
3647 	}
3648 
3649 	radeon_semaphore_sync_to(sem, *fence);
3650 	radeon_semaphore_sync_rings(rdev, sem, ring->idx);
3651 
3652 	for (i = 0; i < num_loops; i++) {
3653 		cur_size_in_bytes = size_in_bytes;
3654 		if (cur_size_in_bytes > 0x1fffff)
3655 			cur_size_in_bytes = 0x1fffff;
3656 		size_in_bytes -= cur_size_in_bytes;
3657 		control = 0;
3658 		if (size_in_bytes == 0)
3659 			control |= PACKET3_DMA_DATA_CP_SYNC;
3660 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3661 		radeon_ring_write(ring, control);
3662 		radeon_ring_write(ring, lower_32_bits(src_offset));
3663 		radeon_ring_write(ring, upper_32_bits(src_offset));
3664 		radeon_ring_write(ring, lower_32_bits(dst_offset));
3665 		radeon_ring_write(ring, upper_32_bits(dst_offset));
3666 		radeon_ring_write(ring, cur_size_in_bytes);
3667 		src_offset += cur_size_in_bytes;
3668 		dst_offset += cur_size_in_bytes;
3669 	}
3670 
3671 	r = radeon_fence_emit(rdev, fence, ring->idx);
3672 	if (r) {
3673 		radeon_ring_unlock_undo(rdev, ring);
3674 		return r;
3675 	}
3676 
3677 	radeon_ring_unlock_commit(rdev, ring);
3678 	radeon_semaphore_free(rdev, &sem, *fence);
3679 
3680 	return r;
3681 }
3682 
3683 /*
3684  * IB stuff
3685  */
3686 /**
3687  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3688  *
3689  * @rdev: radeon_device pointer
3690  * @ib: radeon indirect buffer object
3691  *
3692  * Emits an DE (drawing engine) or CE (constant engine) IB
3693  * on the gfx ring.  IBs are usually generated by userspace
3694  * acceleration drivers and submitted to the kernel for
3695  * sheduling on the ring.  This function schedules the IB
3696  * on the gfx ring for execution by the GPU.
3697  */
3698 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3699 {
3700 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3701 	u32 header, control = INDIRECT_BUFFER_VALID;
3702 
3703 	if (ib->is_const_ib) {
3704 		/* set switch buffer packet before const IB */
3705 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3706 		radeon_ring_write(ring, 0);
3707 
3708 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3709 	} else {
3710 		u32 next_rptr;
3711 		if (ring->rptr_save_reg) {
3712 			next_rptr = ring->wptr + 3 + 4;
3713 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3714 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3715 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
3716 			radeon_ring_write(ring, next_rptr);
3717 		} else if (rdev->wb.enabled) {
3718 			next_rptr = ring->wptr + 5 + 4;
3719 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3720 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3721 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3722 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3723 			radeon_ring_write(ring, next_rptr);
3724 		}
3725 
3726 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3727 	}
3728 
3729 	control |= ib->length_dw |
3730 		(ib->vm ? (ib->vm->id << 24) : 0);
3731 
3732 	radeon_ring_write(ring, header);
3733 	radeon_ring_write(ring,
3734 #ifdef __BIG_ENDIAN
3735 			  (2 << 0) |
3736 #endif
3737 			  (ib->gpu_addr & 0xFFFFFFFC));
3738 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3739 	radeon_ring_write(ring, control);
3740 }
3741 
3742 /**
3743  * cik_ib_test - basic gfx ring IB test
3744  *
3745  * @rdev: radeon_device pointer
3746  * @ring: radeon_ring structure holding ring information
3747  *
3748  * Allocate an IB and execute it on the gfx ring (CIK).
3749  * Provides a basic gfx ring test to verify that IBs are working.
3750  * Returns 0 on success, error on failure.
3751  */
3752 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3753 {
3754 	struct radeon_ib ib;
3755 	uint32_t scratch;
3756 	uint32_t tmp = 0;
3757 	unsigned i;
3758 	int r;
3759 
3760 	r = radeon_scratch_get(rdev, &scratch);
3761 	if (r) {
3762 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3763 		return r;
3764 	}
3765 	WREG32(scratch, 0xCAFEDEAD);
3766 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3767 	if (r) {
3768 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3769 		radeon_scratch_free(rdev, scratch);
3770 		return r;
3771 	}
3772 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3773 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3774 	ib.ptr[2] = 0xDEADBEEF;
3775 	ib.length_dw = 3;
3776 	r = radeon_ib_schedule(rdev, &ib, NULL);
3777 	if (r) {
3778 		radeon_scratch_free(rdev, scratch);
3779 		radeon_ib_free(rdev, &ib);
3780 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3781 		return r;
3782 	}
3783 	r = radeon_fence_wait(ib.fence, false);
3784 	if (r) {
3785 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3786 		radeon_scratch_free(rdev, scratch);
3787 		radeon_ib_free(rdev, &ib);
3788 		return r;
3789 	}
3790 	for (i = 0; i < rdev->usec_timeout; i++) {
3791 		tmp = RREG32(scratch);
3792 		if (tmp == 0xDEADBEEF)
3793 			break;
3794 		DRM_UDELAY(1);
3795 	}
3796 	if (i < rdev->usec_timeout) {
3797 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3798 	} else {
3799 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3800 			  scratch, tmp);
3801 		r = -EINVAL;
3802 	}
3803 	radeon_scratch_free(rdev, scratch);
3804 	radeon_ib_free(rdev, &ib);
3805 	return r;
3806 }
3807 
3808 /*
3809  * CP.
3810  * On CIK, gfx and compute now have independant command processors.
3811  *
3812  * GFX
3813  * Gfx consists of a single ring and can process both gfx jobs and
3814  * compute jobs.  The gfx CP consists of three microengines (ME):
3815  * PFP - Pre-Fetch Parser
3816  * ME - Micro Engine
3817  * CE - Constant Engine
3818  * The PFP and ME make up what is considered the Drawing Engine (DE).
3819  * The CE is an asynchronous engine used for updating buffer desciptors
3820  * used by the DE so that they can be loaded into cache in parallel
3821  * while the DE is processing state update packets.
3822  *
3823  * Compute
3824  * The compute CP consists of two microengines (ME):
3825  * MEC1 - Compute MicroEngine 1
3826  * MEC2 - Compute MicroEngine 2
3827  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3828  * The queues are exposed to userspace and are programmed directly
3829  * by the compute runtime.
3830  */
3831 /**
3832  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3833  *
3834  * @rdev: radeon_device pointer
3835  * @enable: enable or disable the MEs
3836  *
3837  * Halts or unhalts the gfx MEs.
3838  */
3839 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3840 {
3841 	if (enable)
3842 		WREG32(CP_ME_CNTL, 0);
3843 	else {
3844 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3845 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3846 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3847 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3848 	}
3849 	udelay(50);
3850 }
3851 
3852 /**
3853  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3854  *
3855  * @rdev: radeon_device pointer
3856  *
3857  * Loads the gfx PFP, ME, and CE ucode.
3858  * Returns 0 for success, -EINVAL if the ucode is not available.
3859  */
3860 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3861 {
3862 	const __be32 *fw_data;
3863 	int i;
3864 
3865 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3866 		return -EINVAL;
3867 
3868 	cik_cp_gfx_enable(rdev, false);
3869 
3870 	/* PFP */
3871 	fw_data = (const __be32 *)rdev->pfp_fw->data;
3872 	WREG32(CP_PFP_UCODE_ADDR, 0);
3873 	for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3874 		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3875 	WREG32(CP_PFP_UCODE_ADDR, 0);
3876 
3877 	/* CE */
3878 	fw_data = (const __be32 *)rdev->ce_fw->data;
3879 	WREG32(CP_CE_UCODE_ADDR, 0);
3880 	for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3881 		WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3882 	WREG32(CP_CE_UCODE_ADDR, 0);
3883 
3884 	/* ME */
3885 	fw_data = (const __be32 *)rdev->me_fw->data;
3886 	WREG32(CP_ME_RAM_WADDR, 0);
3887 	for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3888 		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3889 	WREG32(CP_ME_RAM_WADDR, 0);
3890 
3891 	WREG32(CP_PFP_UCODE_ADDR, 0);
3892 	WREG32(CP_CE_UCODE_ADDR, 0);
3893 	WREG32(CP_ME_RAM_WADDR, 0);
3894 	WREG32(CP_ME_RAM_RADDR, 0);
3895 	return 0;
3896 }
3897 
3898 /**
3899  * cik_cp_gfx_start - start the gfx ring
3900  *
3901  * @rdev: radeon_device pointer
3902  *
3903  * Enables the ring and loads the clear state context and other
3904  * packets required to init the ring.
3905  * Returns 0 for success, error for failure.
3906  */
3907 static int cik_cp_gfx_start(struct radeon_device *rdev)
3908 {
3909 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3910 	int r, i;
3911 
3912 	/* init the CP */
3913 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3914 	WREG32(CP_ENDIAN_SWAP, 0);
3915 	WREG32(CP_DEVICE_ID, 1);
3916 
3917 	cik_cp_gfx_enable(rdev, true);
3918 
3919 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3920 	if (r) {
3921 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3922 		return r;
3923 	}
3924 
3925 	/* init the CE partitions.  CE only used for gfx on CIK */
3926 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3927 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3928 	radeon_ring_write(ring, 0xc000);
3929 	radeon_ring_write(ring, 0xc000);
3930 
3931 	/* setup clear context state */
3932 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3933 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3934 
3935 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3936 	radeon_ring_write(ring, 0x80000000);
3937 	radeon_ring_write(ring, 0x80000000);
3938 
3939 	for (i = 0; i < cik_default_size; i++)
3940 		radeon_ring_write(ring, cik_default_state[i]);
3941 
3942 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3943 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3944 
3945 	/* set clear context state */
3946 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3947 	radeon_ring_write(ring, 0);
3948 
3949 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3950 	radeon_ring_write(ring, 0x00000316);
3951 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3952 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3953 
3954 	radeon_ring_unlock_commit(rdev, ring);
3955 
3956 	return 0;
3957 }
3958 
3959 /**
3960  * cik_cp_gfx_fini - stop the gfx ring
3961  *
3962  * @rdev: radeon_device pointer
3963  *
3964  * Stop the gfx ring and tear down the driver ring
3965  * info.
3966  */
3967 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3968 {
3969 	cik_cp_gfx_enable(rdev, false);
3970 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3971 }
3972 
3973 /**
3974  * cik_cp_gfx_resume - setup the gfx ring buffer registers
3975  *
3976  * @rdev: radeon_device pointer
3977  *
3978  * Program the location and size of the gfx ring buffer
3979  * and test it to make sure it's working.
3980  * Returns 0 for success, error for failure.
3981  */
3982 static int cik_cp_gfx_resume(struct radeon_device *rdev)
3983 {
3984 	struct radeon_ring *ring;
3985 	u32 tmp;
3986 	u32 rb_bufsz;
3987 	u64 rb_addr;
3988 	int r;
3989 
3990 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3991 	if (rdev->family != CHIP_HAWAII)
3992 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3993 
3994 	/* Set the write pointer delay */
3995 	WREG32(CP_RB_WPTR_DELAY, 0);
3996 
3997 	/* set the RB to use vmid 0 */
3998 	WREG32(CP_RB_VMID, 0);
3999 
4000 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4001 
4002 	/* ring 0 - compute and gfx */
4003 	/* Set ring buffer size */
4004 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4005 	rb_bufsz = order_base_2(ring->ring_size / 8);
4006 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4007 #ifdef __BIG_ENDIAN
4008 	tmp |= BUF_SWAP_32BIT;
4009 #endif
4010 	WREG32(CP_RB0_CNTL, tmp);
4011 
4012 	/* Initialize the ring buffer's read and write pointers */
4013 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4014 	ring->wptr = 0;
4015 	WREG32(CP_RB0_WPTR, ring->wptr);
4016 
4017 	/* set the wb address wether it's enabled or not */
4018 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4019 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4020 
4021 	/* scratch register shadowing is no longer supported */
4022 	WREG32(SCRATCH_UMSK, 0);
4023 
4024 	if (!rdev->wb.enabled)
4025 		tmp |= RB_NO_UPDATE;
4026 
4027 	mdelay(1);
4028 	WREG32(CP_RB0_CNTL, tmp);
4029 
4030 	rb_addr = ring->gpu_addr >> 8;
4031 	WREG32(CP_RB0_BASE, rb_addr);
4032 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4033 
4034 	ring->rptr = RREG32(CP_RB0_RPTR);
4035 
4036 	/* start the ring */
4037 	cik_cp_gfx_start(rdev);
4038 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4039 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4040 	if (r) {
4041 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4042 		return r;
4043 	}
4044 
4045 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4046 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4047 
4048 	return 0;
4049 }
4050 
4051 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4052 		     struct radeon_ring *ring)
4053 {
4054 	u32 rptr;
4055 
4056 	if (rdev->wb.enabled)
4057 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4058 	else
4059 		rptr = RREG32(CP_RB0_RPTR);
4060 
4061 	return rptr;
4062 }
4063 
4064 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4065 		     struct radeon_ring *ring)
4066 {
4067 	u32 wptr;
4068 
4069 	wptr = RREG32(CP_RB0_WPTR);
4070 
4071 	return wptr;
4072 }
4073 
4074 void cik_gfx_set_wptr(struct radeon_device *rdev,
4075 		      struct radeon_ring *ring)
4076 {
4077 	WREG32(CP_RB0_WPTR, ring->wptr);
4078 	(void)RREG32(CP_RB0_WPTR);
4079 }
4080 
4081 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4082 			 struct radeon_ring *ring)
4083 {
4084 	u32 rptr;
4085 
4086 	if (rdev->wb.enabled) {
4087 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4088 	} else {
4089 		mutex_lock(&rdev->srbm_mutex);
4090 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4091 		rptr = RREG32(CP_HQD_PQ_RPTR);
4092 		cik_srbm_select(rdev, 0, 0, 0, 0);
4093 		mutex_unlock(&rdev->srbm_mutex);
4094 	}
4095 
4096 	return rptr;
4097 }
4098 
4099 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4100 			 struct radeon_ring *ring)
4101 {
4102 	u32 wptr;
4103 
4104 	if (rdev->wb.enabled) {
4105 		/* XXX check if swapping is necessary on BE */
4106 		wptr = rdev->wb.wb[ring->wptr_offs/4];
4107 	} else {
4108 		mutex_lock(&rdev->srbm_mutex);
4109 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4110 		wptr = RREG32(CP_HQD_PQ_WPTR);
4111 		cik_srbm_select(rdev, 0, 0, 0, 0);
4112 		mutex_unlock(&rdev->srbm_mutex);
4113 	}
4114 
4115 	return wptr;
4116 }
4117 
4118 void cik_compute_set_wptr(struct radeon_device *rdev,
4119 			  struct radeon_ring *ring)
4120 {
4121 	/* XXX check if swapping is necessary on BE */
4122 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4123 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4124 }
4125 
4126 /**
4127  * cik_cp_compute_enable - enable/disable the compute CP MEs
4128  *
4129  * @rdev: radeon_device pointer
4130  * @enable: enable or disable the MEs
4131  *
4132  * Halts or unhalts the compute MEs.
4133  */
4134 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4135 {
4136 	if (enable)
4137 		WREG32(CP_MEC_CNTL, 0);
4138 	else
4139 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4140 	udelay(50);
4141 }
4142 
4143 /**
4144  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4145  *
4146  * @rdev: radeon_device pointer
4147  *
4148  * Loads the compute MEC1&2 ucode.
4149  * Returns 0 for success, -EINVAL if the ucode is not available.
4150  */
4151 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4152 {
4153 	const __be32 *fw_data;
4154 	int i;
4155 
4156 	if (!rdev->mec_fw)
4157 		return -EINVAL;
4158 
4159 	cik_cp_compute_enable(rdev, false);
4160 
4161 	/* MEC1 */
4162 	fw_data = (const __be32 *)rdev->mec_fw->data;
4163 	WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4164 	for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4165 		WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4166 	WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4167 
4168 	if (rdev->family == CHIP_KAVERI) {
4169 		/* MEC2 */
4170 		fw_data = (const __be32 *)rdev->mec_fw->data;
4171 		WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4172 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4173 			WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4174 		WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4175 	}
4176 
4177 	return 0;
4178 }
4179 
4180 /**
4181  * cik_cp_compute_start - start the compute queues
4182  *
4183  * @rdev: radeon_device pointer
4184  *
4185  * Enable the compute queues.
4186  * Returns 0 for success, error for failure.
4187  */
4188 static int cik_cp_compute_start(struct radeon_device *rdev)
4189 {
4190 	cik_cp_compute_enable(rdev, true);
4191 
4192 	return 0;
4193 }
4194 
4195 /**
4196  * cik_cp_compute_fini - stop the compute queues
4197  *
4198  * @rdev: radeon_device pointer
4199  *
4200  * Stop the compute queues and tear down the driver queue
4201  * info.
4202  */
4203 static void cik_cp_compute_fini(struct radeon_device *rdev)
4204 {
4205 	int i, idx, r;
4206 
4207 	cik_cp_compute_enable(rdev, false);
4208 
4209 	for (i = 0; i < 2; i++) {
4210 		if (i == 0)
4211 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4212 		else
4213 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4214 
4215 		if (rdev->ring[idx].mqd_obj) {
4216 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4217 			if (unlikely(r != 0))
4218 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4219 
4220 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4221 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4222 
4223 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4224 			rdev->ring[idx].mqd_obj = NULL;
4225 		}
4226 	}
4227 }
4228 
4229 static void cik_mec_fini(struct radeon_device *rdev)
4230 {
4231 	int r;
4232 
4233 	if (rdev->mec.hpd_eop_obj) {
4234 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4235 		if (unlikely(r != 0))
4236 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4237 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4238 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4239 
4240 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4241 		rdev->mec.hpd_eop_obj = NULL;
4242 	}
4243 }
4244 
4245 #define MEC_HPD_SIZE 2048
4246 
4247 static int cik_mec_init(struct radeon_device *rdev)
4248 {
4249 	int r;
4250 	u32 *hpd;
4251 
4252 	/*
4253 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4254 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4255 	 */
4256 	if (rdev->family == CHIP_KAVERI)
4257 		rdev->mec.num_mec = 2;
4258 	else
4259 		rdev->mec.num_mec = 1;
4260 	rdev->mec.num_pipe = 4;
4261 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4262 
4263 	if (rdev->mec.hpd_eop_obj == NULL) {
4264 		r = radeon_bo_create(rdev,
4265 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4266 				     PAGE_SIZE, true,
4267 				     RADEON_GEM_DOMAIN_GTT, NULL,
4268 				     &rdev->mec.hpd_eop_obj);
4269 		if (r) {
4270 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4271 			return r;
4272 		}
4273 	}
4274 
4275 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4276 	if (unlikely(r != 0)) {
4277 		cik_mec_fini(rdev);
4278 		return r;
4279 	}
4280 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4281 			  &rdev->mec.hpd_eop_gpu_addr);
4282 	if (r) {
4283 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4284 		cik_mec_fini(rdev);
4285 		return r;
4286 	}
4287 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4288 	if (r) {
4289 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4290 		cik_mec_fini(rdev);
4291 		return r;
4292 	}
4293 
4294 	/* clear memory.  Not sure if this is required or not */
4295 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4296 
4297 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4298 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4299 
4300 	return 0;
4301 }
4302 
4303 struct hqd_registers
4304 {
4305 	u32 cp_mqd_base_addr;
4306 	u32 cp_mqd_base_addr_hi;
4307 	u32 cp_hqd_active;
4308 	u32 cp_hqd_vmid;
4309 	u32 cp_hqd_persistent_state;
4310 	u32 cp_hqd_pipe_priority;
4311 	u32 cp_hqd_queue_priority;
4312 	u32 cp_hqd_quantum;
4313 	u32 cp_hqd_pq_base;
4314 	u32 cp_hqd_pq_base_hi;
4315 	u32 cp_hqd_pq_rptr;
4316 	u32 cp_hqd_pq_rptr_report_addr;
4317 	u32 cp_hqd_pq_rptr_report_addr_hi;
4318 	u32 cp_hqd_pq_wptr_poll_addr;
4319 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4320 	u32 cp_hqd_pq_doorbell_control;
4321 	u32 cp_hqd_pq_wptr;
4322 	u32 cp_hqd_pq_control;
4323 	u32 cp_hqd_ib_base_addr;
4324 	u32 cp_hqd_ib_base_addr_hi;
4325 	u32 cp_hqd_ib_rptr;
4326 	u32 cp_hqd_ib_control;
4327 	u32 cp_hqd_iq_timer;
4328 	u32 cp_hqd_iq_rptr;
4329 	u32 cp_hqd_dequeue_request;
4330 	u32 cp_hqd_dma_offload;
4331 	u32 cp_hqd_sema_cmd;
4332 	u32 cp_hqd_msg_type;
4333 	u32 cp_hqd_atomic0_preop_lo;
4334 	u32 cp_hqd_atomic0_preop_hi;
4335 	u32 cp_hqd_atomic1_preop_lo;
4336 	u32 cp_hqd_atomic1_preop_hi;
4337 	u32 cp_hqd_hq_scheduler0;
4338 	u32 cp_hqd_hq_scheduler1;
4339 	u32 cp_mqd_control;
4340 };
4341 
4342 struct bonaire_mqd
4343 {
4344 	u32 header;
4345 	u32 dispatch_initiator;
4346 	u32 dimensions[3];
4347 	u32 start_idx[3];
4348 	u32 num_threads[3];
4349 	u32 pipeline_stat_enable;
4350 	u32 perf_counter_enable;
4351 	u32 pgm[2];
4352 	u32 tba[2];
4353 	u32 tma[2];
4354 	u32 pgm_rsrc[2];
4355 	u32 vmid;
4356 	u32 resource_limits;
4357 	u32 static_thread_mgmt01[2];
4358 	u32 tmp_ring_size;
4359 	u32 static_thread_mgmt23[2];
4360 	u32 restart[3];
4361 	u32 thread_trace_enable;
4362 	u32 reserved1;
4363 	u32 user_data[16];
4364 	u32 vgtcs_invoke_count[2];
4365 	struct hqd_registers queue_state;
4366 	u32 dequeue_cntr;
4367 	u32 interrupt_queue[64];
4368 };
4369 
4370 /**
4371  * cik_cp_compute_resume - setup the compute queue registers
4372  *
4373  * @rdev: radeon_device pointer
4374  *
4375  * Program the compute queues and test them to make sure they
4376  * are working.
4377  * Returns 0 for success, error for failure.
4378  */
4379 static int cik_cp_compute_resume(struct radeon_device *rdev)
4380 {
4381 	int r, i, idx;
4382 	u32 tmp;
4383 	bool use_doorbell = true;
4384 	u64 hqd_gpu_addr;
4385 	u64 mqd_gpu_addr;
4386 	u64 eop_gpu_addr;
4387 	u64 wb_gpu_addr;
4388 	u32 *buf;
4389 	struct bonaire_mqd *mqd;
4390 
4391 	r = cik_cp_compute_start(rdev);
4392 	if (r)
4393 		return r;
4394 
4395 	/* fix up chicken bits */
4396 	tmp = RREG32(CP_CPF_DEBUG);
4397 	tmp |= (1 << 23);
4398 	WREG32(CP_CPF_DEBUG, tmp);
4399 
4400 	/* init the pipes */
4401 	mutex_lock(&rdev->srbm_mutex);
4402 	for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
4403 		int me = (i < 4) ? 1 : 2;
4404 		int pipe = (i < 4) ? i : (i - 4);
4405 
4406 		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
4407 
4408 		cik_srbm_select(rdev, me, pipe, 0, 0);
4409 
4410 		/* write the EOP addr */
4411 		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4412 		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4413 
4414 		/* set the VMID assigned */
4415 		WREG32(CP_HPD_EOP_VMID, 0);
4416 
4417 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4418 		tmp = RREG32(CP_HPD_EOP_CONTROL);
4419 		tmp &= ~EOP_SIZE_MASK;
4420 		tmp |= order_base_2(MEC_HPD_SIZE / 8);
4421 		WREG32(CP_HPD_EOP_CONTROL, tmp);
4422 	}
4423 	cik_srbm_select(rdev, 0, 0, 0, 0);
4424 	mutex_unlock(&rdev->srbm_mutex);
4425 
4426 	/* init the queues.  Just two for now. */
4427 	for (i = 0; i < 2; i++) {
4428 		if (i == 0)
4429 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4430 		else
4431 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4432 
4433 		if (rdev->ring[idx].mqd_obj == NULL) {
4434 			r = radeon_bo_create(rdev,
4435 					     sizeof(struct bonaire_mqd),
4436 					     PAGE_SIZE, true,
4437 					     RADEON_GEM_DOMAIN_GTT, NULL,
4438 					     &rdev->ring[idx].mqd_obj);
4439 			if (r) {
4440 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4441 				return r;
4442 			}
4443 		}
4444 
4445 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4446 		if (unlikely(r != 0)) {
4447 			cik_cp_compute_fini(rdev);
4448 			return r;
4449 		}
4450 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4451 				  &mqd_gpu_addr);
4452 		if (r) {
4453 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4454 			cik_cp_compute_fini(rdev);
4455 			return r;
4456 		}
4457 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4458 		if (r) {
4459 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4460 			cik_cp_compute_fini(rdev);
4461 			return r;
4462 		}
4463 
4464 		/* init the mqd struct */
4465 		memset(buf, 0, sizeof(struct bonaire_mqd));
4466 
4467 		mqd = (struct bonaire_mqd *)buf;
4468 		mqd->header = 0xC0310800;
4469 		mqd->static_thread_mgmt01[0] = 0xffffffff;
4470 		mqd->static_thread_mgmt01[1] = 0xffffffff;
4471 		mqd->static_thread_mgmt23[0] = 0xffffffff;
4472 		mqd->static_thread_mgmt23[1] = 0xffffffff;
4473 
4474 		mutex_lock(&rdev->srbm_mutex);
4475 		cik_srbm_select(rdev, rdev->ring[idx].me,
4476 				rdev->ring[idx].pipe,
4477 				rdev->ring[idx].queue, 0);
4478 
4479 		/* disable wptr polling */
4480 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4481 		tmp &= ~WPTR_POLL_EN;
4482 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4483 
4484 		/* enable doorbell? */
4485 		mqd->queue_state.cp_hqd_pq_doorbell_control =
4486 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4487 		if (use_doorbell)
4488 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4489 		else
4490 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4491 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4492 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4493 
4494 		/* disable the queue if it's active */
4495 		mqd->queue_state.cp_hqd_dequeue_request = 0;
4496 		mqd->queue_state.cp_hqd_pq_rptr = 0;
4497 		mqd->queue_state.cp_hqd_pq_wptr= 0;
4498 		if (RREG32(CP_HQD_ACTIVE) & 1) {
4499 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4500 			for (i = 0; i < rdev->usec_timeout; i++) {
4501 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4502 					break;
4503 				udelay(1);
4504 			}
4505 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4506 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4507 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4508 		}
4509 
4510 		/* set the pointer to the MQD */
4511 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4512 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4513 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4514 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4515 		/* set MQD vmid to 0 */
4516 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4517 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4518 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4519 
4520 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4521 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4522 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4523 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4524 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4525 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4526 
4527 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4528 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4529 		mqd->queue_state.cp_hqd_pq_control &=
4530 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4531 
4532 		mqd->queue_state.cp_hqd_pq_control |=
4533 			order_base_2(rdev->ring[idx].ring_size / 8);
4534 		mqd->queue_state.cp_hqd_pq_control |=
4535 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4536 #ifdef __BIG_ENDIAN
4537 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4538 #endif
4539 		mqd->queue_state.cp_hqd_pq_control &=
4540 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4541 		mqd->queue_state.cp_hqd_pq_control |=
4542 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4543 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4544 
4545 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4546 		if (i == 0)
4547 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4548 		else
4549 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4550 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4551 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4552 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4553 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4554 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4555 
4556 		/* set the wb address wether it's enabled or not */
4557 		if (i == 0)
4558 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4559 		else
4560 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4561 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4562 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4563 			upper_32_bits(wb_gpu_addr) & 0xffff;
4564 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4565 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4566 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4567 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4568 
4569 		/* enable the doorbell if requested */
4570 		if (use_doorbell) {
4571 			mqd->queue_state.cp_hqd_pq_doorbell_control =
4572 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4573 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4574 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
4575 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4576 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4577 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
4578 				~(DOORBELL_SOURCE | DOORBELL_HIT);
4579 
4580 		} else {
4581 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4582 		}
4583 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4584 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4585 
4586 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4587 		rdev->ring[idx].wptr = 0;
4588 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4589 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4590 		rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
4591 		mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
4592 
4593 		/* set the vmid for the queue */
4594 		mqd->queue_state.cp_hqd_vmid = 0;
4595 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4596 
4597 		/* activate the queue */
4598 		mqd->queue_state.cp_hqd_active = 1;
4599 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4600 
4601 		cik_srbm_select(rdev, 0, 0, 0, 0);
4602 		mutex_unlock(&rdev->srbm_mutex);
4603 
4604 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4605 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4606 
4607 		rdev->ring[idx].ready = true;
4608 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4609 		if (r)
4610 			rdev->ring[idx].ready = false;
4611 	}
4612 
4613 	return 0;
4614 }
4615 
4616 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4617 {
4618 	cik_cp_gfx_enable(rdev, enable);
4619 	cik_cp_compute_enable(rdev, enable);
4620 }
4621 
4622 static int cik_cp_load_microcode(struct radeon_device *rdev)
4623 {
4624 	int r;
4625 
4626 	r = cik_cp_gfx_load_microcode(rdev);
4627 	if (r)
4628 		return r;
4629 	r = cik_cp_compute_load_microcode(rdev);
4630 	if (r)
4631 		return r;
4632 
4633 	return 0;
4634 }
4635 
4636 static void cik_cp_fini(struct radeon_device *rdev)
4637 {
4638 	cik_cp_gfx_fini(rdev);
4639 	cik_cp_compute_fini(rdev);
4640 }
4641 
4642 static int cik_cp_resume(struct radeon_device *rdev)
4643 {
4644 	int r;
4645 
4646 	cik_enable_gui_idle_interrupt(rdev, false);
4647 
4648 	r = cik_cp_load_microcode(rdev);
4649 	if (r)
4650 		return r;
4651 
4652 	r = cik_cp_gfx_resume(rdev);
4653 	if (r)
4654 		return r;
4655 	r = cik_cp_compute_resume(rdev);
4656 	if (r)
4657 		return r;
4658 
4659 	cik_enable_gui_idle_interrupt(rdev, true);
4660 
4661 	return 0;
4662 }
4663 
4664 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4665 {
4666 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4667 		RREG32(GRBM_STATUS));
4668 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4669 		RREG32(GRBM_STATUS2));
4670 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4671 		RREG32(GRBM_STATUS_SE0));
4672 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4673 		RREG32(GRBM_STATUS_SE1));
4674 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4675 		RREG32(GRBM_STATUS_SE2));
4676 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4677 		RREG32(GRBM_STATUS_SE3));
4678 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4679 		RREG32(SRBM_STATUS));
4680 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4681 		RREG32(SRBM_STATUS2));
4682 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4683 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4684 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4685 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4686 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4687 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4688 		 RREG32(CP_STALLED_STAT1));
4689 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4690 		 RREG32(CP_STALLED_STAT2));
4691 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4692 		 RREG32(CP_STALLED_STAT3));
4693 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4694 		 RREG32(CP_CPF_BUSY_STAT));
4695 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4696 		 RREG32(CP_CPF_STALLED_STAT1));
4697 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4698 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4699 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4700 		 RREG32(CP_CPC_STALLED_STAT1));
4701 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4702 }
4703 
4704 /**
4705  * cik_gpu_check_soft_reset - check which blocks are busy
4706  *
4707  * @rdev: radeon_device pointer
4708  *
4709  * Check which blocks are busy and return the relevant reset
4710  * mask to be used by cik_gpu_soft_reset().
4711  * Returns a mask of the blocks to be reset.
4712  */
4713 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4714 {
4715 	u32 reset_mask = 0;
4716 	u32 tmp;
4717 
4718 	/* GRBM_STATUS */
4719 	tmp = RREG32(GRBM_STATUS);
4720 	if (tmp & (PA_BUSY | SC_BUSY |
4721 		   BCI_BUSY | SX_BUSY |
4722 		   TA_BUSY | VGT_BUSY |
4723 		   DB_BUSY | CB_BUSY |
4724 		   GDS_BUSY | SPI_BUSY |
4725 		   IA_BUSY | IA_BUSY_NO_DMA))
4726 		reset_mask |= RADEON_RESET_GFX;
4727 
4728 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4729 		reset_mask |= RADEON_RESET_CP;
4730 
4731 	/* GRBM_STATUS2 */
4732 	tmp = RREG32(GRBM_STATUS2);
4733 	if (tmp & RLC_BUSY)
4734 		reset_mask |= RADEON_RESET_RLC;
4735 
4736 	/* SDMA0_STATUS_REG */
4737 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4738 	if (!(tmp & SDMA_IDLE))
4739 		reset_mask |= RADEON_RESET_DMA;
4740 
4741 	/* SDMA1_STATUS_REG */
4742 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4743 	if (!(tmp & SDMA_IDLE))
4744 		reset_mask |= RADEON_RESET_DMA1;
4745 
4746 	/* SRBM_STATUS2 */
4747 	tmp = RREG32(SRBM_STATUS2);
4748 	if (tmp & SDMA_BUSY)
4749 		reset_mask |= RADEON_RESET_DMA;
4750 
4751 	if (tmp & SDMA1_BUSY)
4752 		reset_mask |= RADEON_RESET_DMA1;
4753 
4754 	/* SRBM_STATUS */
4755 	tmp = RREG32(SRBM_STATUS);
4756 
4757 	if (tmp & IH_BUSY)
4758 		reset_mask |= RADEON_RESET_IH;
4759 
4760 	if (tmp & SEM_BUSY)
4761 		reset_mask |= RADEON_RESET_SEM;
4762 
4763 	if (tmp & GRBM_RQ_PENDING)
4764 		reset_mask |= RADEON_RESET_GRBM;
4765 
4766 	if (tmp & VMC_BUSY)
4767 		reset_mask |= RADEON_RESET_VMC;
4768 
4769 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4770 		   MCC_BUSY | MCD_BUSY))
4771 		reset_mask |= RADEON_RESET_MC;
4772 
4773 	if (evergreen_is_display_hung(rdev))
4774 		reset_mask |= RADEON_RESET_DISPLAY;
4775 
4776 	/* Skip MC reset as it's mostly likely not hung, just busy */
4777 	if (reset_mask & RADEON_RESET_MC) {
4778 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4779 		reset_mask &= ~RADEON_RESET_MC;
4780 	}
4781 
4782 	return reset_mask;
4783 }
4784 
4785 /**
4786  * cik_gpu_soft_reset - soft reset GPU
4787  *
4788  * @rdev: radeon_device pointer
4789  * @reset_mask: mask of which blocks to reset
4790  *
4791  * Soft reset the blocks specified in @reset_mask.
4792  */
4793 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4794 {
4795 	struct evergreen_mc_save save;
4796 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4797 	u32 tmp;
4798 
4799 	if (reset_mask == 0)
4800 		return;
4801 
4802 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4803 
4804 	cik_print_gpu_status_regs(rdev);
4805 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4806 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4807 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4808 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4809 
4810 	/* disable CG/PG */
4811 	cik_fini_pg(rdev);
4812 	cik_fini_cg(rdev);
4813 
4814 	/* stop the rlc */
4815 	cik_rlc_stop(rdev);
4816 
4817 	/* Disable GFX parsing/prefetching */
4818 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4819 
4820 	/* Disable MEC parsing/prefetching */
4821 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4822 
4823 	if (reset_mask & RADEON_RESET_DMA) {
4824 		/* sdma0 */
4825 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4826 		tmp |= SDMA_HALT;
4827 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4828 	}
4829 	if (reset_mask & RADEON_RESET_DMA1) {
4830 		/* sdma1 */
4831 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4832 		tmp |= SDMA_HALT;
4833 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4834 	}
4835 
4836 	evergreen_mc_stop(rdev, &save);
4837 	if (evergreen_mc_wait_for_idle(rdev)) {
4838 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4839 	}
4840 
4841 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4842 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4843 
4844 	if (reset_mask & RADEON_RESET_CP) {
4845 		grbm_soft_reset |= SOFT_RESET_CP;
4846 
4847 		srbm_soft_reset |= SOFT_RESET_GRBM;
4848 	}
4849 
4850 	if (reset_mask & RADEON_RESET_DMA)
4851 		srbm_soft_reset |= SOFT_RESET_SDMA;
4852 
4853 	if (reset_mask & RADEON_RESET_DMA1)
4854 		srbm_soft_reset |= SOFT_RESET_SDMA1;
4855 
4856 	if (reset_mask & RADEON_RESET_DISPLAY)
4857 		srbm_soft_reset |= SOFT_RESET_DC;
4858 
4859 	if (reset_mask & RADEON_RESET_RLC)
4860 		grbm_soft_reset |= SOFT_RESET_RLC;
4861 
4862 	if (reset_mask & RADEON_RESET_SEM)
4863 		srbm_soft_reset |= SOFT_RESET_SEM;
4864 
4865 	if (reset_mask & RADEON_RESET_IH)
4866 		srbm_soft_reset |= SOFT_RESET_IH;
4867 
4868 	if (reset_mask & RADEON_RESET_GRBM)
4869 		srbm_soft_reset |= SOFT_RESET_GRBM;
4870 
4871 	if (reset_mask & RADEON_RESET_VMC)
4872 		srbm_soft_reset |= SOFT_RESET_VMC;
4873 
4874 	if (!(rdev->flags & RADEON_IS_IGP)) {
4875 		if (reset_mask & RADEON_RESET_MC)
4876 			srbm_soft_reset |= SOFT_RESET_MC;
4877 	}
4878 
4879 	if (grbm_soft_reset) {
4880 		tmp = RREG32(GRBM_SOFT_RESET);
4881 		tmp |= grbm_soft_reset;
4882 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4883 		WREG32(GRBM_SOFT_RESET, tmp);
4884 		tmp = RREG32(GRBM_SOFT_RESET);
4885 
4886 		udelay(50);
4887 
4888 		tmp &= ~grbm_soft_reset;
4889 		WREG32(GRBM_SOFT_RESET, tmp);
4890 		tmp = RREG32(GRBM_SOFT_RESET);
4891 	}
4892 
4893 	if (srbm_soft_reset) {
4894 		tmp = RREG32(SRBM_SOFT_RESET);
4895 		tmp |= srbm_soft_reset;
4896 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4897 		WREG32(SRBM_SOFT_RESET, tmp);
4898 		tmp = RREG32(SRBM_SOFT_RESET);
4899 
4900 		udelay(50);
4901 
4902 		tmp &= ~srbm_soft_reset;
4903 		WREG32(SRBM_SOFT_RESET, tmp);
4904 		tmp = RREG32(SRBM_SOFT_RESET);
4905 	}
4906 
4907 	/* Wait a little for things to settle down */
4908 	udelay(50);
4909 
4910 	evergreen_mc_resume(rdev, &save);
4911 	udelay(50);
4912 
4913 	cik_print_gpu_status_regs(rdev);
4914 }
4915 
4916 struct kv_reset_save_regs {
4917 	u32 gmcon_reng_execute;
4918 	u32 gmcon_misc;
4919 	u32 gmcon_misc3;
4920 };
4921 
4922 static void kv_save_regs_for_reset(struct radeon_device *rdev,
4923 				   struct kv_reset_save_regs *save)
4924 {
4925 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
4926 	save->gmcon_misc = RREG32(GMCON_MISC);
4927 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
4928 
4929 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
4930 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
4931 						STCTRL_STUTTER_EN));
4932 }
4933 
4934 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
4935 				      struct kv_reset_save_regs *save)
4936 {
4937 	int i;
4938 
4939 	WREG32(GMCON_PGFSM_WRITE, 0);
4940 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
4941 
4942 	for (i = 0; i < 5; i++)
4943 		WREG32(GMCON_PGFSM_WRITE, 0);
4944 
4945 	WREG32(GMCON_PGFSM_WRITE, 0);
4946 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
4947 
4948 	for (i = 0; i < 5; i++)
4949 		WREG32(GMCON_PGFSM_WRITE, 0);
4950 
4951 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
4952 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
4953 
4954 	for (i = 0; i < 5; i++)
4955 		WREG32(GMCON_PGFSM_WRITE, 0);
4956 
4957 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
4958 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
4959 
4960 	for (i = 0; i < 5; i++)
4961 		WREG32(GMCON_PGFSM_WRITE, 0);
4962 
4963 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
4964 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
4965 
4966 	for (i = 0; i < 5; i++)
4967 		WREG32(GMCON_PGFSM_WRITE, 0);
4968 
4969 	WREG32(GMCON_PGFSM_WRITE, 0);
4970 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
4971 
4972 	for (i = 0; i < 5; i++)
4973 		WREG32(GMCON_PGFSM_WRITE, 0);
4974 
4975 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
4976 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
4977 
4978 	for (i = 0; i < 5; i++)
4979 		WREG32(GMCON_PGFSM_WRITE, 0);
4980 
4981 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
4982 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
4983 
4984 	for (i = 0; i < 5; i++)
4985 		WREG32(GMCON_PGFSM_WRITE, 0);
4986 
4987 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
4988 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
4989 
4990 	for (i = 0; i < 5; i++)
4991 		WREG32(GMCON_PGFSM_WRITE, 0);
4992 
4993 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
4994 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
4995 
4996 	for (i = 0; i < 5; i++)
4997 		WREG32(GMCON_PGFSM_WRITE, 0);
4998 
4999 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5000 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5001 
5002 	WREG32(GMCON_MISC3, save->gmcon_misc3);
5003 	WREG32(GMCON_MISC, save->gmcon_misc);
5004 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5005 }
5006 
5007 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5008 {
5009 	struct evergreen_mc_save save;
5010 	struct kv_reset_save_regs kv_save = { 0 };
5011 	u32 tmp, i;
5012 
5013 	dev_info(rdev->dev, "GPU pci config reset\n");
5014 
5015 	/* disable dpm? */
5016 
5017 	/* disable cg/pg */
5018 	cik_fini_pg(rdev);
5019 	cik_fini_cg(rdev);
5020 
5021 	/* Disable GFX parsing/prefetching */
5022 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5023 
5024 	/* Disable MEC parsing/prefetching */
5025 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5026 
5027 	/* sdma0 */
5028 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5029 	tmp |= SDMA_HALT;
5030 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5031 	/* sdma1 */
5032 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5033 	tmp |= SDMA_HALT;
5034 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5035 	/* XXX other engines? */
5036 
5037 	/* halt the rlc, disable cp internal ints */
5038 	cik_rlc_stop(rdev);
5039 
5040 	udelay(50);
5041 
5042 	/* disable mem access */
5043 	evergreen_mc_stop(rdev, &save);
5044 	if (evergreen_mc_wait_for_idle(rdev)) {
5045 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5046 	}
5047 
5048 	if (rdev->flags & RADEON_IS_IGP)
5049 		kv_save_regs_for_reset(rdev, &kv_save);
5050 
5051 	/* disable BM */
5052 	pci_clear_master(rdev->pdev);
5053 	/* reset */
5054 	radeon_pci_config_reset(rdev);
5055 
5056 	udelay(100);
5057 
5058 	/* wait for asic to come out of reset */
5059 	for (i = 0; i < rdev->usec_timeout; i++) {
5060 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5061 			break;
5062 		udelay(1);
5063 	}
5064 
5065 	/* does asic init need to be run first??? */
5066 	if (rdev->flags & RADEON_IS_IGP)
5067 		kv_restore_regs_for_reset(rdev, &kv_save);
5068 }
5069 
5070 /**
5071  * cik_asic_reset - soft reset GPU
5072  *
5073  * @rdev: radeon_device pointer
5074  *
5075  * Look up which blocks are hung and attempt
5076  * to reset them.
5077  * Returns 0 for success.
5078  */
5079 int cik_asic_reset(struct radeon_device *rdev)
5080 {
5081 	u32 reset_mask;
5082 
5083 	reset_mask = cik_gpu_check_soft_reset(rdev);
5084 
5085 	if (reset_mask)
5086 		r600_set_bios_scratch_engine_hung(rdev, true);
5087 
5088 	/* try soft reset */
5089 	cik_gpu_soft_reset(rdev, reset_mask);
5090 
5091 	reset_mask = cik_gpu_check_soft_reset(rdev);
5092 
5093 	/* try pci config reset */
5094 	if (reset_mask && radeon_hard_reset)
5095 		cik_gpu_pci_config_reset(rdev);
5096 
5097 	reset_mask = cik_gpu_check_soft_reset(rdev);
5098 
5099 	if (!reset_mask)
5100 		r600_set_bios_scratch_engine_hung(rdev, false);
5101 
5102 	return 0;
5103 }
5104 
5105 /**
5106  * cik_gfx_is_lockup - check if the 3D engine is locked up
5107  *
5108  * @rdev: radeon_device pointer
5109  * @ring: radeon_ring structure holding ring information
5110  *
5111  * Check if the 3D engine is locked up (CIK).
5112  * Returns true if the engine is locked, false if not.
5113  */
5114 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5115 {
5116 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5117 
5118 	if (!(reset_mask & (RADEON_RESET_GFX |
5119 			    RADEON_RESET_COMPUTE |
5120 			    RADEON_RESET_CP))) {
5121 		radeon_ring_lockup_update(ring);
5122 		return false;
5123 	}
5124 	/* force CP activities */
5125 	radeon_ring_force_activity(rdev, ring);
5126 	return radeon_ring_test_lockup(rdev, ring);
5127 }
5128 
5129 /* MC */
5130 /**
5131  * cik_mc_program - program the GPU memory controller
5132  *
5133  * @rdev: radeon_device pointer
5134  *
5135  * Set the location of vram, gart, and AGP in the GPU's
5136  * physical address space (CIK).
5137  */
5138 static void cik_mc_program(struct radeon_device *rdev)
5139 {
5140 	struct evergreen_mc_save save;
5141 	u32 tmp;
5142 	int i, j;
5143 
5144 	/* Initialize HDP */
5145 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5146 		WREG32((0x2c14 + j), 0x00000000);
5147 		WREG32((0x2c18 + j), 0x00000000);
5148 		WREG32((0x2c1c + j), 0x00000000);
5149 		WREG32((0x2c20 + j), 0x00000000);
5150 		WREG32((0x2c24 + j), 0x00000000);
5151 	}
5152 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5153 
5154 	evergreen_mc_stop(rdev, &save);
5155 	if (radeon_mc_wait_for_idle(rdev)) {
5156 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5157 	}
5158 	/* Lockout access through VGA aperture*/
5159 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5160 	/* Update configuration */
5161 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5162 	       rdev->mc.vram_start >> 12);
5163 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5164 	       rdev->mc.vram_end >> 12);
5165 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5166 	       rdev->vram_scratch.gpu_addr >> 12);
5167 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5168 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5169 	WREG32(MC_VM_FB_LOCATION, tmp);
5170 	/* XXX double check these! */
5171 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5172 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5173 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5174 	WREG32(MC_VM_AGP_BASE, 0);
5175 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5176 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5177 	if (radeon_mc_wait_for_idle(rdev)) {
5178 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5179 	}
5180 	evergreen_mc_resume(rdev, &save);
5181 	/* we need to own VRAM, so turn off the VGA renderer here
5182 	 * to stop it overwriting our objects */
5183 	rv515_vga_render_disable(rdev);
5184 }
5185 
5186 /**
5187  * cik_mc_init - initialize the memory controller driver params
5188  *
5189  * @rdev: radeon_device pointer
5190  *
5191  * Look up the amount of vram, vram width, and decide how to place
5192  * vram and gart within the GPU's physical address space (CIK).
5193  * Returns 0 for success.
5194  */
5195 static int cik_mc_init(struct radeon_device *rdev)
5196 {
5197 	u32 tmp;
5198 	int chansize, numchan;
5199 
5200 	/* Get VRAM informations */
5201 	rdev->mc.vram_is_ddr = true;
5202 	tmp = RREG32(MC_ARB_RAMCFG);
5203 	if (tmp & CHANSIZE_MASK) {
5204 		chansize = 64;
5205 	} else {
5206 		chansize = 32;
5207 	}
5208 	tmp = RREG32(MC_SHARED_CHMAP);
5209 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5210 	case 0:
5211 	default:
5212 		numchan = 1;
5213 		break;
5214 	case 1:
5215 		numchan = 2;
5216 		break;
5217 	case 2:
5218 		numchan = 4;
5219 		break;
5220 	case 3:
5221 		numchan = 8;
5222 		break;
5223 	case 4:
5224 		numchan = 3;
5225 		break;
5226 	case 5:
5227 		numchan = 6;
5228 		break;
5229 	case 6:
5230 		numchan = 10;
5231 		break;
5232 	case 7:
5233 		numchan = 12;
5234 		break;
5235 	case 8:
5236 		numchan = 16;
5237 		break;
5238 	}
5239 	rdev->mc.vram_width = numchan * chansize;
5240 	/* Could aper size report 0 ? */
5241 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5242 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5243 	/* size in MB on si */
5244 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5245 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5246 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5247 	si_vram_gtt_location(rdev, &rdev->mc);
5248 	radeon_update_bandwidth_info(rdev);
5249 
5250 	return 0;
5251 }
5252 
5253 /*
5254  * GART
5255  * VMID 0 is the physical GPU addresses as used by the kernel.
5256  * VMIDs 1-15 are used for userspace clients and are handled
5257  * by the radeon vm/hsa code.
5258  */
5259 /**
5260  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5261  *
5262  * @rdev: radeon_device pointer
5263  *
5264  * Flush the TLB for the VMID 0 page table (CIK).
5265  */
5266 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5267 {
5268 	/* flush hdp cache */
5269 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5270 
5271 	/* bits 0-15 are the VM contexts0-15 */
5272 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5273 }
5274 
5275 /**
5276  * cik_pcie_gart_enable - gart enable
5277  *
5278  * @rdev: radeon_device pointer
5279  *
5280  * This sets up the TLBs, programs the page tables for VMID0,
5281  * sets up the hw for VMIDs 1-15 which are allocated on
5282  * demand, and sets up the global locations for the LDS, GDS,
5283  * and GPUVM for FSA64 clients (CIK).
5284  * Returns 0 for success, errors for failure.
5285  */
5286 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5287 {
5288 	int r, i;
5289 
5290 	if (rdev->gart.robj == NULL) {
5291 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5292 		return -EINVAL;
5293 	}
5294 	r = radeon_gart_table_vram_pin(rdev);
5295 	if (r)
5296 		return r;
5297 	radeon_gart_restore(rdev);
5298 	/* Setup TLB control */
5299 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5300 	       (0xA << 7) |
5301 	       ENABLE_L1_TLB |
5302 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5303 	       ENABLE_ADVANCED_DRIVER_MODEL |
5304 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5305 	/* Setup L2 cache */
5306 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5307 	       ENABLE_L2_FRAGMENT_PROCESSING |
5308 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5309 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5310 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5311 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5312 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5313 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5314 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5315 	/* setup context0 */
5316 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5317 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5318 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5319 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5320 			(u32)(rdev->dummy_page.addr >> 12));
5321 	WREG32(VM_CONTEXT0_CNTL2, 0);
5322 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5323 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5324 
5325 	WREG32(0x15D4, 0);
5326 	WREG32(0x15D8, 0);
5327 	WREG32(0x15DC, 0);
5328 
5329 	/* empty context1-15 */
5330 	/* FIXME start with 4G, once using 2 level pt switch to full
5331 	 * vm size space
5332 	 */
5333 	/* set vm size, must be a multiple of 4 */
5334 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5335 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5336 	for (i = 1; i < 16; i++) {
5337 		if (i < 8)
5338 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5339 			       rdev->gart.table_addr >> 12);
5340 		else
5341 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5342 			       rdev->gart.table_addr >> 12);
5343 	}
5344 
5345 	/* enable context1-15 */
5346 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5347 	       (u32)(rdev->dummy_page.addr >> 12));
5348 	WREG32(VM_CONTEXT1_CNTL2, 4);
5349 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5350 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5351 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5352 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5353 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5354 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5355 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5356 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5357 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5358 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5359 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5360 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5361 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5362 
5363 	if (rdev->family == CHIP_KAVERI) {
5364 		u32 tmp = RREG32(CHUB_CONTROL);
5365 		tmp &= ~BYPASS_VM;
5366 		WREG32(CHUB_CONTROL, tmp);
5367 	}
5368 
5369 	/* XXX SH_MEM regs */
5370 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5371 	mutex_lock(&rdev->srbm_mutex);
5372 	for (i = 0; i < 16; i++) {
5373 		cik_srbm_select(rdev, 0, 0, 0, i);
5374 		/* CP and shaders */
5375 		WREG32(SH_MEM_CONFIG, 0);
5376 		WREG32(SH_MEM_APE1_BASE, 1);
5377 		WREG32(SH_MEM_APE1_LIMIT, 0);
5378 		WREG32(SH_MEM_BASES, 0);
5379 		/* SDMA GFX */
5380 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5381 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5382 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5383 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5384 		/* XXX SDMA RLC - todo */
5385 	}
5386 	cik_srbm_select(rdev, 0, 0, 0, 0);
5387 	mutex_unlock(&rdev->srbm_mutex);
5388 
5389 	cik_pcie_gart_tlb_flush(rdev);
5390 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5391 		 (unsigned)(rdev->mc.gtt_size >> 20),
5392 		 (unsigned long long)rdev->gart.table_addr);
5393 	rdev->gart.ready = true;
5394 	return 0;
5395 }
5396 
5397 /**
5398  * cik_pcie_gart_disable - gart disable
5399  *
5400  * @rdev: radeon_device pointer
5401  *
5402  * This disables all VM page table (CIK).
5403  */
5404 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5405 {
5406 	/* Disable all tables */
5407 	WREG32(VM_CONTEXT0_CNTL, 0);
5408 	WREG32(VM_CONTEXT1_CNTL, 0);
5409 	/* Setup TLB control */
5410 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5411 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5412 	/* Setup L2 cache */
5413 	WREG32(VM_L2_CNTL,
5414 	       ENABLE_L2_FRAGMENT_PROCESSING |
5415 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5416 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5417 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5418 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5419 	WREG32(VM_L2_CNTL2, 0);
5420 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5421 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5422 	radeon_gart_table_vram_unpin(rdev);
5423 }
5424 
5425 /**
5426  * cik_pcie_gart_fini - vm fini callback
5427  *
5428  * @rdev: radeon_device pointer
5429  *
5430  * Tears down the driver GART/VM setup (CIK).
5431  */
5432 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5433 {
5434 	cik_pcie_gart_disable(rdev);
5435 	radeon_gart_table_vram_free(rdev);
5436 	radeon_gart_fini(rdev);
5437 }
5438 
5439 /* vm parser */
5440 /**
5441  * cik_ib_parse - vm ib_parse callback
5442  *
5443  * @rdev: radeon_device pointer
5444  * @ib: indirect buffer pointer
5445  *
5446  * CIK uses hw IB checking so this is a nop (CIK).
5447  */
5448 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5449 {
5450 	return 0;
5451 }
5452 
5453 /*
5454  * vm
5455  * VMID 0 is the physical GPU addresses as used by the kernel.
5456  * VMIDs 1-15 are used for userspace clients and are handled
5457  * by the radeon vm/hsa code.
5458  */
5459 /**
5460  * cik_vm_init - cik vm init callback
5461  *
5462  * @rdev: radeon_device pointer
5463  *
5464  * Inits cik specific vm parameters (number of VMs, base of vram for
5465  * VMIDs 1-15) (CIK).
5466  * Returns 0 for success.
5467  */
5468 int cik_vm_init(struct radeon_device *rdev)
5469 {
5470 	/* number of VMs */
5471 	rdev->vm_manager.nvm = 16;
5472 	/* base offset of vram pages */
5473 	if (rdev->flags & RADEON_IS_IGP) {
5474 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5475 		tmp <<= 22;
5476 		rdev->vm_manager.vram_base_offset = tmp;
5477 	} else
5478 		rdev->vm_manager.vram_base_offset = 0;
5479 
5480 	return 0;
5481 }
5482 
5483 /**
5484  * cik_vm_fini - cik vm fini callback
5485  *
5486  * @rdev: radeon_device pointer
5487  *
5488  * Tear down any asic specific VM setup (CIK).
5489  */
5490 void cik_vm_fini(struct radeon_device *rdev)
5491 {
5492 }
5493 
5494 /**
5495  * cik_vm_decode_fault - print human readable fault info
5496  *
5497  * @rdev: radeon_device pointer
5498  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5499  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5500  *
5501  * Print human readable fault information (CIK).
5502  */
5503 static void cik_vm_decode_fault(struct radeon_device *rdev,
5504 				u32 status, u32 addr, u32 mc_client)
5505 {
5506 	u32 mc_id;
5507 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5508 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5509 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5510 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5511 
5512 	if (rdev->family == CHIP_HAWAII)
5513 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5514 	else
5515 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5516 
5517 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5518 	       protections, vmid, addr,
5519 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5520 	       block, mc_client, mc_id);
5521 }
5522 
5523 /**
5524  * cik_vm_flush - cik vm flush using the CP
5525  *
5526  * @rdev: radeon_device pointer
5527  *
5528  * Update the page table base and flush the VM TLB
5529  * using the CP (CIK).
5530  */
5531 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5532 {
5533 	struct radeon_ring *ring = &rdev->ring[ridx];
5534 
5535 	if (vm == NULL)
5536 		return;
5537 
5538 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5539 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5540 				 WRITE_DATA_DST_SEL(0)));
5541 	if (vm->id < 8) {
5542 		radeon_ring_write(ring,
5543 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5544 	} else {
5545 		radeon_ring_write(ring,
5546 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5547 	}
5548 	radeon_ring_write(ring, 0);
5549 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5550 
5551 	/* update SH_MEM_* regs */
5552 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5553 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5554 				 WRITE_DATA_DST_SEL(0)));
5555 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5556 	radeon_ring_write(ring, 0);
5557 	radeon_ring_write(ring, VMID(vm->id));
5558 
5559 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5560 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5561 				 WRITE_DATA_DST_SEL(0)));
5562 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
5563 	radeon_ring_write(ring, 0);
5564 
5565 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5566 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5567 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5568 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5569 
5570 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5571 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5572 				 WRITE_DATA_DST_SEL(0)));
5573 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5574 	radeon_ring_write(ring, 0);
5575 	radeon_ring_write(ring, VMID(0));
5576 
5577 	/* HDP flush */
5578 	cik_hdp_flush_cp_ring_emit(rdev, ridx);
5579 
5580 	/* bits 0-15 are the VM contexts0-15 */
5581 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5582 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5583 				 WRITE_DATA_DST_SEL(0)));
5584 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5585 	radeon_ring_write(ring, 0);
5586 	radeon_ring_write(ring, 1 << vm->id);
5587 
5588 	/* compute doesn't have PFP */
5589 	if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
5590 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5591 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5592 		radeon_ring_write(ring, 0x0);
5593 	}
5594 }
5595 
5596 /*
5597  * RLC
5598  * The RLC is a multi-purpose microengine that handles a
5599  * variety of functions, the most important of which is
5600  * the interrupt controller.
5601  */
5602 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5603 					  bool enable)
5604 {
5605 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5606 
5607 	if (enable)
5608 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5609 	else
5610 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5611 	WREG32(CP_INT_CNTL_RING0, tmp);
5612 }
5613 
5614 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5615 {
5616 	u32 tmp;
5617 
5618 	tmp = RREG32(RLC_LB_CNTL);
5619 	if (enable)
5620 		tmp |= LOAD_BALANCE_ENABLE;
5621 	else
5622 		tmp &= ~LOAD_BALANCE_ENABLE;
5623 	WREG32(RLC_LB_CNTL, tmp);
5624 }
5625 
5626 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5627 {
5628 	u32 i, j, k;
5629 	u32 mask;
5630 
5631 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5632 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5633 			cik_select_se_sh(rdev, i, j);
5634 			for (k = 0; k < rdev->usec_timeout; k++) {
5635 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5636 					break;
5637 				udelay(1);
5638 			}
5639 		}
5640 	}
5641 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5642 
5643 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5644 	for (k = 0; k < rdev->usec_timeout; k++) {
5645 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5646 			break;
5647 		udelay(1);
5648 	}
5649 }
5650 
5651 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5652 {
5653 	u32 tmp;
5654 
5655 	tmp = RREG32(RLC_CNTL);
5656 	if (tmp != rlc)
5657 		WREG32(RLC_CNTL, rlc);
5658 }
5659 
5660 static u32 cik_halt_rlc(struct radeon_device *rdev)
5661 {
5662 	u32 data, orig;
5663 
5664 	orig = data = RREG32(RLC_CNTL);
5665 
5666 	if (data & RLC_ENABLE) {
5667 		u32 i;
5668 
5669 		data &= ~RLC_ENABLE;
5670 		WREG32(RLC_CNTL, data);
5671 
5672 		for (i = 0; i < rdev->usec_timeout; i++) {
5673 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5674 				break;
5675 			udelay(1);
5676 		}
5677 
5678 		cik_wait_for_rlc_serdes(rdev);
5679 	}
5680 
5681 	return orig;
5682 }
5683 
5684 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5685 {
5686 	u32 tmp, i, mask;
5687 
5688 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5689 	WREG32(RLC_GPR_REG2, tmp);
5690 
5691 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5692 	for (i = 0; i < rdev->usec_timeout; i++) {
5693 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5694 			break;
5695 		udelay(1);
5696 	}
5697 
5698 	for (i = 0; i < rdev->usec_timeout; i++) {
5699 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5700 			break;
5701 		udelay(1);
5702 	}
5703 }
5704 
5705 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5706 {
5707 	u32 tmp;
5708 
5709 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5710 	WREG32(RLC_GPR_REG2, tmp);
5711 }
5712 
5713 /**
5714  * cik_rlc_stop - stop the RLC ME
5715  *
5716  * @rdev: radeon_device pointer
5717  *
5718  * Halt the RLC ME (MicroEngine) (CIK).
5719  */
5720 static void cik_rlc_stop(struct radeon_device *rdev)
5721 {
5722 	WREG32(RLC_CNTL, 0);
5723 
5724 	cik_enable_gui_idle_interrupt(rdev, false);
5725 
5726 	cik_wait_for_rlc_serdes(rdev);
5727 }
5728 
5729 /**
5730  * cik_rlc_start - start the RLC ME
5731  *
5732  * @rdev: radeon_device pointer
5733  *
5734  * Unhalt the RLC ME (MicroEngine) (CIK).
5735  */
5736 static void cik_rlc_start(struct radeon_device *rdev)
5737 {
5738 	WREG32(RLC_CNTL, RLC_ENABLE);
5739 
5740 	cik_enable_gui_idle_interrupt(rdev, true);
5741 
5742 	udelay(50);
5743 }
5744 
5745 /**
5746  * cik_rlc_resume - setup the RLC hw
5747  *
5748  * @rdev: radeon_device pointer
5749  *
5750  * Initialize the RLC registers, load the ucode,
5751  * and start the RLC (CIK).
5752  * Returns 0 for success, -EINVAL if the ucode is not available.
5753  */
5754 static int cik_rlc_resume(struct radeon_device *rdev)
5755 {
5756 	u32 i, size, tmp;
5757 	const __be32 *fw_data;
5758 
5759 	if (!rdev->rlc_fw)
5760 		return -EINVAL;
5761 
5762 	switch (rdev->family) {
5763 	case CHIP_BONAIRE:
5764 	case CHIP_HAWAII:
5765 	default:
5766 		size = BONAIRE_RLC_UCODE_SIZE;
5767 		break;
5768 	case CHIP_KAVERI:
5769 		size = KV_RLC_UCODE_SIZE;
5770 		break;
5771 	case CHIP_KABINI:
5772 		size = KB_RLC_UCODE_SIZE;
5773 		break;
5774 	}
5775 
5776 	cik_rlc_stop(rdev);
5777 
5778 	/* disable CG */
5779 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5780 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5781 
5782 	si_rlc_reset(rdev);
5783 
5784 	cik_init_pg(rdev);
5785 
5786 	cik_init_cg(rdev);
5787 
5788 	WREG32(RLC_LB_CNTR_INIT, 0);
5789 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5790 
5791 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5792 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5793 	WREG32(RLC_LB_PARAMS, 0x00600408);
5794 	WREG32(RLC_LB_CNTL, 0x80000004);
5795 
5796 	WREG32(RLC_MC_CNTL, 0);
5797 	WREG32(RLC_UCODE_CNTL, 0);
5798 
5799 	fw_data = (const __be32 *)rdev->rlc_fw->data;
5800 		WREG32(RLC_GPM_UCODE_ADDR, 0);
5801 	for (i = 0; i < size; i++)
5802 		WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5803 	WREG32(RLC_GPM_UCODE_ADDR, 0);
5804 
5805 	/* XXX - find out what chips support lbpw */
5806 	cik_enable_lbpw(rdev, false);
5807 
5808 	if (rdev->family == CHIP_BONAIRE)
5809 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
5810 
5811 	cik_rlc_start(rdev);
5812 
5813 	return 0;
5814 }
5815 
5816 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5817 {
5818 	u32 data, orig, tmp, tmp2;
5819 
5820 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5821 
5822 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5823 		cik_enable_gui_idle_interrupt(rdev, true);
5824 
5825 		tmp = cik_halt_rlc(rdev);
5826 
5827 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5828 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5829 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5830 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5831 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
5832 
5833 		cik_update_rlc(rdev, tmp);
5834 
5835 		data |= CGCG_EN | CGLS_EN;
5836 	} else {
5837 		cik_enable_gui_idle_interrupt(rdev, false);
5838 
5839 		RREG32(CB_CGTT_SCLK_CTRL);
5840 		RREG32(CB_CGTT_SCLK_CTRL);
5841 		RREG32(CB_CGTT_SCLK_CTRL);
5842 		RREG32(CB_CGTT_SCLK_CTRL);
5843 
5844 		data &= ~(CGCG_EN | CGLS_EN);
5845 	}
5846 
5847 	if (orig != data)
5848 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5849 
5850 }
5851 
5852 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5853 {
5854 	u32 data, orig, tmp = 0;
5855 
5856 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5857 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5858 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5859 				orig = data = RREG32(CP_MEM_SLP_CNTL);
5860 				data |= CP_MEM_LS_EN;
5861 				if (orig != data)
5862 					WREG32(CP_MEM_SLP_CNTL, data);
5863 			}
5864 		}
5865 
5866 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5867 		data &= 0xfffffffd;
5868 		if (orig != data)
5869 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5870 
5871 		tmp = cik_halt_rlc(rdev);
5872 
5873 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5874 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5875 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5876 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5877 		WREG32(RLC_SERDES_WR_CTRL, data);
5878 
5879 		cik_update_rlc(rdev, tmp);
5880 
5881 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
5882 			orig = data = RREG32(CGTS_SM_CTRL_REG);
5883 			data &= ~SM_MODE_MASK;
5884 			data |= SM_MODE(0x2);
5885 			data |= SM_MODE_ENABLE;
5886 			data &= ~CGTS_OVERRIDE;
5887 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
5888 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
5889 				data &= ~CGTS_LS_OVERRIDE;
5890 			data &= ~ON_MONITOR_ADD_MASK;
5891 			data |= ON_MONITOR_ADD_EN;
5892 			data |= ON_MONITOR_ADD(0x96);
5893 			if (orig != data)
5894 				WREG32(CGTS_SM_CTRL_REG, data);
5895 		}
5896 	} else {
5897 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5898 		data |= 0x00000002;
5899 		if (orig != data)
5900 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5901 
5902 		data = RREG32(RLC_MEM_SLP_CNTL);
5903 		if (data & RLC_MEM_LS_EN) {
5904 			data &= ~RLC_MEM_LS_EN;
5905 			WREG32(RLC_MEM_SLP_CNTL, data);
5906 		}
5907 
5908 		data = RREG32(CP_MEM_SLP_CNTL);
5909 		if (data & CP_MEM_LS_EN) {
5910 			data &= ~CP_MEM_LS_EN;
5911 			WREG32(CP_MEM_SLP_CNTL, data);
5912 		}
5913 
5914 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5915 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5916 		if (orig != data)
5917 			WREG32(CGTS_SM_CTRL_REG, data);
5918 
5919 		tmp = cik_halt_rlc(rdev);
5920 
5921 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5922 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5923 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5924 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5925 		WREG32(RLC_SERDES_WR_CTRL, data);
5926 
5927 		cik_update_rlc(rdev, tmp);
5928 	}
5929 }
5930 
5931 static const u32 mc_cg_registers[] =
5932 {
5933 	MC_HUB_MISC_HUB_CG,
5934 	MC_HUB_MISC_SIP_CG,
5935 	MC_HUB_MISC_VM_CG,
5936 	MC_XPB_CLK_GAT,
5937 	ATC_MISC_CG,
5938 	MC_CITF_MISC_WR_CG,
5939 	MC_CITF_MISC_RD_CG,
5940 	MC_CITF_MISC_VM_CG,
5941 	VM_L2_CG,
5942 };
5943 
5944 static void cik_enable_mc_ls(struct radeon_device *rdev,
5945 			     bool enable)
5946 {
5947 	int i;
5948 	u32 orig, data;
5949 
5950 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5951 		orig = data = RREG32(mc_cg_registers[i]);
5952 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5953 			data |= MC_LS_ENABLE;
5954 		else
5955 			data &= ~MC_LS_ENABLE;
5956 		if (data != orig)
5957 			WREG32(mc_cg_registers[i], data);
5958 	}
5959 }
5960 
5961 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5962 			       bool enable)
5963 {
5964 	int i;
5965 	u32 orig, data;
5966 
5967 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5968 		orig = data = RREG32(mc_cg_registers[i]);
5969 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5970 			data |= MC_CG_ENABLE;
5971 		else
5972 			data &= ~MC_CG_ENABLE;
5973 		if (data != orig)
5974 			WREG32(mc_cg_registers[i], data);
5975 	}
5976 }
5977 
5978 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
5979 				 bool enable)
5980 {
5981 	u32 orig, data;
5982 
5983 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5984 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
5985 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
5986 	} else {
5987 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
5988 		data |= 0xff000000;
5989 		if (data != orig)
5990 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
5991 
5992 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
5993 		data |= 0xff000000;
5994 		if (data != orig)
5995 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
5996 	}
5997 }
5998 
5999 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6000 				 bool enable)
6001 {
6002 	u32 orig, data;
6003 
6004 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6005 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6006 		data |= 0x100;
6007 		if (orig != data)
6008 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6009 
6010 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6011 		data |= 0x100;
6012 		if (orig != data)
6013 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6014 	} else {
6015 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6016 		data &= ~0x100;
6017 		if (orig != data)
6018 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6019 
6020 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6021 		data &= ~0x100;
6022 		if (orig != data)
6023 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6024 	}
6025 }
6026 
6027 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6028 				bool enable)
6029 {
6030 	u32 orig, data;
6031 
6032 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6033 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6034 		data = 0xfff;
6035 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6036 
6037 		orig = data = RREG32(UVD_CGC_CTRL);
6038 		data |= DCM;
6039 		if (orig != data)
6040 			WREG32(UVD_CGC_CTRL, data);
6041 	} else {
6042 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6043 		data &= ~0xfff;
6044 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6045 
6046 		orig = data = RREG32(UVD_CGC_CTRL);
6047 		data &= ~DCM;
6048 		if (orig != data)
6049 			WREG32(UVD_CGC_CTRL, data);
6050 	}
6051 }
6052 
6053 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6054 			       bool enable)
6055 {
6056 	u32 orig, data;
6057 
6058 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6059 
6060 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6061 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6062 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6063 	else
6064 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6065 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6066 
6067 	if (orig != data)
6068 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6069 }
6070 
6071 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6072 				bool enable)
6073 {
6074 	u32 orig, data;
6075 
6076 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6077 
6078 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6079 		data &= ~CLOCK_GATING_DIS;
6080 	else
6081 		data |= CLOCK_GATING_DIS;
6082 
6083 	if (orig != data)
6084 		WREG32(HDP_HOST_PATH_CNTL, data);
6085 }
6086 
6087 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6088 			      bool enable)
6089 {
6090 	u32 orig, data;
6091 
6092 	orig = data = RREG32(HDP_MEM_POWER_LS);
6093 
6094 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6095 		data |= HDP_LS_ENABLE;
6096 	else
6097 		data &= ~HDP_LS_ENABLE;
6098 
6099 	if (orig != data)
6100 		WREG32(HDP_MEM_POWER_LS, data);
6101 }
6102 
6103 void cik_update_cg(struct radeon_device *rdev,
6104 		   u32 block, bool enable)
6105 {
6106 
6107 	if (block & RADEON_CG_BLOCK_GFX) {
6108 		cik_enable_gui_idle_interrupt(rdev, false);
6109 		/* order matters! */
6110 		if (enable) {
6111 			cik_enable_mgcg(rdev, true);
6112 			cik_enable_cgcg(rdev, true);
6113 		} else {
6114 			cik_enable_cgcg(rdev, false);
6115 			cik_enable_mgcg(rdev, false);
6116 		}
6117 		cik_enable_gui_idle_interrupt(rdev, true);
6118 	}
6119 
6120 	if (block & RADEON_CG_BLOCK_MC) {
6121 		if (!(rdev->flags & RADEON_IS_IGP)) {
6122 			cik_enable_mc_mgcg(rdev, enable);
6123 			cik_enable_mc_ls(rdev, enable);
6124 		}
6125 	}
6126 
6127 	if (block & RADEON_CG_BLOCK_SDMA) {
6128 		cik_enable_sdma_mgcg(rdev, enable);
6129 		cik_enable_sdma_mgls(rdev, enable);
6130 	}
6131 
6132 	if (block & RADEON_CG_BLOCK_BIF) {
6133 		cik_enable_bif_mgls(rdev, enable);
6134 	}
6135 
6136 	if (block & RADEON_CG_BLOCK_UVD) {
6137 		if (rdev->has_uvd)
6138 			cik_enable_uvd_mgcg(rdev, enable);
6139 	}
6140 
6141 	if (block & RADEON_CG_BLOCK_HDP) {
6142 		cik_enable_hdp_mgcg(rdev, enable);
6143 		cik_enable_hdp_ls(rdev, enable);
6144 	}
6145 
6146 	if (block & RADEON_CG_BLOCK_VCE) {
6147 		vce_v2_0_enable_mgcg(rdev, enable);
6148 	}
6149 }
6150 
6151 static void cik_init_cg(struct radeon_device *rdev)
6152 {
6153 
6154 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6155 
6156 	if (rdev->has_uvd)
6157 		si_init_uvd_internal_cg(rdev);
6158 
6159 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6160 			     RADEON_CG_BLOCK_SDMA |
6161 			     RADEON_CG_BLOCK_BIF |
6162 			     RADEON_CG_BLOCK_UVD |
6163 			     RADEON_CG_BLOCK_HDP), true);
6164 }
6165 
6166 static void cik_fini_cg(struct radeon_device *rdev)
6167 {
6168 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6169 			     RADEON_CG_BLOCK_SDMA |
6170 			     RADEON_CG_BLOCK_BIF |
6171 			     RADEON_CG_BLOCK_UVD |
6172 			     RADEON_CG_BLOCK_HDP), false);
6173 
6174 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6175 }
6176 
6177 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6178 					  bool enable)
6179 {
6180 	u32 data, orig;
6181 
6182 	orig = data = RREG32(RLC_PG_CNTL);
6183 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6184 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6185 	else
6186 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6187 	if (orig != data)
6188 		WREG32(RLC_PG_CNTL, data);
6189 }
6190 
6191 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6192 					  bool enable)
6193 {
6194 	u32 data, orig;
6195 
6196 	orig = data = RREG32(RLC_PG_CNTL);
6197 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6198 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6199 	else
6200 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6201 	if (orig != data)
6202 		WREG32(RLC_PG_CNTL, data);
6203 }
6204 
6205 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6206 {
6207 	u32 data, orig;
6208 
6209 	orig = data = RREG32(RLC_PG_CNTL);
6210 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6211 		data &= ~DISABLE_CP_PG;
6212 	else
6213 		data |= DISABLE_CP_PG;
6214 	if (orig != data)
6215 		WREG32(RLC_PG_CNTL, data);
6216 }
6217 
6218 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6219 {
6220 	u32 data, orig;
6221 
6222 	orig = data = RREG32(RLC_PG_CNTL);
6223 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6224 		data &= ~DISABLE_GDS_PG;
6225 	else
6226 		data |= DISABLE_GDS_PG;
6227 	if (orig != data)
6228 		WREG32(RLC_PG_CNTL, data);
6229 }
6230 
6231 #define CP_ME_TABLE_SIZE    96
6232 #define CP_ME_TABLE_OFFSET  2048
6233 #define CP_MEC_TABLE_OFFSET 4096
6234 
6235 void cik_init_cp_pg_table(struct radeon_device *rdev)
6236 {
6237 	const __be32 *fw_data;
6238 	volatile u32 *dst_ptr;
6239 	int me, i, max_me = 4;
6240 	u32 bo_offset = 0;
6241 	u32 table_offset;
6242 
6243 	if (rdev->family == CHIP_KAVERI)
6244 		max_me = 5;
6245 
6246 	if (rdev->rlc.cp_table_ptr == NULL)
6247 		return;
6248 
6249 	/* write the cp table buffer */
6250 	dst_ptr = rdev->rlc.cp_table_ptr;
6251 	for (me = 0; me < max_me; me++) {
6252 		if (me == 0) {
6253 			fw_data = (const __be32 *)rdev->ce_fw->data;
6254 			table_offset = CP_ME_TABLE_OFFSET;
6255 		} else if (me == 1) {
6256 			fw_data = (const __be32 *)rdev->pfp_fw->data;
6257 			table_offset = CP_ME_TABLE_OFFSET;
6258 		} else if (me == 2) {
6259 			fw_data = (const __be32 *)rdev->me_fw->data;
6260 			table_offset = CP_ME_TABLE_OFFSET;
6261 		} else {
6262 			fw_data = (const __be32 *)rdev->mec_fw->data;
6263 			table_offset = CP_MEC_TABLE_OFFSET;
6264 		}
6265 
6266 		for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
6267 			dst_ptr[bo_offset + i] = cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6268 		}
6269 		bo_offset += CP_ME_TABLE_SIZE;
6270 	}
6271 }
6272 
6273 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6274 				bool enable)
6275 {
6276 	u32 data, orig;
6277 
6278 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6279 		orig = data = RREG32(RLC_PG_CNTL);
6280 		data |= GFX_PG_ENABLE;
6281 		if (orig != data)
6282 			WREG32(RLC_PG_CNTL, data);
6283 
6284 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6285 		data |= AUTO_PG_EN;
6286 		if (orig != data)
6287 			WREG32(RLC_AUTO_PG_CTRL, data);
6288 	} else {
6289 		orig = data = RREG32(RLC_PG_CNTL);
6290 		data &= ~GFX_PG_ENABLE;
6291 		if (orig != data)
6292 			WREG32(RLC_PG_CNTL, data);
6293 
6294 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6295 		data &= ~AUTO_PG_EN;
6296 		if (orig != data)
6297 			WREG32(RLC_AUTO_PG_CTRL, data);
6298 
6299 		data = RREG32(DB_RENDER_CONTROL);
6300 	}
6301 }
6302 
6303 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6304 {
6305 	u32 mask = 0, tmp, tmp1;
6306 	int i;
6307 
6308 	cik_select_se_sh(rdev, se, sh);
6309 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6310 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6311 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6312 
6313 	tmp &= 0xffff0000;
6314 
6315 	tmp |= tmp1;
6316 	tmp >>= 16;
6317 
6318 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6319 		mask <<= 1;
6320 		mask |= 1;
6321 	}
6322 
6323 	return (~tmp) & mask;
6324 }
6325 
6326 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6327 {
6328 	u32 i, j, k, active_cu_number = 0;
6329 	u32 mask, counter, cu_bitmap;
6330 	u32 tmp = 0;
6331 
6332 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6333 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6334 			mask = 1;
6335 			cu_bitmap = 0;
6336 			counter = 0;
6337 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6338 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6339 					if (counter < 2)
6340 						cu_bitmap |= mask;
6341 					counter ++;
6342 				}
6343 				mask <<= 1;
6344 			}
6345 
6346 			active_cu_number += counter;
6347 			tmp |= (cu_bitmap << (i * 16 + j * 8));
6348 		}
6349 	}
6350 
6351 	WREG32(RLC_PG_AO_CU_MASK, tmp);
6352 
6353 	tmp = RREG32(RLC_MAX_PG_CU);
6354 	tmp &= ~MAX_PU_CU_MASK;
6355 	tmp |= MAX_PU_CU(active_cu_number);
6356 	WREG32(RLC_MAX_PG_CU, tmp);
6357 }
6358 
6359 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6360 				       bool enable)
6361 {
6362 	u32 data, orig;
6363 
6364 	orig = data = RREG32(RLC_PG_CNTL);
6365 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6366 		data |= STATIC_PER_CU_PG_ENABLE;
6367 	else
6368 		data &= ~STATIC_PER_CU_PG_ENABLE;
6369 	if (orig != data)
6370 		WREG32(RLC_PG_CNTL, data);
6371 }
6372 
6373 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6374 					bool enable)
6375 {
6376 	u32 data, orig;
6377 
6378 	orig = data = RREG32(RLC_PG_CNTL);
6379 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6380 		data |= DYN_PER_CU_PG_ENABLE;
6381 	else
6382 		data &= ~DYN_PER_CU_PG_ENABLE;
6383 	if (orig != data)
6384 		WREG32(RLC_PG_CNTL, data);
6385 }
6386 
6387 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6388 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6389 
6390 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6391 {
6392 	u32 data, orig;
6393 	u32 i;
6394 
6395 	if (rdev->rlc.cs_data) {
6396 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6397 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6398 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6399 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6400 	} else {
6401 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6402 		for (i = 0; i < 3; i++)
6403 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6404 	}
6405 	if (rdev->rlc.reg_list) {
6406 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6407 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6408 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6409 	}
6410 
6411 	orig = data = RREG32(RLC_PG_CNTL);
6412 	data |= GFX_PG_SRC;
6413 	if (orig != data)
6414 		WREG32(RLC_PG_CNTL, data);
6415 
6416 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6417 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6418 
6419 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6420 	data &= ~IDLE_POLL_COUNT_MASK;
6421 	data |= IDLE_POLL_COUNT(0x60);
6422 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6423 
6424 	data = 0x10101010;
6425 	WREG32(RLC_PG_DELAY, data);
6426 
6427 	data = RREG32(RLC_PG_DELAY_2);
6428 	data &= ~0xff;
6429 	data |= 0x3;
6430 	WREG32(RLC_PG_DELAY_2, data);
6431 
6432 	data = RREG32(RLC_AUTO_PG_CTRL);
6433 	data &= ~GRBM_REG_SGIT_MASK;
6434 	data |= GRBM_REG_SGIT(0x700);
6435 	WREG32(RLC_AUTO_PG_CTRL, data);
6436 
6437 }
6438 
6439 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6440 {
6441 	cik_enable_gfx_cgpg(rdev, enable);
6442 	cik_enable_gfx_static_mgpg(rdev, enable);
6443 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
6444 }
6445 
6446 u32 cik_get_csb_size(struct radeon_device *rdev)
6447 {
6448 	u32 count = 0;
6449 	const struct cs_section_def *sect = NULL;
6450 	const struct cs_extent_def *ext = NULL;
6451 
6452 	if (rdev->rlc.cs_data == NULL)
6453 		return 0;
6454 
6455 	/* begin clear state */
6456 	count += 2;
6457 	/* context control state */
6458 	count += 3;
6459 
6460 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6461 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6462 			if (sect->id == SECT_CONTEXT)
6463 				count += 2 + ext->reg_count;
6464 			else
6465 				return 0;
6466 		}
6467 	}
6468 	/* pa_sc_raster_config/pa_sc_raster_config1 */
6469 	count += 4;
6470 	/* end clear state */
6471 	count += 2;
6472 	/* clear state */
6473 	count += 2;
6474 
6475 	return count;
6476 }
6477 
6478 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6479 {
6480 	u32 count = 0, i;
6481 	const struct cs_section_def *sect = NULL;
6482 	const struct cs_extent_def *ext = NULL;
6483 
6484 	if (rdev->rlc.cs_data == NULL)
6485 		return;
6486 	if (buffer == NULL)
6487 		return;
6488 
6489 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6490 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6491 
6492 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6493 	buffer[count++] = cpu_to_le32(0x80000000);
6494 	buffer[count++] = cpu_to_le32(0x80000000);
6495 
6496 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6497 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6498 			if (sect->id == SECT_CONTEXT) {
6499 				buffer[count++] =
6500 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6501 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6502 				for (i = 0; i < ext->reg_count; i++)
6503 					buffer[count++] = cpu_to_le32(ext->extent[i]);
6504 			} else {
6505 				return;
6506 			}
6507 		}
6508 	}
6509 
6510 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6511 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6512 	switch (rdev->family) {
6513 	case CHIP_BONAIRE:
6514 		buffer[count++] = cpu_to_le32(0x16000012);
6515 		buffer[count++] = cpu_to_le32(0x00000000);
6516 		break;
6517 	case CHIP_KAVERI:
6518 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6519 		buffer[count++] = cpu_to_le32(0x00000000);
6520 		break;
6521 	case CHIP_KABINI:
6522 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6523 		buffer[count++] = cpu_to_le32(0x00000000);
6524 		break;
6525 	case CHIP_HAWAII:
6526 		buffer[count++] = 0x3a00161a;
6527 		buffer[count++] = 0x0000002e;
6528 		break;
6529 	default:
6530 		buffer[count++] = cpu_to_le32(0x00000000);
6531 		buffer[count++] = cpu_to_le32(0x00000000);
6532 		break;
6533 	}
6534 
6535 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6536 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6537 
6538 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6539 	buffer[count++] = cpu_to_le32(0);
6540 }
6541 
6542 static void cik_init_pg(struct radeon_device *rdev)
6543 {
6544 	if (rdev->pg_flags) {
6545 		cik_enable_sck_slowdown_on_pu(rdev, true);
6546 		cik_enable_sck_slowdown_on_pd(rdev, true);
6547 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6548 			cik_init_gfx_cgpg(rdev);
6549 			cik_enable_cp_pg(rdev, true);
6550 			cik_enable_gds_pg(rdev, true);
6551 		}
6552 		cik_init_ao_cu_mask(rdev);
6553 		cik_update_gfx_pg(rdev, true);
6554 	}
6555 }
6556 
6557 static void cik_fini_pg(struct radeon_device *rdev)
6558 {
6559 	if (rdev->pg_flags) {
6560 		cik_update_gfx_pg(rdev, false);
6561 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6562 			cik_enable_cp_pg(rdev, false);
6563 			cik_enable_gds_pg(rdev, false);
6564 		}
6565 	}
6566 }
6567 
6568 /*
6569  * Interrupts
6570  * Starting with r6xx, interrupts are handled via a ring buffer.
6571  * Ring buffers are areas of GPU accessible memory that the GPU
6572  * writes interrupt vectors into and the host reads vectors out of.
6573  * There is a rptr (read pointer) that determines where the
6574  * host is currently reading, and a wptr (write pointer)
6575  * which determines where the GPU has written.  When the
6576  * pointers are equal, the ring is idle.  When the GPU
6577  * writes vectors to the ring buffer, it increments the
6578  * wptr.  When there is an interrupt, the host then starts
6579  * fetching commands and processing them until the pointers are
6580  * equal again at which point it updates the rptr.
6581  */
6582 
6583 /**
6584  * cik_enable_interrupts - Enable the interrupt ring buffer
6585  *
6586  * @rdev: radeon_device pointer
6587  *
6588  * Enable the interrupt ring buffer (CIK).
6589  */
6590 static void cik_enable_interrupts(struct radeon_device *rdev)
6591 {
6592 	u32 ih_cntl = RREG32(IH_CNTL);
6593 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6594 
6595 	ih_cntl |= ENABLE_INTR;
6596 	ih_rb_cntl |= IH_RB_ENABLE;
6597 	WREG32(IH_CNTL, ih_cntl);
6598 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6599 	rdev->ih.enabled = true;
6600 }
6601 
6602 /**
6603  * cik_disable_interrupts - Disable the interrupt ring buffer
6604  *
6605  * @rdev: radeon_device pointer
6606  *
6607  * Disable the interrupt ring buffer (CIK).
6608  */
6609 static void cik_disable_interrupts(struct radeon_device *rdev)
6610 {
6611 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6612 	u32 ih_cntl = RREG32(IH_CNTL);
6613 
6614 	ih_rb_cntl &= ~IH_RB_ENABLE;
6615 	ih_cntl &= ~ENABLE_INTR;
6616 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6617 	WREG32(IH_CNTL, ih_cntl);
6618 	/* set rptr, wptr to 0 */
6619 	WREG32(IH_RB_RPTR, 0);
6620 	WREG32(IH_RB_WPTR, 0);
6621 	rdev->ih.enabled = false;
6622 	rdev->ih.rptr = 0;
6623 }
6624 
6625 /**
6626  * cik_disable_interrupt_state - Disable all interrupt sources
6627  *
6628  * @rdev: radeon_device pointer
6629  *
6630  * Clear all interrupt enable bits used by the driver (CIK).
6631  */
6632 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6633 {
6634 	u32 tmp;
6635 
6636 	/* gfx ring */
6637 	tmp = RREG32(CP_INT_CNTL_RING0) &
6638 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6639 	WREG32(CP_INT_CNTL_RING0, tmp);
6640 	/* sdma */
6641 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6642 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6643 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6644 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6645 	/* compute queues */
6646 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6647 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6648 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6649 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6650 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6651 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6652 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6653 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6654 	/* grbm */
6655 	WREG32(GRBM_INT_CNTL, 0);
6656 	/* vline/vblank, etc. */
6657 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6658 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6659 	if (rdev->num_crtc >= 4) {
6660 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6661 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6662 	}
6663 	if (rdev->num_crtc >= 6) {
6664 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6665 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6666 	}
6667 
6668 	/* dac hotplug */
6669 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6670 
6671 	/* digital hotplug */
6672 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6673 	WREG32(DC_HPD1_INT_CONTROL, tmp);
6674 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6675 	WREG32(DC_HPD2_INT_CONTROL, tmp);
6676 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6677 	WREG32(DC_HPD3_INT_CONTROL, tmp);
6678 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6679 	WREG32(DC_HPD4_INT_CONTROL, tmp);
6680 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6681 	WREG32(DC_HPD5_INT_CONTROL, tmp);
6682 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6683 	WREG32(DC_HPD6_INT_CONTROL, tmp);
6684 
6685 }
6686 
6687 /**
6688  * cik_irq_init - init and enable the interrupt ring
6689  *
6690  * @rdev: radeon_device pointer
6691  *
6692  * Allocate a ring buffer for the interrupt controller,
6693  * enable the RLC, disable interrupts, enable the IH
6694  * ring buffer and enable it (CIK).
6695  * Called at device load and reume.
6696  * Returns 0 for success, errors for failure.
6697  */
6698 static int cik_irq_init(struct radeon_device *rdev)
6699 {
6700 	int ret = 0;
6701 	int rb_bufsz;
6702 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6703 
6704 	/* allocate ring */
6705 	ret = r600_ih_ring_alloc(rdev);
6706 	if (ret)
6707 		return ret;
6708 
6709 	/* disable irqs */
6710 	cik_disable_interrupts(rdev);
6711 
6712 	/* init rlc */
6713 	ret = cik_rlc_resume(rdev);
6714 	if (ret) {
6715 		r600_ih_ring_fini(rdev);
6716 		return ret;
6717 	}
6718 
6719 	/* setup interrupt control */
6720 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
6721 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6722 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
6723 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6724 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6725 	 */
6726 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6727 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6728 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6729 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6730 
6731 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6732 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6733 
6734 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6735 		      IH_WPTR_OVERFLOW_CLEAR |
6736 		      (rb_bufsz << 1));
6737 
6738 	if (rdev->wb.enabled)
6739 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6740 
6741 	/* set the writeback address whether it's enabled or not */
6742 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6743 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6744 
6745 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6746 
6747 	/* set rptr, wptr to 0 */
6748 	WREG32(IH_RB_RPTR, 0);
6749 	WREG32(IH_RB_WPTR, 0);
6750 
6751 	/* Default settings for IH_CNTL (disabled at first) */
6752 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6753 	/* RPTR_REARM only works if msi's are enabled */
6754 	if (rdev->msi_enabled)
6755 		ih_cntl |= RPTR_REARM;
6756 	WREG32(IH_CNTL, ih_cntl);
6757 
6758 	/* force the active interrupt state to all disabled */
6759 	cik_disable_interrupt_state(rdev);
6760 
6761 	pci_set_master(rdev->pdev);
6762 
6763 	/* enable irqs */
6764 	cik_enable_interrupts(rdev);
6765 
6766 	return ret;
6767 }
6768 
6769 /**
6770  * cik_irq_set - enable/disable interrupt sources
6771  *
6772  * @rdev: radeon_device pointer
6773  *
6774  * Enable interrupt sources on the GPU (vblanks, hpd,
6775  * etc.) (CIK).
6776  * Returns 0 for success, errors for failure.
6777  */
6778 int cik_irq_set(struct radeon_device *rdev)
6779 {
6780 	u32 cp_int_cntl;
6781 	u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6782 	u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6783 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6784 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6785 	u32 grbm_int_cntl = 0;
6786 	u32 dma_cntl, dma_cntl1;
6787 	u32 thermal_int;
6788 
6789 	if (!rdev->irq.installed) {
6790 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6791 		return -EINVAL;
6792 	}
6793 	/* don't enable anything if the ih is disabled */
6794 	if (!rdev->ih.enabled) {
6795 		cik_disable_interrupts(rdev);
6796 		/* force the active interrupt state to all disabled */
6797 		cik_disable_interrupt_state(rdev);
6798 		return 0;
6799 	}
6800 
6801 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6802 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6803 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6804 
6805 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6806 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6807 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6808 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6809 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6810 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6811 
6812 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6813 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6814 
6815 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6816 	cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6817 	cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6818 	cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6819 	cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6820 	cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6821 	cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6822 	cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6823 
6824 	if (rdev->flags & RADEON_IS_IGP)
6825 		thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6826 			~(THERM_INTH_MASK | THERM_INTL_MASK);
6827 	else
6828 		thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6829 			~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6830 
6831 	/* enable CP interrupts on all rings */
6832 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6833 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
6834 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6835 	}
6836 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6837 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6838 		DRM_DEBUG("si_irq_set: sw int cp1\n");
6839 		if (ring->me == 1) {
6840 			switch (ring->pipe) {
6841 			case 0:
6842 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6843 				break;
6844 			case 1:
6845 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6846 				break;
6847 			case 2:
6848 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6849 				break;
6850 			case 3:
6851 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6852 				break;
6853 			default:
6854 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6855 				break;
6856 			}
6857 		} else if (ring->me == 2) {
6858 			switch (ring->pipe) {
6859 			case 0:
6860 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6861 				break;
6862 			case 1:
6863 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6864 				break;
6865 			case 2:
6866 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6867 				break;
6868 			case 3:
6869 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6870 				break;
6871 			default:
6872 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6873 				break;
6874 			}
6875 		} else {
6876 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6877 		}
6878 	}
6879 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6880 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6881 		DRM_DEBUG("si_irq_set: sw int cp2\n");
6882 		if (ring->me == 1) {
6883 			switch (ring->pipe) {
6884 			case 0:
6885 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6886 				break;
6887 			case 1:
6888 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6889 				break;
6890 			case 2:
6891 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6892 				break;
6893 			case 3:
6894 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6895 				break;
6896 			default:
6897 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6898 				break;
6899 			}
6900 		} else if (ring->me == 2) {
6901 			switch (ring->pipe) {
6902 			case 0:
6903 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6904 				break;
6905 			case 1:
6906 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6907 				break;
6908 			case 2:
6909 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6910 				break;
6911 			case 3:
6912 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6913 				break;
6914 			default:
6915 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6916 				break;
6917 			}
6918 		} else {
6919 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6920 		}
6921 	}
6922 
6923 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6924 		DRM_DEBUG("cik_irq_set: sw int dma\n");
6925 		dma_cntl |= TRAP_ENABLE;
6926 	}
6927 
6928 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6929 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
6930 		dma_cntl1 |= TRAP_ENABLE;
6931 	}
6932 
6933 	if (rdev->irq.crtc_vblank_int[0] ||
6934 	    atomic_read(&rdev->irq.pflip[0])) {
6935 		DRM_DEBUG("cik_irq_set: vblank 0\n");
6936 		crtc1 |= VBLANK_INTERRUPT_MASK;
6937 	}
6938 	if (rdev->irq.crtc_vblank_int[1] ||
6939 	    atomic_read(&rdev->irq.pflip[1])) {
6940 		DRM_DEBUG("cik_irq_set: vblank 1\n");
6941 		crtc2 |= VBLANK_INTERRUPT_MASK;
6942 	}
6943 	if (rdev->irq.crtc_vblank_int[2] ||
6944 	    atomic_read(&rdev->irq.pflip[2])) {
6945 		DRM_DEBUG("cik_irq_set: vblank 2\n");
6946 		crtc3 |= VBLANK_INTERRUPT_MASK;
6947 	}
6948 	if (rdev->irq.crtc_vblank_int[3] ||
6949 	    atomic_read(&rdev->irq.pflip[3])) {
6950 		DRM_DEBUG("cik_irq_set: vblank 3\n");
6951 		crtc4 |= VBLANK_INTERRUPT_MASK;
6952 	}
6953 	if (rdev->irq.crtc_vblank_int[4] ||
6954 	    atomic_read(&rdev->irq.pflip[4])) {
6955 		DRM_DEBUG("cik_irq_set: vblank 4\n");
6956 		crtc5 |= VBLANK_INTERRUPT_MASK;
6957 	}
6958 	if (rdev->irq.crtc_vblank_int[5] ||
6959 	    atomic_read(&rdev->irq.pflip[5])) {
6960 		DRM_DEBUG("cik_irq_set: vblank 5\n");
6961 		crtc6 |= VBLANK_INTERRUPT_MASK;
6962 	}
6963 	if (rdev->irq.hpd[0]) {
6964 		DRM_DEBUG("cik_irq_set: hpd 1\n");
6965 		hpd1 |= DC_HPDx_INT_EN;
6966 	}
6967 	if (rdev->irq.hpd[1]) {
6968 		DRM_DEBUG("cik_irq_set: hpd 2\n");
6969 		hpd2 |= DC_HPDx_INT_EN;
6970 	}
6971 	if (rdev->irq.hpd[2]) {
6972 		DRM_DEBUG("cik_irq_set: hpd 3\n");
6973 		hpd3 |= DC_HPDx_INT_EN;
6974 	}
6975 	if (rdev->irq.hpd[3]) {
6976 		DRM_DEBUG("cik_irq_set: hpd 4\n");
6977 		hpd4 |= DC_HPDx_INT_EN;
6978 	}
6979 	if (rdev->irq.hpd[4]) {
6980 		DRM_DEBUG("cik_irq_set: hpd 5\n");
6981 		hpd5 |= DC_HPDx_INT_EN;
6982 	}
6983 	if (rdev->irq.hpd[5]) {
6984 		DRM_DEBUG("cik_irq_set: hpd 6\n");
6985 		hpd6 |= DC_HPDx_INT_EN;
6986 	}
6987 
6988 	if (rdev->irq.dpm_thermal) {
6989 		DRM_DEBUG("dpm thermal\n");
6990 		if (rdev->flags & RADEON_IS_IGP)
6991 			thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
6992 		else
6993 			thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6994 	}
6995 
6996 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6997 
6998 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
6999 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7000 
7001 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7002 	WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7003 	WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7004 	WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7005 	WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7006 	WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7007 	WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7008 	WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7009 
7010 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7011 
7012 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7013 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7014 	if (rdev->num_crtc >= 4) {
7015 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7016 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7017 	}
7018 	if (rdev->num_crtc >= 6) {
7019 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7020 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7021 	}
7022 
7023 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7024 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7025 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7026 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7027 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7028 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7029 
7030 	if (rdev->flags & RADEON_IS_IGP)
7031 		WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
7032 	else
7033 		WREG32_SMC(CG_THERMAL_INT, thermal_int);
7034 
7035 	return 0;
7036 }
7037 
7038 /**
7039  * cik_irq_ack - ack interrupt sources
7040  *
7041  * @rdev: radeon_device pointer
7042  *
7043  * Ack interrupt sources on the GPU (vblanks, hpd,
7044  * etc.) (CIK).  Certain interrupts sources are sw
7045  * generated and do not require an explicit ack.
7046  */
7047 static inline void cik_irq_ack(struct radeon_device *rdev)
7048 {
7049 	u32 tmp;
7050 
7051 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7052 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7053 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7054 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7055 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7056 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7057 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7058 
7059 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7060 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7061 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7062 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7063 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7064 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7065 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7066 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7067 
7068 	if (rdev->num_crtc >= 4) {
7069 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7070 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7071 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7072 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7073 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7074 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7075 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7076 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7077 	}
7078 
7079 	if (rdev->num_crtc >= 6) {
7080 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7081 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7082 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7083 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7084 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7085 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7086 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7087 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7088 	}
7089 
7090 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7091 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7092 		tmp |= DC_HPDx_INT_ACK;
7093 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7094 	}
7095 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7096 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7097 		tmp |= DC_HPDx_INT_ACK;
7098 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7099 	}
7100 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7101 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7102 		tmp |= DC_HPDx_INT_ACK;
7103 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7104 	}
7105 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7106 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7107 		tmp |= DC_HPDx_INT_ACK;
7108 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7109 	}
7110 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7111 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7112 		tmp |= DC_HPDx_INT_ACK;
7113 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7114 	}
7115 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7116 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7117 		tmp |= DC_HPDx_INT_ACK;
7118 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7119 	}
7120 }
7121 
7122 /**
7123  * cik_irq_disable - disable interrupts
7124  *
7125  * @rdev: radeon_device pointer
7126  *
7127  * Disable interrupts on the hw (CIK).
7128  */
7129 static void cik_irq_disable(struct radeon_device *rdev)
7130 {
7131 	cik_disable_interrupts(rdev);
7132 	/* Wait and acknowledge irq */
7133 	mdelay(1);
7134 	cik_irq_ack(rdev);
7135 	cik_disable_interrupt_state(rdev);
7136 }
7137 
7138 /**
7139  * cik_irq_disable - disable interrupts for suspend
7140  *
7141  * @rdev: radeon_device pointer
7142  *
7143  * Disable interrupts and stop the RLC (CIK).
7144  * Used for suspend.
7145  */
7146 static void cik_irq_suspend(struct radeon_device *rdev)
7147 {
7148 	cik_irq_disable(rdev);
7149 	cik_rlc_stop(rdev);
7150 }
7151 
7152 /**
7153  * cik_irq_fini - tear down interrupt support
7154  *
7155  * @rdev: radeon_device pointer
7156  *
7157  * Disable interrupts on the hw and free the IH ring
7158  * buffer (CIK).
7159  * Used for driver unload.
7160  */
7161 static void cik_irq_fini(struct radeon_device *rdev)
7162 {
7163 	cik_irq_suspend(rdev);
7164 	r600_ih_ring_fini(rdev);
7165 }
7166 
7167 /**
7168  * cik_get_ih_wptr - get the IH ring buffer wptr
7169  *
7170  * @rdev: radeon_device pointer
7171  *
7172  * Get the IH ring buffer wptr from either the register
7173  * or the writeback memory buffer (CIK).  Also check for
7174  * ring buffer overflow and deal with it.
7175  * Used by cik_irq_process().
7176  * Returns the value of the wptr.
7177  */
7178 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7179 {
7180 	u32 wptr, tmp;
7181 
7182 	if (rdev->wb.enabled)
7183 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7184 	else
7185 		wptr = RREG32(IH_RB_WPTR);
7186 
7187 	if (wptr & RB_OVERFLOW) {
7188 		/* When a ring buffer overflow happen start parsing interrupt
7189 		 * from the last not overwritten vector (wptr + 16). Hopefully
7190 		 * this should allow us to catchup.
7191 		 */
7192 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
7193 			wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
7194 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7195 		tmp = RREG32(IH_RB_CNTL);
7196 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7197 		WREG32(IH_RB_CNTL, tmp);
7198 	}
7199 	return (wptr & rdev->ih.ptr_mask);
7200 }
7201 
7202 /*        CIK IV Ring
7203  * Each IV ring entry is 128 bits:
7204  * [7:0]    - interrupt source id
7205  * [31:8]   - reserved
7206  * [59:32]  - interrupt source data
7207  * [63:60]  - reserved
7208  * [71:64]  - RINGID
7209  *            CP:
7210  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7211  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7212  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7213  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7214  *            PIPE_ID - ME0 0=3D
7215  *                    - ME1&2 compute dispatcher (4 pipes each)
7216  *            SDMA:
7217  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7218  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7219  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7220  * [79:72]  - VMID
7221  * [95:80]  - PASID
7222  * [127:96] - reserved
7223  */
7224 /**
7225  * cik_irq_process - interrupt handler
7226  *
7227  * @rdev: radeon_device pointer
7228  *
7229  * Interrupt hander (CIK).  Walk the IH ring,
7230  * ack interrupts and schedule work to handle
7231  * interrupt events.
7232  * Returns irq process return code.
7233  */
7234 int cik_irq_process(struct radeon_device *rdev)
7235 {
7236 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7237 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7238 	u32 wptr;
7239 	u32 rptr;
7240 	u32 src_id, src_data, ring_id;
7241 	u8 me_id, pipe_id, queue_id;
7242 	u32 ring_index;
7243 	bool queue_hotplug = false;
7244 	bool queue_reset = false;
7245 	u32 addr, status, mc_client;
7246 	bool queue_thermal = false;
7247 
7248 	if (!rdev->ih.enabled || rdev->shutdown)
7249 		return IRQ_NONE;
7250 
7251 	wptr = cik_get_ih_wptr(rdev);
7252 
7253 restart_ih:
7254 	/* is somebody else already processing irqs? */
7255 	if (atomic_xchg(&rdev->ih.lock, 1))
7256 		return IRQ_NONE;
7257 
7258 	rptr = rdev->ih.rptr;
7259 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7260 
7261 	/* Order reading of wptr vs. reading of IH ring data */
7262 	rmb();
7263 
7264 	/* display interrupts */
7265 	cik_irq_ack(rdev);
7266 
7267 	while (rptr != wptr) {
7268 		/* wptr/rptr are in bytes! */
7269 		ring_index = rptr / 4;
7270 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7271 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7272 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7273 
7274 		switch (src_id) {
7275 		case 1: /* D1 vblank/vline */
7276 			switch (src_data) {
7277 			case 0: /* D1 vblank */
7278 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7279 					if (rdev->irq.crtc_vblank_int[0]) {
7280 						drm_handle_vblank(rdev->ddev, 0);
7281 						rdev->pm.vblank_sync = true;
7282 						wake_up(&rdev->irq.vblank_queue);
7283 					}
7284 					if (atomic_read(&rdev->irq.pflip[0]))
7285 						radeon_crtc_handle_flip(rdev, 0);
7286 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7287 					DRM_DEBUG("IH: D1 vblank\n");
7288 				}
7289 				break;
7290 			case 1: /* D1 vline */
7291 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7292 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7293 					DRM_DEBUG("IH: D1 vline\n");
7294 				}
7295 				break;
7296 			default:
7297 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7298 				break;
7299 			}
7300 			break;
7301 		case 2: /* D2 vblank/vline */
7302 			switch (src_data) {
7303 			case 0: /* D2 vblank */
7304 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7305 					if (rdev->irq.crtc_vblank_int[1]) {
7306 						drm_handle_vblank(rdev->ddev, 1);
7307 						rdev->pm.vblank_sync = true;
7308 						wake_up(&rdev->irq.vblank_queue);
7309 					}
7310 					if (atomic_read(&rdev->irq.pflip[1]))
7311 						radeon_crtc_handle_flip(rdev, 1);
7312 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7313 					DRM_DEBUG("IH: D2 vblank\n");
7314 				}
7315 				break;
7316 			case 1: /* D2 vline */
7317 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7318 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7319 					DRM_DEBUG("IH: D2 vline\n");
7320 				}
7321 				break;
7322 			default:
7323 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7324 				break;
7325 			}
7326 			break;
7327 		case 3: /* D3 vblank/vline */
7328 			switch (src_data) {
7329 			case 0: /* D3 vblank */
7330 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7331 					if (rdev->irq.crtc_vblank_int[2]) {
7332 						drm_handle_vblank(rdev->ddev, 2);
7333 						rdev->pm.vblank_sync = true;
7334 						wake_up(&rdev->irq.vblank_queue);
7335 					}
7336 					if (atomic_read(&rdev->irq.pflip[2]))
7337 						radeon_crtc_handle_flip(rdev, 2);
7338 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7339 					DRM_DEBUG("IH: D3 vblank\n");
7340 				}
7341 				break;
7342 			case 1: /* D3 vline */
7343 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7344 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7345 					DRM_DEBUG("IH: D3 vline\n");
7346 				}
7347 				break;
7348 			default:
7349 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7350 				break;
7351 			}
7352 			break;
7353 		case 4: /* D4 vblank/vline */
7354 			switch (src_data) {
7355 			case 0: /* D4 vblank */
7356 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7357 					if (rdev->irq.crtc_vblank_int[3]) {
7358 						drm_handle_vblank(rdev->ddev, 3);
7359 						rdev->pm.vblank_sync = true;
7360 						wake_up(&rdev->irq.vblank_queue);
7361 					}
7362 					if (atomic_read(&rdev->irq.pflip[3]))
7363 						radeon_crtc_handle_flip(rdev, 3);
7364 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7365 					DRM_DEBUG("IH: D4 vblank\n");
7366 				}
7367 				break;
7368 			case 1: /* D4 vline */
7369 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7370 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7371 					DRM_DEBUG("IH: D4 vline\n");
7372 				}
7373 				break;
7374 			default:
7375 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7376 				break;
7377 			}
7378 			break;
7379 		case 5: /* D5 vblank/vline */
7380 			switch (src_data) {
7381 			case 0: /* D5 vblank */
7382 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7383 					if (rdev->irq.crtc_vblank_int[4]) {
7384 						drm_handle_vblank(rdev->ddev, 4);
7385 						rdev->pm.vblank_sync = true;
7386 						wake_up(&rdev->irq.vblank_queue);
7387 					}
7388 					if (atomic_read(&rdev->irq.pflip[4]))
7389 						radeon_crtc_handle_flip(rdev, 4);
7390 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7391 					DRM_DEBUG("IH: D5 vblank\n");
7392 				}
7393 				break;
7394 			case 1: /* D5 vline */
7395 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7396 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7397 					DRM_DEBUG("IH: D5 vline\n");
7398 				}
7399 				break;
7400 			default:
7401 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7402 				break;
7403 			}
7404 			break;
7405 		case 6: /* D6 vblank/vline */
7406 			switch (src_data) {
7407 			case 0: /* D6 vblank */
7408 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7409 					if (rdev->irq.crtc_vblank_int[5]) {
7410 						drm_handle_vblank(rdev->ddev, 5);
7411 						rdev->pm.vblank_sync = true;
7412 						wake_up(&rdev->irq.vblank_queue);
7413 					}
7414 					if (atomic_read(&rdev->irq.pflip[5]))
7415 						radeon_crtc_handle_flip(rdev, 5);
7416 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7417 					DRM_DEBUG("IH: D6 vblank\n");
7418 				}
7419 				break;
7420 			case 1: /* D6 vline */
7421 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7422 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7423 					DRM_DEBUG("IH: D6 vline\n");
7424 				}
7425 				break;
7426 			default:
7427 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7428 				break;
7429 			}
7430 			break;
7431 		case 42: /* HPD hotplug */
7432 			switch (src_data) {
7433 			case 0:
7434 				if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7435 					rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7436 					queue_hotplug = true;
7437 					DRM_DEBUG("IH: HPD1\n");
7438 				}
7439 				break;
7440 			case 1:
7441 				if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7442 					rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7443 					queue_hotplug = true;
7444 					DRM_DEBUG("IH: HPD2\n");
7445 				}
7446 				break;
7447 			case 2:
7448 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7449 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7450 					queue_hotplug = true;
7451 					DRM_DEBUG("IH: HPD3\n");
7452 				}
7453 				break;
7454 			case 3:
7455 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7456 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7457 					queue_hotplug = true;
7458 					DRM_DEBUG("IH: HPD4\n");
7459 				}
7460 				break;
7461 			case 4:
7462 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7463 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7464 					queue_hotplug = true;
7465 					DRM_DEBUG("IH: HPD5\n");
7466 				}
7467 				break;
7468 			case 5:
7469 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7470 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7471 					queue_hotplug = true;
7472 					DRM_DEBUG("IH: HPD6\n");
7473 				}
7474 				break;
7475 			default:
7476 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7477 				break;
7478 			}
7479 			break;
7480 		case 124: /* UVD */
7481 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7482 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7483 			break;
7484 		case 146:
7485 		case 147:
7486 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7487 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7488 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7489 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7490 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7491 				addr);
7492 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7493 				status);
7494 			cik_vm_decode_fault(rdev, status, addr, mc_client);
7495 			/* reset addr and status */
7496 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7497 			break;
7498 		case 167: /* VCE */
7499 			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7500 			switch (src_data) {
7501 			case 0:
7502 				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7503 				break;
7504 			case 1:
7505 				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7506 				break;
7507 			default:
7508 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7509 				break;
7510 			}
7511 			break;
7512 		case 176: /* GFX RB CP_INT */
7513 		case 177: /* GFX IB CP_INT */
7514 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7515 			break;
7516 		case 181: /* CP EOP event */
7517 			DRM_DEBUG("IH: CP EOP\n");
7518 			/* XXX check the bitfield order! */
7519 			me_id = (ring_id & 0x60) >> 5;
7520 			pipe_id = (ring_id & 0x18) >> 3;
7521 			queue_id = (ring_id & 0x7) >> 0;
7522 			switch (me_id) {
7523 			case 0:
7524 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7525 				break;
7526 			case 1:
7527 			case 2:
7528 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7529 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7530 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7531 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7532 				break;
7533 			}
7534 			break;
7535 		case 184: /* CP Privileged reg access */
7536 			DRM_ERROR("Illegal register access in command stream\n");
7537 			/* XXX check the bitfield order! */
7538 			me_id = (ring_id & 0x60) >> 5;
7539 			pipe_id = (ring_id & 0x18) >> 3;
7540 			queue_id = (ring_id & 0x7) >> 0;
7541 			switch (me_id) {
7542 			case 0:
7543 				/* This results in a full GPU reset, but all we need to do is soft
7544 				 * reset the CP for gfx
7545 				 */
7546 				queue_reset = true;
7547 				break;
7548 			case 1:
7549 				/* XXX compute */
7550 				queue_reset = true;
7551 				break;
7552 			case 2:
7553 				/* XXX compute */
7554 				queue_reset = true;
7555 				break;
7556 			}
7557 			break;
7558 		case 185: /* CP Privileged inst */
7559 			DRM_ERROR("Illegal instruction in command stream\n");
7560 			/* XXX check the bitfield order! */
7561 			me_id = (ring_id & 0x60) >> 5;
7562 			pipe_id = (ring_id & 0x18) >> 3;
7563 			queue_id = (ring_id & 0x7) >> 0;
7564 			switch (me_id) {
7565 			case 0:
7566 				/* This results in a full GPU reset, but all we need to do is soft
7567 				 * reset the CP for gfx
7568 				 */
7569 				queue_reset = true;
7570 				break;
7571 			case 1:
7572 				/* XXX compute */
7573 				queue_reset = true;
7574 				break;
7575 			case 2:
7576 				/* XXX compute */
7577 				queue_reset = true;
7578 				break;
7579 			}
7580 			break;
7581 		case 224: /* SDMA trap event */
7582 			/* XXX check the bitfield order! */
7583 			me_id = (ring_id & 0x3) >> 0;
7584 			queue_id = (ring_id & 0xc) >> 2;
7585 			DRM_DEBUG("IH: SDMA trap\n");
7586 			switch (me_id) {
7587 			case 0:
7588 				switch (queue_id) {
7589 				case 0:
7590 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7591 					break;
7592 				case 1:
7593 					/* XXX compute */
7594 					break;
7595 				case 2:
7596 					/* XXX compute */
7597 					break;
7598 				}
7599 				break;
7600 			case 1:
7601 				switch (queue_id) {
7602 				case 0:
7603 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7604 					break;
7605 				case 1:
7606 					/* XXX compute */
7607 					break;
7608 				case 2:
7609 					/* XXX compute */
7610 					break;
7611 				}
7612 				break;
7613 			}
7614 			break;
7615 		case 230: /* thermal low to high */
7616 			DRM_DEBUG("IH: thermal low to high\n");
7617 			rdev->pm.dpm.thermal.high_to_low = false;
7618 			queue_thermal = true;
7619 			break;
7620 		case 231: /* thermal high to low */
7621 			DRM_DEBUG("IH: thermal high to low\n");
7622 			rdev->pm.dpm.thermal.high_to_low = true;
7623 			queue_thermal = true;
7624 			break;
7625 		case 233: /* GUI IDLE */
7626 			DRM_DEBUG("IH: GUI idle\n");
7627 			break;
7628 		case 241: /* SDMA Privileged inst */
7629 		case 247: /* SDMA Privileged inst */
7630 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
7631 			/* XXX check the bitfield order! */
7632 			me_id = (ring_id & 0x3) >> 0;
7633 			queue_id = (ring_id & 0xc) >> 2;
7634 			switch (me_id) {
7635 			case 0:
7636 				switch (queue_id) {
7637 				case 0:
7638 					queue_reset = true;
7639 					break;
7640 				case 1:
7641 					/* XXX compute */
7642 					queue_reset = true;
7643 					break;
7644 				case 2:
7645 					/* XXX compute */
7646 					queue_reset = true;
7647 					break;
7648 				}
7649 				break;
7650 			case 1:
7651 				switch (queue_id) {
7652 				case 0:
7653 					queue_reset = true;
7654 					break;
7655 				case 1:
7656 					/* XXX compute */
7657 					queue_reset = true;
7658 					break;
7659 				case 2:
7660 					/* XXX compute */
7661 					queue_reset = true;
7662 					break;
7663 				}
7664 				break;
7665 			}
7666 			break;
7667 		default:
7668 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7669 			break;
7670 		}
7671 
7672 		/* wptr/rptr are in bytes! */
7673 		rptr += 16;
7674 		rptr &= rdev->ih.ptr_mask;
7675 	}
7676 	if (queue_hotplug)
7677 		schedule_work(&rdev->hotplug_work);
7678 	if (queue_reset)
7679 		schedule_work(&rdev->reset_work);
7680 	if (queue_thermal)
7681 		schedule_work(&rdev->pm.dpm.thermal.work);
7682 	rdev->ih.rptr = rptr;
7683 	WREG32(IH_RB_RPTR, rdev->ih.rptr);
7684 	atomic_set(&rdev->ih.lock, 0);
7685 
7686 	/* make sure wptr hasn't changed while processing */
7687 	wptr = cik_get_ih_wptr(rdev);
7688 	if (wptr != rptr)
7689 		goto restart_ih;
7690 
7691 	return IRQ_HANDLED;
7692 }
7693 
7694 /*
7695  * startup/shutdown callbacks
7696  */
7697 /**
7698  * cik_startup - program the asic to a functional state
7699  *
7700  * @rdev: radeon_device pointer
7701  *
7702  * Programs the asic to a functional state (CIK).
7703  * Called by cik_init() and cik_resume().
7704  * Returns 0 for success, error for failure.
7705  */
7706 static int cik_startup(struct radeon_device *rdev)
7707 {
7708 	struct radeon_ring *ring;
7709 	int r;
7710 
7711 	/* enable pcie gen2/3 link */
7712 	cik_pcie_gen3_enable(rdev);
7713 	/* enable aspm */
7714 	cik_program_aspm(rdev);
7715 
7716 	/* scratch needs to be initialized before MC */
7717 	r = r600_vram_scratch_init(rdev);
7718 	if (r)
7719 		return r;
7720 
7721 	cik_mc_program(rdev);
7722 
7723 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
7724 		r = ci_mc_load_microcode(rdev);
7725 		if (r) {
7726 			DRM_ERROR("Failed to load MC firmware!\n");
7727 			return r;
7728 		}
7729 	}
7730 
7731 	r = cik_pcie_gart_enable(rdev);
7732 	if (r)
7733 		return r;
7734 	cik_gpu_init(rdev);
7735 
7736 	/* allocate rlc buffers */
7737 	if (rdev->flags & RADEON_IS_IGP) {
7738 		if (rdev->family == CHIP_KAVERI) {
7739 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7740 			rdev->rlc.reg_list_size =
7741 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7742 		} else {
7743 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7744 			rdev->rlc.reg_list_size =
7745 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7746 		}
7747 	}
7748 	rdev->rlc.cs_data = ci_cs_data;
7749 	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7750 	r = sumo_rlc_init(rdev);
7751 	if (r) {
7752 		DRM_ERROR("Failed to init rlc BOs!\n");
7753 		return r;
7754 	}
7755 
7756 	/* allocate wb buffer */
7757 	r = radeon_wb_init(rdev);
7758 	if (r)
7759 		return r;
7760 
7761 	/* allocate mec buffers */
7762 	r = cik_mec_init(rdev);
7763 	if (r) {
7764 		DRM_ERROR("Failed to init MEC BOs!\n");
7765 		return r;
7766 	}
7767 
7768 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7769 	if (r) {
7770 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7771 		return r;
7772 	}
7773 
7774 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7775 	if (r) {
7776 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7777 		return r;
7778 	}
7779 
7780 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7781 	if (r) {
7782 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7783 		return r;
7784 	}
7785 
7786 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7787 	if (r) {
7788 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7789 		return r;
7790 	}
7791 
7792 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7793 	if (r) {
7794 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7795 		return r;
7796 	}
7797 
7798 	r = radeon_uvd_resume(rdev);
7799 	if (!r) {
7800 		r = uvd_v4_2_resume(rdev);
7801 		if (!r) {
7802 			r = radeon_fence_driver_start_ring(rdev,
7803 							   R600_RING_TYPE_UVD_INDEX);
7804 			if (r)
7805 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7806 		}
7807 	}
7808 	if (r)
7809 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7810 
7811 	r = radeon_vce_resume(rdev);
7812 	if (!r) {
7813 		r = vce_v2_0_resume(rdev);
7814 		if (!r)
7815 			r = radeon_fence_driver_start_ring(rdev,
7816 							   TN_RING_TYPE_VCE1_INDEX);
7817 		if (!r)
7818 			r = radeon_fence_driver_start_ring(rdev,
7819 							   TN_RING_TYPE_VCE2_INDEX);
7820 	}
7821 	if (r) {
7822 		dev_err(rdev->dev, "VCE init error (%d).\n", r);
7823 		rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
7824 		rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
7825 	}
7826 
7827 	/* Enable IRQ */
7828 	if (!rdev->irq.installed) {
7829 		r = radeon_irq_kms_init(rdev);
7830 		if (r)
7831 			return r;
7832 	}
7833 
7834 	r = cik_irq_init(rdev);
7835 	if (r) {
7836 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
7837 		radeon_irq_kms_fini(rdev);
7838 		return r;
7839 	}
7840 	cik_irq_set(rdev);
7841 
7842 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7843 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7844 			     PACKET3(PACKET3_NOP, 0x3FFF));
7845 	if (r)
7846 		return r;
7847 
7848 	/* set up the compute queues */
7849 	/* type-2 packets are deprecated on MEC, use type-3 instead */
7850 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7851 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7852 			     PACKET3(PACKET3_NOP, 0x3FFF));
7853 	if (r)
7854 		return r;
7855 	ring->me = 1; /* first MEC */
7856 	ring->pipe = 0; /* first pipe */
7857 	ring->queue = 0; /* first queue */
7858 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
7859 
7860 	/* type-2 packets are deprecated on MEC, use type-3 instead */
7861 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7862 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7863 			     PACKET3(PACKET3_NOP, 0x3FFF));
7864 	if (r)
7865 		return r;
7866 	/* dGPU only have 1 MEC */
7867 	ring->me = 1; /* first MEC */
7868 	ring->pipe = 0; /* first pipe */
7869 	ring->queue = 1; /* second queue */
7870 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
7871 
7872 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7873 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7874 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7875 	if (r)
7876 		return r;
7877 
7878 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7879 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7880 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7881 	if (r)
7882 		return r;
7883 
7884 	r = cik_cp_resume(rdev);
7885 	if (r)
7886 		return r;
7887 
7888 	r = cik_sdma_resume(rdev);
7889 	if (r)
7890 		return r;
7891 
7892 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7893 	if (ring->ring_size) {
7894 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7895 				     RADEON_CP_PACKET2);
7896 		if (!r)
7897 			r = uvd_v1_0_init(rdev);
7898 		if (r)
7899 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7900 	}
7901 
7902 	r = -ENOENT;
7903 
7904 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
7905 	if (ring->ring_size)
7906 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7907 				     VCE_CMD_NO_OP);
7908 
7909 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
7910 	if (ring->ring_size)
7911 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7912 				     VCE_CMD_NO_OP);
7913 
7914 	if (!r)
7915 		r = vce_v1_0_init(rdev);
7916 	else if (r != -ENOENT)
7917 		DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
7918 
7919 	r = radeon_ib_pool_init(rdev);
7920 	if (r) {
7921 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7922 		return r;
7923 	}
7924 
7925 	r = radeon_vm_manager_init(rdev);
7926 	if (r) {
7927 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7928 		return r;
7929 	}
7930 
7931 	r = dce6_audio_init(rdev);
7932 	if (r)
7933 		return r;
7934 
7935 	return 0;
7936 }
7937 
7938 /**
7939  * cik_resume - resume the asic to a functional state
7940  *
7941  * @rdev: radeon_device pointer
7942  *
7943  * Programs the asic to a functional state (CIK).
7944  * Called at resume.
7945  * Returns 0 for success, error for failure.
7946  */
7947 int cik_resume(struct radeon_device *rdev)
7948 {
7949 	int r;
7950 
7951 	/* post card */
7952 	atom_asic_init(rdev->mode_info.atom_context);
7953 
7954 	/* init golden registers */
7955 	cik_init_golden_registers(rdev);
7956 
7957 	radeon_pm_resume(rdev);
7958 
7959 	rdev->accel_working = true;
7960 	r = cik_startup(rdev);
7961 	if (r) {
7962 		DRM_ERROR("cik startup failed on resume\n");
7963 		rdev->accel_working = false;
7964 		return r;
7965 	}
7966 
7967 	return r;
7968 
7969 }
7970 
7971 /**
7972  * cik_suspend - suspend the asic
7973  *
7974  * @rdev: radeon_device pointer
7975  *
7976  * Bring the chip into a state suitable for suspend (CIK).
7977  * Called at suspend.
7978  * Returns 0 for success.
7979  */
7980 int cik_suspend(struct radeon_device *rdev)
7981 {
7982 	radeon_pm_suspend(rdev);
7983 	dce6_audio_fini(rdev);
7984 	radeon_vm_manager_fini(rdev);
7985 	cik_cp_enable(rdev, false);
7986 	cik_sdma_enable(rdev, false);
7987 	uvd_v1_0_fini(rdev);
7988 	radeon_uvd_suspend(rdev);
7989 	radeon_vce_suspend(rdev);
7990 	cik_fini_pg(rdev);
7991 	cik_fini_cg(rdev);
7992 	cik_irq_suspend(rdev);
7993 	radeon_wb_disable(rdev);
7994 	cik_pcie_gart_disable(rdev);
7995 	return 0;
7996 }
7997 
7998 /* Plan is to move initialization in that function and use
7999  * helper function so that radeon_device_init pretty much
8000  * do nothing more than calling asic specific function. This
8001  * should also allow to remove a bunch of callback function
8002  * like vram_info.
8003  */
8004 /**
8005  * cik_init - asic specific driver and hw init
8006  *
8007  * @rdev: radeon_device pointer
8008  *
8009  * Setup asic specific driver variables and program the hw
8010  * to a functional state (CIK).
8011  * Called at driver startup.
8012  * Returns 0 for success, errors for failure.
8013  */
8014 int cik_init(struct radeon_device *rdev)
8015 {
8016 	struct radeon_ring *ring;
8017 	int r;
8018 
8019 	/* Read BIOS */
8020 	if (!radeon_get_bios(rdev)) {
8021 		if (ASIC_IS_AVIVO(rdev))
8022 			return -EINVAL;
8023 	}
8024 	/* Must be an ATOMBIOS */
8025 	if (!rdev->is_atom_bios) {
8026 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8027 		return -EINVAL;
8028 	}
8029 	r = radeon_atombios_init(rdev);
8030 	if (r)
8031 		return r;
8032 
8033 	/* Post card if necessary */
8034 	if (!radeon_card_posted(rdev)) {
8035 		if (!rdev->bios) {
8036 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8037 			return -EINVAL;
8038 		}
8039 		DRM_INFO("GPU not posted. posting now...\n");
8040 		atom_asic_init(rdev->mode_info.atom_context);
8041 	}
8042 	/* init golden registers */
8043 	cik_init_golden_registers(rdev);
8044 	/* Initialize scratch registers */
8045 	cik_scratch_init(rdev);
8046 	/* Initialize surface registers */
8047 	radeon_surface_init(rdev);
8048 	/* Initialize clocks */
8049 	radeon_get_clock_info(rdev->ddev);
8050 
8051 	/* Fence driver */
8052 	r = radeon_fence_driver_init(rdev);
8053 	if (r)
8054 		return r;
8055 
8056 	/* initialize memory controller */
8057 	r = cik_mc_init(rdev);
8058 	if (r)
8059 		return r;
8060 	/* Memory manager */
8061 	r = radeon_bo_init(rdev);
8062 	if (r)
8063 		return r;
8064 
8065 	if (rdev->flags & RADEON_IS_IGP) {
8066 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8067 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8068 			r = cik_init_microcode(rdev);
8069 			if (r) {
8070 				DRM_ERROR("Failed to load firmware!\n");
8071 				return r;
8072 			}
8073 		}
8074 	} else {
8075 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8076 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8077 		    !rdev->mc_fw) {
8078 			r = cik_init_microcode(rdev);
8079 			if (r) {
8080 				DRM_ERROR("Failed to load firmware!\n");
8081 				return r;
8082 			}
8083 		}
8084 	}
8085 
8086 	/* Initialize power management */
8087 	radeon_pm_init(rdev);
8088 
8089 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8090 	ring->ring_obj = NULL;
8091 	r600_ring_init(rdev, ring, 1024 * 1024);
8092 
8093 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8094 	ring->ring_obj = NULL;
8095 	r600_ring_init(rdev, ring, 1024 * 1024);
8096 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8097 	if (r)
8098 		return r;
8099 
8100 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8101 	ring->ring_obj = NULL;
8102 	r600_ring_init(rdev, ring, 1024 * 1024);
8103 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8104 	if (r)
8105 		return r;
8106 
8107 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8108 	ring->ring_obj = NULL;
8109 	r600_ring_init(rdev, ring, 256 * 1024);
8110 
8111 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8112 	ring->ring_obj = NULL;
8113 	r600_ring_init(rdev, ring, 256 * 1024);
8114 
8115 	r = radeon_uvd_init(rdev);
8116 	if (!r) {
8117 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8118 		ring->ring_obj = NULL;
8119 		r600_ring_init(rdev, ring, 4096);
8120 	}
8121 
8122 	r = radeon_vce_init(rdev);
8123 	if (!r) {
8124 		ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8125 		ring->ring_obj = NULL;
8126 		r600_ring_init(rdev, ring, 4096);
8127 
8128 		ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8129 		ring->ring_obj = NULL;
8130 		r600_ring_init(rdev, ring, 4096);
8131 	}
8132 
8133 	rdev->ih.ring_obj = NULL;
8134 	r600_ih_ring_init(rdev, 64 * 1024);
8135 
8136 	r = r600_pcie_gart_init(rdev);
8137 	if (r)
8138 		return r;
8139 
8140 	rdev->accel_working = true;
8141 	r = cik_startup(rdev);
8142 	if (r) {
8143 		dev_err(rdev->dev, "disabling GPU acceleration\n");
8144 		cik_cp_fini(rdev);
8145 		cik_sdma_fini(rdev);
8146 		cik_irq_fini(rdev);
8147 		sumo_rlc_fini(rdev);
8148 		cik_mec_fini(rdev);
8149 		radeon_wb_fini(rdev);
8150 		radeon_ib_pool_fini(rdev);
8151 		radeon_vm_manager_fini(rdev);
8152 		radeon_irq_kms_fini(rdev);
8153 		cik_pcie_gart_fini(rdev);
8154 		rdev->accel_working = false;
8155 	}
8156 
8157 	/* Don't start up if the MC ucode is missing.
8158 	 * The default clocks and voltages before the MC ucode
8159 	 * is loaded are not suffient for advanced operations.
8160 	 */
8161 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8162 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8163 		return -EINVAL;
8164 	}
8165 
8166 	return 0;
8167 }
8168 
8169 /**
8170  * cik_fini - asic specific driver and hw fini
8171  *
8172  * @rdev: radeon_device pointer
8173  *
8174  * Tear down the asic specific driver variables and program the hw
8175  * to an idle state (CIK).
8176  * Called at driver unload.
8177  */
8178 void cik_fini(struct radeon_device *rdev)
8179 {
8180 	radeon_pm_fini(rdev);
8181 	cik_cp_fini(rdev);
8182 	cik_sdma_fini(rdev);
8183 	cik_fini_pg(rdev);
8184 	cik_fini_cg(rdev);
8185 	cik_irq_fini(rdev);
8186 	sumo_rlc_fini(rdev);
8187 	cik_mec_fini(rdev);
8188 	radeon_wb_fini(rdev);
8189 	radeon_vm_manager_fini(rdev);
8190 	radeon_ib_pool_fini(rdev);
8191 	radeon_irq_kms_fini(rdev);
8192 	uvd_v1_0_fini(rdev);
8193 	radeon_uvd_fini(rdev);
8194 	radeon_vce_fini(rdev);
8195 	cik_pcie_gart_fini(rdev);
8196 	r600_vram_scratch_fini(rdev);
8197 	radeon_gem_fini(rdev);
8198 	radeon_fence_driver_fini(rdev);
8199 	radeon_bo_fini(rdev);
8200 	radeon_atombios_fini(rdev);
8201 	kfree(rdev->bios);
8202 	rdev->bios = NULL;
8203 }
8204 
8205 void dce8_program_fmt(struct drm_encoder *encoder)
8206 {
8207 	struct drm_device *dev = encoder->dev;
8208 	struct radeon_device *rdev = dev->dev_private;
8209 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8210 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8211 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8212 	int bpc = 0;
8213 	u32 tmp = 0;
8214 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8215 
8216 	if (connector) {
8217 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8218 		bpc = radeon_get_monitor_bpc(connector);
8219 		dither = radeon_connector->dither;
8220 	}
8221 
8222 	/* LVDS/eDP FMT is set up by atom */
8223 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8224 		return;
8225 
8226 	/* not needed for analog */
8227 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8228 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8229 		return;
8230 
8231 	if (bpc == 0)
8232 		return;
8233 
8234 	switch (bpc) {
8235 	case 6:
8236 		if (dither == RADEON_FMT_DITHER_ENABLE)
8237 			/* XXX sort out optimal dither settings */
8238 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8239 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8240 		else
8241 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8242 		break;
8243 	case 8:
8244 		if (dither == RADEON_FMT_DITHER_ENABLE)
8245 			/* XXX sort out optimal dither settings */
8246 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8247 				FMT_RGB_RANDOM_ENABLE |
8248 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8249 		else
8250 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8251 		break;
8252 	case 10:
8253 		if (dither == RADEON_FMT_DITHER_ENABLE)
8254 			/* XXX sort out optimal dither settings */
8255 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8256 				FMT_RGB_RANDOM_ENABLE |
8257 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8258 		else
8259 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8260 		break;
8261 	default:
8262 		/* not needed */
8263 		break;
8264 	}
8265 
8266 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8267 }
8268 
8269 /* display watermark setup */
8270 /**
8271  * dce8_line_buffer_adjust - Set up the line buffer
8272  *
8273  * @rdev: radeon_device pointer
8274  * @radeon_crtc: the selected display controller
8275  * @mode: the current display mode on the selected display
8276  * controller
8277  *
8278  * Setup up the line buffer allocation for
8279  * the selected display controller (CIK).
8280  * Returns the line buffer size in pixels.
8281  */
8282 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8283 				   struct radeon_crtc *radeon_crtc,
8284 				   struct drm_display_mode *mode)
8285 {
8286 	u32 tmp, buffer_alloc, i;
8287 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8288 	/*
8289 	 * Line Buffer Setup
8290 	 * There are 6 line buffers, one for each display controllers.
8291 	 * There are 3 partitions per LB. Select the number of partitions
8292 	 * to enable based on the display width.  For display widths larger
8293 	 * than 4096, you need use to use 2 display controllers and combine
8294 	 * them using the stereo blender.
8295 	 */
8296 	if (radeon_crtc->base.enabled && mode) {
8297 		if (mode->crtc_hdisplay < 1920) {
8298 			tmp = 1;
8299 			buffer_alloc = 2;
8300 		} else if (mode->crtc_hdisplay < 2560) {
8301 			tmp = 2;
8302 			buffer_alloc = 2;
8303 		} else if (mode->crtc_hdisplay < 4096) {
8304 			tmp = 0;
8305 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8306 		} else {
8307 			DRM_DEBUG_KMS("Mode too big for LB!\n");
8308 			tmp = 0;
8309 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8310 		}
8311 	} else {
8312 		tmp = 1;
8313 		buffer_alloc = 0;
8314 	}
8315 
8316 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8317 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8318 
8319 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8320 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8321 	for (i = 0; i < rdev->usec_timeout; i++) {
8322 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8323 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8324 			break;
8325 		udelay(1);
8326 	}
8327 
8328 	if (radeon_crtc->base.enabled && mode) {
8329 		switch (tmp) {
8330 		case 0:
8331 		default:
8332 			return 4096 * 2;
8333 		case 1:
8334 			return 1920 * 2;
8335 		case 2:
8336 			return 2560 * 2;
8337 		}
8338 	}
8339 
8340 	/* controller not enabled, so no lb used */
8341 	return 0;
8342 }
8343 
8344 /**
8345  * cik_get_number_of_dram_channels - get the number of dram channels
8346  *
8347  * @rdev: radeon_device pointer
8348  *
8349  * Look up the number of video ram channels (CIK).
8350  * Used for display watermark bandwidth calculations
8351  * Returns the number of dram channels
8352  */
8353 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8354 {
8355 	u32 tmp = RREG32(MC_SHARED_CHMAP);
8356 
8357 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8358 	case 0:
8359 	default:
8360 		return 1;
8361 	case 1:
8362 		return 2;
8363 	case 2:
8364 		return 4;
8365 	case 3:
8366 		return 8;
8367 	case 4:
8368 		return 3;
8369 	case 5:
8370 		return 6;
8371 	case 6:
8372 		return 10;
8373 	case 7:
8374 		return 12;
8375 	case 8:
8376 		return 16;
8377 	}
8378 }
8379 
8380 struct dce8_wm_params {
8381 	u32 dram_channels; /* number of dram channels */
8382 	u32 yclk;          /* bandwidth per dram data pin in kHz */
8383 	u32 sclk;          /* engine clock in kHz */
8384 	u32 disp_clk;      /* display clock in kHz */
8385 	u32 src_width;     /* viewport width */
8386 	u32 active_time;   /* active display time in ns */
8387 	u32 blank_time;    /* blank time in ns */
8388 	bool interlaced;    /* mode is interlaced */
8389 	fixed20_12 vsc;    /* vertical scale ratio */
8390 	u32 num_heads;     /* number of active crtcs */
8391 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8392 	u32 lb_size;       /* line buffer allocated to pipe */
8393 	u32 vtaps;         /* vertical scaler taps */
8394 };
8395 
8396 /**
8397  * dce8_dram_bandwidth - get the dram bandwidth
8398  *
8399  * @wm: watermark calculation data
8400  *
8401  * Calculate the raw dram bandwidth (CIK).
8402  * Used for display watermark bandwidth calculations
8403  * Returns the dram bandwidth in MBytes/s
8404  */
8405 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8406 {
8407 	/* Calculate raw DRAM Bandwidth */
8408 	fixed20_12 dram_efficiency; /* 0.7 */
8409 	fixed20_12 yclk, dram_channels, bandwidth;
8410 	fixed20_12 a;
8411 
8412 	a.full = dfixed_const(1000);
8413 	yclk.full = dfixed_const(wm->yclk);
8414 	yclk.full = dfixed_div(yclk, a);
8415 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8416 	a.full = dfixed_const(10);
8417 	dram_efficiency.full = dfixed_const(7);
8418 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
8419 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8420 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8421 
8422 	return dfixed_trunc(bandwidth);
8423 }
8424 
8425 /**
8426  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8427  *
8428  * @wm: watermark calculation data
8429  *
8430  * Calculate the dram bandwidth used for display (CIK).
8431  * Used for display watermark bandwidth calculations
8432  * Returns the dram bandwidth for display in MBytes/s
8433  */
8434 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8435 {
8436 	/* Calculate DRAM Bandwidth and the part allocated to display. */
8437 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8438 	fixed20_12 yclk, dram_channels, bandwidth;
8439 	fixed20_12 a;
8440 
8441 	a.full = dfixed_const(1000);
8442 	yclk.full = dfixed_const(wm->yclk);
8443 	yclk.full = dfixed_div(yclk, a);
8444 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8445 	a.full = dfixed_const(10);
8446 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8447 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8448 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8449 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8450 
8451 	return dfixed_trunc(bandwidth);
8452 }
8453 
8454 /**
8455  * dce8_data_return_bandwidth - get the data return bandwidth
8456  *
8457  * @wm: watermark calculation data
8458  *
8459  * Calculate the data return bandwidth used for display (CIK).
8460  * Used for display watermark bandwidth calculations
8461  * Returns the data return bandwidth in MBytes/s
8462  */
8463 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8464 {
8465 	/* Calculate the display Data return Bandwidth */
8466 	fixed20_12 return_efficiency; /* 0.8 */
8467 	fixed20_12 sclk, bandwidth;
8468 	fixed20_12 a;
8469 
8470 	a.full = dfixed_const(1000);
8471 	sclk.full = dfixed_const(wm->sclk);
8472 	sclk.full = dfixed_div(sclk, a);
8473 	a.full = dfixed_const(10);
8474 	return_efficiency.full = dfixed_const(8);
8475 	return_efficiency.full = dfixed_div(return_efficiency, a);
8476 	a.full = dfixed_const(32);
8477 	bandwidth.full = dfixed_mul(a, sclk);
8478 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8479 
8480 	return dfixed_trunc(bandwidth);
8481 }
8482 
8483 /**
8484  * dce8_dmif_request_bandwidth - get the dmif bandwidth
8485  *
8486  * @wm: watermark calculation data
8487  *
8488  * Calculate the dmif bandwidth used for display (CIK).
8489  * Used for display watermark bandwidth calculations
8490  * Returns the dmif bandwidth in MBytes/s
8491  */
8492 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
8493 {
8494 	/* Calculate the DMIF Request Bandwidth */
8495 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
8496 	fixed20_12 disp_clk, bandwidth;
8497 	fixed20_12 a, b;
8498 
8499 	a.full = dfixed_const(1000);
8500 	disp_clk.full = dfixed_const(wm->disp_clk);
8501 	disp_clk.full = dfixed_div(disp_clk, a);
8502 	a.full = dfixed_const(32);
8503 	b.full = dfixed_mul(a, disp_clk);
8504 
8505 	a.full = dfixed_const(10);
8506 	disp_clk_request_efficiency.full = dfixed_const(8);
8507 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
8508 
8509 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
8510 
8511 	return dfixed_trunc(bandwidth);
8512 }
8513 
8514 /**
8515  * dce8_available_bandwidth - get the min available bandwidth
8516  *
8517  * @wm: watermark calculation data
8518  *
8519  * Calculate the min available bandwidth used for display (CIK).
8520  * Used for display watermark bandwidth calculations
8521  * Returns the min available bandwidth in MBytes/s
8522  */
8523 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
8524 {
8525 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8526 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
8527 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
8528 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
8529 
8530 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
8531 }
8532 
8533 /**
8534  * dce8_average_bandwidth - get the average available bandwidth
8535  *
8536  * @wm: watermark calculation data
8537  *
8538  * Calculate the average available bandwidth used for display (CIK).
8539  * Used for display watermark bandwidth calculations
8540  * Returns the average available bandwidth in MBytes/s
8541  */
8542 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
8543 {
8544 	/* Calculate the display mode Average Bandwidth
8545 	 * DisplayMode should contain the source and destination dimensions,
8546 	 * timing, etc.
8547 	 */
8548 	fixed20_12 bpp;
8549 	fixed20_12 line_time;
8550 	fixed20_12 src_width;
8551 	fixed20_12 bandwidth;
8552 	fixed20_12 a;
8553 
8554 	a.full = dfixed_const(1000);
8555 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
8556 	line_time.full = dfixed_div(line_time, a);
8557 	bpp.full = dfixed_const(wm->bytes_per_pixel);
8558 	src_width.full = dfixed_const(wm->src_width);
8559 	bandwidth.full = dfixed_mul(src_width, bpp);
8560 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
8561 	bandwidth.full = dfixed_div(bandwidth, line_time);
8562 
8563 	return dfixed_trunc(bandwidth);
8564 }
8565 
8566 /**
8567  * dce8_latency_watermark - get the latency watermark
8568  *
8569  * @wm: watermark calculation data
8570  *
8571  * Calculate the latency watermark (CIK).
8572  * Used for display watermark bandwidth calculations
8573  * Returns the latency watermark in ns
8574  */
8575 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
8576 {
8577 	/* First calculate the latency in ns */
8578 	u32 mc_latency = 2000; /* 2000 ns. */
8579 	u32 available_bandwidth = dce8_available_bandwidth(wm);
8580 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
8581 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
8582 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
8583 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
8584 		(wm->num_heads * cursor_line_pair_return_time);
8585 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
8586 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
8587 	u32 tmp, dmif_size = 12288;
8588 	fixed20_12 a, b, c;
8589 
8590 	if (wm->num_heads == 0)
8591 		return 0;
8592 
8593 	a.full = dfixed_const(2);
8594 	b.full = dfixed_const(1);
8595 	if ((wm->vsc.full > a.full) ||
8596 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
8597 	    (wm->vtaps >= 5) ||
8598 	    ((wm->vsc.full >= a.full) && wm->interlaced))
8599 		max_src_lines_per_dst_line = 4;
8600 	else
8601 		max_src_lines_per_dst_line = 2;
8602 
8603 	a.full = dfixed_const(available_bandwidth);
8604 	b.full = dfixed_const(wm->num_heads);
8605 	a.full = dfixed_div(a, b);
8606 
8607 	b.full = dfixed_const(mc_latency + 512);
8608 	c.full = dfixed_const(wm->disp_clk);
8609 	b.full = dfixed_div(b, c);
8610 
8611 	c.full = dfixed_const(dmif_size);
8612 	b.full = dfixed_div(c, b);
8613 
8614 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
8615 
8616 	b.full = dfixed_const(1000);
8617 	c.full = dfixed_const(wm->disp_clk);
8618 	b.full = dfixed_div(c, b);
8619 	c.full = dfixed_const(wm->bytes_per_pixel);
8620 	b.full = dfixed_mul(b, c);
8621 
8622 	lb_fill_bw = min(tmp, dfixed_trunc(b));
8623 
8624 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
8625 	b.full = dfixed_const(1000);
8626 	c.full = dfixed_const(lb_fill_bw);
8627 	b.full = dfixed_div(c, b);
8628 	a.full = dfixed_div(a, b);
8629 	line_fill_time = dfixed_trunc(a);
8630 
8631 	if (line_fill_time < wm->active_time)
8632 		return latency;
8633 	else
8634 		return latency + (line_fill_time - wm->active_time);
8635 
8636 }
8637 
8638 /**
8639  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
8640  * average and available dram bandwidth
8641  *
8642  * @wm: watermark calculation data
8643  *
8644  * Check if the display average bandwidth fits in the display
8645  * dram bandwidth (CIK).
8646  * Used for display watermark bandwidth calculations
8647  * Returns true if the display fits, false if not.
8648  */
8649 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8650 {
8651 	if (dce8_average_bandwidth(wm) <=
8652 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
8653 		return true;
8654 	else
8655 		return false;
8656 }
8657 
8658 /**
8659  * dce8_average_bandwidth_vs_available_bandwidth - check
8660  * average and available bandwidth
8661  *
8662  * @wm: watermark calculation data
8663  *
8664  * Check if the display average bandwidth fits in the display
8665  * available bandwidth (CIK).
8666  * Used for display watermark bandwidth calculations
8667  * Returns true if the display fits, false if not.
8668  */
8669 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
8670 {
8671 	if (dce8_average_bandwidth(wm) <=
8672 	    (dce8_available_bandwidth(wm) / wm->num_heads))
8673 		return true;
8674 	else
8675 		return false;
8676 }
8677 
8678 /**
8679  * dce8_check_latency_hiding - check latency hiding
8680  *
8681  * @wm: watermark calculation data
8682  *
8683  * Check latency hiding (CIK).
8684  * Used for display watermark bandwidth calculations
8685  * Returns true if the display fits, false if not.
8686  */
8687 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
8688 {
8689 	u32 lb_partitions = wm->lb_size / wm->src_width;
8690 	u32 line_time = wm->active_time + wm->blank_time;
8691 	u32 latency_tolerant_lines;
8692 	u32 latency_hiding;
8693 	fixed20_12 a;
8694 
8695 	a.full = dfixed_const(1);
8696 	if (wm->vsc.full > a.full)
8697 		latency_tolerant_lines = 1;
8698 	else {
8699 		if (lb_partitions <= (wm->vtaps + 1))
8700 			latency_tolerant_lines = 1;
8701 		else
8702 			latency_tolerant_lines = 2;
8703 	}
8704 
8705 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
8706 
8707 	if (dce8_latency_watermark(wm) <= latency_hiding)
8708 		return true;
8709 	else
8710 		return false;
8711 }
8712 
8713 /**
8714  * dce8_program_watermarks - program display watermarks
8715  *
8716  * @rdev: radeon_device pointer
8717  * @radeon_crtc: the selected display controller
8718  * @lb_size: line buffer size
8719  * @num_heads: number of display controllers in use
8720  *
8721  * Calculate and program the display watermarks for the
8722  * selected display controller (CIK).
8723  */
8724 static void dce8_program_watermarks(struct radeon_device *rdev,
8725 				    struct radeon_crtc *radeon_crtc,
8726 				    u32 lb_size, u32 num_heads)
8727 {
8728 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
8729 	struct dce8_wm_params wm_low, wm_high;
8730 	u32 pixel_period;
8731 	u32 line_time = 0;
8732 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
8733 	u32 tmp, wm_mask;
8734 
8735 	if (radeon_crtc->base.enabled && num_heads && mode) {
8736 		pixel_period = 1000000 / (u32)mode->clock;
8737 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
8738 
8739 		/* watermark for high clocks */
8740 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8741 		    rdev->pm.dpm_enabled) {
8742 			wm_high.yclk =
8743 				radeon_dpm_get_mclk(rdev, false) * 10;
8744 			wm_high.sclk =
8745 				radeon_dpm_get_sclk(rdev, false) * 10;
8746 		} else {
8747 			wm_high.yclk = rdev->pm.current_mclk * 10;
8748 			wm_high.sclk = rdev->pm.current_sclk * 10;
8749 		}
8750 
8751 		wm_high.disp_clk = mode->clock;
8752 		wm_high.src_width = mode->crtc_hdisplay;
8753 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
8754 		wm_high.blank_time = line_time - wm_high.active_time;
8755 		wm_high.interlaced = false;
8756 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8757 			wm_high.interlaced = true;
8758 		wm_high.vsc = radeon_crtc->vsc;
8759 		wm_high.vtaps = 1;
8760 		if (radeon_crtc->rmx_type != RMX_OFF)
8761 			wm_high.vtaps = 2;
8762 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
8763 		wm_high.lb_size = lb_size;
8764 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
8765 		wm_high.num_heads = num_heads;
8766 
8767 		/* set for high clocks */
8768 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
8769 
8770 		/* possibly force display priority to high */
8771 		/* should really do this at mode validation time... */
8772 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
8773 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
8774 		    !dce8_check_latency_hiding(&wm_high) ||
8775 		    (rdev->disp_priority == 2)) {
8776 			DRM_DEBUG_KMS("force priority to high\n");
8777 		}
8778 
8779 		/* watermark for low clocks */
8780 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8781 		    rdev->pm.dpm_enabled) {
8782 			wm_low.yclk =
8783 				radeon_dpm_get_mclk(rdev, true) * 10;
8784 			wm_low.sclk =
8785 				radeon_dpm_get_sclk(rdev, true) * 10;
8786 		} else {
8787 			wm_low.yclk = rdev->pm.current_mclk * 10;
8788 			wm_low.sclk = rdev->pm.current_sclk * 10;
8789 		}
8790 
8791 		wm_low.disp_clk = mode->clock;
8792 		wm_low.src_width = mode->crtc_hdisplay;
8793 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
8794 		wm_low.blank_time = line_time - wm_low.active_time;
8795 		wm_low.interlaced = false;
8796 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8797 			wm_low.interlaced = true;
8798 		wm_low.vsc = radeon_crtc->vsc;
8799 		wm_low.vtaps = 1;
8800 		if (radeon_crtc->rmx_type != RMX_OFF)
8801 			wm_low.vtaps = 2;
8802 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
8803 		wm_low.lb_size = lb_size;
8804 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
8805 		wm_low.num_heads = num_heads;
8806 
8807 		/* set for low clocks */
8808 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
8809 
8810 		/* possibly force display priority to high */
8811 		/* should really do this at mode validation time... */
8812 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
8813 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
8814 		    !dce8_check_latency_hiding(&wm_low) ||
8815 		    (rdev->disp_priority == 2)) {
8816 			DRM_DEBUG_KMS("force priority to high\n");
8817 		}
8818 	}
8819 
8820 	/* select wm A */
8821 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8822 	tmp = wm_mask;
8823 	tmp &= ~LATENCY_WATERMARK_MASK(3);
8824 	tmp |= LATENCY_WATERMARK_MASK(1);
8825 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8826 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8827 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
8828 		LATENCY_HIGH_WATERMARK(line_time)));
8829 	/* select wm B */
8830 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8831 	tmp &= ~LATENCY_WATERMARK_MASK(3);
8832 	tmp |= LATENCY_WATERMARK_MASK(2);
8833 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8834 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8835 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
8836 		LATENCY_HIGH_WATERMARK(line_time)));
8837 	/* restore original selection */
8838 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
8839 
8840 	/* save values for DPM */
8841 	radeon_crtc->line_time = line_time;
8842 	radeon_crtc->wm_high = latency_watermark_a;
8843 	radeon_crtc->wm_low = latency_watermark_b;
8844 }
8845 
8846 /**
8847  * dce8_bandwidth_update - program display watermarks
8848  *
8849  * @rdev: radeon_device pointer
8850  *
8851  * Calculate and program the display watermarks and line
8852  * buffer allocation (CIK).
8853  */
8854 void dce8_bandwidth_update(struct radeon_device *rdev)
8855 {
8856 	struct drm_display_mode *mode = NULL;
8857 	u32 num_heads = 0, lb_size;
8858 	int i;
8859 
8860 	radeon_update_display_priority(rdev);
8861 
8862 	for (i = 0; i < rdev->num_crtc; i++) {
8863 		if (rdev->mode_info.crtcs[i]->base.enabled)
8864 			num_heads++;
8865 	}
8866 	for (i = 0; i < rdev->num_crtc; i++) {
8867 		mode = &rdev->mode_info.crtcs[i]->base.mode;
8868 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
8869 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
8870 	}
8871 }
8872 
8873 /**
8874  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8875  *
8876  * @rdev: radeon_device pointer
8877  *
8878  * Fetches a GPU clock counter snapshot (SI).
8879  * Returns the 64 bit clock counter snapshot.
8880  */
8881 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
8882 {
8883 	uint64_t clock;
8884 
8885 	mutex_lock(&rdev->gpu_clock_mutex);
8886 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
8887 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
8888 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
8889 	mutex_unlock(&rdev->gpu_clock_mutex);
8890 	return clock;
8891 }
8892 
8893 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
8894                               u32 cntl_reg, u32 status_reg)
8895 {
8896 	int r, i;
8897 	struct atom_clock_dividers dividers;
8898 	uint32_t tmp;
8899 
8900 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8901 					   clock, false, &dividers);
8902 	if (r)
8903 		return r;
8904 
8905 	tmp = RREG32_SMC(cntl_reg);
8906 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
8907 	tmp |= dividers.post_divider;
8908 	WREG32_SMC(cntl_reg, tmp);
8909 
8910 	for (i = 0; i < 100; i++) {
8911 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
8912 			break;
8913 		mdelay(10);
8914 	}
8915 	if (i == 100)
8916 		return -ETIMEDOUT;
8917 
8918 	return 0;
8919 }
8920 
8921 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
8922 {
8923 	int r = 0;
8924 
8925 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
8926 	if (r)
8927 		return r;
8928 
8929 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
8930 	return r;
8931 }
8932 
8933 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
8934 {
8935 	int r, i;
8936 	struct atom_clock_dividers dividers;
8937 	u32 tmp;
8938 
8939 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8940 					   ecclk, false, &dividers);
8941 	if (r)
8942 		return r;
8943 
8944 	for (i = 0; i < 100; i++) {
8945 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
8946 			break;
8947 		mdelay(10);
8948 	}
8949 	if (i == 100)
8950 		return -ETIMEDOUT;
8951 
8952 	tmp = RREG32_SMC(CG_ECLK_CNTL);
8953 	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
8954 	tmp |= dividers.post_divider;
8955 	WREG32_SMC(CG_ECLK_CNTL, tmp);
8956 
8957 	for (i = 0; i < 100; i++) {
8958 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
8959 			break;
8960 		mdelay(10);
8961 	}
8962 	if (i == 100)
8963 		return -ETIMEDOUT;
8964 
8965 	return 0;
8966 }
8967 
8968 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
8969 {
8970 	struct pci_dev *root = rdev->pdev->bus->self;
8971 	int bridge_pos, gpu_pos;
8972 	u32 speed_cntl, mask, current_data_rate;
8973 	int ret, i;
8974 	u16 tmp16;
8975 
8976 	if (radeon_pcie_gen2 == 0)
8977 		return;
8978 
8979 	if (rdev->flags & RADEON_IS_IGP)
8980 		return;
8981 
8982 	if (!(rdev->flags & RADEON_IS_PCIE))
8983 		return;
8984 
8985 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
8986 	if (ret != 0)
8987 		return;
8988 
8989 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
8990 		return;
8991 
8992 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8993 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
8994 		LC_CURRENT_DATA_RATE_SHIFT;
8995 	if (mask & DRM_PCIE_SPEED_80) {
8996 		if (current_data_rate == 2) {
8997 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
8998 			return;
8999 		}
9000 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9001 	} else if (mask & DRM_PCIE_SPEED_50) {
9002 		if (current_data_rate == 1) {
9003 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9004 			return;
9005 		}
9006 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9007 	}
9008 
9009 	bridge_pos = pci_pcie_cap(root);
9010 	if (!bridge_pos)
9011 		return;
9012 
9013 	gpu_pos = pci_pcie_cap(rdev->pdev);
9014 	if (!gpu_pos)
9015 		return;
9016 
9017 	if (mask & DRM_PCIE_SPEED_80) {
9018 		/* re-try equalization if gen3 is not already enabled */
9019 		if (current_data_rate != 2) {
9020 			u16 bridge_cfg, gpu_cfg;
9021 			u16 bridge_cfg2, gpu_cfg2;
9022 			u32 max_lw, current_lw, tmp;
9023 
9024 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9025 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9026 
9027 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9028 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9029 
9030 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9031 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9032 
9033 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9034 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9035 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9036 
9037 			if (current_lw < max_lw) {
9038 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9039 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9040 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9041 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9042 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9043 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9044 				}
9045 			}
9046 
9047 			for (i = 0; i < 10; i++) {
9048 				/* check status */
9049 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9050 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9051 					break;
9052 
9053 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9054 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9055 
9056 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9057 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9058 
9059 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9060 				tmp |= LC_SET_QUIESCE;
9061 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9062 
9063 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9064 				tmp |= LC_REDO_EQ;
9065 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9066 
9067 				mdelay(100);
9068 
9069 				/* linkctl */
9070 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9071 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9072 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9073 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9074 
9075 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9076 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9077 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9078 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9079 
9080 				/* linkctl2 */
9081 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9082 				tmp16 &= ~((1 << 4) | (7 << 9));
9083 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9084 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9085 
9086 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9087 				tmp16 &= ~((1 << 4) | (7 << 9));
9088 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9089 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9090 
9091 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9092 				tmp &= ~LC_SET_QUIESCE;
9093 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9094 			}
9095 		}
9096 	}
9097 
9098 	/* set the link speed */
9099 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9100 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9101 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9102 
9103 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9104 	tmp16 &= ~0xf;
9105 	if (mask & DRM_PCIE_SPEED_80)
9106 		tmp16 |= 3; /* gen3 */
9107 	else if (mask & DRM_PCIE_SPEED_50)
9108 		tmp16 |= 2; /* gen2 */
9109 	else
9110 		tmp16 |= 1; /* gen1 */
9111 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9112 
9113 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9114 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9115 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9116 
9117 	for (i = 0; i < rdev->usec_timeout; i++) {
9118 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9119 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9120 			break;
9121 		udelay(1);
9122 	}
9123 }
9124 
9125 static void cik_program_aspm(struct radeon_device *rdev)
9126 {
9127 	u32 data, orig;
9128 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9129 	bool disable_clkreq = false;
9130 
9131 	if (radeon_aspm == 0)
9132 		return;
9133 
9134 	/* XXX double check IGPs */
9135 	if (rdev->flags & RADEON_IS_IGP)
9136 		return;
9137 
9138 	if (!(rdev->flags & RADEON_IS_PCIE))
9139 		return;
9140 
9141 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9142 	data &= ~LC_XMIT_N_FTS_MASK;
9143 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9144 	if (orig != data)
9145 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9146 
9147 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9148 	data |= LC_GO_TO_RECOVERY;
9149 	if (orig != data)
9150 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9151 
9152 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9153 	data |= P_IGNORE_EDB_ERR;
9154 	if (orig != data)
9155 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9156 
9157 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9158 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9159 	data |= LC_PMI_TO_L1_DIS;
9160 	if (!disable_l0s)
9161 		data |= LC_L0S_INACTIVITY(7);
9162 
9163 	if (!disable_l1) {
9164 		data |= LC_L1_INACTIVITY(7);
9165 		data &= ~LC_PMI_TO_L1_DIS;
9166 		if (orig != data)
9167 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9168 
9169 		if (!disable_plloff_in_l1) {
9170 			bool clk_req_support;
9171 
9172 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9173 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9174 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9175 			if (orig != data)
9176 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9177 
9178 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9179 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9180 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9181 			if (orig != data)
9182 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9183 
9184 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9185 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9186 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9187 			if (orig != data)
9188 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9189 
9190 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9191 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9192 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9193 			if (orig != data)
9194 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9195 
9196 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9197 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9198 			data |= LC_DYN_LANES_PWR_STATE(3);
9199 			if (orig != data)
9200 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9201 
9202 			if (!disable_clkreq) {
9203 				struct pci_dev *root = rdev->pdev->bus->self;
9204 				u32 lnkcap;
9205 
9206 				clk_req_support = false;
9207 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9208 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9209 					clk_req_support = true;
9210 			} else {
9211 				clk_req_support = false;
9212 			}
9213 
9214 			if (clk_req_support) {
9215 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9216 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9217 				if (orig != data)
9218 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9219 
9220 				orig = data = RREG32_SMC(THM_CLK_CNTL);
9221 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9222 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9223 				if (orig != data)
9224 					WREG32_SMC(THM_CLK_CNTL, data);
9225 
9226 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9227 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9228 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9229 				if (orig != data)
9230 					WREG32_SMC(MISC_CLK_CTRL, data);
9231 
9232 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9233 				data &= ~BCLK_AS_XCLK;
9234 				if (orig != data)
9235 					WREG32_SMC(CG_CLKPIN_CNTL, data);
9236 
9237 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9238 				data &= ~FORCE_BIF_REFCLK_EN;
9239 				if (orig != data)
9240 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9241 
9242 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9243 				data &= ~MPLL_CLKOUT_SEL_MASK;
9244 				data |= MPLL_CLKOUT_SEL(4);
9245 				if (orig != data)
9246 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9247 			}
9248 		}
9249 	} else {
9250 		if (orig != data)
9251 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9252 	}
9253 
9254 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9255 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9256 	if (orig != data)
9257 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9258 
9259 	if (!disable_l0s) {
9260 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9261 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9262 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9263 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9264 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9265 				data &= ~LC_L0S_INACTIVITY_MASK;
9266 				if (orig != data)
9267 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9268 			}
9269 		}
9270 	}
9271 }
9272