xref: /openbmc/linux/drivers/gpu/drm/radeon/cik.c (revision 92a2c6b2)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include "cikd.h"
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_ci.h"
36 #include "radeon_kfd.h"
37 
38 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
46 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
47 
48 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
49 MODULE_FIRMWARE("radeon/bonaire_me.bin");
50 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
51 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
52 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
53 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
54 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
55 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
56 
57 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
58 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
59 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
60 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
64 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
65 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
66 
67 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
68 MODULE_FIRMWARE("radeon/hawaii_me.bin");
69 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
70 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
71 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
72 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
73 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
74 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
75 
76 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
77 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
78 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
79 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
80 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
81 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
82 
83 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
84 MODULE_FIRMWARE("radeon/kaveri_me.bin");
85 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
86 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
87 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
88 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
89 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
90 
91 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
92 MODULE_FIRMWARE("radeon/KABINI_me.bin");
93 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
94 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
95 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
96 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
97 
98 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
99 MODULE_FIRMWARE("radeon/kabini_me.bin");
100 MODULE_FIRMWARE("radeon/kabini_ce.bin");
101 MODULE_FIRMWARE("radeon/kabini_mec.bin");
102 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
103 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
104 
105 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
106 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
107 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
108 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
109 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
110 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
111 
112 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
113 MODULE_FIRMWARE("radeon/mullins_me.bin");
114 MODULE_FIRMWARE("radeon/mullins_ce.bin");
115 MODULE_FIRMWARE("radeon/mullins_mec.bin");
116 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
117 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
118 
119 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
120 extern void r600_ih_ring_fini(struct radeon_device *rdev);
121 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
122 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
123 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
124 extern void sumo_rlc_fini(struct radeon_device *rdev);
125 extern int sumo_rlc_init(struct radeon_device *rdev);
126 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
127 extern void si_rlc_reset(struct radeon_device *rdev);
128 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
129 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
130 extern int cik_sdma_resume(struct radeon_device *rdev);
131 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
132 extern void cik_sdma_fini(struct radeon_device *rdev);
133 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
134 static void cik_rlc_stop(struct radeon_device *rdev);
135 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
136 static void cik_program_aspm(struct radeon_device *rdev);
137 static void cik_init_pg(struct radeon_device *rdev);
138 static void cik_init_cg(struct radeon_device *rdev);
139 static void cik_fini_pg(struct radeon_device *rdev);
140 static void cik_fini_cg(struct radeon_device *rdev);
141 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
142 					  bool enable);
143 
144 /* get temperature in millidegrees */
145 int ci_get_temp(struct radeon_device *rdev)
146 {
147 	u32 temp;
148 	int actual_temp = 0;
149 
150 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
151 		CTF_TEMP_SHIFT;
152 
153 	if (temp & 0x200)
154 		actual_temp = 255;
155 	else
156 		actual_temp = temp & 0x1ff;
157 
158 	actual_temp = actual_temp * 1000;
159 
160 	return actual_temp;
161 }
162 
163 /* get temperature in millidegrees */
164 int kv_get_temp(struct radeon_device *rdev)
165 {
166 	u32 temp;
167 	int actual_temp = 0;
168 
169 	temp = RREG32_SMC(0xC0300E0C);
170 
171 	if (temp)
172 		actual_temp = (temp / 8) - 49;
173 	else
174 		actual_temp = 0;
175 
176 	actual_temp = actual_temp * 1000;
177 
178 	return actual_temp;
179 }
180 
181 /*
182  * Indirect registers accessor
183  */
184 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
185 {
186 	unsigned long flags;
187 	u32 r;
188 
189 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
190 	WREG32(PCIE_INDEX, reg);
191 	(void)RREG32(PCIE_INDEX);
192 	r = RREG32(PCIE_DATA);
193 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
194 	return r;
195 }
196 
197 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
198 {
199 	unsigned long flags;
200 
201 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
202 	WREG32(PCIE_INDEX, reg);
203 	(void)RREG32(PCIE_INDEX);
204 	WREG32(PCIE_DATA, v);
205 	(void)RREG32(PCIE_DATA);
206 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
207 }
208 
209 static const u32 spectre_rlc_save_restore_register_list[] =
210 {
211 	(0x0e00 << 16) | (0xc12c >> 2),
212 	0x00000000,
213 	(0x0e00 << 16) | (0xc140 >> 2),
214 	0x00000000,
215 	(0x0e00 << 16) | (0xc150 >> 2),
216 	0x00000000,
217 	(0x0e00 << 16) | (0xc15c >> 2),
218 	0x00000000,
219 	(0x0e00 << 16) | (0xc168 >> 2),
220 	0x00000000,
221 	(0x0e00 << 16) | (0xc170 >> 2),
222 	0x00000000,
223 	(0x0e00 << 16) | (0xc178 >> 2),
224 	0x00000000,
225 	(0x0e00 << 16) | (0xc204 >> 2),
226 	0x00000000,
227 	(0x0e00 << 16) | (0xc2b4 >> 2),
228 	0x00000000,
229 	(0x0e00 << 16) | (0xc2b8 >> 2),
230 	0x00000000,
231 	(0x0e00 << 16) | (0xc2bc >> 2),
232 	0x00000000,
233 	(0x0e00 << 16) | (0xc2c0 >> 2),
234 	0x00000000,
235 	(0x0e00 << 16) | (0x8228 >> 2),
236 	0x00000000,
237 	(0x0e00 << 16) | (0x829c >> 2),
238 	0x00000000,
239 	(0x0e00 << 16) | (0x869c >> 2),
240 	0x00000000,
241 	(0x0600 << 16) | (0x98f4 >> 2),
242 	0x00000000,
243 	(0x0e00 << 16) | (0x98f8 >> 2),
244 	0x00000000,
245 	(0x0e00 << 16) | (0x9900 >> 2),
246 	0x00000000,
247 	(0x0e00 << 16) | (0xc260 >> 2),
248 	0x00000000,
249 	(0x0e00 << 16) | (0x90e8 >> 2),
250 	0x00000000,
251 	(0x0e00 << 16) | (0x3c000 >> 2),
252 	0x00000000,
253 	(0x0e00 << 16) | (0x3c00c >> 2),
254 	0x00000000,
255 	(0x0e00 << 16) | (0x8c1c >> 2),
256 	0x00000000,
257 	(0x0e00 << 16) | (0x9700 >> 2),
258 	0x00000000,
259 	(0x0e00 << 16) | (0xcd20 >> 2),
260 	0x00000000,
261 	(0x4e00 << 16) | (0xcd20 >> 2),
262 	0x00000000,
263 	(0x5e00 << 16) | (0xcd20 >> 2),
264 	0x00000000,
265 	(0x6e00 << 16) | (0xcd20 >> 2),
266 	0x00000000,
267 	(0x7e00 << 16) | (0xcd20 >> 2),
268 	0x00000000,
269 	(0x8e00 << 16) | (0xcd20 >> 2),
270 	0x00000000,
271 	(0x9e00 << 16) | (0xcd20 >> 2),
272 	0x00000000,
273 	(0xae00 << 16) | (0xcd20 >> 2),
274 	0x00000000,
275 	(0xbe00 << 16) | (0xcd20 >> 2),
276 	0x00000000,
277 	(0x0e00 << 16) | (0x89bc >> 2),
278 	0x00000000,
279 	(0x0e00 << 16) | (0x8900 >> 2),
280 	0x00000000,
281 	0x3,
282 	(0x0e00 << 16) | (0xc130 >> 2),
283 	0x00000000,
284 	(0x0e00 << 16) | (0xc134 >> 2),
285 	0x00000000,
286 	(0x0e00 << 16) | (0xc1fc >> 2),
287 	0x00000000,
288 	(0x0e00 << 16) | (0xc208 >> 2),
289 	0x00000000,
290 	(0x0e00 << 16) | (0xc264 >> 2),
291 	0x00000000,
292 	(0x0e00 << 16) | (0xc268 >> 2),
293 	0x00000000,
294 	(0x0e00 << 16) | (0xc26c >> 2),
295 	0x00000000,
296 	(0x0e00 << 16) | (0xc270 >> 2),
297 	0x00000000,
298 	(0x0e00 << 16) | (0xc274 >> 2),
299 	0x00000000,
300 	(0x0e00 << 16) | (0xc278 >> 2),
301 	0x00000000,
302 	(0x0e00 << 16) | (0xc27c >> 2),
303 	0x00000000,
304 	(0x0e00 << 16) | (0xc280 >> 2),
305 	0x00000000,
306 	(0x0e00 << 16) | (0xc284 >> 2),
307 	0x00000000,
308 	(0x0e00 << 16) | (0xc288 >> 2),
309 	0x00000000,
310 	(0x0e00 << 16) | (0xc28c >> 2),
311 	0x00000000,
312 	(0x0e00 << 16) | (0xc290 >> 2),
313 	0x00000000,
314 	(0x0e00 << 16) | (0xc294 >> 2),
315 	0x00000000,
316 	(0x0e00 << 16) | (0xc298 >> 2),
317 	0x00000000,
318 	(0x0e00 << 16) | (0xc29c >> 2),
319 	0x00000000,
320 	(0x0e00 << 16) | (0xc2a0 >> 2),
321 	0x00000000,
322 	(0x0e00 << 16) | (0xc2a4 >> 2),
323 	0x00000000,
324 	(0x0e00 << 16) | (0xc2a8 >> 2),
325 	0x00000000,
326 	(0x0e00 << 16) | (0xc2ac  >> 2),
327 	0x00000000,
328 	(0x0e00 << 16) | (0xc2b0 >> 2),
329 	0x00000000,
330 	(0x0e00 << 16) | (0x301d0 >> 2),
331 	0x00000000,
332 	(0x0e00 << 16) | (0x30238 >> 2),
333 	0x00000000,
334 	(0x0e00 << 16) | (0x30250 >> 2),
335 	0x00000000,
336 	(0x0e00 << 16) | (0x30254 >> 2),
337 	0x00000000,
338 	(0x0e00 << 16) | (0x30258 >> 2),
339 	0x00000000,
340 	(0x0e00 << 16) | (0x3025c >> 2),
341 	0x00000000,
342 	(0x4e00 << 16) | (0xc900 >> 2),
343 	0x00000000,
344 	(0x5e00 << 16) | (0xc900 >> 2),
345 	0x00000000,
346 	(0x6e00 << 16) | (0xc900 >> 2),
347 	0x00000000,
348 	(0x7e00 << 16) | (0xc900 >> 2),
349 	0x00000000,
350 	(0x8e00 << 16) | (0xc900 >> 2),
351 	0x00000000,
352 	(0x9e00 << 16) | (0xc900 >> 2),
353 	0x00000000,
354 	(0xae00 << 16) | (0xc900 >> 2),
355 	0x00000000,
356 	(0xbe00 << 16) | (0xc900 >> 2),
357 	0x00000000,
358 	(0x4e00 << 16) | (0xc904 >> 2),
359 	0x00000000,
360 	(0x5e00 << 16) | (0xc904 >> 2),
361 	0x00000000,
362 	(0x6e00 << 16) | (0xc904 >> 2),
363 	0x00000000,
364 	(0x7e00 << 16) | (0xc904 >> 2),
365 	0x00000000,
366 	(0x8e00 << 16) | (0xc904 >> 2),
367 	0x00000000,
368 	(0x9e00 << 16) | (0xc904 >> 2),
369 	0x00000000,
370 	(0xae00 << 16) | (0xc904 >> 2),
371 	0x00000000,
372 	(0xbe00 << 16) | (0xc904 >> 2),
373 	0x00000000,
374 	(0x4e00 << 16) | (0xc908 >> 2),
375 	0x00000000,
376 	(0x5e00 << 16) | (0xc908 >> 2),
377 	0x00000000,
378 	(0x6e00 << 16) | (0xc908 >> 2),
379 	0x00000000,
380 	(0x7e00 << 16) | (0xc908 >> 2),
381 	0x00000000,
382 	(0x8e00 << 16) | (0xc908 >> 2),
383 	0x00000000,
384 	(0x9e00 << 16) | (0xc908 >> 2),
385 	0x00000000,
386 	(0xae00 << 16) | (0xc908 >> 2),
387 	0x00000000,
388 	(0xbe00 << 16) | (0xc908 >> 2),
389 	0x00000000,
390 	(0x4e00 << 16) | (0xc90c >> 2),
391 	0x00000000,
392 	(0x5e00 << 16) | (0xc90c >> 2),
393 	0x00000000,
394 	(0x6e00 << 16) | (0xc90c >> 2),
395 	0x00000000,
396 	(0x7e00 << 16) | (0xc90c >> 2),
397 	0x00000000,
398 	(0x8e00 << 16) | (0xc90c >> 2),
399 	0x00000000,
400 	(0x9e00 << 16) | (0xc90c >> 2),
401 	0x00000000,
402 	(0xae00 << 16) | (0xc90c >> 2),
403 	0x00000000,
404 	(0xbe00 << 16) | (0xc90c >> 2),
405 	0x00000000,
406 	(0x4e00 << 16) | (0xc910 >> 2),
407 	0x00000000,
408 	(0x5e00 << 16) | (0xc910 >> 2),
409 	0x00000000,
410 	(0x6e00 << 16) | (0xc910 >> 2),
411 	0x00000000,
412 	(0x7e00 << 16) | (0xc910 >> 2),
413 	0x00000000,
414 	(0x8e00 << 16) | (0xc910 >> 2),
415 	0x00000000,
416 	(0x9e00 << 16) | (0xc910 >> 2),
417 	0x00000000,
418 	(0xae00 << 16) | (0xc910 >> 2),
419 	0x00000000,
420 	(0xbe00 << 16) | (0xc910 >> 2),
421 	0x00000000,
422 	(0x0e00 << 16) | (0xc99c >> 2),
423 	0x00000000,
424 	(0x0e00 << 16) | (0x9834 >> 2),
425 	0x00000000,
426 	(0x0000 << 16) | (0x30f00 >> 2),
427 	0x00000000,
428 	(0x0001 << 16) | (0x30f00 >> 2),
429 	0x00000000,
430 	(0x0000 << 16) | (0x30f04 >> 2),
431 	0x00000000,
432 	(0x0001 << 16) | (0x30f04 >> 2),
433 	0x00000000,
434 	(0x0000 << 16) | (0x30f08 >> 2),
435 	0x00000000,
436 	(0x0001 << 16) | (0x30f08 >> 2),
437 	0x00000000,
438 	(0x0000 << 16) | (0x30f0c >> 2),
439 	0x00000000,
440 	(0x0001 << 16) | (0x30f0c >> 2),
441 	0x00000000,
442 	(0x0600 << 16) | (0x9b7c >> 2),
443 	0x00000000,
444 	(0x0e00 << 16) | (0x8a14 >> 2),
445 	0x00000000,
446 	(0x0e00 << 16) | (0x8a18 >> 2),
447 	0x00000000,
448 	(0x0600 << 16) | (0x30a00 >> 2),
449 	0x00000000,
450 	(0x0e00 << 16) | (0x8bf0 >> 2),
451 	0x00000000,
452 	(0x0e00 << 16) | (0x8bcc >> 2),
453 	0x00000000,
454 	(0x0e00 << 16) | (0x8b24 >> 2),
455 	0x00000000,
456 	(0x0e00 << 16) | (0x30a04 >> 2),
457 	0x00000000,
458 	(0x0600 << 16) | (0x30a10 >> 2),
459 	0x00000000,
460 	(0x0600 << 16) | (0x30a14 >> 2),
461 	0x00000000,
462 	(0x0600 << 16) | (0x30a18 >> 2),
463 	0x00000000,
464 	(0x0600 << 16) | (0x30a2c >> 2),
465 	0x00000000,
466 	(0x0e00 << 16) | (0xc700 >> 2),
467 	0x00000000,
468 	(0x0e00 << 16) | (0xc704 >> 2),
469 	0x00000000,
470 	(0x0e00 << 16) | (0xc708 >> 2),
471 	0x00000000,
472 	(0x0e00 << 16) | (0xc768 >> 2),
473 	0x00000000,
474 	(0x0400 << 16) | (0xc770 >> 2),
475 	0x00000000,
476 	(0x0400 << 16) | (0xc774 >> 2),
477 	0x00000000,
478 	(0x0400 << 16) | (0xc778 >> 2),
479 	0x00000000,
480 	(0x0400 << 16) | (0xc77c >> 2),
481 	0x00000000,
482 	(0x0400 << 16) | (0xc780 >> 2),
483 	0x00000000,
484 	(0x0400 << 16) | (0xc784 >> 2),
485 	0x00000000,
486 	(0x0400 << 16) | (0xc788 >> 2),
487 	0x00000000,
488 	(0x0400 << 16) | (0xc78c >> 2),
489 	0x00000000,
490 	(0x0400 << 16) | (0xc798 >> 2),
491 	0x00000000,
492 	(0x0400 << 16) | (0xc79c >> 2),
493 	0x00000000,
494 	(0x0400 << 16) | (0xc7a0 >> 2),
495 	0x00000000,
496 	(0x0400 << 16) | (0xc7a4 >> 2),
497 	0x00000000,
498 	(0x0400 << 16) | (0xc7a8 >> 2),
499 	0x00000000,
500 	(0x0400 << 16) | (0xc7ac >> 2),
501 	0x00000000,
502 	(0x0400 << 16) | (0xc7b0 >> 2),
503 	0x00000000,
504 	(0x0400 << 16) | (0xc7b4 >> 2),
505 	0x00000000,
506 	(0x0e00 << 16) | (0x9100 >> 2),
507 	0x00000000,
508 	(0x0e00 << 16) | (0x3c010 >> 2),
509 	0x00000000,
510 	(0x0e00 << 16) | (0x92a8 >> 2),
511 	0x00000000,
512 	(0x0e00 << 16) | (0x92ac >> 2),
513 	0x00000000,
514 	(0x0e00 << 16) | (0x92b4 >> 2),
515 	0x00000000,
516 	(0x0e00 << 16) | (0x92b8 >> 2),
517 	0x00000000,
518 	(0x0e00 << 16) | (0x92bc >> 2),
519 	0x00000000,
520 	(0x0e00 << 16) | (0x92c0 >> 2),
521 	0x00000000,
522 	(0x0e00 << 16) | (0x92c4 >> 2),
523 	0x00000000,
524 	(0x0e00 << 16) | (0x92c8 >> 2),
525 	0x00000000,
526 	(0x0e00 << 16) | (0x92cc >> 2),
527 	0x00000000,
528 	(0x0e00 << 16) | (0x92d0 >> 2),
529 	0x00000000,
530 	(0x0e00 << 16) | (0x8c00 >> 2),
531 	0x00000000,
532 	(0x0e00 << 16) | (0x8c04 >> 2),
533 	0x00000000,
534 	(0x0e00 << 16) | (0x8c20 >> 2),
535 	0x00000000,
536 	(0x0e00 << 16) | (0x8c38 >> 2),
537 	0x00000000,
538 	(0x0e00 << 16) | (0x8c3c >> 2),
539 	0x00000000,
540 	(0x0e00 << 16) | (0xae00 >> 2),
541 	0x00000000,
542 	(0x0e00 << 16) | (0x9604 >> 2),
543 	0x00000000,
544 	(0x0e00 << 16) | (0xac08 >> 2),
545 	0x00000000,
546 	(0x0e00 << 16) | (0xac0c >> 2),
547 	0x00000000,
548 	(0x0e00 << 16) | (0xac10 >> 2),
549 	0x00000000,
550 	(0x0e00 << 16) | (0xac14 >> 2),
551 	0x00000000,
552 	(0x0e00 << 16) | (0xac58 >> 2),
553 	0x00000000,
554 	(0x0e00 << 16) | (0xac68 >> 2),
555 	0x00000000,
556 	(0x0e00 << 16) | (0xac6c >> 2),
557 	0x00000000,
558 	(0x0e00 << 16) | (0xac70 >> 2),
559 	0x00000000,
560 	(0x0e00 << 16) | (0xac74 >> 2),
561 	0x00000000,
562 	(0x0e00 << 16) | (0xac78 >> 2),
563 	0x00000000,
564 	(0x0e00 << 16) | (0xac7c >> 2),
565 	0x00000000,
566 	(0x0e00 << 16) | (0xac80 >> 2),
567 	0x00000000,
568 	(0x0e00 << 16) | (0xac84 >> 2),
569 	0x00000000,
570 	(0x0e00 << 16) | (0xac88 >> 2),
571 	0x00000000,
572 	(0x0e00 << 16) | (0xac8c >> 2),
573 	0x00000000,
574 	(0x0e00 << 16) | (0x970c >> 2),
575 	0x00000000,
576 	(0x0e00 << 16) | (0x9714 >> 2),
577 	0x00000000,
578 	(0x0e00 << 16) | (0x9718 >> 2),
579 	0x00000000,
580 	(0x0e00 << 16) | (0x971c >> 2),
581 	0x00000000,
582 	(0x0e00 << 16) | (0x31068 >> 2),
583 	0x00000000,
584 	(0x4e00 << 16) | (0x31068 >> 2),
585 	0x00000000,
586 	(0x5e00 << 16) | (0x31068 >> 2),
587 	0x00000000,
588 	(0x6e00 << 16) | (0x31068 >> 2),
589 	0x00000000,
590 	(0x7e00 << 16) | (0x31068 >> 2),
591 	0x00000000,
592 	(0x8e00 << 16) | (0x31068 >> 2),
593 	0x00000000,
594 	(0x9e00 << 16) | (0x31068 >> 2),
595 	0x00000000,
596 	(0xae00 << 16) | (0x31068 >> 2),
597 	0x00000000,
598 	(0xbe00 << 16) | (0x31068 >> 2),
599 	0x00000000,
600 	(0x0e00 << 16) | (0xcd10 >> 2),
601 	0x00000000,
602 	(0x0e00 << 16) | (0xcd14 >> 2),
603 	0x00000000,
604 	(0x0e00 << 16) | (0x88b0 >> 2),
605 	0x00000000,
606 	(0x0e00 << 16) | (0x88b4 >> 2),
607 	0x00000000,
608 	(0x0e00 << 16) | (0x88b8 >> 2),
609 	0x00000000,
610 	(0x0e00 << 16) | (0x88bc >> 2),
611 	0x00000000,
612 	(0x0400 << 16) | (0x89c0 >> 2),
613 	0x00000000,
614 	(0x0e00 << 16) | (0x88c4 >> 2),
615 	0x00000000,
616 	(0x0e00 << 16) | (0x88c8 >> 2),
617 	0x00000000,
618 	(0x0e00 << 16) | (0x88d0 >> 2),
619 	0x00000000,
620 	(0x0e00 << 16) | (0x88d4 >> 2),
621 	0x00000000,
622 	(0x0e00 << 16) | (0x88d8 >> 2),
623 	0x00000000,
624 	(0x0e00 << 16) | (0x8980 >> 2),
625 	0x00000000,
626 	(0x0e00 << 16) | (0x30938 >> 2),
627 	0x00000000,
628 	(0x0e00 << 16) | (0x3093c >> 2),
629 	0x00000000,
630 	(0x0e00 << 16) | (0x30940 >> 2),
631 	0x00000000,
632 	(0x0e00 << 16) | (0x89a0 >> 2),
633 	0x00000000,
634 	(0x0e00 << 16) | (0x30900 >> 2),
635 	0x00000000,
636 	(0x0e00 << 16) | (0x30904 >> 2),
637 	0x00000000,
638 	(0x0e00 << 16) | (0x89b4 >> 2),
639 	0x00000000,
640 	(0x0e00 << 16) | (0x3c210 >> 2),
641 	0x00000000,
642 	(0x0e00 << 16) | (0x3c214 >> 2),
643 	0x00000000,
644 	(0x0e00 << 16) | (0x3c218 >> 2),
645 	0x00000000,
646 	(0x0e00 << 16) | (0x8904 >> 2),
647 	0x00000000,
648 	0x5,
649 	(0x0e00 << 16) | (0x8c28 >> 2),
650 	(0x0e00 << 16) | (0x8c2c >> 2),
651 	(0x0e00 << 16) | (0x8c30 >> 2),
652 	(0x0e00 << 16) | (0x8c34 >> 2),
653 	(0x0e00 << 16) | (0x9600 >> 2),
654 };
655 
656 static const u32 kalindi_rlc_save_restore_register_list[] =
657 {
658 	(0x0e00 << 16) | (0xc12c >> 2),
659 	0x00000000,
660 	(0x0e00 << 16) | (0xc140 >> 2),
661 	0x00000000,
662 	(0x0e00 << 16) | (0xc150 >> 2),
663 	0x00000000,
664 	(0x0e00 << 16) | (0xc15c >> 2),
665 	0x00000000,
666 	(0x0e00 << 16) | (0xc168 >> 2),
667 	0x00000000,
668 	(0x0e00 << 16) | (0xc170 >> 2),
669 	0x00000000,
670 	(0x0e00 << 16) | (0xc204 >> 2),
671 	0x00000000,
672 	(0x0e00 << 16) | (0xc2b4 >> 2),
673 	0x00000000,
674 	(0x0e00 << 16) | (0xc2b8 >> 2),
675 	0x00000000,
676 	(0x0e00 << 16) | (0xc2bc >> 2),
677 	0x00000000,
678 	(0x0e00 << 16) | (0xc2c0 >> 2),
679 	0x00000000,
680 	(0x0e00 << 16) | (0x8228 >> 2),
681 	0x00000000,
682 	(0x0e00 << 16) | (0x829c >> 2),
683 	0x00000000,
684 	(0x0e00 << 16) | (0x869c >> 2),
685 	0x00000000,
686 	(0x0600 << 16) | (0x98f4 >> 2),
687 	0x00000000,
688 	(0x0e00 << 16) | (0x98f8 >> 2),
689 	0x00000000,
690 	(0x0e00 << 16) | (0x9900 >> 2),
691 	0x00000000,
692 	(0x0e00 << 16) | (0xc260 >> 2),
693 	0x00000000,
694 	(0x0e00 << 16) | (0x90e8 >> 2),
695 	0x00000000,
696 	(0x0e00 << 16) | (0x3c000 >> 2),
697 	0x00000000,
698 	(0x0e00 << 16) | (0x3c00c >> 2),
699 	0x00000000,
700 	(0x0e00 << 16) | (0x8c1c >> 2),
701 	0x00000000,
702 	(0x0e00 << 16) | (0x9700 >> 2),
703 	0x00000000,
704 	(0x0e00 << 16) | (0xcd20 >> 2),
705 	0x00000000,
706 	(0x4e00 << 16) | (0xcd20 >> 2),
707 	0x00000000,
708 	(0x5e00 << 16) | (0xcd20 >> 2),
709 	0x00000000,
710 	(0x6e00 << 16) | (0xcd20 >> 2),
711 	0x00000000,
712 	(0x7e00 << 16) | (0xcd20 >> 2),
713 	0x00000000,
714 	(0x0e00 << 16) | (0x89bc >> 2),
715 	0x00000000,
716 	(0x0e00 << 16) | (0x8900 >> 2),
717 	0x00000000,
718 	0x3,
719 	(0x0e00 << 16) | (0xc130 >> 2),
720 	0x00000000,
721 	(0x0e00 << 16) | (0xc134 >> 2),
722 	0x00000000,
723 	(0x0e00 << 16) | (0xc1fc >> 2),
724 	0x00000000,
725 	(0x0e00 << 16) | (0xc208 >> 2),
726 	0x00000000,
727 	(0x0e00 << 16) | (0xc264 >> 2),
728 	0x00000000,
729 	(0x0e00 << 16) | (0xc268 >> 2),
730 	0x00000000,
731 	(0x0e00 << 16) | (0xc26c >> 2),
732 	0x00000000,
733 	(0x0e00 << 16) | (0xc270 >> 2),
734 	0x00000000,
735 	(0x0e00 << 16) | (0xc274 >> 2),
736 	0x00000000,
737 	(0x0e00 << 16) | (0xc28c >> 2),
738 	0x00000000,
739 	(0x0e00 << 16) | (0xc290 >> 2),
740 	0x00000000,
741 	(0x0e00 << 16) | (0xc294 >> 2),
742 	0x00000000,
743 	(0x0e00 << 16) | (0xc298 >> 2),
744 	0x00000000,
745 	(0x0e00 << 16) | (0xc2a0 >> 2),
746 	0x00000000,
747 	(0x0e00 << 16) | (0xc2a4 >> 2),
748 	0x00000000,
749 	(0x0e00 << 16) | (0xc2a8 >> 2),
750 	0x00000000,
751 	(0x0e00 << 16) | (0xc2ac >> 2),
752 	0x00000000,
753 	(0x0e00 << 16) | (0x301d0 >> 2),
754 	0x00000000,
755 	(0x0e00 << 16) | (0x30238 >> 2),
756 	0x00000000,
757 	(0x0e00 << 16) | (0x30250 >> 2),
758 	0x00000000,
759 	(0x0e00 << 16) | (0x30254 >> 2),
760 	0x00000000,
761 	(0x0e00 << 16) | (0x30258 >> 2),
762 	0x00000000,
763 	(0x0e00 << 16) | (0x3025c >> 2),
764 	0x00000000,
765 	(0x4e00 << 16) | (0xc900 >> 2),
766 	0x00000000,
767 	(0x5e00 << 16) | (0xc900 >> 2),
768 	0x00000000,
769 	(0x6e00 << 16) | (0xc900 >> 2),
770 	0x00000000,
771 	(0x7e00 << 16) | (0xc900 >> 2),
772 	0x00000000,
773 	(0x4e00 << 16) | (0xc904 >> 2),
774 	0x00000000,
775 	(0x5e00 << 16) | (0xc904 >> 2),
776 	0x00000000,
777 	(0x6e00 << 16) | (0xc904 >> 2),
778 	0x00000000,
779 	(0x7e00 << 16) | (0xc904 >> 2),
780 	0x00000000,
781 	(0x4e00 << 16) | (0xc908 >> 2),
782 	0x00000000,
783 	(0x5e00 << 16) | (0xc908 >> 2),
784 	0x00000000,
785 	(0x6e00 << 16) | (0xc908 >> 2),
786 	0x00000000,
787 	(0x7e00 << 16) | (0xc908 >> 2),
788 	0x00000000,
789 	(0x4e00 << 16) | (0xc90c >> 2),
790 	0x00000000,
791 	(0x5e00 << 16) | (0xc90c >> 2),
792 	0x00000000,
793 	(0x6e00 << 16) | (0xc90c >> 2),
794 	0x00000000,
795 	(0x7e00 << 16) | (0xc90c >> 2),
796 	0x00000000,
797 	(0x4e00 << 16) | (0xc910 >> 2),
798 	0x00000000,
799 	(0x5e00 << 16) | (0xc910 >> 2),
800 	0x00000000,
801 	(0x6e00 << 16) | (0xc910 >> 2),
802 	0x00000000,
803 	(0x7e00 << 16) | (0xc910 >> 2),
804 	0x00000000,
805 	(0x0e00 << 16) | (0xc99c >> 2),
806 	0x00000000,
807 	(0x0e00 << 16) | (0x9834 >> 2),
808 	0x00000000,
809 	(0x0000 << 16) | (0x30f00 >> 2),
810 	0x00000000,
811 	(0x0000 << 16) | (0x30f04 >> 2),
812 	0x00000000,
813 	(0x0000 << 16) | (0x30f08 >> 2),
814 	0x00000000,
815 	(0x0000 << 16) | (0x30f0c >> 2),
816 	0x00000000,
817 	(0x0600 << 16) | (0x9b7c >> 2),
818 	0x00000000,
819 	(0x0e00 << 16) | (0x8a14 >> 2),
820 	0x00000000,
821 	(0x0e00 << 16) | (0x8a18 >> 2),
822 	0x00000000,
823 	(0x0600 << 16) | (0x30a00 >> 2),
824 	0x00000000,
825 	(0x0e00 << 16) | (0x8bf0 >> 2),
826 	0x00000000,
827 	(0x0e00 << 16) | (0x8bcc >> 2),
828 	0x00000000,
829 	(0x0e00 << 16) | (0x8b24 >> 2),
830 	0x00000000,
831 	(0x0e00 << 16) | (0x30a04 >> 2),
832 	0x00000000,
833 	(0x0600 << 16) | (0x30a10 >> 2),
834 	0x00000000,
835 	(0x0600 << 16) | (0x30a14 >> 2),
836 	0x00000000,
837 	(0x0600 << 16) | (0x30a18 >> 2),
838 	0x00000000,
839 	(0x0600 << 16) | (0x30a2c >> 2),
840 	0x00000000,
841 	(0x0e00 << 16) | (0xc700 >> 2),
842 	0x00000000,
843 	(0x0e00 << 16) | (0xc704 >> 2),
844 	0x00000000,
845 	(0x0e00 << 16) | (0xc708 >> 2),
846 	0x00000000,
847 	(0x0e00 << 16) | (0xc768 >> 2),
848 	0x00000000,
849 	(0x0400 << 16) | (0xc770 >> 2),
850 	0x00000000,
851 	(0x0400 << 16) | (0xc774 >> 2),
852 	0x00000000,
853 	(0x0400 << 16) | (0xc798 >> 2),
854 	0x00000000,
855 	(0x0400 << 16) | (0xc79c >> 2),
856 	0x00000000,
857 	(0x0e00 << 16) | (0x9100 >> 2),
858 	0x00000000,
859 	(0x0e00 << 16) | (0x3c010 >> 2),
860 	0x00000000,
861 	(0x0e00 << 16) | (0x8c00 >> 2),
862 	0x00000000,
863 	(0x0e00 << 16) | (0x8c04 >> 2),
864 	0x00000000,
865 	(0x0e00 << 16) | (0x8c20 >> 2),
866 	0x00000000,
867 	(0x0e00 << 16) | (0x8c38 >> 2),
868 	0x00000000,
869 	(0x0e00 << 16) | (0x8c3c >> 2),
870 	0x00000000,
871 	(0x0e00 << 16) | (0xae00 >> 2),
872 	0x00000000,
873 	(0x0e00 << 16) | (0x9604 >> 2),
874 	0x00000000,
875 	(0x0e00 << 16) | (0xac08 >> 2),
876 	0x00000000,
877 	(0x0e00 << 16) | (0xac0c >> 2),
878 	0x00000000,
879 	(0x0e00 << 16) | (0xac10 >> 2),
880 	0x00000000,
881 	(0x0e00 << 16) | (0xac14 >> 2),
882 	0x00000000,
883 	(0x0e00 << 16) | (0xac58 >> 2),
884 	0x00000000,
885 	(0x0e00 << 16) | (0xac68 >> 2),
886 	0x00000000,
887 	(0x0e00 << 16) | (0xac6c >> 2),
888 	0x00000000,
889 	(0x0e00 << 16) | (0xac70 >> 2),
890 	0x00000000,
891 	(0x0e00 << 16) | (0xac74 >> 2),
892 	0x00000000,
893 	(0x0e00 << 16) | (0xac78 >> 2),
894 	0x00000000,
895 	(0x0e00 << 16) | (0xac7c >> 2),
896 	0x00000000,
897 	(0x0e00 << 16) | (0xac80 >> 2),
898 	0x00000000,
899 	(0x0e00 << 16) | (0xac84 >> 2),
900 	0x00000000,
901 	(0x0e00 << 16) | (0xac88 >> 2),
902 	0x00000000,
903 	(0x0e00 << 16) | (0xac8c >> 2),
904 	0x00000000,
905 	(0x0e00 << 16) | (0x970c >> 2),
906 	0x00000000,
907 	(0x0e00 << 16) | (0x9714 >> 2),
908 	0x00000000,
909 	(0x0e00 << 16) | (0x9718 >> 2),
910 	0x00000000,
911 	(0x0e00 << 16) | (0x971c >> 2),
912 	0x00000000,
913 	(0x0e00 << 16) | (0x31068 >> 2),
914 	0x00000000,
915 	(0x4e00 << 16) | (0x31068 >> 2),
916 	0x00000000,
917 	(0x5e00 << 16) | (0x31068 >> 2),
918 	0x00000000,
919 	(0x6e00 << 16) | (0x31068 >> 2),
920 	0x00000000,
921 	(0x7e00 << 16) | (0x31068 >> 2),
922 	0x00000000,
923 	(0x0e00 << 16) | (0xcd10 >> 2),
924 	0x00000000,
925 	(0x0e00 << 16) | (0xcd14 >> 2),
926 	0x00000000,
927 	(0x0e00 << 16) | (0x88b0 >> 2),
928 	0x00000000,
929 	(0x0e00 << 16) | (0x88b4 >> 2),
930 	0x00000000,
931 	(0x0e00 << 16) | (0x88b8 >> 2),
932 	0x00000000,
933 	(0x0e00 << 16) | (0x88bc >> 2),
934 	0x00000000,
935 	(0x0400 << 16) | (0x89c0 >> 2),
936 	0x00000000,
937 	(0x0e00 << 16) | (0x88c4 >> 2),
938 	0x00000000,
939 	(0x0e00 << 16) | (0x88c8 >> 2),
940 	0x00000000,
941 	(0x0e00 << 16) | (0x88d0 >> 2),
942 	0x00000000,
943 	(0x0e00 << 16) | (0x88d4 >> 2),
944 	0x00000000,
945 	(0x0e00 << 16) | (0x88d8 >> 2),
946 	0x00000000,
947 	(0x0e00 << 16) | (0x8980 >> 2),
948 	0x00000000,
949 	(0x0e00 << 16) | (0x30938 >> 2),
950 	0x00000000,
951 	(0x0e00 << 16) | (0x3093c >> 2),
952 	0x00000000,
953 	(0x0e00 << 16) | (0x30940 >> 2),
954 	0x00000000,
955 	(0x0e00 << 16) | (0x89a0 >> 2),
956 	0x00000000,
957 	(0x0e00 << 16) | (0x30900 >> 2),
958 	0x00000000,
959 	(0x0e00 << 16) | (0x30904 >> 2),
960 	0x00000000,
961 	(0x0e00 << 16) | (0x89b4 >> 2),
962 	0x00000000,
963 	(0x0e00 << 16) | (0x3e1fc >> 2),
964 	0x00000000,
965 	(0x0e00 << 16) | (0x3c210 >> 2),
966 	0x00000000,
967 	(0x0e00 << 16) | (0x3c214 >> 2),
968 	0x00000000,
969 	(0x0e00 << 16) | (0x3c218 >> 2),
970 	0x00000000,
971 	(0x0e00 << 16) | (0x8904 >> 2),
972 	0x00000000,
973 	0x5,
974 	(0x0e00 << 16) | (0x8c28 >> 2),
975 	(0x0e00 << 16) | (0x8c2c >> 2),
976 	(0x0e00 << 16) | (0x8c30 >> 2),
977 	(0x0e00 << 16) | (0x8c34 >> 2),
978 	(0x0e00 << 16) | (0x9600 >> 2),
979 };
980 
981 static const u32 bonaire_golden_spm_registers[] =
982 {
983 	0x30800, 0xe0ffffff, 0xe0000000
984 };
985 
986 static const u32 bonaire_golden_common_registers[] =
987 {
988 	0xc770, 0xffffffff, 0x00000800,
989 	0xc774, 0xffffffff, 0x00000800,
990 	0xc798, 0xffffffff, 0x00007fbf,
991 	0xc79c, 0xffffffff, 0x00007faf
992 };
993 
994 static const u32 bonaire_golden_registers[] =
995 {
996 	0x3354, 0x00000333, 0x00000333,
997 	0x3350, 0x000c0fc0, 0x00040200,
998 	0x9a10, 0x00010000, 0x00058208,
999 	0x3c000, 0xffff1fff, 0x00140000,
1000 	0x3c200, 0xfdfc0fff, 0x00000100,
1001 	0x3c234, 0x40000000, 0x40000200,
1002 	0x9830, 0xffffffff, 0x00000000,
1003 	0x9834, 0xf00fffff, 0x00000400,
1004 	0x9838, 0x0002021c, 0x00020200,
1005 	0xc78, 0x00000080, 0x00000000,
1006 	0x5bb0, 0x000000f0, 0x00000070,
1007 	0x5bc0, 0xf0311fff, 0x80300000,
1008 	0x98f8, 0x73773777, 0x12010001,
1009 	0x350c, 0x00810000, 0x408af000,
1010 	0x7030, 0x31000111, 0x00000011,
1011 	0x2f48, 0x73773777, 0x12010001,
1012 	0x220c, 0x00007fb6, 0x0021a1b1,
1013 	0x2210, 0x00007fb6, 0x002021b1,
1014 	0x2180, 0x00007fb6, 0x00002191,
1015 	0x2218, 0x00007fb6, 0x002121b1,
1016 	0x221c, 0x00007fb6, 0x002021b1,
1017 	0x21dc, 0x00007fb6, 0x00002191,
1018 	0x21e0, 0x00007fb6, 0x00002191,
1019 	0x3628, 0x0000003f, 0x0000000a,
1020 	0x362c, 0x0000003f, 0x0000000a,
1021 	0x2ae4, 0x00073ffe, 0x000022a2,
1022 	0x240c, 0x000007ff, 0x00000000,
1023 	0x8a14, 0xf000003f, 0x00000007,
1024 	0x8bf0, 0x00002001, 0x00000001,
1025 	0x8b24, 0xffffffff, 0x00ffffff,
1026 	0x30a04, 0x0000ff0f, 0x00000000,
1027 	0x28a4c, 0x07ffffff, 0x06000000,
1028 	0x4d8, 0x00000fff, 0x00000100,
1029 	0x3e78, 0x00000001, 0x00000002,
1030 	0x9100, 0x03000000, 0x0362c688,
1031 	0x8c00, 0x000000ff, 0x00000001,
1032 	0xe40, 0x00001fff, 0x00001fff,
1033 	0x9060, 0x0000007f, 0x00000020,
1034 	0x9508, 0x00010000, 0x00010000,
1035 	0xac14, 0x000003ff, 0x000000f3,
1036 	0xac0c, 0xffffffff, 0x00001032
1037 };
1038 
1039 static const u32 bonaire_mgcg_cgcg_init[] =
1040 {
1041 	0xc420, 0xffffffff, 0xfffffffc,
1042 	0x30800, 0xffffffff, 0xe0000000,
1043 	0x3c2a0, 0xffffffff, 0x00000100,
1044 	0x3c208, 0xffffffff, 0x00000100,
1045 	0x3c2c0, 0xffffffff, 0xc0000100,
1046 	0x3c2c8, 0xffffffff, 0xc0000100,
1047 	0x3c2c4, 0xffffffff, 0xc0000100,
1048 	0x55e4, 0xffffffff, 0x00600100,
1049 	0x3c280, 0xffffffff, 0x00000100,
1050 	0x3c214, 0xffffffff, 0x06000100,
1051 	0x3c220, 0xffffffff, 0x00000100,
1052 	0x3c218, 0xffffffff, 0x06000100,
1053 	0x3c204, 0xffffffff, 0x00000100,
1054 	0x3c2e0, 0xffffffff, 0x00000100,
1055 	0x3c224, 0xffffffff, 0x00000100,
1056 	0x3c200, 0xffffffff, 0x00000100,
1057 	0x3c230, 0xffffffff, 0x00000100,
1058 	0x3c234, 0xffffffff, 0x00000100,
1059 	0x3c250, 0xffffffff, 0x00000100,
1060 	0x3c254, 0xffffffff, 0x00000100,
1061 	0x3c258, 0xffffffff, 0x00000100,
1062 	0x3c25c, 0xffffffff, 0x00000100,
1063 	0x3c260, 0xffffffff, 0x00000100,
1064 	0x3c27c, 0xffffffff, 0x00000100,
1065 	0x3c278, 0xffffffff, 0x00000100,
1066 	0x3c210, 0xffffffff, 0x06000100,
1067 	0x3c290, 0xffffffff, 0x00000100,
1068 	0x3c274, 0xffffffff, 0x00000100,
1069 	0x3c2b4, 0xffffffff, 0x00000100,
1070 	0x3c2b0, 0xffffffff, 0x00000100,
1071 	0x3c270, 0xffffffff, 0x00000100,
1072 	0x30800, 0xffffffff, 0xe0000000,
1073 	0x3c020, 0xffffffff, 0x00010000,
1074 	0x3c024, 0xffffffff, 0x00030002,
1075 	0x3c028, 0xffffffff, 0x00040007,
1076 	0x3c02c, 0xffffffff, 0x00060005,
1077 	0x3c030, 0xffffffff, 0x00090008,
1078 	0x3c034, 0xffffffff, 0x00010000,
1079 	0x3c038, 0xffffffff, 0x00030002,
1080 	0x3c03c, 0xffffffff, 0x00040007,
1081 	0x3c040, 0xffffffff, 0x00060005,
1082 	0x3c044, 0xffffffff, 0x00090008,
1083 	0x3c048, 0xffffffff, 0x00010000,
1084 	0x3c04c, 0xffffffff, 0x00030002,
1085 	0x3c050, 0xffffffff, 0x00040007,
1086 	0x3c054, 0xffffffff, 0x00060005,
1087 	0x3c058, 0xffffffff, 0x00090008,
1088 	0x3c05c, 0xffffffff, 0x00010000,
1089 	0x3c060, 0xffffffff, 0x00030002,
1090 	0x3c064, 0xffffffff, 0x00040007,
1091 	0x3c068, 0xffffffff, 0x00060005,
1092 	0x3c06c, 0xffffffff, 0x00090008,
1093 	0x3c070, 0xffffffff, 0x00010000,
1094 	0x3c074, 0xffffffff, 0x00030002,
1095 	0x3c078, 0xffffffff, 0x00040007,
1096 	0x3c07c, 0xffffffff, 0x00060005,
1097 	0x3c080, 0xffffffff, 0x00090008,
1098 	0x3c084, 0xffffffff, 0x00010000,
1099 	0x3c088, 0xffffffff, 0x00030002,
1100 	0x3c08c, 0xffffffff, 0x00040007,
1101 	0x3c090, 0xffffffff, 0x00060005,
1102 	0x3c094, 0xffffffff, 0x00090008,
1103 	0x3c098, 0xffffffff, 0x00010000,
1104 	0x3c09c, 0xffffffff, 0x00030002,
1105 	0x3c0a0, 0xffffffff, 0x00040007,
1106 	0x3c0a4, 0xffffffff, 0x00060005,
1107 	0x3c0a8, 0xffffffff, 0x00090008,
1108 	0x3c000, 0xffffffff, 0x96e00200,
1109 	0x8708, 0xffffffff, 0x00900100,
1110 	0xc424, 0xffffffff, 0x0020003f,
1111 	0x38, 0xffffffff, 0x0140001c,
1112 	0x3c, 0x000f0000, 0x000f0000,
1113 	0x220, 0xffffffff, 0xC060000C,
1114 	0x224, 0xc0000fff, 0x00000100,
1115 	0xf90, 0xffffffff, 0x00000100,
1116 	0xf98, 0x00000101, 0x00000000,
1117 	0x20a8, 0xffffffff, 0x00000104,
1118 	0x55e4, 0xff000fff, 0x00000100,
1119 	0x30cc, 0xc0000fff, 0x00000104,
1120 	0xc1e4, 0x00000001, 0x00000001,
1121 	0xd00c, 0xff000ff0, 0x00000100,
1122 	0xd80c, 0xff000ff0, 0x00000100
1123 };
1124 
1125 static const u32 spectre_golden_spm_registers[] =
1126 {
1127 	0x30800, 0xe0ffffff, 0xe0000000
1128 };
1129 
1130 static const u32 spectre_golden_common_registers[] =
1131 {
1132 	0xc770, 0xffffffff, 0x00000800,
1133 	0xc774, 0xffffffff, 0x00000800,
1134 	0xc798, 0xffffffff, 0x00007fbf,
1135 	0xc79c, 0xffffffff, 0x00007faf
1136 };
1137 
1138 static const u32 spectre_golden_registers[] =
1139 {
1140 	0x3c000, 0xffff1fff, 0x96940200,
1141 	0x3c00c, 0xffff0001, 0xff000000,
1142 	0x3c200, 0xfffc0fff, 0x00000100,
1143 	0x6ed8, 0x00010101, 0x00010000,
1144 	0x9834, 0xf00fffff, 0x00000400,
1145 	0x9838, 0xfffffffc, 0x00020200,
1146 	0x5bb0, 0x000000f0, 0x00000070,
1147 	0x5bc0, 0xf0311fff, 0x80300000,
1148 	0x98f8, 0x73773777, 0x12010001,
1149 	0x9b7c, 0x00ff0000, 0x00fc0000,
1150 	0x2f48, 0x73773777, 0x12010001,
1151 	0x8a14, 0xf000003f, 0x00000007,
1152 	0x8b24, 0xffffffff, 0x00ffffff,
1153 	0x28350, 0x3f3f3fff, 0x00000082,
1154 	0x28354, 0x0000003f, 0x00000000,
1155 	0x3e78, 0x00000001, 0x00000002,
1156 	0x913c, 0xffff03df, 0x00000004,
1157 	0xc768, 0x00000008, 0x00000008,
1158 	0x8c00, 0x000008ff, 0x00000800,
1159 	0x9508, 0x00010000, 0x00010000,
1160 	0xac0c, 0xffffffff, 0x54763210,
1161 	0x214f8, 0x01ff01ff, 0x00000002,
1162 	0x21498, 0x007ff800, 0x00200000,
1163 	0x2015c, 0xffffffff, 0x00000f40,
1164 	0x30934, 0xffffffff, 0x00000001
1165 };
1166 
1167 static const u32 spectre_mgcg_cgcg_init[] =
1168 {
1169 	0xc420, 0xffffffff, 0xfffffffc,
1170 	0x30800, 0xffffffff, 0xe0000000,
1171 	0x3c2a0, 0xffffffff, 0x00000100,
1172 	0x3c208, 0xffffffff, 0x00000100,
1173 	0x3c2c0, 0xffffffff, 0x00000100,
1174 	0x3c2c8, 0xffffffff, 0x00000100,
1175 	0x3c2c4, 0xffffffff, 0x00000100,
1176 	0x55e4, 0xffffffff, 0x00600100,
1177 	0x3c280, 0xffffffff, 0x00000100,
1178 	0x3c214, 0xffffffff, 0x06000100,
1179 	0x3c220, 0xffffffff, 0x00000100,
1180 	0x3c218, 0xffffffff, 0x06000100,
1181 	0x3c204, 0xffffffff, 0x00000100,
1182 	0x3c2e0, 0xffffffff, 0x00000100,
1183 	0x3c224, 0xffffffff, 0x00000100,
1184 	0x3c200, 0xffffffff, 0x00000100,
1185 	0x3c230, 0xffffffff, 0x00000100,
1186 	0x3c234, 0xffffffff, 0x00000100,
1187 	0x3c250, 0xffffffff, 0x00000100,
1188 	0x3c254, 0xffffffff, 0x00000100,
1189 	0x3c258, 0xffffffff, 0x00000100,
1190 	0x3c25c, 0xffffffff, 0x00000100,
1191 	0x3c260, 0xffffffff, 0x00000100,
1192 	0x3c27c, 0xffffffff, 0x00000100,
1193 	0x3c278, 0xffffffff, 0x00000100,
1194 	0x3c210, 0xffffffff, 0x06000100,
1195 	0x3c290, 0xffffffff, 0x00000100,
1196 	0x3c274, 0xffffffff, 0x00000100,
1197 	0x3c2b4, 0xffffffff, 0x00000100,
1198 	0x3c2b0, 0xffffffff, 0x00000100,
1199 	0x3c270, 0xffffffff, 0x00000100,
1200 	0x30800, 0xffffffff, 0xe0000000,
1201 	0x3c020, 0xffffffff, 0x00010000,
1202 	0x3c024, 0xffffffff, 0x00030002,
1203 	0x3c028, 0xffffffff, 0x00040007,
1204 	0x3c02c, 0xffffffff, 0x00060005,
1205 	0x3c030, 0xffffffff, 0x00090008,
1206 	0x3c034, 0xffffffff, 0x00010000,
1207 	0x3c038, 0xffffffff, 0x00030002,
1208 	0x3c03c, 0xffffffff, 0x00040007,
1209 	0x3c040, 0xffffffff, 0x00060005,
1210 	0x3c044, 0xffffffff, 0x00090008,
1211 	0x3c048, 0xffffffff, 0x00010000,
1212 	0x3c04c, 0xffffffff, 0x00030002,
1213 	0x3c050, 0xffffffff, 0x00040007,
1214 	0x3c054, 0xffffffff, 0x00060005,
1215 	0x3c058, 0xffffffff, 0x00090008,
1216 	0x3c05c, 0xffffffff, 0x00010000,
1217 	0x3c060, 0xffffffff, 0x00030002,
1218 	0x3c064, 0xffffffff, 0x00040007,
1219 	0x3c068, 0xffffffff, 0x00060005,
1220 	0x3c06c, 0xffffffff, 0x00090008,
1221 	0x3c070, 0xffffffff, 0x00010000,
1222 	0x3c074, 0xffffffff, 0x00030002,
1223 	0x3c078, 0xffffffff, 0x00040007,
1224 	0x3c07c, 0xffffffff, 0x00060005,
1225 	0x3c080, 0xffffffff, 0x00090008,
1226 	0x3c084, 0xffffffff, 0x00010000,
1227 	0x3c088, 0xffffffff, 0x00030002,
1228 	0x3c08c, 0xffffffff, 0x00040007,
1229 	0x3c090, 0xffffffff, 0x00060005,
1230 	0x3c094, 0xffffffff, 0x00090008,
1231 	0x3c098, 0xffffffff, 0x00010000,
1232 	0x3c09c, 0xffffffff, 0x00030002,
1233 	0x3c0a0, 0xffffffff, 0x00040007,
1234 	0x3c0a4, 0xffffffff, 0x00060005,
1235 	0x3c0a8, 0xffffffff, 0x00090008,
1236 	0x3c0ac, 0xffffffff, 0x00010000,
1237 	0x3c0b0, 0xffffffff, 0x00030002,
1238 	0x3c0b4, 0xffffffff, 0x00040007,
1239 	0x3c0b8, 0xffffffff, 0x00060005,
1240 	0x3c0bc, 0xffffffff, 0x00090008,
1241 	0x3c000, 0xffffffff, 0x96e00200,
1242 	0x8708, 0xffffffff, 0x00900100,
1243 	0xc424, 0xffffffff, 0x0020003f,
1244 	0x38, 0xffffffff, 0x0140001c,
1245 	0x3c, 0x000f0000, 0x000f0000,
1246 	0x220, 0xffffffff, 0xC060000C,
1247 	0x224, 0xc0000fff, 0x00000100,
1248 	0xf90, 0xffffffff, 0x00000100,
1249 	0xf98, 0x00000101, 0x00000000,
1250 	0x20a8, 0xffffffff, 0x00000104,
1251 	0x55e4, 0xff000fff, 0x00000100,
1252 	0x30cc, 0xc0000fff, 0x00000104,
1253 	0xc1e4, 0x00000001, 0x00000001,
1254 	0xd00c, 0xff000ff0, 0x00000100,
1255 	0xd80c, 0xff000ff0, 0x00000100
1256 };
1257 
1258 static const u32 kalindi_golden_spm_registers[] =
1259 {
1260 	0x30800, 0xe0ffffff, 0xe0000000
1261 };
1262 
1263 static const u32 kalindi_golden_common_registers[] =
1264 {
1265 	0xc770, 0xffffffff, 0x00000800,
1266 	0xc774, 0xffffffff, 0x00000800,
1267 	0xc798, 0xffffffff, 0x00007fbf,
1268 	0xc79c, 0xffffffff, 0x00007faf
1269 };
1270 
1271 static const u32 kalindi_golden_registers[] =
1272 {
1273 	0x3c000, 0xffffdfff, 0x6e944040,
1274 	0x55e4, 0xff607fff, 0xfc000100,
1275 	0x3c220, 0xff000fff, 0x00000100,
1276 	0x3c224, 0xff000fff, 0x00000100,
1277 	0x3c200, 0xfffc0fff, 0x00000100,
1278 	0x6ed8, 0x00010101, 0x00010000,
1279 	0x9830, 0xffffffff, 0x00000000,
1280 	0x9834, 0xf00fffff, 0x00000400,
1281 	0x5bb0, 0x000000f0, 0x00000070,
1282 	0x5bc0, 0xf0311fff, 0x80300000,
1283 	0x98f8, 0x73773777, 0x12010001,
1284 	0x98fc, 0xffffffff, 0x00000010,
1285 	0x9b7c, 0x00ff0000, 0x00fc0000,
1286 	0x8030, 0x00001f0f, 0x0000100a,
1287 	0x2f48, 0x73773777, 0x12010001,
1288 	0x2408, 0x000fffff, 0x000c007f,
1289 	0x8a14, 0xf000003f, 0x00000007,
1290 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1291 	0x30a04, 0x0000ff0f, 0x00000000,
1292 	0x28a4c, 0x07ffffff, 0x06000000,
1293 	0x4d8, 0x00000fff, 0x00000100,
1294 	0x3e78, 0x00000001, 0x00000002,
1295 	0xc768, 0x00000008, 0x00000008,
1296 	0x8c00, 0x000000ff, 0x00000003,
1297 	0x214f8, 0x01ff01ff, 0x00000002,
1298 	0x21498, 0x007ff800, 0x00200000,
1299 	0x2015c, 0xffffffff, 0x00000f40,
1300 	0x88c4, 0x001f3ae3, 0x00000082,
1301 	0x88d4, 0x0000001f, 0x00000010,
1302 	0x30934, 0xffffffff, 0x00000000
1303 };
1304 
1305 static const u32 kalindi_mgcg_cgcg_init[] =
1306 {
1307 	0xc420, 0xffffffff, 0xfffffffc,
1308 	0x30800, 0xffffffff, 0xe0000000,
1309 	0x3c2a0, 0xffffffff, 0x00000100,
1310 	0x3c208, 0xffffffff, 0x00000100,
1311 	0x3c2c0, 0xffffffff, 0x00000100,
1312 	0x3c2c8, 0xffffffff, 0x00000100,
1313 	0x3c2c4, 0xffffffff, 0x00000100,
1314 	0x55e4, 0xffffffff, 0x00600100,
1315 	0x3c280, 0xffffffff, 0x00000100,
1316 	0x3c214, 0xffffffff, 0x06000100,
1317 	0x3c220, 0xffffffff, 0x00000100,
1318 	0x3c218, 0xffffffff, 0x06000100,
1319 	0x3c204, 0xffffffff, 0x00000100,
1320 	0x3c2e0, 0xffffffff, 0x00000100,
1321 	0x3c224, 0xffffffff, 0x00000100,
1322 	0x3c200, 0xffffffff, 0x00000100,
1323 	0x3c230, 0xffffffff, 0x00000100,
1324 	0x3c234, 0xffffffff, 0x00000100,
1325 	0x3c250, 0xffffffff, 0x00000100,
1326 	0x3c254, 0xffffffff, 0x00000100,
1327 	0x3c258, 0xffffffff, 0x00000100,
1328 	0x3c25c, 0xffffffff, 0x00000100,
1329 	0x3c260, 0xffffffff, 0x00000100,
1330 	0x3c27c, 0xffffffff, 0x00000100,
1331 	0x3c278, 0xffffffff, 0x00000100,
1332 	0x3c210, 0xffffffff, 0x06000100,
1333 	0x3c290, 0xffffffff, 0x00000100,
1334 	0x3c274, 0xffffffff, 0x00000100,
1335 	0x3c2b4, 0xffffffff, 0x00000100,
1336 	0x3c2b0, 0xffffffff, 0x00000100,
1337 	0x3c270, 0xffffffff, 0x00000100,
1338 	0x30800, 0xffffffff, 0xe0000000,
1339 	0x3c020, 0xffffffff, 0x00010000,
1340 	0x3c024, 0xffffffff, 0x00030002,
1341 	0x3c028, 0xffffffff, 0x00040007,
1342 	0x3c02c, 0xffffffff, 0x00060005,
1343 	0x3c030, 0xffffffff, 0x00090008,
1344 	0x3c034, 0xffffffff, 0x00010000,
1345 	0x3c038, 0xffffffff, 0x00030002,
1346 	0x3c03c, 0xffffffff, 0x00040007,
1347 	0x3c040, 0xffffffff, 0x00060005,
1348 	0x3c044, 0xffffffff, 0x00090008,
1349 	0x3c000, 0xffffffff, 0x96e00200,
1350 	0x8708, 0xffffffff, 0x00900100,
1351 	0xc424, 0xffffffff, 0x0020003f,
1352 	0x38, 0xffffffff, 0x0140001c,
1353 	0x3c, 0x000f0000, 0x000f0000,
1354 	0x220, 0xffffffff, 0xC060000C,
1355 	0x224, 0xc0000fff, 0x00000100,
1356 	0x20a8, 0xffffffff, 0x00000104,
1357 	0x55e4, 0xff000fff, 0x00000100,
1358 	0x30cc, 0xc0000fff, 0x00000104,
1359 	0xc1e4, 0x00000001, 0x00000001,
1360 	0xd00c, 0xff000ff0, 0x00000100,
1361 	0xd80c, 0xff000ff0, 0x00000100
1362 };
1363 
1364 static const u32 hawaii_golden_spm_registers[] =
1365 {
1366 	0x30800, 0xe0ffffff, 0xe0000000
1367 };
1368 
1369 static const u32 hawaii_golden_common_registers[] =
1370 {
1371 	0x30800, 0xffffffff, 0xe0000000,
1372 	0x28350, 0xffffffff, 0x3a00161a,
1373 	0x28354, 0xffffffff, 0x0000002e,
1374 	0x9a10, 0xffffffff, 0x00018208,
1375 	0x98f8, 0xffffffff, 0x12011003
1376 };
1377 
1378 static const u32 hawaii_golden_registers[] =
1379 {
1380 	0x3354, 0x00000333, 0x00000333,
1381 	0x9a10, 0x00010000, 0x00058208,
1382 	0x9830, 0xffffffff, 0x00000000,
1383 	0x9834, 0xf00fffff, 0x00000400,
1384 	0x9838, 0x0002021c, 0x00020200,
1385 	0xc78, 0x00000080, 0x00000000,
1386 	0x5bb0, 0x000000f0, 0x00000070,
1387 	0x5bc0, 0xf0311fff, 0x80300000,
1388 	0x350c, 0x00810000, 0x408af000,
1389 	0x7030, 0x31000111, 0x00000011,
1390 	0x2f48, 0x73773777, 0x12010001,
1391 	0x2120, 0x0000007f, 0x0000001b,
1392 	0x21dc, 0x00007fb6, 0x00002191,
1393 	0x3628, 0x0000003f, 0x0000000a,
1394 	0x362c, 0x0000003f, 0x0000000a,
1395 	0x2ae4, 0x00073ffe, 0x000022a2,
1396 	0x240c, 0x000007ff, 0x00000000,
1397 	0x8bf0, 0x00002001, 0x00000001,
1398 	0x8b24, 0xffffffff, 0x00ffffff,
1399 	0x30a04, 0x0000ff0f, 0x00000000,
1400 	0x28a4c, 0x07ffffff, 0x06000000,
1401 	0x3e78, 0x00000001, 0x00000002,
1402 	0xc768, 0x00000008, 0x00000008,
1403 	0xc770, 0x00000f00, 0x00000800,
1404 	0xc774, 0x00000f00, 0x00000800,
1405 	0xc798, 0x00ffffff, 0x00ff7fbf,
1406 	0xc79c, 0x00ffffff, 0x00ff7faf,
1407 	0x8c00, 0x000000ff, 0x00000800,
1408 	0xe40, 0x00001fff, 0x00001fff,
1409 	0x9060, 0x0000007f, 0x00000020,
1410 	0x9508, 0x00010000, 0x00010000,
1411 	0xae00, 0x00100000, 0x000ff07c,
1412 	0xac14, 0x000003ff, 0x0000000f,
1413 	0xac10, 0xffffffff, 0x7564fdec,
1414 	0xac0c, 0xffffffff, 0x3120b9a8,
1415 	0xac08, 0x20000000, 0x0f9c0000
1416 };
1417 
1418 static const u32 hawaii_mgcg_cgcg_init[] =
1419 {
1420 	0xc420, 0xffffffff, 0xfffffffd,
1421 	0x30800, 0xffffffff, 0xe0000000,
1422 	0x3c2a0, 0xffffffff, 0x00000100,
1423 	0x3c208, 0xffffffff, 0x00000100,
1424 	0x3c2c0, 0xffffffff, 0x00000100,
1425 	0x3c2c8, 0xffffffff, 0x00000100,
1426 	0x3c2c4, 0xffffffff, 0x00000100,
1427 	0x55e4, 0xffffffff, 0x00200100,
1428 	0x3c280, 0xffffffff, 0x00000100,
1429 	0x3c214, 0xffffffff, 0x06000100,
1430 	0x3c220, 0xffffffff, 0x00000100,
1431 	0x3c218, 0xffffffff, 0x06000100,
1432 	0x3c204, 0xffffffff, 0x00000100,
1433 	0x3c2e0, 0xffffffff, 0x00000100,
1434 	0x3c224, 0xffffffff, 0x00000100,
1435 	0x3c200, 0xffffffff, 0x00000100,
1436 	0x3c230, 0xffffffff, 0x00000100,
1437 	0x3c234, 0xffffffff, 0x00000100,
1438 	0x3c250, 0xffffffff, 0x00000100,
1439 	0x3c254, 0xffffffff, 0x00000100,
1440 	0x3c258, 0xffffffff, 0x00000100,
1441 	0x3c25c, 0xffffffff, 0x00000100,
1442 	0x3c260, 0xffffffff, 0x00000100,
1443 	0x3c27c, 0xffffffff, 0x00000100,
1444 	0x3c278, 0xffffffff, 0x00000100,
1445 	0x3c210, 0xffffffff, 0x06000100,
1446 	0x3c290, 0xffffffff, 0x00000100,
1447 	0x3c274, 0xffffffff, 0x00000100,
1448 	0x3c2b4, 0xffffffff, 0x00000100,
1449 	0x3c2b0, 0xffffffff, 0x00000100,
1450 	0x3c270, 0xffffffff, 0x00000100,
1451 	0x30800, 0xffffffff, 0xe0000000,
1452 	0x3c020, 0xffffffff, 0x00010000,
1453 	0x3c024, 0xffffffff, 0x00030002,
1454 	0x3c028, 0xffffffff, 0x00040007,
1455 	0x3c02c, 0xffffffff, 0x00060005,
1456 	0x3c030, 0xffffffff, 0x00090008,
1457 	0x3c034, 0xffffffff, 0x00010000,
1458 	0x3c038, 0xffffffff, 0x00030002,
1459 	0x3c03c, 0xffffffff, 0x00040007,
1460 	0x3c040, 0xffffffff, 0x00060005,
1461 	0x3c044, 0xffffffff, 0x00090008,
1462 	0x3c048, 0xffffffff, 0x00010000,
1463 	0x3c04c, 0xffffffff, 0x00030002,
1464 	0x3c050, 0xffffffff, 0x00040007,
1465 	0x3c054, 0xffffffff, 0x00060005,
1466 	0x3c058, 0xffffffff, 0x00090008,
1467 	0x3c05c, 0xffffffff, 0x00010000,
1468 	0x3c060, 0xffffffff, 0x00030002,
1469 	0x3c064, 0xffffffff, 0x00040007,
1470 	0x3c068, 0xffffffff, 0x00060005,
1471 	0x3c06c, 0xffffffff, 0x00090008,
1472 	0x3c070, 0xffffffff, 0x00010000,
1473 	0x3c074, 0xffffffff, 0x00030002,
1474 	0x3c078, 0xffffffff, 0x00040007,
1475 	0x3c07c, 0xffffffff, 0x00060005,
1476 	0x3c080, 0xffffffff, 0x00090008,
1477 	0x3c084, 0xffffffff, 0x00010000,
1478 	0x3c088, 0xffffffff, 0x00030002,
1479 	0x3c08c, 0xffffffff, 0x00040007,
1480 	0x3c090, 0xffffffff, 0x00060005,
1481 	0x3c094, 0xffffffff, 0x00090008,
1482 	0x3c098, 0xffffffff, 0x00010000,
1483 	0x3c09c, 0xffffffff, 0x00030002,
1484 	0x3c0a0, 0xffffffff, 0x00040007,
1485 	0x3c0a4, 0xffffffff, 0x00060005,
1486 	0x3c0a8, 0xffffffff, 0x00090008,
1487 	0x3c0ac, 0xffffffff, 0x00010000,
1488 	0x3c0b0, 0xffffffff, 0x00030002,
1489 	0x3c0b4, 0xffffffff, 0x00040007,
1490 	0x3c0b8, 0xffffffff, 0x00060005,
1491 	0x3c0bc, 0xffffffff, 0x00090008,
1492 	0x3c0c0, 0xffffffff, 0x00010000,
1493 	0x3c0c4, 0xffffffff, 0x00030002,
1494 	0x3c0c8, 0xffffffff, 0x00040007,
1495 	0x3c0cc, 0xffffffff, 0x00060005,
1496 	0x3c0d0, 0xffffffff, 0x00090008,
1497 	0x3c0d4, 0xffffffff, 0x00010000,
1498 	0x3c0d8, 0xffffffff, 0x00030002,
1499 	0x3c0dc, 0xffffffff, 0x00040007,
1500 	0x3c0e0, 0xffffffff, 0x00060005,
1501 	0x3c0e4, 0xffffffff, 0x00090008,
1502 	0x3c0e8, 0xffffffff, 0x00010000,
1503 	0x3c0ec, 0xffffffff, 0x00030002,
1504 	0x3c0f0, 0xffffffff, 0x00040007,
1505 	0x3c0f4, 0xffffffff, 0x00060005,
1506 	0x3c0f8, 0xffffffff, 0x00090008,
1507 	0xc318, 0xffffffff, 0x00020200,
1508 	0x3350, 0xffffffff, 0x00000200,
1509 	0x15c0, 0xffffffff, 0x00000400,
1510 	0x55e8, 0xffffffff, 0x00000000,
1511 	0x2f50, 0xffffffff, 0x00000902,
1512 	0x3c000, 0xffffffff, 0x96940200,
1513 	0x8708, 0xffffffff, 0x00900100,
1514 	0xc424, 0xffffffff, 0x0020003f,
1515 	0x38, 0xffffffff, 0x0140001c,
1516 	0x3c, 0x000f0000, 0x000f0000,
1517 	0x220, 0xffffffff, 0xc060000c,
1518 	0x224, 0xc0000fff, 0x00000100,
1519 	0xf90, 0xffffffff, 0x00000100,
1520 	0xf98, 0x00000101, 0x00000000,
1521 	0x20a8, 0xffffffff, 0x00000104,
1522 	0x55e4, 0xff000fff, 0x00000100,
1523 	0x30cc, 0xc0000fff, 0x00000104,
1524 	0xc1e4, 0x00000001, 0x00000001,
1525 	0xd00c, 0xff000ff0, 0x00000100,
1526 	0xd80c, 0xff000ff0, 0x00000100
1527 };
1528 
1529 static const u32 godavari_golden_registers[] =
1530 {
1531 	0x55e4, 0xff607fff, 0xfc000100,
1532 	0x6ed8, 0x00010101, 0x00010000,
1533 	0x9830, 0xffffffff, 0x00000000,
1534 	0x98302, 0xf00fffff, 0x00000400,
1535 	0x6130, 0xffffffff, 0x00010000,
1536 	0x5bb0, 0x000000f0, 0x00000070,
1537 	0x5bc0, 0xf0311fff, 0x80300000,
1538 	0x98f8, 0x73773777, 0x12010001,
1539 	0x98fc, 0xffffffff, 0x00000010,
1540 	0x8030, 0x00001f0f, 0x0000100a,
1541 	0x2f48, 0x73773777, 0x12010001,
1542 	0x2408, 0x000fffff, 0x000c007f,
1543 	0x8a14, 0xf000003f, 0x00000007,
1544 	0x8b24, 0xffffffff, 0x00ff0fff,
1545 	0x30a04, 0x0000ff0f, 0x00000000,
1546 	0x28a4c, 0x07ffffff, 0x06000000,
1547 	0x4d8, 0x00000fff, 0x00000100,
1548 	0xd014, 0x00010000, 0x00810001,
1549 	0xd814, 0x00010000, 0x00810001,
1550 	0x3e78, 0x00000001, 0x00000002,
1551 	0xc768, 0x00000008, 0x00000008,
1552 	0xc770, 0x00000f00, 0x00000800,
1553 	0xc774, 0x00000f00, 0x00000800,
1554 	0xc798, 0x00ffffff, 0x00ff7fbf,
1555 	0xc79c, 0x00ffffff, 0x00ff7faf,
1556 	0x8c00, 0x000000ff, 0x00000001,
1557 	0x214f8, 0x01ff01ff, 0x00000002,
1558 	0x21498, 0x007ff800, 0x00200000,
1559 	0x2015c, 0xffffffff, 0x00000f40,
1560 	0x88c4, 0x001f3ae3, 0x00000082,
1561 	0x88d4, 0x0000001f, 0x00000010,
1562 	0x30934, 0xffffffff, 0x00000000
1563 };
1564 
1565 
1566 static void cik_init_golden_registers(struct radeon_device *rdev)
1567 {
1568 	/* Some of the registers might be dependent on GRBM_GFX_INDEX */
1569 	mutex_lock(&rdev->grbm_idx_mutex);
1570 	switch (rdev->family) {
1571 	case CHIP_BONAIRE:
1572 		radeon_program_register_sequence(rdev,
1573 						 bonaire_mgcg_cgcg_init,
1574 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1575 		radeon_program_register_sequence(rdev,
1576 						 bonaire_golden_registers,
1577 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1578 		radeon_program_register_sequence(rdev,
1579 						 bonaire_golden_common_registers,
1580 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1581 		radeon_program_register_sequence(rdev,
1582 						 bonaire_golden_spm_registers,
1583 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1584 		break;
1585 	case CHIP_KABINI:
1586 		radeon_program_register_sequence(rdev,
1587 						 kalindi_mgcg_cgcg_init,
1588 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1589 		radeon_program_register_sequence(rdev,
1590 						 kalindi_golden_registers,
1591 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1592 		radeon_program_register_sequence(rdev,
1593 						 kalindi_golden_common_registers,
1594 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1595 		radeon_program_register_sequence(rdev,
1596 						 kalindi_golden_spm_registers,
1597 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1598 		break;
1599 	case CHIP_MULLINS:
1600 		radeon_program_register_sequence(rdev,
1601 						 kalindi_mgcg_cgcg_init,
1602 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1603 		radeon_program_register_sequence(rdev,
1604 						 godavari_golden_registers,
1605 						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1606 		radeon_program_register_sequence(rdev,
1607 						 kalindi_golden_common_registers,
1608 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1609 		radeon_program_register_sequence(rdev,
1610 						 kalindi_golden_spm_registers,
1611 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1612 		break;
1613 	case CHIP_KAVERI:
1614 		radeon_program_register_sequence(rdev,
1615 						 spectre_mgcg_cgcg_init,
1616 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1617 		radeon_program_register_sequence(rdev,
1618 						 spectre_golden_registers,
1619 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1620 		radeon_program_register_sequence(rdev,
1621 						 spectre_golden_common_registers,
1622 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1623 		radeon_program_register_sequence(rdev,
1624 						 spectre_golden_spm_registers,
1625 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1626 		break;
1627 	case CHIP_HAWAII:
1628 		radeon_program_register_sequence(rdev,
1629 						 hawaii_mgcg_cgcg_init,
1630 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1631 		radeon_program_register_sequence(rdev,
1632 						 hawaii_golden_registers,
1633 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1634 		radeon_program_register_sequence(rdev,
1635 						 hawaii_golden_common_registers,
1636 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1637 		radeon_program_register_sequence(rdev,
1638 						 hawaii_golden_spm_registers,
1639 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1640 		break;
1641 	default:
1642 		break;
1643 	}
1644 	mutex_unlock(&rdev->grbm_idx_mutex);
1645 }
1646 
1647 /**
1648  * cik_get_xclk - get the xclk
1649  *
1650  * @rdev: radeon_device pointer
1651  *
1652  * Returns the reference clock used by the gfx engine
1653  * (CIK).
1654  */
1655 u32 cik_get_xclk(struct radeon_device *rdev)
1656 {
1657         u32 reference_clock = rdev->clock.spll.reference_freq;
1658 
1659 	if (rdev->flags & RADEON_IS_IGP) {
1660 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1661 			return reference_clock / 2;
1662 	} else {
1663 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1664 			return reference_clock / 4;
1665 	}
1666 	return reference_clock;
1667 }
1668 
1669 /**
1670  * cik_mm_rdoorbell - read a doorbell dword
1671  *
1672  * @rdev: radeon_device pointer
1673  * @index: doorbell index
1674  *
1675  * Returns the value in the doorbell aperture at the
1676  * requested doorbell index (CIK).
1677  */
1678 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1679 {
1680 	if (index < rdev->doorbell.num_doorbells) {
1681 		return readl(rdev->doorbell.ptr + index);
1682 	} else {
1683 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1684 		return 0;
1685 	}
1686 }
1687 
1688 /**
1689  * cik_mm_wdoorbell - write a doorbell dword
1690  *
1691  * @rdev: radeon_device pointer
1692  * @index: doorbell index
1693  * @v: value to write
1694  *
1695  * Writes @v to the doorbell aperture at the
1696  * requested doorbell index (CIK).
1697  */
1698 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1699 {
1700 	if (index < rdev->doorbell.num_doorbells) {
1701 		writel(v, rdev->doorbell.ptr + index);
1702 	} else {
1703 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1704 	}
1705 }
1706 
1707 #define BONAIRE_IO_MC_REGS_SIZE 36
1708 
1709 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1710 {
1711 	{0x00000070, 0x04400000},
1712 	{0x00000071, 0x80c01803},
1713 	{0x00000072, 0x00004004},
1714 	{0x00000073, 0x00000100},
1715 	{0x00000074, 0x00ff0000},
1716 	{0x00000075, 0x34000000},
1717 	{0x00000076, 0x08000014},
1718 	{0x00000077, 0x00cc08ec},
1719 	{0x00000078, 0x00000400},
1720 	{0x00000079, 0x00000000},
1721 	{0x0000007a, 0x04090000},
1722 	{0x0000007c, 0x00000000},
1723 	{0x0000007e, 0x4408a8e8},
1724 	{0x0000007f, 0x00000304},
1725 	{0x00000080, 0x00000000},
1726 	{0x00000082, 0x00000001},
1727 	{0x00000083, 0x00000002},
1728 	{0x00000084, 0xf3e4f400},
1729 	{0x00000085, 0x052024e3},
1730 	{0x00000087, 0x00000000},
1731 	{0x00000088, 0x01000000},
1732 	{0x0000008a, 0x1c0a0000},
1733 	{0x0000008b, 0xff010000},
1734 	{0x0000008d, 0xffffefff},
1735 	{0x0000008e, 0xfff3efff},
1736 	{0x0000008f, 0xfff3efbf},
1737 	{0x00000092, 0xf7ffffff},
1738 	{0x00000093, 0xffffff7f},
1739 	{0x00000095, 0x00101101},
1740 	{0x00000096, 0x00000fff},
1741 	{0x00000097, 0x00116fff},
1742 	{0x00000098, 0x60010000},
1743 	{0x00000099, 0x10010000},
1744 	{0x0000009a, 0x00006000},
1745 	{0x0000009b, 0x00001000},
1746 	{0x0000009f, 0x00b48000}
1747 };
1748 
1749 #define HAWAII_IO_MC_REGS_SIZE 22
1750 
1751 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1752 {
1753 	{0x0000007d, 0x40000000},
1754 	{0x0000007e, 0x40180304},
1755 	{0x0000007f, 0x0000ff00},
1756 	{0x00000081, 0x00000000},
1757 	{0x00000083, 0x00000800},
1758 	{0x00000086, 0x00000000},
1759 	{0x00000087, 0x00000100},
1760 	{0x00000088, 0x00020100},
1761 	{0x00000089, 0x00000000},
1762 	{0x0000008b, 0x00040000},
1763 	{0x0000008c, 0x00000100},
1764 	{0x0000008e, 0xff010000},
1765 	{0x00000090, 0xffffefff},
1766 	{0x00000091, 0xfff3efff},
1767 	{0x00000092, 0xfff3efbf},
1768 	{0x00000093, 0xf7ffffff},
1769 	{0x00000094, 0xffffff7f},
1770 	{0x00000095, 0x00000fff},
1771 	{0x00000096, 0x00116fff},
1772 	{0x00000097, 0x60010000},
1773 	{0x00000098, 0x10010000},
1774 	{0x0000009f, 0x00c79000}
1775 };
1776 
1777 
1778 /**
1779  * cik_srbm_select - select specific register instances
1780  *
1781  * @rdev: radeon_device pointer
1782  * @me: selected ME (micro engine)
1783  * @pipe: pipe
1784  * @queue: queue
1785  * @vmid: VMID
1786  *
1787  * Switches the currently active registers instances.  Some
1788  * registers are instanced per VMID, others are instanced per
1789  * me/pipe/queue combination.
1790  */
1791 static void cik_srbm_select(struct radeon_device *rdev,
1792 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1793 {
1794 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1795 			     MEID(me & 0x3) |
1796 			     VMID(vmid & 0xf) |
1797 			     QUEUEID(queue & 0x7));
1798 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1799 }
1800 
1801 /* ucode loading */
1802 /**
1803  * ci_mc_load_microcode - load MC ucode into the hw
1804  *
1805  * @rdev: radeon_device pointer
1806  *
1807  * Load the GDDR MC ucode into the hw (CIK).
1808  * Returns 0 on success, error on failure.
1809  */
1810 int ci_mc_load_microcode(struct radeon_device *rdev)
1811 {
1812 	const __be32 *fw_data = NULL;
1813 	const __le32 *new_fw_data = NULL;
1814 	u32 running, blackout = 0, tmp;
1815 	u32 *io_mc_regs = NULL;
1816 	const __le32 *new_io_mc_regs = NULL;
1817 	int i, regs_size, ucode_size;
1818 
1819 	if (!rdev->mc_fw)
1820 		return -EINVAL;
1821 
1822 	if (rdev->new_fw) {
1823 		const struct mc_firmware_header_v1_0 *hdr =
1824 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1825 
1826 		radeon_ucode_print_mc_hdr(&hdr->header);
1827 
1828 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1829 		new_io_mc_regs = (const __le32 *)
1830 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1831 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1832 		new_fw_data = (const __le32 *)
1833 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1834 	} else {
1835 		ucode_size = rdev->mc_fw->size / 4;
1836 
1837 		switch (rdev->family) {
1838 		case CHIP_BONAIRE:
1839 			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1840 			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1841 			break;
1842 		case CHIP_HAWAII:
1843 			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1844 			regs_size = HAWAII_IO_MC_REGS_SIZE;
1845 			break;
1846 		default:
1847 			return -EINVAL;
1848 		}
1849 		fw_data = (const __be32 *)rdev->mc_fw->data;
1850 	}
1851 
1852 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1853 
1854 	if (running == 0) {
1855 		if (running) {
1856 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1857 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1858 		}
1859 
1860 		/* reset the engine and set to writable */
1861 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1862 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1863 
1864 		/* load mc io regs */
1865 		for (i = 0; i < regs_size; i++) {
1866 			if (rdev->new_fw) {
1867 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1868 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1869 			} else {
1870 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1871 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1872 			}
1873 		}
1874 
1875 		tmp = RREG32(MC_SEQ_MISC0);
1876 		if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1877 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1878 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1879 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1880 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1881 		}
1882 
1883 		/* load the MC ucode */
1884 		for (i = 0; i < ucode_size; i++) {
1885 			if (rdev->new_fw)
1886 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1887 			else
1888 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1889 		}
1890 
1891 		/* put the engine back into the active state */
1892 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1893 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1894 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1895 
1896 		/* wait for training to complete */
1897 		for (i = 0; i < rdev->usec_timeout; i++) {
1898 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1899 				break;
1900 			udelay(1);
1901 		}
1902 		for (i = 0; i < rdev->usec_timeout; i++) {
1903 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1904 				break;
1905 			udelay(1);
1906 		}
1907 
1908 		if (running)
1909 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1910 	}
1911 
1912 	return 0;
1913 }
1914 
1915 /**
1916  * cik_init_microcode - load ucode images from disk
1917  *
1918  * @rdev: radeon_device pointer
1919  *
1920  * Use the firmware interface to load the ucode images into
1921  * the driver (not loaded into hw).
1922  * Returns 0 on success, error on failure.
1923  */
1924 static int cik_init_microcode(struct radeon_device *rdev)
1925 {
1926 	const char *chip_name;
1927 	const char *new_chip_name;
1928 	size_t pfp_req_size, me_req_size, ce_req_size,
1929 		mec_req_size, rlc_req_size, mc_req_size = 0,
1930 		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1931 	char fw_name[30];
1932 	int new_fw = 0;
1933 	int err;
1934 	int num_fw;
1935 
1936 	DRM_DEBUG("\n");
1937 
1938 	switch (rdev->family) {
1939 	case CHIP_BONAIRE:
1940 		chip_name = "BONAIRE";
1941 		new_chip_name = "bonaire";
1942 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1943 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1944 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1945 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1946 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1947 		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1948 		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1949 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1950 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1951 		num_fw = 8;
1952 		break;
1953 	case CHIP_HAWAII:
1954 		chip_name = "HAWAII";
1955 		new_chip_name = "hawaii";
1956 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1957 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1958 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1959 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1960 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1961 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1962 		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
1963 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1964 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1965 		num_fw = 8;
1966 		break;
1967 	case CHIP_KAVERI:
1968 		chip_name = "KAVERI";
1969 		new_chip_name = "kaveri";
1970 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1971 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1972 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1973 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1974 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1975 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1976 		num_fw = 7;
1977 		break;
1978 	case CHIP_KABINI:
1979 		chip_name = "KABINI";
1980 		new_chip_name = "kabini";
1981 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1982 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1983 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1984 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1985 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1986 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1987 		num_fw = 6;
1988 		break;
1989 	case CHIP_MULLINS:
1990 		chip_name = "MULLINS";
1991 		new_chip_name = "mullins";
1992 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1993 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1994 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1995 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1996 		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
1997 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1998 		num_fw = 6;
1999 		break;
2000 	default: BUG();
2001 	}
2002 
2003 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
2004 
2005 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2006 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2007 	if (err) {
2008 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2009 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2010 		if (err)
2011 			goto out;
2012 		if (rdev->pfp_fw->size != pfp_req_size) {
2013 			printk(KERN_ERR
2014 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2015 			       rdev->pfp_fw->size, fw_name);
2016 			err = -EINVAL;
2017 			goto out;
2018 		}
2019 	} else {
2020 		err = radeon_ucode_validate(rdev->pfp_fw);
2021 		if (err) {
2022 			printk(KERN_ERR
2023 			       "cik_fw: validation failed for firmware \"%s\"\n",
2024 			       fw_name);
2025 			goto out;
2026 		} else {
2027 			new_fw++;
2028 		}
2029 	}
2030 
2031 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2032 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2033 	if (err) {
2034 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2035 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2036 		if (err)
2037 			goto out;
2038 		if (rdev->me_fw->size != me_req_size) {
2039 			printk(KERN_ERR
2040 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2041 			       rdev->me_fw->size, fw_name);
2042 			err = -EINVAL;
2043 		}
2044 	} else {
2045 		err = radeon_ucode_validate(rdev->me_fw);
2046 		if (err) {
2047 			printk(KERN_ERR
2048 			       "cik_fw: validation failed for firmware \"%s\"\n",
2049 			       fw_name);
2050 			goto out;
2051 		} else {
2052 			new_fw++;
2053 		}
2054 	}
2055 
2056 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2057 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2058 	if (err) {
2059 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2060 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2061 		if (err)
2062 			goto out;
2063 		if (rdev->ce_fw->size != ce_req_size) {
2064 			printk(KERN_ERR
2065 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2066 			       rdev->ce_fw->size, fw_name);
2067 			err = -EINVAL;
2068 		}
2069 	} else {
2070 		err = radeon_ucode_validate(rdev->ce_fw);
2071 		if (err) {
2072 			printk(KERN_ERR
2073 			       "cik_fw: validation failed for firmware \"%s\"\n",
2074 			       fw_name);
2075 			goto out;
2076 		} else {
2077 			new_fw++;
2078 		}
2079 	}
2080 
2081 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2082 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2083 	if (err) {
2084 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2085 		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2086 		if (err)
2087 			goto out;
2088 		if (rdev->mec_fw->size != mec_req_size) {
2089 			printk(KERN_ERR
2090 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2091 			       rdev->mec_fw->size, fw_name);
2092 			err = -EINVAL;
2093 		}
2094 	} else {
2095 		err = radeon_ucode_validate(rdev->mec_fw);
2096 		if (err) {
2097 			printk(KERN_ERR
2098 			       "cik_fw: validation failed for firmware \"%s\"\n",
2099 			       fw_name);
2100 			goto out;
2101 		} else {
2102 			new_fw++;
2103 		}
2104 	}
2105 
2106 	if (rdev->family == CHIP_KAVERI) {
2107 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2108 		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2109 		if (err) {
2110 			goto out;
2111 		} else {
2112 			err = radeon_ucode_validate(rdev->mec2_fw);
2113 			if (err) {
2114 				goto out;
2115 			} else {
2116 				new_fw++;
2117 			}
2118 		}
2119 	}
2120 
2121 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2122 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2123 	if (err) {
2124 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2125 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2126 		if (err)
2127 			goto out;
2128 		if (rdev->rlc_fw->size != rlc_req_size) {
2129 			printk(KERN_ERR
2130 			       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2131 			       rdev->rlc_fw->size, fw_name);
2132 			err = -EINVAL;
2133 		}
2134 	} else {
2135 		err = radeon_ucode_validate(rdev->rlc_fw);
2136 		if (err) {
2137 			printk(KERN_ERR
2138 			       "cik_fw: validation failed for firmware \"%s\"\n",
2139 			       fw_name);
2140 			goto out;
2141 		} else {
2142 			new_fw++;
2143 		}
2144 	}
2145 
2146 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2147 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2148 	if (err) {
2149 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2150 		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2151 		if (err)
2152 			goto out;
2153 		if (rdev->sdma_fw->size != sdma_req_size) {
2154 			printk(KERN_ERR
2155 			       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2156 			       rdev->sdma_fw->size, fw_name);
2157 			err = -EINVAL;
2158 		}
2159 	} else {
2160 		err = radeon_ucode_validate(rdev->sdma_fw);
2161 		if (err) {
2162 			printk(KERN_ERR
2163 			       "cik_fw: validation failed for firmware \"%s\"\n",
2164 			       fw_name);
2165 			goto out;
2166 		} else {
2167 			new_fw++;
2168 		}
2169 	}
2170 
2171 	/* No SMC, MC ucode on APUs */
2172 	if (!(rdev->flags & RADEON_IS_IGP)) {
2173 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2174 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2175 		if (err) {
2176 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2177 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2178 			if (err) {
2179 				snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2180 				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2181 				if (err)
2182 					goto out;
2183 			}
2184 			if ((rdev->mc_fw->size != mc_req_size) &&
2185 			    (rdev->mc_fw->size != mc2_req_size)){
2186 				printk(KERN_ERR
2187 				       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2188 				       rdev->mc_fw->size, fw_name);
2189 				err = -EINVAL;
2190 			}
2191 			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2192 		} else {
2193 			err = radeon_ucode_validate(rdev->mc_fw);
2194 			if (err) {
2195 				printk(KERN_ERR
2196 				       "cik_fw: validation failed for firmware \"%s\"\n",
2197 				       fw_name);
2198 				goto out;
2199 			} else {
2200 				new_fw++;
2201 			}
2202 		}
2203 
2204 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2205 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2206 		if (err) {
2207 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2208 			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2209 			if (err) {
2210 				printk(KERN_ERR
2211 				       "smc: error loading firmware \"%s\"\n",
2212 				       fw_name);
2213 				release_firmware(rdev->smc_fw);
2214 				rdev->smc_fw = NULL;
2215 				err = 0;
2216 			} else if (rdev->smc_fw->size != smc_req_size) {
2217 				printk(KERN_ERR
2218 				       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2219 				       rdev->smc_fw->size, fw_name);
2220 				err = -EINVAL;
2221 			}
2222 		} else {
2223 			err = radeon_ucode_validate(rdev->smc_fw);
2224 			if (err) {
2225 				printk(KERN_ERR
2226 				       "cik_fw: validation failed for firmware \"%s\"\n",
2227 				       fw_name);
2228 				goto out;
2229 			} else {
2230 				new_fw++;
2231 			}
2232 		}
2233 	}
2234 
2235 	if (new_fw == 0) {
2236 		rdev->new_fw = false;
2237 	} else if (new_fw < num_fw) {
2238 		printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2239 		err = -EINVAL;
2240 	} else {
2241 		rdev->new_fw = true;
2242 	}
2243 
2244 out:
2245 	if (err) {
2246 		if (err != -EINVAL)
2247 			printk(KERN_ERR
2248 			       "cik_cp: Failed to load firmware \"%s\"\n",
2249 			       fw_name);
2250 		release_firmware(rdev->pfp_fw);
2251 		rdev->pfp_fw = NULL;
2252 		release_firmware(rdev->me_fw);
2253 		rdev->me_fw = NULL;
2254 		release_firmware(rdev->ce_fw);
2255 		rdev->ce_fw = NULL;
2256 		release_firmware(rdev->mec_fw);
2257 		rdev->mec_fw = NULL;
2258 		release_firmware(rdev->mec2_fw);
2259 		rdev->mec2_fw = NULL;
2260 		release_firmware(rdev->rlc_fw);
2261 		rdev->rlc_fw = NULL;
2262 		release_firmware(rdev->sdma_fw);
2263 		rdev->sdma_fw = NULL;
2264 		release_firmware(rdev->mc_fw);
2265 		rdev->mc_fw = NULL;
2266 		release_firmware(rdev->smc_fw);
2267 		rdev->smc_fw = NULL;
2268 	}
2269 	return err;
2270 }
2271 
2272 /*
2273  * Core functions
2274  */
2275 /**
2276  * cik_tiling_mode_table_init - init the hw tiling table
2277  *
2278  * @rdev: radeon_device pointer
2279  *
2280  * Starting with SI, the tiling setup is done globally in a
2281  * set of 32 tiling modes.  Rather than selecting each set of
2282  * parameters per surface as on older asics, we just select
2283  * which index in the tiling table we want to use, and the
2284  * surface uses those parameters (CIK).
2285  */
2286 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2287 {
2288 	const u32 num_tile_mode_states = 32;
2289 	const u32 num_secondary_tile_mode_states = 16;
2290 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2291 	u32 num_pipe_configs;
2292 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2293 		rdev->config.cik.max_shader_engines;
2294 
2295 	switch (rdev->config.cik.mem_row_size_in_kb) {
2296 	case 1:
2297 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2298 		break;
2299 	case 2:
2300 	default:
2301 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2302 		break;
2303 	case 4:
2304 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2305 		break;
2306 	}
2307 
2308 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2309 	if (num_pipe_configs > 8)
2310 		num_pipe_configs = 16;
2311 
2312 	if (num_pipe_configs == 16) {
2313 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2314 			switch (reg_offset) {
2315 			case 0:
2316 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2317 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2318 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2319 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2320 				break;
2321 			case 1:
2322 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2323 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2324 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2325 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2326 				break;
2327 			case 2:
2328 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2329 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2330 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2331 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2332 				break;
2333 			case 3:
2334 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2335 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2336 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2337 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2338 				break;
2339 			case 4:
2340 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2341 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2342 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2343 						 TILE_SPLIT(split_equal_to_row_size));
2344 				break;
2345 			case 5:
2346 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2347 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2348 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2349 				break;
2350 			case 6:
2351 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2352 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2353 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2354 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2355 				break;
2356 			case 7:
2357 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2358 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2359 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2360 						 TILE_SPLIT(split_equal_to_row_size));
2361 				break;
2362 			case 8:
2363 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2364 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2365 				break;
2366 			case 9:
2367 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2368 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2369 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2370 				break;
2371 			case 10:
2372 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2373 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2374 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2375 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2376 				break;
2377 			case 11:
2378 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2379 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2380 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2381 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2382 				break;
2383 			case 12:
2384 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2385 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2386 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2387 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2388 				break;
2389 			case 13:
2390 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2391 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2393 				break;
2394 			case 14:
2395 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2396 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2397 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2398 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2399 				break;
2400 			case 16:
2401 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2402 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2403 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2404 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2405 				break;
2406 			case 17:
2407 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2408 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2409 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2410 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2411 				break;
2412 			case 27:
2413 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2414 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2415 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2416 				break;
2417 			case 28:
2418 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2419 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2420 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2421 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2422 				break;
2423 			case 29:
2424 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2425 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2426 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2427 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2428 				break;
2429 			case 30:
2430 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2431 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2432 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2433 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2434 				break;
2435 			default:
2436 				gb_tile_moden = 0;
2437 				break;
2438 			}
2439 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2440 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2441 		}
2442 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2443 			switch (reg_offset) {
2444 			case 0:
2445 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2446 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2447 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2448 						 NUM_BANKS(ADDR_SURF_16_BANK));
2449 				break;
2450 			case 1:
2451 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2452 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2453 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2454 						 NUM_BANKS(ADDR_SURF_16_BANK));
2455 				break;
2456 			case 2:
2457 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2459 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2460 						 NUM_BANKS(ADDR_SURF_16_BANK));
2461 				break;
2462 			case 3:
2463 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2464 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2465 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2466 						 NUM_BANKS(ADDR_SURF_16_BANK));
2467 				break;
2468 			case 4:
2469 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2470 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2471 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2472 						 NUM_BANKS(ADDR_SURF_8_BANK));
2473 				break;
2474 			case 5:
2475 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2476 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2477 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2478 						 NUM_BANKS(ADDR_SURF_4_BANK));
2479 				break;
2480 			case 6:
2481 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2483 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2484 						 NUM_BANKS(ADDR_SURF_2_BANK));
2485 				break;
2486 			case 8:
2487 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2488 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2489 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2490 						 NUM_BANKS(ADDR_SURF_16_BANK));
2491 				break;
2492 			case 9:
2493 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2494 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2495 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2496 						 NUM_BANKS(ADDR_SURF_16_BANK));
2497 				break;
2498 			case 10:
2499 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2500 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2501 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2502 						 NUM_BANKS(ADDR_SURF_16_BANK));
2503 				break;
2504 			case 11:
2505 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2506 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2507 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2508 						 NUM_BANKS(ADDR_SURF_8_BANK));
2509 				break;
2510 			case 12:
2511 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2512 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2513 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2514 						 NUM_BANKS(ADDR_SURF_4_BANK));
2515 				break;
2516 			case 13:
2517 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2518 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2519 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2520 						 NUM_BANKS(ADDR_SURF_2_BANK));
2521 				break;
2522 			case 14:
2523 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2524 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2525 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2526 						 NUM_BANKS(ADDR_SURF_2_BANK));
2527 				break;
2528 			default:
2529 				gb_tile_moden = 0;
2530 				break;
2531 			}
2532 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2533 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2534 		}
2535 	} else if (num_pipe_configs == 8) {
2536 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2537 			switch (reg_offset) {
2538 			case 0:
2539 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2540 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2541 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2542 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2543 				break;
2544 			case 1:
2545 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2546 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2547 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2548 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2549 				break;
2550 			case 2:
2551 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2552 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2553 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2554 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2555 				break;
2556 			case 3:
2557 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2558 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2559 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2560 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2561 				break;
2562 			case 4:
2563 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2564 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2565 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2566 						 TILE_SPLIT(split_equal_to_row_size));
2567 				break;
2568 			case 5:
2569 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2570 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2571 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2572 				break;
2573 			case 6:
2574 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2575 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2576 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2577 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2578 				break;
2579 			case 7:
2580 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2581 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2582 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2583 						 TILE_SPLIT(split_equal_to_row_size));
2584 				break;
2585 			case 8:
2586 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2587 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2588 				break;
2589 			case 9:
2590 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2591 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2592 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2593 				break;
2594 			case 10:
2595 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2596 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2597 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2598 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2599 				break;
2600 			case 11:
2601 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2602 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2603 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2604 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2605 				break;
2606 			case 12:
2607 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2608 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2609 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2610 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2611 				break;
2612 			case 13:
2613 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2614 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2615 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2616 				break;
2617 			case 14:
2618 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2619 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2620 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2621 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2622 				break;
2623 			case 16:
2624 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2625 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2626 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2627 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2628 				break;
2629 			case 17:
2630 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2631 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2632 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2633 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2634 				break;
2635 			case 27:
2636 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2637 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2638 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2639 				break;
2640 			case 28:
2641 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2642 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2643 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2644 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2645 				break;
2646 			case 29:
2647 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2648 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2649 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2650 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2651 				break;
2652 			case 30:
2653 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2654 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2655 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2656 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2657 				break;
2658 			default:
2659 				gb_tile_moden = 0;
2660 				break;
2661 			}
2662 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2663 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2664 		}
2665 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2666 			switch (reg_offset) {
2667 			case 0:
2668 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2669 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2670 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2671 						 NUM_BANKS(ADDR_SURF_16_BANK));
2672 				break;
2673 			case 1:
2674 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2675 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2676 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2677 						 NUM_BANKS(ADDR_SURF_16_BANK));
2678 				break;
2679 			case 2:
2680 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2681 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2682 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2683 						 NUM_BANKS(ADDR_SURF_16_BANK));
2684 				break;
2685 			case 3:
2686 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2687 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2688 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2689 						 NUM_BANKS(ADDR_SURF_16_BANK));
2690 				break;
2691 			case 4:
2692 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2693 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2694 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2695 						 NUM_BANKS(ADDR_SURF_8_BANK));
2696 				break;
2697 			case 5:
2698 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2699 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2700 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2701 						 NUM_BANKS(ADDR_SURF_4_BANK));
2702 				break;
2703 			case 6:
2704 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2705 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2706 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2707 						 NUM_BANKS(ADDR_SURF_2_BANK));
2708 				break;
2709 			case 8:
2710 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2711 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2712 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2713 						 NUM_BANKS(ADDR_SURF_16_BANK));
2714 				break;
2715 			case 9:
2716 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2717 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2718 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2719 						 NUM_BANKS(ADDR_SURF_16_BANK));
2720 				break;
2721 			case 10:
2722 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2723 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2724 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2725 						 NUM_BANKS(ADDR_SURF_16_BANK));
2726 				break;
2727 			case 11:
2728 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2729 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2730 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2731 						 NUM_BANKS(ADDR_SURF_16_BANK));
2732 				break;
2733 			case 12:
2734 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2735 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2736 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2737 						 NUM_BANKS(ADDR_SURF_8_BANK));
2738 				break;
2739 			case 13:
2740 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2741 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2742 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2743 						 NUM_BANKS(ADDR_SURF_4_BANK));
2744 				break;
2745 			case 14:
2746 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2747 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2748 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2749 						 NUM_BANKS(ADDR_SURF_2_BANK));
2750 				break;
2751 			default:
2752 				gb_tile_moden = 0;
2753 				break;
2754 			}
2755 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2756 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2757 		}
2758 	} else if (num_pipe_configs == 4) {
2759 		if (num_rbs == 4) {
2760 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2761 				switch (reg_offset) {
2762 				case 0:
2763 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2764 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2765 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2766 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2767 					break;
2768 				case 1:
2769 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2770 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2771 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2772 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2773 					break;
2774 				case 2:
2775 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2776 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2777 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2778 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2779 					break;
2780 				case 3:
2781 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2782 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2783 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2784 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2785 					break;
2786 				case 4:
2787 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2788 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2789 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2790 							 TILE_SPLIT(split_equal_to_row_size));
2791 					break;
2792 				case 5:
2793 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2794 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2795 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2796 					break;
2797 				case 6:
2798 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2799 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2800 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2801 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2802 					break;
2803 				case 7:
2804 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2805 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2806 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2807 							 TILE_SPLIT(split_equal_to_row_size));
2808 					break;
2809 				case 8:
2810 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2811 							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2812 					break;
2813 				case 9:
2814 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2815 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2816 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2817 					break;
2818 				case 10:
2819 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2820 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2821 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2822 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2823 					break;
2824 				case 11:
2825 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2826 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2827 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2828 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2829 					break;
2830 				case 12:
2831 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2832 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2833 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2834 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2835 					break;
2836 				case 13:
2837 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2838 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2839 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2840 					break;
2841 				case 14:
2842 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2843 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2844 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2845 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2846 					break;
2847 				case 16:
2848 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2849 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2850 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2851 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2852 					break;
2853 				case 17:
2854 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2855 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2856 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2857 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2858 					break;
2859 				case 27:
2860 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2861 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2862 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2863 					break;
2864 				case 28:
2865 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2866 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2867 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2868 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2869 					break;
2870 				case 29:
2871 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2872 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2873 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2874 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2875 					break;
2876 				case 30:
2877 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2878 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2879 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2880 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2881 					break;
2882 				default:
2883 					gb_tile_moden = 0;
2884 					break;
2885 				}
2886 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2887 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2888 			}
2889 		} else if (num_rbs < 4) {
2890 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2891 				switch (reg_offset) {
2892 				case 0:
2893 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2894 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2895 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2896 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2897 					break;
2898 				case 1:
2899 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2900 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2901 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2902 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2903 					break;
2904 				case 2:
2905 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2906 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2907 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2908 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2909 					break;
2910 				case 3:
2911 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2912 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2913 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2914 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2915 					break;
2916 				case 4:
2917 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2918 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2919 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2920 							 TILE_SPLIT(split_equal_to_row_size));
2921 					break;
2922 				case 5:
2923 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2924 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2925 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2926 					break;
2927 				case 6:
2928 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2929 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2930 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2931 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2932 					break;
2933 				case 7:
2934 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2935 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2936 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2937 							 TILE_SPLIT(split_equal_to_row_size));
2938 					break;
2939 				case 8:
2940 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2941 						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2942 					break;
2943 				case 9:
2944 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2945 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2946 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2947 					break;
2948 				case 10:
2949 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2950 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2951 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2952 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2953 					break;
2954 				case 11:
2955 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2956 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2957 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2958 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2959 					break;
2960 				case 12:
2961 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2962 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2963 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2964 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2965 					break;
2966 				case 13:
2967 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2968 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2969 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2970 					break;
2971 				case 14:
2972 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2973 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2974 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2975 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2976 					break;
2977 				case 16:
2978 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2979 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2980 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2981 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2982 					break;
2983 				case 17:
2984 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2985 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2986 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2987 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2988 					break;
2989 				case 27:
2990 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2991 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2992 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2993 					break;
2994 				case 28:
2995 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2996 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2997 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2998 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2999 					break;
3000 				case 29:
3001 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3002 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3003 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3004 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3005 					break;
3006 				case 30:
3007 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3008 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3009 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3010 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3011 					break;
3012 				default:
3013 					gb_tile_moden = 0;
3014 					break;
3015 				}
3016 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3017 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3018 			}
3019 		}
3020 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3021 			switch (reg_offset) {
3022 			case 0:
3023 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3024 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3025 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3026 						 NUM_BANKS(ADDR_SURF_16_BANK));
3027 				break;
3028 			case 1:
3029 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3030 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3031 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3032 						 NUM_BANKS(ADDR_SURF_16_BANK));
3033 				break;
3034 			case 2:
3035 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3036 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3037 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3038 						 NUM_BANKS(ADDR_SURF_16_BANK));
3039 				break;
3040 			case 3:
3041 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3042 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3043 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3044 						 NUM_BANKS(ADDR_SURF_16_BANK));
3045 				break;
3046 			case 4:
3047 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3048 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3049 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3050 						 NUM_BANKS(ADDR_SURF_16_BANK));
3051 				break;
3052 			case 5:
3053 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3054 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3055 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3056 						 NUM_BANKS(ADDR_SURF_8_BANK));
3057 				break;
3058 			case 6:
3059 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3060 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3061 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3062 						 NUM_BANKS(ADDR_SURF_4_BANK));
3063 				break;
3064 			case 8:
3065 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3066 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3067 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3068 						 NUM_BANKS(ADDR_SURF_16_BANK));
3069 				break;
3070 			case 9:
3071 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3072 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3073 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3074 						 NUM_BANKS(ADDR_SURF_16_BANK));
3075 				break;
3076 			case 10:
3077 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3078 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3079 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3080 						 NUM_BANKS(ADDR_SURF_16_BANK));
3081 				break;
3082 			case 11:
3083 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3084 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3085 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3086 						 NUM_BANKS(ADDR_SURF_16_BANK));
3087 				break;
3088 			case 12:
3089 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3090 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3091 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3092 						 NUM_BANKS(ADDR_SURF_16_BANK));
3093 				break;
3094 			case 13:
3095 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3096 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3097 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3098 						 NUM_BANKS(ADDR_SURF_8_BANK));
3099 				break;
3100 			case 14:
3101 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3102 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3103 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3104 						 NUM_BANKS(ADDR_SURF_4_BANK));
3105 				break;
3106 			default:
3107 				gb_tile_moden = 0;
3108 				break;
3109 			}
3110 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3111 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3112 		}
3113 	} else if (num_pipe_configs == 2) {
3114 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
3115 			switch (reg_offset) {
3116 			case 0:
3117 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3118 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3119 						 PIPE_CONFIG(ADDR_SURF_P2) |
3120 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
3121 				break;
3122 			case 1:
3123 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3124 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3125 						 PIPE_CONFIG(ADDR_SURF_P2) |
3126 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
3127 				break;
3128 			case 2:
3129 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3130 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3131 						 PIPE_CONFIG(ADDR_SURF_P2) |
3132 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3133 				break;
3134 			case 3:
3135 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3136 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3137 						 PIPE_CONFIG(ADDR_SURF_P2) |
3138 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
3139 				break;
3140 			case 4:
3141 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3142 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3143 						 PIPE_CONFIG(ADDR_SURF_P2) |
3144 						 TILE_SPLIT(split_equal_to_row_size));
3145 				break;
3146 			case 5:
3147 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3148 						 PIPE_CONFIG(ADDR_SURF_P2) |
3149 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3150 				break;
3151 			case 6:
3152 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3153 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3154 						 PIPE_CONFIG(ADDR_SURF_P2) |
3155 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3156 				break;
3157 			case 7:
3158 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3159 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3160 						 PIPE_CONFIG(ADDR_SURF_P2) |
3161 						 TILE_SPLIT(split_equal_to_row_size));
3162 				break;
3163 			case 8:
3164 				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3165 						PIPE_CONFIG(ADDR_SURF_P2);
3166 				break;
3167 			case 9:
3168 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3169 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3170 						 PIPE_CONFIG(ADDR_SURF_P2));
3171 				break;
3172 			case 10:
3173 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3174 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3175 						 PIPE_CONFIG(ADDR_SURF_P2) |
3176 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3177 				break;
3178 			case 11:
3179 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3180 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3181 						 PIPE_CONFIG(ADDR_SURF_P2) |
3182 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3183 				break;
3184 			case 12:
3185 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3186 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3187 						 PIPE_CONFIG(ADDR_SURF_P2) |
3188 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3189 				break;
3190 			case 13:
3191 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3192 						 PIPE_CONFIG(ADDR_SURF_P2) |
3193 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3194 				break;
3195 			case 14:
3196 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3197 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3198 						 PIPE_CONFIG(ADDR_SURF_P2) |
3199 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3200 				break;
3201 			case 16:
3202 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3203 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3204 						 PIPE_CONFIG(ADDR_SURF_P2) |
3205 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3206 				break;
3207 			case 17:
3208 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3209 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3210 						 PIPE_CONFIG(ADDR_SURF_P2) |
3211 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3212 				break;
3213 			case 27:
3214 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3215 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3216 						 PIPE_CONFIG(ADDR_SURF_P2));
3217 				break;
3218 			case 28:
3219 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3220 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3221 						 PIPE_CONFIG(ADDR_SURF_P2) |
3222 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3223 				break;
3224 			case 29:
3225 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3226 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3227 						 PIPE_CONFIG(ADDR_SURF_P2) |
3228 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3229 				break;
3230 			case 30:
3231 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3232 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3233 						 PIPE_CONFIG(ADDR_SURF_P2) |
3234 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3235 				break;
3236 			default:
3237 				gb_tile_moden = 0;
3238 				break;
3239 			}
3240 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3241 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3242 		}
3243 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3244 			switch (reg_offset) {
3245 			case 0:
3246 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3247 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3248 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3249 						 NUM_BANKS(ADDR_SURF_16_BANK));
3250 				break;
3251 			case 1:
3252 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3253 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3254 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3255 						 NUM_BANKS(ADDR_SURF_16_BANK));
3256 				break;
3257 			case 2:
3258 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3259 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3260 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3261 						 NUM_BANKS(ADDR_SURF_16_BANK));
3262 				break;
3263 			case 3:
3264 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3265 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3266 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3267 						 NUM_BANKS(ADDR_SURF_16_BANK));
3268 				break;
3269 			case 4:
3270 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3271 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3272 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3273 						 NUM_BANKS(ADDR_SURF_16_BANK));
3274 				break;
3275 			case 5:
3276 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3277 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3278 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3279 						 NUM_BANKS(ADDR_SURF_16_BANK));
3280 				break;
3281 			case 6:
3282 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3283 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3284 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3285 						 NUM_BANKS(ADDR_SURF_8_BANK));
3286 				break;
3287 			case 8:
3288 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3289 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3290 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3291 						 NUM_BANKS(ADDR_SURF_16_BANK));
3292 				break;
3293 			case 9:
3294 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3295 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3296 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3297 						 NUM_BANKS(ADDR_SURF_16_BANK));
3298 				break;
3299 			case 10:
3300 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3301 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3302 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3303 						 NUM_BANKS(ADDR_SURF_16_BANK));
3304 				break;
3305 			case 11:
3306 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3307 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3308 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3309 						 NUM_BANKS(ADDR_SURF_16_BANK));
3310 				break;
3311 			case 12:
3312 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3313 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3314 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3315 						 NUM_BANKS(ADDR_SURF_16_BANK));
3316 				break;
3317 			case 13:
3318 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3319 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3320 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3321 						 NUM_BANKS(ADDR_SURF_16_BANK));
3322 				break;
3323 			case 14:
3324 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3325 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3326 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3327 						 NUM_BANKS(ADDR_SURF_8_BANK));
3328 				break;
3329 			default:
3330 				gb_tile_moden = 0;
3331 				break;
3332 			}
3333 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3334 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3335 		}
3336 	} else
3337 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3338 }
3339 
3340 /**
3341  * cik_select_se_sh - select which SE, SH to address
3342  *
3343  * @rdev: radeon_device pointer
3344  * @se_num: shader engine to address
3345  * @sh_num: sh block to address
3346  *
3347  * Select which SE, SH combinations to address. Certain
3348  * registers are instanced per SE or SH.  0xffffffff means
3349  * broadcast to all SEs or SHs (CIK).
3350  */
3351 static void cik_select_se_sh(struct radeon_device *rdev,
3352 			     u32 se_num, u32 sh_num)
3353 {
3354 	u32 data = INSTANCE_BROADCAST_WRITES;
3355 
3356 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3357 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3358 	else if (se_num == 0xffffffff)
3359 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3360 	else if (sh_num == 0xffffffff)
3361 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3362 	else
3363 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3364 	WREG32(GRBM_GFX_INDEX, data);
3365 }
3366 
3367 /**
3368  * cik_create_bitmask - create a bitmask
3369  *
3370  * @bit_width: length of the mask
3371  *
3372  * create a variable length bit mask (CIK).
3373  * Returns the bitmask.
3374  */
3375 static u32 cik_create_bitmask(u32 bit_width)
3376 {
3377 	u32 i, mask = 0;
3378 
3379 	for (i = 0; i < bit_width; i++) {
3380 		mask <<= 1;
3381 		mask |= 1;
3382 	}
3383 	return mask;
3384 }
3385 
3386 /**
3387  * cik_get_rb_disabled - computes the mask of disabled RBs
3388  *
3389  * @rdev: radeon_device pointer
3390  * @max_rb_num: max RBs (render backends) for the asic
3391  * @se_num: number of SEs (shader engines) for the asic
3392  * @sh_per_se: number of SH blocks per SE for the asic
3393  *
3394  * Calculates the bitmask of disabled RBs (CIK).
3395  * Returns the disabled RB bitmask.
3396  */
3397 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3398 			      u32 max_rb_num_per_se,
3399 			      u32 sh_per_se)
3400 {
3401 	u32 data, mask;
3402 
3403 	data = RREG32(CC_RB_BACKEND_DISABLE);
3404 	if (data & 1)
3405 		data &= BACKEND_DISABLE_MASK;
3406 	else
3407 		data = 0;
3408 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3409 
3410 	data >>= BACKEND_DISABLE_SHIFT;
3411 
3412 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3413 
3414 	return data & mask;
3415 }
3416 
3417 /**
3418  * cik_setup_rb - setup the RBs on the asic
3419  *
3420  * @rdev: radeon_device pointer
3421  * @se_num: number of SEs (shader engines) for the asic
3422  * @sh_per_se: number of SH blocks per SE for the asic
3423  * @max_rb_num: max RBs (render backends) for the asic
3424  *
3425  * Configures per-SE/SH RB registers (CIK).
3426  */
3427 static void cik_setup_rb(struct radeon_device *rdev,
3428 			 u32 se_num, u32 sh_per_se,
3429 			 u32 max_rb_num_per_se)
3430 {
3431 	int i, j;
3432 	u32 data, mask;
3433 	u32 disabled_rbs = 0;
3434 	u32 enabled_rbs = 0;
3435 
3436 	mutex_lock(&rdev->grbm_idx_mutex);
3437 	for (i = 0; i < se_num; i++) {
3438 		for (j = 0; j < sh_per_se; j++) {
3439 			cik_select_se_sh(rdev, i, j);
3440 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3441 			if (rdev->family == CHIP_HAWAII)
3442 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3443 			else
3444 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3445 		}
3446 	}
3447 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3448 	mutex_unlock(&rdev->grbm_idx_mutex);
3449 
3450 	mask = 1;
3451 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3452 		if (!(disabled_rbs & mask))
3453 			enabled_rbs |= mask;
3454 		mask <<= 1;
3455 	}
3456 
3457 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3458 
3459 	mutex_lock(&rdev->grbm_idx_mutex);
3460 	for (i = 0; i < se_num; i++) {
3461 		cik_select_se_sh(rdev, i, 0xffffffff);
3462 		data = 0;
3463 		for (j = 0; j < sh_per_se; j++) {
3464 			switch (enabled_rbs & 3) {
3465 			case 0:
3466 				if (j == 0)
3467 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3468 				else
3469 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3470 				break;
3471 			case 1:
3472 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3473 				break;
3474 			case 2:
3475 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3476 				break;
3477 			case 3:
3478 			default:
3479 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3480 				break;
3481 			}
3482 			enabled_rbs >>= 2;
3483 		}
3484 		WREG32(PA_SC_RASTER_CONFIG, data);
3485 	}
3486 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3487 	mutex_unlock(&rdev->grbm_idx_mutex);
3488 }
3489 
3490 /**
3491  * cik_gpu_init - setup the 3D engine
3492  *
3493  * @rdev: radeon_device pointer
3494  *
3495  * Configures the 3D engine and tiling configuration
3496  * registers so that the 3D engine is usable.
3497  */
3498 static void cik_gpu_init(struct radeon_device *rdev)
3499 {
3500 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3501 	u32 mc_shared_chmap, mc_arb_ramcfg;
3502 	u32 hdp_host_path_cntl;
3503 	u32 tmp;
3504 	int i, j;
3505 
3506 	switch (rdev->family) {
3507 	case CHIP_BONAIRE:
3508 		rdev->config.cik.max_shader_engines = 2;
3509 		rdev->config.cik.max_tile_pipes = 4;
3510 		rdev->config.cik.max_cu_per_sh = 7;
3511 		rdev->config.cik.max_sh_per_se = 1;
3512 		rdev->config.cik.max_backends_per_se = 2;
3513 		rdev->config.cik.max_texture_channel_caches = 4;
3514 		rdev->config.cik.max_gprs = 256;
3515 		rdev->config.cik.max_gs_threads = 32;
3516 		rdev->config.cik.max_hw_contexts = 8;
3517 
3518 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3519 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3520 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3521 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3522 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3523 		break;
3524 	case CHIP_HAWAII:
3525 		rdev->config.cik.max_shader_engines = 4;
3526 		rdev->config.cik.max_tile_pipes = 16;
3527 		rdev->config.cik.max_cu_per_sh = 11;
3528 		rdev->config.cik.max_sh_per_se = 1;
3529 		rdev->config.cik.max_backends_per_se = 4;
3530 		rdev->config.cik.max_texture_channel_caches = 16;
3531 		rdev->config.cik.max_gprs = 256;
3532 		rdev->config.cik.max_gs_threads = 32;
3533 		rdev->config.cik.max_hw_contexts = 8;
3534 
3535 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3536 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3537 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3538 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3539 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3540 		break;
3541 	case CHIP_KAVERI:
3542 		rdev->config.cik.max_shader_engines = 1;
3543 		rdev->config.cik.max_tile_pipes = 4;
3544 		if ((rdev->pdev->device == 0x1304) ||
3545 		    (rdev->pdev->device == 0x1305) ||
3546 		    (rdev->pdev->device == 0x130C) ||
3547 		    (rdev->pdev->device == 0x130F) ||
3548 		    (rdev->pdev->device == 0x1310) ||
3549 		    (rdev->pdev->device == 0x1311) ||
3550 		    (rdev->pdev->device == 0x131C)) {
3551 			rdev->config.cik.max_cu_per_sh = 8;
3552 			rdev->config.cik.max_backends_per_se = 2;
3553 		} else if ((rdev->pdev->device == 0x1309) ||
3554 			   (rdev->pdev->device == 0x130A) ||
3555 			   (rdev->pdev->device == 0x130D) ||
3556 			   (rdev->pdev->device == 0x1313) ||
3557 			   (rdev->pdev->device == 0x131D)) {
3558 			rdev->config.cik.max_cu_per_sh = 6;
3559 			rdev->config.cik.max_backends_per_se = 2;
3560 		} else if ((rdev->pdev->device == 0x1306) ||
3561 			   (rdev->pdev->device == 0x1307) ||
3562 			   (rdev->pdev->device == 0x130B) ||
3563 			   (rdev->pdev->device == 0x130E) ||
3564 			   (rdev->pdev->device == 0x1315) ||
3565 			   (rdev->pdev->device == 0x1318) ||
3566 			   (rdev->pdev->device == 0x131B)) {
3567 			rdev->config.cik.max_cu_per_sh = 4;
3568 			rdev->config.cik.max_backends_per_se = 1;
3569 		} else {
3570 			rdev->config.cik.max_cu_per_sh = 3;
3571 			rdev->config.cik.max_backends_per_se = 1;
3572 		}
3573 		rdev->config.cik.max_sh_per_se = 1;
3574 		rdev->config.cik.max_texture_channel_caches = 4;
3575 		rdev->config.cik.max_gprs = 256;
3576 		rdev->config.cik.max_gs_threads = 16;
3577 		rdev->config.cik.max_hw_contexts = 8;
3578 
3579 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3580 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3581 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3582 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3583 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3584 		break;
3585 	case CHIP_KABINI:
3586 	case CHIP_MULLINS:
3587 	default:
3588 		rdev->config.cik.max_shader_engines = 1;
3589 		rdev->config.cik.max_tile_pipes = 2;
3590 		rdev->config.cik.max_cu_per_sh = 2;
3591 		rdev->config.cik.max_sh_per_se = 1;
3592 		rdev->config.cik.max_backends_per_se = 1;
3593 		rdev->config.cik.max_texture_channel_caches = 2;
3594 		rdev->config.cik.max_gprs = 256;
3595 		rdev->config.cik.max_gs_threads = 16;
3596 		rdev->config.cik.max_hw_contexts = 8;
3597 
3598 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3599 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3600 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3601 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3602 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3603 		break;
3604 	}
3605 
3606 	/* Initialize HDP */
3607 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3608 		WREG32((0x2c14 + j), 0x00000000);
3609 		WREG32((0x2c18 + j), 0x00000000);
3610 		WREG32((0x2c1c + j), 0x00000000);
3611 		WREG32((0x2c20 + j), 0x00000000);
3612 		WREG32((0x2c24 + j), 0x00000000);
3613 	}
3614 
3615 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3616 
3617 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3618 
3619 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3620 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3621 
3622 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3623 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3624 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3625 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3626 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3627 		rdev->config.cik.mem_row_size_in_kb = 4;
3628 	/* XXX use MC settings? */
3629 	rdev->config.cik.shader_engine_tile_size = 32;
3630 	rdev->config.cik.num_gpus = 1;
3631 	rdev->config.cik.multi_gpu_tile_size = 64;
3632 
3633 	/* fix up row size */
3634 	gb_addr_config &= ~ROW_SIZE_MASK;
3635 	switch (rdev->config.cik.mem_row_size_in_kb) {
3636 	case 1:
3637 	default:
3638 		gb_addr_config |= ROW_SIZE(0);
3639 		break;
3640 	case 2:
3641 		gb_addr_config |= ROW_SIZE(1);
3642 		break;
3643 	case 4:
3644 		gb_addr_config |= ROW_SIZE(2);
3645 		break;
3646 	}
3647 
3648 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3649 	 * not have bank info, so create a custom tiling dword.
3650 	 * bits 3:0   num_pipes
3651 	 * bits 7:4   num_banks
3652 	 * bits 11:8  group_size
3653 	 * bits 15:12 row_size
3654 	 */
3655 	rdev->config.cik.tile_config = 0;
3656 	switch (rdev->config.cik.num_tile_pipes) {
3657 	case 1:
3658 		rdev->config.cik.tile_config |= (0 << 0);
3659 		break;
3660 	case 2:
3661 		rdev->config.cik.tile_config |= (1 << 0);
3662 		break;
3663 	case 4:
3664 		rdev->config.cik.tile_config |= (2 << 0);
3665 		break;
3666 	case 8:
3667 	default:
3668 		/* XXX what about 12? */
3669 		rdev->config.cik.tile_config |= (3 << 0);
3670 		break;
3671 	}
3672 	rdev->config.cik.tile_config |=
3673 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3674 	rdev->config.cik.tile_config |=
3675 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3676 	rdev->config.cik.tile_config |=
3677 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3678 
3679 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3680 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3681 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3682 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3683 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3684 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3685 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3686 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3687 
3688 	cik_tiling_mode_table_init(rdev);
3689 
3690 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3691 		     rdev->config.cik.max_sh_per_se,
3692 		     rdev->config.cik.max_backends_per_se);
3693 
3694 	rdev->config.cik.active_cus = 0;
3695 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3696 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3697 			rdev->config.cik.active_cus +=
3698 				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3699 		}
3700 	}
3701 
3702 	/* set HW defaults for 3D engine */
3703 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3704 
3705 	mutex_lock(&rdev->grbm_idx_mutex);
3706 	/*
3707 	 * making sure that the following register writes will be broadcasted
3708 	 * to all the shaders
3709 	 */
3710 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3711 	WREG32(SX_DEBUG_1, 0x20);
3712 
3713 	WREG32(TA_CNTL_AUX, 0x00010000);
3714 
3715 	tmp = RREG32(SPI_CONFIG_CNTL);
3716 	tmp |= 0x03000000;
3717 	WREG32(SPI_CONFIG_CNTL, tmp);
3718 
3719 	WREG32(SQ_CONFIG, 1);
3720 
3721 	WREG32(DB_DEBUG, 0);
3722 
3723 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3724 	tmp |= 0x00000400;
3725 	WREG32(DB_DEBUG2, tmp);
3726 
3727 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3728 	tmp |= 0x00020200;
3729 	WREG32(DB_DEBUG3, tmp);
3730 
3731 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3732 	tmp |= 0x00018208;
3733 	WREG32(CB_HW_CONTROL, tmp);
3734 
3735 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3736 
3737 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3738 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3739 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3740 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3741 
3742 	WREG32(VGT_NUM_INSTANCES, 1);
3743 
3744 	WREG32(CP_PERFMON_CNTL, 0);
3745 
3746 	WREG32(SQ_CONFIG, 0);
3747 
3748 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3749 					  FORCE_EOV_MAX_REZ_CNT(255)));
3750 
3751 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3752 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3753 
3754 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3755 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3756 
3757 	tmp = RREG32(HDP_MISC_CNTL);
3758 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3759 	WREG32(HDP_MISC_CNTL, tmp);
3760 
3761 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3762 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3763 
3764 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3765 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3766 	mutex_unlock(&rdev->grbm_idx_mutex);
3767 
3768 	udelay(50);
3769 }
3770 
3771 /*
3772  * GPU scratch registers helpers function.
3773  */
3774 /**
3775  * cik_scratch_init - setup driver info for CP scratch regs
3776  *
3777  * @rdev: radeon_device pointer
3778  *
3779  * Set up the number and offset of the CP scratch registers.
3780  * NOTE: use of CP scratch registers is a legacy inferface and
3781  * is not used by default on newer asics (r6xx+).  On newer asics,
3782  * memory buffers are used for fences rather than scratch regs.
3783  */
3784 static void cik_scratch_init(struct radeon_device *rdev)
3785 {
3786 	int i;
3787 
3788 	rdev->scratch.num_reg = 7;
3789 	rdev->scratch.reg_base = SCRATCH_REG0;
3790 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3791 		rdev->scratch.free[i] = true;
3792 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3793 	}
3794 }
3795 
3796 /**
3797  * cik_ring_test - basic gfx ring test
3798  *
3799  * @rdev: radeon_device pointer
3800  * @ring: radeon_ring structure holding ring information
3801  *
3802  * Allocate a scratch register and write to it using the gfx ring (CIK).
3803  * Provides a basic gfx ring test to verify that the ring is working.
3804  * Used by cik_cp_gfx_resume();
3805  * Returns 0 on success, error on failure.
3806  */
3807 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3808 {
3809 	uint32_t scratch;
3810 	uint32_t tmp = 0;
3811 	unsigned i;
3812 	int r;
3813 
3814 	r = radeon_scratch_get(rdev, &scratch);
3815 	if (r) {
3816 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3817 		return r;
3818 	}
3819 	WREG32(scratch, 0xCAFEDEAD);
3820 	r = radeon_ring_lock(rdev, ring, 3);
3821 	if (r) {
3822 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3823 		radeon_scratch_free(rdev, scratch);
3824 		return r;
3825 	}
3826 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3827 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3828 	radeon_ring_write(ring, 0xDEADBEEF);
3829 	radeon_ring_unlock_commit(rdev, ring, false);
3830 
3831 	for (i = 0; i < rdev->usec_timeout; i++) {
3832 		tmp = RREG32(scratch);
3833 		if (tmp == 0xDEADBEEF)
3834 			break;
3835 		DRM_UDELAY(1);
3836 	}
3837 	if (i < rdev->usec_timeout) {
3838 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3839 	} else {
3840 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3841 			  ring->idx, scratch, tmp);
3842 		r = -EINVAL;
3843 	}
3844 	radeon_scratch_free(rdev, scratch);
3845 	return r;
3846 }
3847 
3848 /**
3849  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3850  *
3851  * @rdev: radeon_device pointer
3852  * @ridx: radeon ring index
3853  *
3854  * Emits an hdp flush on the cp.
3855  */
3856 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3857 				       int ridx)
3858 {
3859 	struct radeon_ring *ring = &rdev->ring[ridx];
3860 	u32 ref_and_mask;
3861 
3862 	switch (ring->idx) {
3863 	case CAYMAN_RING_TYPE_CP1_INDEX:
3864 	case CAYMAN_RING_TYPE_CP2_INDEX:
3865 	default:
3866 		switch (ring->me) {
3867 		case 0:
3868 			ref_and_mask = CP2 << ring->pipe;
3869 			break;
3870 		case 1:
3871 			ref_and_mask = CP6 << ring->pipe;
3872 			break;
3873 		default:
3874 			return;
3875 		}
3876 		break;
3877 	case RADEON_RING_TYPE_GFX_INDEX:
3878 		ref_and_mask = CP0;
3879 		break;
3880 	}
3881 
3882 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3883 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3884 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3885 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3886 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3887 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3888 	radeon_ring_write(ring, ref_and_mask);
3889 	radeon_ring_write(ring, ref_and_mask);
3890 	radeon_ring_write(ring, 0x20); /* poll interval */
3891 }
3892 
3893 /**
3894  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3895  *
3896  * @rdev: radeon_device pointer
3897  * @fence: radeon fence object
3898  *
3899  * Emits a fence sequnce number on the gfx ring and flushes
3900  * GPU caches.
3901  */
3902 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3903 			     struct radeon_fence *fence)
3904 {
3905 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3906 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3907 
3908 	/* Workaround for cache flush problems. First send a dummy EOP
3909 	 * event down the pipe with seq one below.
3910 	 */
3911 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3912 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3913 				 EOP_TC_ACTION_EN |
3914 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3915 				 EVENT_INDEX(5)));
3916 	radeon_ring_write(ring, addr & 0xfffffffc);
3917 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3918 				DATA_SEL(1) | INT_SEL(0));
3919 	radeon_ring_write(ring, fence->seq - 1);
3920 	radeon_ring_write(ring, 0);
3921 
3922 	/* Then send the real EOP event down the pipe. */
3923 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3924 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3925 				 EOP_TC_ACTION_EN |
3926 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3927 				 EVENT_INDEX(5)));
3928 	radeon_ring_write(ring, addr & 0xfffffffc);
3929 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3930 	radeon_ring_write(ring, fence->seq);
3931 	radeon_ring_write(ring, 0);
3932 }
3933 
3934 /**
3935  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3936  *
3937  * @rdev: radeon_device pointer
3938  * @fence: radeon fence object
3939  *
3940  * Emits a fence sequnce number on the compute ring and flushes
3941  * GPU caches.
3942  */
3943 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3944 				 struct radeon_fence *fence)
3945 {
3946 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3947 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3948 
3949 	/* RELEASE_MEM - flush caches, send int */
3950 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3951 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3952 				 EOP_TC_ACTION_EN |
3953 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3954 				 EVENT_INDEX(5)));
3955 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3956 	radeon_ring_write(ring, addr & 0xfffffffc);
3957 	radeon_ring_write(ring, upper_32_bits(addr));
3958 	radeon_ring_write(ring, fence->seq);
3959 	radeon_ring_write(ring, 0);
3960 }
3961 
3962 /**
3963  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3964  *
3965  * @rdev: radeon_device pointer
3966  * @ring: radeon ring buffer object
3967  * @semaphore: radeon semaphore object
3968  * @emit_wait: Is this a sempahore wait?
3969  *
3970  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3971  * from running ahead of semaphore waits.
3972  */
3973 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3974 			     struct radeon_ring *ring,
3975 			     struct radeon_semaphore *semaphore,
3976 			     bool emit_wait)
3977 {
3978 	uint64_t addr = semaphore->gpu_addr;
3979 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3980 
3981 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3982 	radeon_ring_write(ring, lower_32_bits(addr));
3983 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3984 
3985 	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3986 		/* Prevent the PFP from running ahead of the semaphore wait */
3987 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3988 		radeon_ring_write(ring, 0x0);
3989 	}
3990 
3991 	return true;
3992 }
3993 
3994 /**
3995  * cik_copy_cpdma - copy pages using the CP DMA engine
3996  *
3997  * @rdev: radeon_device pointer
3998  * @src_offset: src GPU address
3999  * @dst_offset: dst GPU address
4000  * @num_gpu_pages: number of GPU pages to xfer
4001  * @resv: reservation object to sync to
4002  *
4003  * Copy GPU paging using the CP DMA engine (CIK+).
4004  * Used by the radeon ttm implementation to move pages if
4005  * registered as the asic copy callback.
4006  */
4007 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
4008 				    uint64_t src_offset, uint64_t dst_offset,
4009 				    unsigned num_gpu_pages,
4010 				    struct reservation_object *resv)
4011 {
4012 	struct radeon_fence *fence;
4013 	struct radeon_sync sync;
4014 	int ring_index = rdev->asic->copy.blit_ring_index;
4015 	struct radeon_ring *ring = &rdev->ring[ring_index];
4016 	u32 size_in_bytes, cur_size_in_bytes, control;
4017 	int i, num_loops;
4018 	int r = 0;
4019 
4020 	radeon_sync_create(&sync);
4021 
4022 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
4023 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
4024 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
4025 	if (r) {
4026 		DRM_ERROR("radeon: moving bo (%d).\n", r);
4027 		radeon_sync_free(rdev, &sync, NULL);
4028 		return ERR_PTR(r);
4029 	}
4030 
4031 	radeon_sync_resv(rdev, &sync, resv, false);
4032 	radeon_sync_rings(rdev, &sync, ring->idx);
4033 
4034 	for (i = 0; i < num_loops; i++) {
4035 		cur_size_in_bytes = size_in_bytes;
4036 		if (cur_size_in_bytes > 0x1fffff)
4037 			cur_size_in_bytes = 0x1fffff;
4038 		size_in_bytes -= cur_size_in_bytes;
4039 		control = 0;
4040 		if (size_in_bytes == 0)
4041 			control |= PACKET3_DMA_DATA_CP_SYNC;
4042 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4043 		radeon_ring_write(ring, control);
4044 		radeon_ring_write(ring, lower_32_bits(src_offset));
4045 		radeon_ring_write(ring, upper_32_bits(src_offset));
4046 		radeon_ring_write(ring, lower_32_bits(dst_offset));
4047 		radeon_ring_write(ring, upper_32_bits(dst_offset));
4048 		radeon_ring_write(ring, cur_size_in_bytes);
4049 		src_offset += cur_size_in_bytes;
4050 		dst_offset += cur_size_in_bytes;
4051 	}
4052 
4053 	r = radeon_fence_emit(rdev, &fence, ring->idx);
4054 	if (r) {
4055 		radeon_ring_unlock_undo(rdev, ring);
4056 		radeon_sync_free(rdev, &sync, NULL);
4057 		return ERR_PTR(r);
4058 	}
4059 
4060 	radeon_ring_unlock_commit(rdev, ring, false);
4061 	radeon_sync_free(rdev, &sync, fence);
4062 
4063 	return fence;
4064 }
4065 
4066 /*
4067  * IB stuff
4068  */
4069 /**
4070  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
4071  *
4072  * @rdev: radeon_device pointer
4073  * @ib: radeon indirect buffer object
4074  *
4075  * Emits an DE (drawing engine) or CE (constant engine) IB
4076  * on the gfx ring.  IBs are usually generated by userspace
4077  * acceleration drivers and submitted to the kernel for
4078  * sheduling on the ring.  This function schedules the IB
4079  * on the gfx ring for execution by the GPU.
4080  */
4081 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
4082 {
4083 	struct radeon_ring *ring = &rdev->ring[ib->ring];
4084 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
4085 	u32 header, control = INDIRECT_BUFFER_VALID;
4086 
4087 	if (ib->is_const_ib) {
4088 		/* set switch buffer packet before const IB */
4089 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4090 		radeon_ring_write(ring, 0);
4091 
4092 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4093 	} else {
4094 		u32 next_rptr;
4095 		if (ring->rptr_save_reg) {
4096 			next_rptr = ring->wptr + 3 + 4;
4097 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4098 			radeon_ring_write(ring, ((ring->rptr_save_reg -
4099 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
4100 			radeon_ring_write(ring, next_rptr);
4101 		} else if (rdev->wb.enabled) {
4102 			next_rptr = ring->wptr + 5 + 4;
4103 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4104 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
4105 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4106 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
4107 			radeon_ring_write(ring, next_rptr);
4108 		}
4109 
4110 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4111 	}
4112 
4113 	control |= ib->length_dw | (vm_id << 24);
4114 
4115 	radeon_ring_write(ring, header);
4116 	radeon_ring_write(ring,
4117 #ifdef __BIG_ENDIAN
4118 			  (2 << 0) |
4119 #endif
4120 			  (ib->gpu_addr & 0xFFFFFFFC));
4121 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4122 	radeon_ring_write(ring, control);
4123 }
4124 
4125 /**
4126  * cik_ib_test - basic gfx ring IB test
4127  *
4128  * @rdev: radeon_device pointer
4129  * @ring: radeon_ring structure holding ring information
4130  *
4131  * Allocate an IB and execute it on the gfx ring (CIK).
4132  * Provides a basic gfx ring test to verify that IBs are working.
4133  * Returns 0 on success, error on failure.
4134  */
4135 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4136 {
4137 	struct radeon_ib ib;
4138 	uint32_t scratch;
4139 	uint32_t tmp = 0;
4140 	unsigned i;
4141 	int r;
4142 
4143 	r = radeon_scratch_get(rdev, &scratch);
4144 	if (r) {
4145 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
4146 		return r;
4147 	}
4148 	WREG32(scratch, 0xCAFEDEAD);
4149 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
4150 	if (r) {
4151 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
4152 		radeon_scratch_free(rdev, scratch);
4153 		return r;
4154 	}
4155 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
4156 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
4157 	ib.ptr[2] = 0xDEADBEEF;
4158 	ib.length_dw = 3;
4159 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
4160 	if (r) {
4161 		radeon_scratch_free(rdev, scratch);
4162 		radeon_ib_free(rdev, &ib);
4163 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4164 		return r;
4165 	}
4166 	r = radeon_fence_wait(ib.fence, false);
4167 	if (r) {
4168 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4169 		radeon_scratch_free(rdev, scratch);
4170 		radeon_ib_free(rdev, &ib);
4171 		return r;
4172 	}
4173 	for (i = 0; i < rdev->usec_timeout; i++) {
4174 		tmp = RREG32(scratch);
4175 		if (tmp == 0xDEADBEEF)
4176 			break;
4177 		DRM_UDELAY(1);
4178 	}
4179 	if (i < rdev->usec_timeout) {
4180 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
4181 	} else {
4182 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
4183 			  scratch, tmp);
4184 		r = -EINVAL;
4185 	}
4186 	radeon_scratch_free(rdev, scratch);
4187 	radeon_ib_free(rdev, &ib);
4188 	return r;
4189 }
4190 
4191 /*
4192  * CP.
4193  * On CIK, gfx and compute now have independant command processors.
4194  *
4195  * GFX
4196  * Gfx consists of a single ring and can process both gfx jobs and
4197  * compute jobs.  The gfx CP consists of three microengines (ME):
4198  * PFP - Pre-Fetch Parser
4199  * ME - Micro Engine
4200  * CE - Constant Engine
4201  * The PFP and ME make up what is considered the Drawing Engine (DE).
4202  * The CE is an asynchronous engine used for updating buffer desciptors
4203  * used by the DE so that they can be loaded into cache in parallel
4204  * while the DE is processing state update packets.
4205  *
4206  * Compute
4207  * The compute CP consists of two microengines (ME):
4208  * MEC1 - Compute MicroEngine 1
4209  * MEC2 - Compute MicroEngine 2
4210  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
4211  * The queues are exposed to userspace and are programmed directly
4212  * by the compute runtime.
4213  */
4214 /**
4215  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
4216  *
4217  * @rdev: radeon_device pointer
4218  * @enable: enable or disable the MEs
4219  *
4220  * Halts or unhalts the gfx MEs.
4221  */
4222 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
4223 {
4224 	if (enable)
4225 		WREG32(CP_ME_CNTL, 0);
4226 	else {
4227 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4228 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
4229 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
4230 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4231 	}
4232 	udelay(50);
4233 }
4234 
4235 /**
4236  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
4237  *
4238  * @rdev: radeon_device pointer
4239  *
4240  * Loads the gfx PFP, ME, and CE ucode.
4241  * Returns 0 for success, -EINVAL if the ucode is not available.
4242  */
4243 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
4244 {
4245 	int i;
4246 
4247 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
4248 		return -EINVAL;
4249 
4250 	cik_cp_gfx_enable(rdev, false);
4251 
4252 	if (rdev->new_fw) {
4253 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
4254 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
4255 		const struct gfx_firmware_header_v1_0 *ce_hdr =
4256 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
4257 		const struct gfx_firmware_header_v1_0 *me_hdr =
4258 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
4259 		const __le32 *fw_data;
4260 		u32 fw_size;
4261 
4262 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
4263 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
4264 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
4265 
4266 		/* PFP */
4267 		fw_data = (const __le32 *)
4268 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4269 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4270 		WREG32(CP_PFP_UCODE_ADDR, 0);
4271 		for (i = 0; i < fw_size; i++)
4272 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4273 		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
4274 
4275 		/* CE */
4276 		fw_data = (const __le32 *)
4277 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4278 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4279 		WREG32(CP_CE_UCODE_ADDR, 0);
4280 		for (i = 0; i < fw_size; i++)
4281 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4282 		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
4283 
4284 		/* ME */
4285 		fw_data = (const __be32 *)
4286 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4287 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4288 		WREG32(CP_ME_RAM_WADDR, 0);
4289 		for (i = 0; i < fw_size; i++)
4290 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4291 		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
4292 		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
4293 	} else {
4294 		const __be32 *fw_data;
4295 
4296 		/* PFP */
4297 		fw_data = (const __be32 *)rdev->pfp_fw->data;
4298 		WREG32(CP_PFP_UCODE_ADDR, 0);
4299 		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4300 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4301 		WREG32(CP_PFP_UCODE_ADDR, 0);
4302 
4303 		/* CE */
4304 		fw_data = (const __be32 *)rdev->ce_fw->data;
4305 		WREG32(CP_CE_UCODE_ADDR, 0);
4306 		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4307 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4308 		WREG32(CP_CE_UCODE_ADDR, 0);
4309 
4310 		/* ME */
4311 		fw_data = (const __be32 *)rdev->me_fw->data;
4312 		WREG32(CP_ME_RAM_WADDR, 0);
4313 		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4314 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4315 		WREG32(CP_ME_RAM_WADDR, 0);
4316 	}
4317 
4318 	return 0;
4319 }
4320 
4321 /**
4322  * cik_cp_gfx_start - start the gfx ring
4323  *
4324  * @rdev: radeon_device pointer
4325  *
4326  * Enables the ring and loads the clear state context and other
4327  * packets required to init the ring.
4328  * Returns 0 for success, error for failure.
4329  */
4330 static int cik_cp_gfx_start(struct radeon_device *rdev)
4331 {
4332 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4333 	int r, i;
4334 
4335 	/* init the CP */
4336 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4337 	WREG32(CP_ENDIAN_SWAP, 0);
4338 	WREG32(CP_DEVICE_ID, 1);
4339 
4340 	cik_cp_gfx_enable(rdev, true);
4341 
4342 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4343 	if (r) {
4344 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4345 		return r;
4346 	}
4347 
4348 	/* init the CE partitions.  CE only used for gfx on CIK */
4349 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4350 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4351 	radeon_ring_write(ring, 0x8000);
4352 	radeon_ring_write(ring, 0x8000);
4353 
4354 	/* setup clear context state */
4355 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4356 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4357 
4358 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4359 	radeon_ring_write(ring, 0x80000000);
4360 	radeon_ring_write(ring, 0x80000000);
4361 
4362 	for (i = 0; i < cik_default_size; i++)
4363 		radeon_ring_write(ring, cik_default_state[i]);
4364 
4365 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4366 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4367 
4368 	/* set clear context state */
4369 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4370 	radeon_ring_write(ring, 0);
4371 
4372 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4373 	radeon_ring_write(ring, 0x00000316);
4374 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4375 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4376 
4377 	radeon_ring_unlock_commit(rdev, ring, false);
4378 
4379 	return 0;
4380 }
4381 
4382 /**
4383  * cik_cp_gfx_fini - stop the gfx ring
4384  *
4385  * @rdev: radeon_device pointer
4386  *
4387  * Stop the gfx ring and tear down the driver ring
4388  * info.
4389  */
4390 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4391 {
4392 	cik_cp_gfx_enable(rdev, false);
4393 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4394 }
4395 
4396 /**
4397  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4398  *
4399  * @rdev: radeon_device pointer
4400  *
4401  * Program the location and size of the gfx ring buffer
4402  * and test it to make sure it's working.
4403  * Returns 0 for success, error for failure.
4404  */
4405 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4406 {
4407 	struct radeon_ring *ring;
4408 	u32 tmp;
4409 	u32 rb_bufsz;
4410 	u64 rb_addr;
4411 	int r;
4412 
4413 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4414 	if (rdev->family != CHIP_HAWAII)
4415 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4416 
4417 	/* Set the write pointer delay */
4418 	WREG32(CP_RB_WPTR_DELAY, 0);
4419 
4420 	/* set the RB to use vmid 0 */
4421 	WREG32(CP_RB_VMID, 0);
4422 
4423 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4424 
4425 	/* ring 0 - compute and gfx */
4426 	/* Set ring buffer size */
4427 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4428 	rb_bufsz = order_base_2(ring->ring_size / 8);
4429 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4430 #ifdef __BIG_ENDIAN
4431 	tmp |= BUF_SWAP_32BIT;
4432 #endif
4433 	WREG32(CP_RB0_CNTL, tmp);
4434 
4435 	/* Initialize the ring buffer's read and write pointers */
4436 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4437 	ring->wptr = 0;
4438 	WREG32(CP_RB0_WPTR, ring->wptr);
4439 
4440 	/* set the wb address wether it's enabled or not */
4441 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4442 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4443 
4444 	/* scratch register shadowing is no longer supported */
4445 	WREG32(SCRATCH_UMSK, 0);
4446 
4447 	if (!rdev->wb.enabled)
4448 		tmp |= RB_NO_UPDATE;
4449 
4450 	mdelay(1);
4451 	WREG32(CP_RB0_CNTL, tmp);
4452 
4453 	rb_addr = ring->gpu_addr >> 8;
4454 	WREG32(CP_RB0_BASE, rb_addr);
4455 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4456 
4457 	/* start the ring */
4458 	cik_cp_gfx_start(rdev);
4459 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4460 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4461 	if (r) {
4462 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4463 		return r;
4464 	}
4465 
4466 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4467 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4468 
4469 	return 0;
4470 }
4471 
4472 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4473 		     struct radeon_ring *ring)
4474 {
4475 	u32 rptr;
4476 
4477 	if (rdev->wb.enabled)
4478 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4479 	else
4480 		rptr = RREG32(CP_RB0_RPTR);
4481 
4482 	return rptr;
4483 }
4484 
4485 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4486 		     struct radeon_ring *ring)
4487 {
4488 	u32 wptr;
4489 
4490 	wptr = RREG32(CP_RB0_WPTR);
4491 
4492 	return wptr;
4493 }
4494 
4495 void cik_gfx_set_wptr(struct radeon_device *rdev,
4496 		      struct radeon_ring *ring)
4497 {
4498 	WREG32(CP_RB0_WPTR, ring->wptr);
4499 	(void)RREG32(CP_RB0_WPTR);
4500 }
4501 
4502 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4503 			 struct radeon_ring *ring)
4504 {
4505 	u32 rptr;
4506 
4507 	if (rdev->wb.enabled) {
4508 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4509 	} else {
4510 		mutex_lock(&rdev->srbm_mutex);
4511 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4512 		rptr = RREG32(CP_HQD_PQ_RPTR);
4513 		cik_srbm_select(rdev, 0, 0, 0, 0);
4514 		mutex_unlock(&rdev->srbm_mutex);
4515 	}
4516 
4517 	return rptr;
4518 }
4519 
4520 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4521 			 struct radeon_ring *ring)
4522 {
4523 	u32 wptr;
4524 
4525 	if (rdev->wb.enabled) {
4526 		/* XXX check if swapping is necessary on BE */
4527 		wptr = rdev->wb.wb[ring->wptr_offs/4];
4528 	} else {
4529 		mutex_lock(&rdev->srbm_mutex);
4530 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4531 		wptr = RREG32(CP_HQD_PQ_WPTR);
4532 		cik_srbm_select(rdev, 0, 0, 0, 0);
4533 		mutex_unlock(&rdev->srbm_mutex);
4534 	}
4535 
4536 	return wptr;
4537 }
4538 
4539 void cik_compute_set_wptr(struct radeon_device *rdev,
4540 			  struct radeon_ring *ring)
4541 {
4542 	/* XXX check if swapping is necessary on BE */
4543 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4544 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4545 }
4546 
4547 /**
4548  * cik_cp_compute_enable - enable/disable the compute CP MEs
4549  *
4550  * @rdev: radeon_device pointer
4551  * @enable: enable or disable the MEs
4552  *
4553  * Halts or unhalts the compute MEs.
4554  */
4555 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4556 {
4557 	if (enable)
4558 		WREG32(CP_MEC_CNTL, 0);
4559 	else {
4560 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4561 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4562 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4563 	}
4564 	udelay(50);
4565 }
4566 
4567 /**
4568  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4569  *
4570  * @rdev: radeon_device pointer
4571  *
4572  * Loads the compute MEC1&2 ucode.
4573  * Returns 0 for success, -EINVAL if the ucode is not available.
4574  */
4575 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4576 {
4577 	int i;
4578 
4579 	if (!rdev->mec_fw)
4580 		return -EINVAL;
4581 
4582 	cik_cp_compute_enable(rdev, false);
4583 
4584 	if (rdev->new_fw) {
4585 		const struct gfx_firmware_header_v1_0 *mec_hdr =
4586 			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4587 		const __le32 *fw_data;
4588 		u32 fw_size;
4589 
4590 		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4591 
4592 		/* MEC1 */
4593 		fw_data = (const __le32 *)
4594 			(rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4595 		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4596 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4597 		for (i = 0; i < fw_size; i++)
4598 			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4599 		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4600 
4601 		/* MEC2 */
4602 		if (rdev->family == CHIP_KAVERI) {
4603 			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4604 				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4605 
4606 			fw_data = (const __le32 *)
4607 				(rdev->mec2_fw->data +
4608 				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4609 			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4610 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4611 			for (i = 0; i < fw_size; i++)
4612 				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4613 			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4614 		}
4615 	} else {
4616 		const __be32 *fw_data;
4617 
4618 		/* MEC1 */
4619 		fw_data = (const __be32 *)rdev->mec_fw->data;
4620 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4621 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4622 			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4623 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4624 
4625 		if (rdev->family == CHIP_KAVERI) {
4626 			/* MEC2 */
4627 			fw_data = (const __be32 *)rdev->mec_fw->data;
4628 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4629 			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4630 				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4631 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4632 		}
4633 	}
4634 
4635 	return 0;
4636 }
4637 
4638 /**
4639  * cik_cp_compute_start - start the compute queues
4640  *
4641  * @rdev: radeon_device pointer
4642  *
4643  * Enable the compute queues.
4644  * Returns 0 for success, error for failure.
4645  */
4646 static int cik_cp_compute_start(struct radeon_device *rdev)
4647 {
4648 	cik_cp_compute_enable(rdev, true);
4649 
4650 	return 0;
4651 }
4652 
4653 /**
4654  * cik_cp_compute_fini - stop the compute queues
4655  *
4656  * @rdev: radeon_device pointer
4657  *
4658  * Stop the compute queues and tear down the driver queue
4659  * info.
4660  */
4661 static void cik_cp_compute_fini(struct radeon_device *rdev)
4662 {
4663 	int i, idx, r;
4664 
4665 	cik_cp_compute_enable(rdev, false);
4666 
4667 	for (i = 0; i < 2; i++) {
4668 		if (i == 0)
4669 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4670 		else
4671 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4672 
4673 		if (rdev->ring[idx].mqd_obj) {
4674 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4675 			if (unlikely(r != 0))
4676 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4677 
4678 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4679 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4680 
4681 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4682 			rdev->ring[idx].mqd_obj = NULL;
4683 		}
4684 	}
4685 }
4686 
4687 static void cik_mec_fini(struct radeon_device *rdev)
4688 {
4689 	int r;
4690 
4691 	if (rdev->mec.hpd_eop_obj) {
4692 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4693 		if (unlikely(r != 0))
4694 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4695 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4696 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4697 
4698 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4699 		rdev->mec.hpd_eop_obj = NULL;
4700 	}
4701 }
4702 
4703 #define MEC_HPD_SIZE 2048
4704 
4705 static int cik_mec_init(struct radeon_device *rdev)
4706 {
4707 	int r;
4708 	u32 *hpd;
4709 
4710 	/*
4711 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4712 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4713 	 * Nonetheless, we assign only 1 pipe because all other pipes will
4714 	 * be handled by KFD
4715 	 */
4716 	rdev->mec.num_mec = 1;
4717 	rdev->mec.num_pipe = 1;
4718 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4719 
4720 	if (rdev->mec.hpd_eop_obj == NULL) {
4721 		r = radeon_bo_create(rdev,
4722 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4723 				     PAGE_SIZE, true,
4724 				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4725 				     &rdev->mec.hpd_eop_obj);
4726 		if (r) {
4727 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4728 			return r;
4729 		}
4730 	}
4731 
4732 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4733 	if (unlikely(r != 0)) {
4734 		cik_mec_fini(rdev);
4735 		return r;
4736 	}
4737 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4738 			  &rdev->mec.hpd_eop_gpu_addr);
4739 	if (r) {
4740 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4741 		cik_mec_fini(rdev);
4742 		return r;
4743 	}
4744 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4745 	if (r) {
4746 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4747 		cik_mec_fini(rdev);
4748 		return r;
4749 	}
4750 
4751 	/* clear memory.  Not sure if this is required or not */
4752 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4753 
4754 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4755 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4756 
4757 	return 0;
4758 }
4759 
4760 struct hqd_registers
4761 {
4762 	u32 cp_mqd_base_addr;
4763 	u32 cp_mqd_base_addr_hi;
4764 	u32 cp_hqd_active;
4765 	u32 cp_hqd_vmid;
4766 	u32 cp_hqd_persistent_state;
4767 	u32 cp_hqd_pipe_priority;
4768 	u32 cp_hqd_queue_priority;
4769 	u32 cp_hqd_quantum;
4770 	u32 cp_hqd_pq_base;
4771 	u32 cp_hqd_pq_base_hi;
4772 	u32 cp_hqd_pq_rptr;
4773 	u32 cp_hqd_pq_rptr_report_addr;
4774 	u32 cp_hqd_pq_rptr_report_addr_hi;
4775 	u32 cp_hqd_pq_wptr_poll_addr;
4776 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4777 	u32 cp_hqd_pq_doorbell_control;
4778 	u32 cp_hqd_pq_wptr;
4779 	u32 cp_hqd_pq_control;
4780 	u32 cp_hqd_ib_base_addr;
4781 	u32 cp_hqd_ib_base_addr_hi;
4782 	u32 cp_hqd_ib_rptr;
4783 	u32 cp_hqd_ib_control;
4784 	u32 cp_hqd_iq_timer;
4785 	u32 cp_hqd_iq_rptr;
4786 	u32 cp_hqd_dequeue_request;
4787 	u32 cp_hqd_dma_offload;
4788 	u32 cp_hqd_sema_cmd;
4789 	u32 cp_hqd_msg_type;
4790 	u32 cp_hqd_atomic0_preop_lo;
4791 	u32 cp_hqd_atomic0_preop_hi;
4792 	u32 cp_hqd_atomic1_preop_lo;
4793 	u32 cp_hqd_atomic1_preop_hi;
4794 	u32 cp_hqd_hq_scheduler0;
4795 	u32 cp_hqd_hq_scheduler1;
4796 	u32 cp_mqd_control;
4797 };
4798 
4799 struct bonaire_mqd
4800 {
4801 	u32 header;
4802 	u32 dispatch_initiator;
4803 	u32 dimensions[3];
4804 	u32 start_idx[3];
4805 	u32 num_threads[3];
4806 	u32 pipeline_stat_enable;
4807 	u32 perf_counter_enable;
4808 	u32 pgm[2];
4809 	u32 tba[2];
4810 	u32 tma[2];
4811 	u32 pgm_rsrc[2];
4812 	u32 vmid;
4813 	u32 resource_limits;
4814 	u32 static_thread_mgmt01[2];
4815 	u32 tmp_ring_size;
4816 	u32 static_thread_mgmt23[2];
4817 	u32 restart[3];
4818 	u32 thread_trace_enable;
4819 	u32 reserved1;
4820 	u32 user_data[16];
4821 	u32 vgtcs_invoke_count[2];
4822 	struct hqd_registers queue_state;
4823 	u32 dequeue_cntr;
4824 	u32 interrupt_queue[64];
4825 };
4826 
4827 /**
4828  * cik_cp_compute_resume - setup the compute queue registers
4829  *
4830  * @rdev: radeon_device pointer
4831  *
4832  * Program the compute queues and test them to make sure they
4833  * are working.
4834  * Returns 0 for success, error for failure.
4835  */
4836 static int cik_cp_compute_resume(struct radeon_device *rdev)
4837 {
4838 	int r, i, j, idx;
4839 	u32 tmp;
4840 	bool use_doorbell = true;
4841 	u64 hqd_gpu_addr;
4842 	u64 mqd_gpu_addr;
4843 	u64 eop_gpu_addr;
4844 	u64 wb_gpu_addr;
4845 	u32 *buf;
4846 	struct bonaire_mqd *mqd;
4847 
4848 	r = cik_cp_compute_start(rdev);
4849 	if (r)
4850 		return r;
4851 
4852 	/* fix up chicken bits */
4853 	tmp = RREG32(CP_CPF_DEBUG);
4854 	tmp |= (1 << 23);
4855 	WREG32(CP_CPF_DEBUG, tmp);
4856 
4857 	/* init the pipes */
4858 	mutex_lock(&rdev->srbm_mutex);
4859 
4860 	eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
4861 
4862 	cik_srbm_select(rdev, 0, 0, 0, 0);
4863 
4864 	/* write the EOP addr */
4865 	WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4866 	WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4867 
4868 	/* set the VMID assigned */
4869 	WREG32(CP_HPD_EOP_VMID, 0);
4870 
4871 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4872 	tmp = RREG32(CP_HPD_EOP_CONTROL);
4873 	tmp &= ~EOP_SIZE_MASK;
4874 	tmp |= order_base_2(MEC_HPD_SIZE / 8);
4875 	WREG32(CP_HPD_EOP_CONTROL, tmp);
4876 
4877 	mutex_unlock(&rdev->srbm_mutex);
4878 
4879 	/* init the queues.  Just two for now. */
4880 	for (i = 0; i < 2; i++) {
4881 		if (i == 0)
4882 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4883 		else
4884 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4885 
4886 		if (rdev->ring[idx].mqd_obj == NULL) {
4887 			r = radeon_bo_create(rdev,
4888 					     sizeof(struct bonaire_mqd),
4889 					     PAGE_SIZE, true,
4890 					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4891 					     NULL, &rdev->ring[idx].mqd_obj);
4892 			if (r) {
4893 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4894 				return r;
4895 			}
4896 		}
4897 
4898 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4899 		if (unlikely(r != 0)) {
4900 			cik_cp_compute_fini(rdev);
4901 			return r;
4902 		}
4903 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4904 				  &mqd_gpu_addr);
4905 		if (r) {
4906 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4907 			cik_cp_compute_fini(rdev);
4908 			return r;
4909 		}
4910 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4911 		if (r) {
4912 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4913 			cik_cp_compute_fini(rdev);
4914 			return r;
4915 		}
4916 
4917 		/* init the mqd struct */
4918 		memset(buf, 0, sizeof(struct bonaire_mqd));
4919 
4920 		mqd = (struct bonaire_mqd *)buf;
4921 		mqd->header = 0xC0310800;
4922 		mqd->static_thread_mgmt01[0] = 0xffffffff;
4923 		mqd->static_thread_mgmt01[1] = 0xffffffff;
4924 		mqd->static_thread_mgmt23[0] = 0xffffffff;
4925 		mqd->static_thread_mgmt23[1] = 0xffffffff;
4926 
4927 		mutex_lock(&rdev->srbm_mutex);
4928 		cik_srbm_select(rdev, rdev->ring[idx].me,
4929 				rdev->ring[idx].pipe,
4930 				rdev->ring[idx].queue, 0);
4931 
4932 		/* disable wptr polling */
4933 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4934 		tmp &= ~WPTR_POLL_EN;
4935 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4936 
4937 		/* enable doorbell? */
4938 		mqd->queue_state.cp_hqd_pq_doorbell_control =
4939 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4940 		if (use_doorbell)
4941 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4942 		else
4943 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4944 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4945 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4946 
4947 		/* disable the queue if it's active */
4948 		mqd->queue_state.cp_hqd_dequeue_request = 0;
4949 		mqd->queue_state.cp_hqd_pq_rptr = 0;
4950 		mqd->queue_state.cp_hqd_pq_wptr= 0;
4951 		if (RREG32(CP_HQD_ACTIVE) & 1) {
4952 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4953 			for (j = 0; j < rdev->usec_timeout; j++) {
4954 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4955 					break;
4956 				udelay(1);
4957 			}
4958 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4959 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4960 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4961 		}
4962 
4963 		/* set the pointer to the MQD */
4964 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4965 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4966 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4967 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4968 		/* set MQD vmid to 0 */
4969 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4970 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4971 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4972 
4973 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4974 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4975 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4976 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4977 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4978 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4979 
4980 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4981 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4982 		mqd->queue_state.cp_hqd_pq_control &=
4983 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4984 
4985 		mqd->queue_state.cp_hqd_pq_control |=
4986 			order_base_2(rdev->ring[idx].ring_size / 8);
4987 		mqd->queue_state.cp_hqd_pq_control |=
4988 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4989 #ifdef __BIG_ENDIAN
4990 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4991 #endif
4992 		mqd->queue_state.cp_hqd_pq_control &=
4993 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4994 		mqd->queue_state.cp_hqd_pq_control |=
4995 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4996 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4997 
4998 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4999 		if (i == 0)
5000 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
5001 		else
5002 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
5003 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
5004 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
5005 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
5006 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
5007 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
5008 
5009 		/* set the wb address wether it's enabled or not */
5010 		if (i == 0)
5011 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
5012 		else
5013 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
5014 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
5015 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
5016 			upper_32_bits(wb_gpu_addr) & 0xffff;
5017 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
5018 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
5019 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5020 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
5021 
5022 		/* enable the doorbell if requested */
5023 		if (use_doorbell) {
5024 			mqd->queue_state.cp_hqd_pq_doorbell_control =
5025 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5026 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
5027 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
5028 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
5029 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5030 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
5031 				~(DOORBELL_SOURCE | DOORBELL_HIT);
5032 
5033 		} else {
5034 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
5035 		}
5036 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5037 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
5038 
5039 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5040 		rdev->ring[idx].wptr = 0;
5041 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
5042 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5043 		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
5044 
5045 		/* set the vmid for the queue */
5046 		mqd->queue_state.cp_hqd_vmid = 0;
5047 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
5048 
5049 		/* activate the queue */
5050 		mqd->queue_state.cp_hqd_active = 1;
5051 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
5052 
5053 		cik_srbm_select(rdev, 0, 0, 0, 0);
5054 		mutex_unlock(&rdev->srbm_mutex);
5055 
5056 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
5057 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
5058 
5059 		rdev->ring[idx].ready = true;
5060 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
5061 		if (r)
5062 			rdev->ring[idx].ready = false;
5063 	}
5064 
5065 	return 0;
5066 }
5067 
5068 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
5069 {
5070 	cik_cp_gfx_enable(rdev, enable);
5071 	cik_cp_compute_enable(rdev, enable);
5072 }
5073 
5074 static int cik_cp_load_microcode(struct radeon_device *rdev)
5075 {
5076 	int r;
5077 
5078 	r = cik_cp_gfx_load_microcode(rdev);
5079 	if (r)
5080 		return r;
5081 	r = cik_cp_compute_load_microcode(rdev);
5082 	if (r)
5083 		return r;
5084 
5085 	return 0;
5086 }
5087 
5088 static void cik_cp_fini(struct radeon_device *rdev)
5089 {
5090 	cik_cp_gfx_fini(rdev);
5091 	cik_cp_compute_fini(rdev);
5092 }
5093 
5094 static int cik_cp_resume(struct radeon_device *rdev)
5095 {
5096 	int r;
5097 
5098 	cik_enable_gui_idle_interrupt(rdev, false);
5099 
5100 	r = cik_cp_load_microcode(rdev);
5101 	if (r)
5102 		return r;
5103 
5104 	r = cik_cp_gfx_resume(rdev);
5105 	if (r)
5106 		return r;
5107 	r = cik_cp_compute_resume(rdev);
5108 	if (r)
5109 		return r;
5110 
5111 	cik_enable_gui_idle_interrupt(rdev, true);
5112 
5113 	return 0;
5114 }
5115 
5116 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
5117 {
5118 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
5119 		RREG32(GRBM_STATUS));
5120 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
5121 		RREG32(GRBM_STATUS2));
5122 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
5123 		RREG32(GRBM_STATUS_SE0));
5124 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
5125 		RREG32(GRBM_STATUS_SE1));
5126 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
5127 		RREG32(GRBM_STATUS_SE2));
5128 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
5129 		RREG32(GRBM_STATUS_SE3));
5130 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
5131 		RREG32(SRBM_STATUS));
5132 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
5133 		RREG32(SRBM_STATUS2));
5134 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
5135 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
5136 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
5137 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
5138 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
5139 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
5140 		 RREG32(CP_STALLED_STAT1));
5141 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
5142 		 RREG32(CP_STALLED_STAT2));
5143 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
5144 		 RREG32(CP_STALLED_STAT3));
5145 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
5146 		 RREG32(CP_CPF_BUSY_STAT));
5147 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
5148 		 RREG32(CP_CPF_STALLED_STAT1));
5149 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
5150 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
5151 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
5152 		 RREG32(CP_CPC_STALLED_STAT1));
5153 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
5154 }
5155 
5156 /**
5157  * cik_gpu_check_soft_reset - check which blocks are busy
5158  *
5159  * @rdev: radeon_device pointer
5160  *
5161  * Check which blocks are busy and return the relevant reset
5162  * mask to be used by cik_gpu_soft_reset().
5163  * Returns a mask of the blocks to be reset.
5164  */
5165 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
5166 {
5167 	u32 reset_mask = 0;
5168 	u32 tmp;
5169 
5170 	/* GRBM_STATUS */
5171 	tmp = RREG32(GRBM_STATUS);
5172 	if (tmp & (PA_BUSY | SC_BUSY |
5173 		   BCI_BUSY | SX_BUSY |
5174 		   TA_BUSY | VGT_BUSY |
5175 		   DB_BUSY | CB_BUSY |
5176 		   GDS_BUSY | SPI_BUSY |
5177 		   IA_BUSY | IA_BUSY_NO_DMA))
5178 		reset_mask |= RADEON_RESET_GFX;
5179 
5180 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
5181 		reset_mask |= RADEON_RESET_CP;
5182 
5183 	/* GRBM_STATUS2 */
5184 	tmp = RREG32(GRBM_STATUS2);
5185 	if (tmp & RLC_BUSY)
5186 		reset_mask |= RADEON_RESET_RLC;
5187 
5188 	/* SDMA0_STATUS_REG */
5189 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
5190 	if (!(tmp & SDMA_IDLE))
5191 		reset_mask |= RADEON_RESET_DMA;
5192 
5193 	/* SDMA1_STATUS_REG */
5194 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
5195 	if (!(tmp & SDMA_IDLE))
5196 		reset_mask |= RADEON_RESET_DMA1;
5197 
5198 	/* SRBM_STATUS2 */
5199 	tmp = RREG32(SRBM_STATUS2);
5200 	if (tmp & SDMA_BUSY)
5201 		reset_mask |= RADEON_RESET_DMA;
5202 
5203 	if (tmp & SDMA1_BUSY)
5204 		reset_mask |= RADEON_RESET_DMA1;
5205 
5206 	/* SRBM_STATUS */
5207 	tmp = RREG32(SRBM_STATUS);
5208 
5209 	if (tmp & IH_BUSY)
5210 		reset_mask |= RADEON_RESET_IH;
5211 
5212 	if (tmp & SEM_BUSY)
5213 		reset_mask |= RADEON_RESET_SEM;
5214 
5215 	if (tmp & GRBM_RQ_PENDING)
5216 		reset_mask |= RADEON_RESET_GRBM;
5217 
5218 	if (tmp & VMC_BUSY)
5219 		reset_mask |= RADEON_RESET_VMC;
5220 
5221 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
5222 		   MCC_BUSY | MCD_BUSY))
5223 		reset_mask |= RADEON_RESET_MC;
5224 
5225 	if (evergreen_is_display_hung(rdev))
5226 		reset_mask |= RADEON_RESET_DISPLAY;
5227 
5228 	/* Skip MC reset as it's mostly likely not hung, just busy */
5229 	if (reset_mask & RADEON_RESET_MC) {
5230 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
5231 		reset_mask &= ~RADEON_RESET_MC;
5232 	}
5233 
5234 	return reset_mask;
5235 }
5236 
5237 /**
5238  * cik_gpu_soft_reset - soft reset GPU
5239  *
5240  * @rdev: radeon_device pointer
5241  * @reset_mask: mask of which blocks to reset
5242  *
5243  * Soft reset the blocks specified in @reset_mask.
5244  */
5245 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
5246 {
5247 	struct evergreen_mc_save save;
5248 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5249 	u32 tmp;
5250 
5251 	if (reset_mask == 0)
5252 		return;
5253 
5254 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
5255 
5256 	cik_print_gpu_status_regs(rdev);
5257 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5258 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5259 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5260 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5261 
5262 	/* disable CG/PG */
5263 	cik_fini_pg(rdev);
5264 	cik_fini_cg(rdev);
5265 
5266 	/* stop the rlc */
5267 	cik_rlc_stop(rdev);
5268 
5269 	/* Disable GFX parsing/prefetching */
5270 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5271 
5272 	/* Disable MEC parsing/prefetching */
5273 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5274 
5275 	if (reset_mask & RADEON_RESET_DMA) {
5276 		/* sdma0 */
5277 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5278 		tmp |= SDMA_HALT;
5279 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5280 	}
5281 	if (reset_mask & RADEON_RESET_DMA1) {
5282 		/* sdma1 */
5283 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5284 		tmp |= SDMA_HALT;
5285 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5286 	}
5287 
5288 	evergreen_mc_stop(rdev, &save);
5289 	if (evergreen_mc_wait_for_idle(rdev)) {
5290 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5291 	}
5292 
5293 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5294 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5295 
5296 	if (reset_mask & RADEON_RESET_CP) {
5297 		grbm_soft_reset |= SOFT_RESET_CP;
5298 
5299 		srbm_soft_reset |= SOFT_RESET_GRBM;
5300 	}
5301 
5302 	if (reset_mask & RADEON_RESET_DMA)
5303 		srbm_soft_reset |= SOFT_RESET_SDMA;
5304 
5305 	if (reset_mask & RADEON_RESET_DMA1)
5306 		srbm_soft_reset |= SOFT_RESET_SDMA1;
5307 
5308 	if (reset_mask & RADEON_RESET_DISPLAY)
5309 		srbm_soft_reset |= SOFT_RESET_DC;
5310 
5311 	if (reset_mask & RADEON_RESET_RLC)
5312 		grbm_soft_reset |= SOFT_RESET_RLC;
5313 
5314 	if (reset_mask & RADEON_RESET_SEM)
5315 		srbm_soft_reset |= SOFT_RESET_SEM;
5316 
5317 	if (reset_mask & RADEON_RESET_IH)
5318 		srbm_soft_reset |= SOFT_RESET_IH;
5319 
5320 	if (reset_mask & RADEON_RESET_GRBM)
5321 		srbm_soft_reset |= SOFT_RESET_GRBM;
5322 
5323 	if (reset_mask & RADEON_RESET_VMC)
5324 		srbm_soft_reset |= SOFT_RESET_VMC;
5325 
5326 	if (!(rdev->flags & RADEON_IS_IGP)) {
5327 		if (reset_mask & RADEON_RESET_MC)
5328 			srbm_soft_reset |= SOFT_RESET_MC;
5329 	}
5330 
5331 	if (grbm_soft_reset) {
5332 		tmp = RREG32(GRBM_SOFT_RESET);
5333 		tmp |= grbm_soft_reset;
5334 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5335 		WREG32(GRBM_SOFT_RESET, tmp);
5336 		tmp = RREG32(GRBM_SOFT_RESET);
5337 
5338 		udelay(50);
5339 
5340 		tmp &= ~grbm_soft_reset;
5341 		WREG32(GRBM_SOFT_RESET, tmp);
5342 		tmp = RREG32(GRBM_SOFT_RESET);
5343 	}
5344 
5345 	if (srbm_soft_reset) {
5346 		tmp = RREG32(SRBM_SOFT_RESET);
5347 		tmp |= srbm_soft_reset;
5348 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5349 		WREG32(SRBM_SOFT_RESET, tmp);
5350 		tmp = RREG32(SRBM_SOFT_RESET);
5351 
5352 		udelay(50);
5353 
5354 		tmp &= ~srbm_soft_reset;
5355 		WREG32(SRBM_SOFT_RESET, tmp);
5356 		tmp = RREG32(SRBM_SOFT_RESET);
5357 	}
5358 
5359 	/* Wait a little for things to settle down */
5360 	udelay(50);
5361 
5362 	evergreen_mc_resume(rdev, &save);
5363 	udelay(50);
5364 
5365 	cik_print_gpu_status_regs(rdev);
5366 }
5367 
5368 struct kv_reset_save_regs {
5369 	u32 gmcon_reng_execute;
5370 	u32 gmcon_misc;
5371 	u32 gmcon_misc3;
5372 };
5373 
5374 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5375 				   struct kv_reset_save_regs *save)
5376 {
5377 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5378 	save->gmcon_misc = RREG32(GMCON_MISC);
5379 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5380 
5381 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5382 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5383 						STCTRL_STUTTER_EN));
5384 }
5385 
5386 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5387 				      struct kv_reset_save_regs *save)
5388 {
5389 	int i;
5390 
5391 	WREG32(GMCON_PGFSM_WRITE, 0);
5392 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5393 
5394 	for (i = 0; i < 5; i++)
5395 		WREG32(GMCON_PGFSM_WRITE, 0);
5396 
5397 	WREG32(GMCON_PGFSM_WRITE, 0);
5398 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5399 
5400 	for (i = 0; i < 5; i++)
5401 		WREG32(GMCON_PGFSM_WRITE, 0);
5402 
5403 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5404 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5405 
5406 	for (i = 0; i < 5; i++)
5407 		WREG32(GMCON_PGFSM_WRITE, 0);
5408 
5409 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5410 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5411 
5412 	for (i = 0; i < 5; i++)
5413 		WREG32(GMCON_PGFSM_WRITE, 0);
5414 
5415 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5416 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5417 
5418 	for (i = 0; i < 5; i++)
5419 		WREG32(GMCON_PGFSM_WRITE, 0);
5420 
5421 	WREG32(GMCON_PGFSM_WRITE, 0);
5422 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5423 
5424 	for (i = 0; i < 5; i++)
5425 		WREG32(GMCON_PGFSM_WRITE, 0);
5426 
5427 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5428 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5429 
5430 	for (i = 0; i < 5; i++)
5431 		WREG32(GMCON_PGFSM_WRITE, 0);
5432 
5433 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5434 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5435 
5436 	for (i = 0; i < 5; i++)
5437 		WREG32(GMCON_PGFSM_WRITE, 0);
5438 
5439 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5440 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5441 
5442 	for (i = 0; i < 5; i++)
5443 		WREG32(GMCON_PGFSM_WRITE, 0);
5444 
5445 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5446 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5447 
5448 	for (i = 0; i < 5; i++)
5449 		WREG32(GMCON_PGFSM_WRITE, 0);
5450 
5451 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5452 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5453 
5454 	WREG32(GMCON_MISC3, save->gmcon_misc3);
5455 	WREG32(GMCON_MISC, save->gmcon_misc);
5456 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5457 }
5458 
5459 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5460 {
5461 	struct evergreen_mc_save save;
5462 	struct kv_reset_save_regs kv_save = { 0 };
5463 	u32 tmp, i;
5464 
5465 	dev_info(rdev->dev, "GPU pci config reset\n");
5466 
5467 	/* disable dpm? */
5468 
5469 	/* disable cg/pg */
5470 	cik_fini_pg(rdev);
5471 	cik_fini_cg(rdev);
5472 
5473 	/* Disable GFX parsing/prefetching */
5474 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5475 
5476 	/* Disable MEC parsing/prefetching */
5477 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5478 
5479 	/* sdma0 */
5480 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5481 	tmp |= SDMA_HALT;
5482 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5483 	/* sdma1 */
5484 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5485 	tmp |= SDMA_HALT;
5486 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5487 	/* XXX other engines? */
5488 
5489 	/* halt the rlc, disable cp internal ints */
5490 	cik_rlc_stop(rdev);
5491 
5492 	udelay(50);
5493 
5494 	/* disable mem access */
5495 	evergreen_mc_stop(rdev, &save);
5496 	if (evergreen_mc_wait_for_idle(rdev)) {
5497 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5498 	}
5499 
5500 	if (rdev->flags & RADEON_IS_IGP)
5501 		kv_save_regs_for_reset(rdev, &kv_save);
5502 
5503 	/* disable BM */
5504 	pci_clear_master(rdev->pdev);
5505 	/* reset */
5506 	radeon_pci_config_reset(rdev);
5507 
5508 	udelay(100);
5509 
5510 	/* wait for asic to come out of reset */
5511 	for (i = 0; i < rdev->usec_timeout; i++) {
5512 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5513 			break;
5514 		udelay(1);
5515 	}
5516 
5517 	/* does asic init need to be run first??? */
5518 	if (rdev->flags & RADEON_IS_IGP)
5519 		kv_restore_regs_for_reset(rdev, &kv_save);
5520 }
5521 
5522 /**
5523  * cik_asic_reset - soft reset GPU
5524  *
5525  * @rdev: radeon_device pointer
5526  *
5527  * Look up which blocks are hung and attempt
5528  * to reset them.
5529  * Returns 0 for success.
5530  */
5531 int cik_asic_reset(struct radeon_device *rdev)
5532 {
5533 	u32 reset_mask;
5534 
5535 	reset_mask = cik_gpu_check_soft_reset(rdev);
5536 
5537 	if (reset_mask)
5538 		r600_set_bios_scratch_engine_hung(rdev, true);
5539 
5540 	/* try soft reset */
5541 	cik_gpu_soft_reset(rdev, reset_mask);
5542 
5543 	reset_mask = cik_gpu_check_soft_reset(rdev);
5544 
5545 	/* try pci config reset */
5546 	if (reset_mask && radeon_hard_reset)
5547 		cik_gpu_pci_config_reset(rdev);
5548 
5549 	reset_mask = cik_gpu_check_soft_reset(rdev);
5550 
5551 	if (!reset_mask)
5552 		r600_set_bios_scratch_engine_hung(rdev, false);
5553 
5554 	return 0;
5555 }
5556 
5557 /**
5558  * cik_gfx_is_lockup - check if the 3D engine is locked up
5559  *
5560  * @rdev: radeon_device pointer
5561  * @ring: radeon_ring structure holding ring information
5562  *
5563  * Check if the 3D engine is locked up (CIK).
5564  * Returns true if the engine is locked, false if not.
5565  */
5566 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5567 {
5568 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5569 
5570 	if (!(reset_mask & (RADEON_RESET_GFX |
5571 			    RADEON_RESET_COMPUTE |
5572 			    RADEON_RESET_CP))) {
5573 		radeon_ring_lockup_update(rdev, ring);
5574 		return false;
5575 	}
5576 	return radeon_ring_test_lockup(rdev, ring);
5577 }
5578 
5579 /* MC */
5580 /**
5581  * cik_mc_program - program the GPU memory controller
5582  *
5583  * @rdev: radeon_device pointer
5584  *
5585  * Set the location of vram, gart, and AGP in the GPU's
5586  * physical address space (CIK).
5587  */
5588 static void cik_mc_program(struct radeon_device *rdev)
5589 {
5590 	struct evergreen_mc_save save;
5591 	u32 tmp;
5592 	int i, j;
5593 
5594 	/* Initialize HDP */
5595 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5596 		WREG32((0x2c14 + j), 0x00000000);
5597 		WREG32((0x2c18 + j), 0x00000000);
5598 		WREG32((0x2c1c + j), 0x00000000);
5599 		WREG32((0x2c20 + j), 0x00000000);
5600 		WREG32((0x2c24 + j), 0x00000000);
5601 	}
5602 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5603 
5604 	evergreen_mc_stop(rdev, &save);
5605 	if (radeon_mc_wait_for_idle(rdev)) {
5606 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5607 	}
5608 	/* Lockout access through VGA aperture*/
5609 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5610 	/* Update configuration */
5611 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5612 	       rdev->mc.vram_start >> 12);
5613 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5614 	       rdev->mc.vram_end >> 12);
5615 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5616 	       rdev->vram_scratch.gpu_addr >> 12);
5617 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5618 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5619 	WREG32(MC_VM_FB_LOCATION, tmp);
5620 	/* XXX double check these! */
5621 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5622 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5623 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5624 	WREG32(MC_VM_AGP_BASE, 0);
5625 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5626 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5627 	if (radeon_mc_wait_for_idle(rdev)) {
5628 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5629 	}
5630 	evergreen_mc_resume(rdev, &save);
5631 	/* we need to own VRAM, so turn off the VGA renderer here
5632 	 * to stop it overwriting our objects */
5633 	rv515_vga_render_disable(rdev);
5634 }
5635 
5636 /**
5637  * cik_mc_init - initialize the memory controller driver params
5638  *
5639  * @rdev: radeon_device pointer
5640  *
5641  * Look up the amount of vram, vram width, and decide how to place
5642  * vram and gart within the GPU's physical address space (CIK).
5643  * Returns 0 for success.
5644  */
5645 static int cik_mc_init(struct radeon_device *rdev)
5646 {
5647 	u32 tmp;
5648 	int chansize, numchan;
5649 
5650 	/* Get VRAM informations */
5651 	rdev->mc.vram_is_ddr = true;
5652 	tmp = RREG32(MC_ARB_RAMCFG);
5653 	if (tmp & CHANSIZE_MASK) {
5654 		chansize = 64;
5655 	} else {
5656 		chansize = 32;
5657 	}
5658 	tmp = RREG32(MC_SHARED_CHMAP);
5659 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5660 	case 0:
5661 	default:
5662 		numchan = 1;
5663 		break;
5664 	case 1:
5665 		numchan = 2;
5666 		break;
5667 	case 2:
5668 		numchan = 4;
5669 		break;
5670 	case 3:
5671 		numchan = 8;
5672 		break;
5673 	case 4:
5674 		numchan = 3;
5675 		break;
5676 	case 5:
5677 		numchan = 6;
5678 		break;
5679 	case 6:
5680 		numchan = 10;
5681 		break;
5682 	case 7:
5683 		numchan = 12;
5684 		break;
5685 	case 8:
5686 		numchan = 16;
5687 		break;
5688 	}
5689 	rdev->mc.vram_width = numchan * chansize;
5690 	/* Could aper size report 0 ? */
5691 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5692 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5693 	/* size in MB on si */
5694 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5695 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5696 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5697 	si_vram_gtt_location(rdev, &rdev->mc);
5698 	radeon_update_bandwidth_info(rdev);
5699 
5700 	return 0;
5701 }
5702 
5703 /*
5704  * GART
5705  * VMID 0 is the physical GPU addresses as used by the kernel.
5706  * VMIDs 1-15 are used for userspace clients and are handled
5707  * by the radeon vm/hsa code.
5708  */
5709 /**
5710  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5711  *
5712  * @rdev: radeon_device pointer
5713  *
5714  * Flush the TLB for the VMID 0 page table (CIK).
5715  */
5716 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5717 {
5718 	/* flush hdp cache */
5719 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5720 
5721 	/* bits 0-15 are the VM contexts0-15 */
5722 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5723 }
5724 
5725 static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5726 {
5727 	int i;
5728 	uint32_t sh_mem_bases, sh_mem_config;
5729 
5730 	sh_mem_bases = 0x6000 | 0x6000 << 16;
5731 	sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5732 	sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5733 
5734 	mutex_lock(&rdev->srbm_mutex);
5735 	for (i = 8; i < 16; i++) {
5736 		cik_srbm_select(rdev, 0, 0, 0, i);
5737 		/* CP and shaders */
5738 		WREG32(SH_MEM_CONFIG, sh_mem_config);
5739 		WREG32(SH_MEM_APE1_BASE, 1);
5740 		WREG32(SH_MEM_APE1_LIMIT, 0);
5741 		WREG32(SH_MEM_BASES, sh_mem_bases);
5742 	}
5743 	cik_srbm_select(rdev, 0, 0, 0, 0);
5744 	mutex_unlock(&rdev->srbm_mutex);
5745 }
5746 
5747 /**
5748  * cik_pcie_gart_enable - gart enable
5749  *
5750  * @rdev: radeon_device pointer
5751  *
5752  * This sets up the TLBs, programs the page tables for VMID0,
5753  * sets up the hw for VMIDs 1-15 which are allocated on
5754  * demand, and sets up the global locations for the LDS, GDS,
5755  * and GPUVM for FSA64 clients (CIK).
5756  * Returns 0 for success, errors for failure.
5757  */
5758 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5759 {
5760 	int r, i;
5761 
5762 	if (rdev->gart.robj == NULL) {
5763 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5764 		return -EINVAL;
5765 	}
5766 	r = radeon_gart_table_vram_pin(rdev);
5767 	if (r)
5768 		return r;
5769 	/* Setup TLB control */
5770 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5771 	       (0xA << 7) |
5772 	       ENABLE_L1_TLB |
5773 	       ENABLE_L1_FRAGMENT_PROCESSING |
5774 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5775 	       ENABLE_ADVANCED_DRIVER_MODEL |
5776 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5777 	/* Setup L2 cache */
5778 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5779 	       ENABLE_L2_FRAGMENT_PROCESSING |
5780 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5781 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5782 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5783 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5784 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5785 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5786 	       BANK_SELECT(4) |
5787 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5788 	/* setup context0 */
5789 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5790 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5791 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5792 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5793 			(u32)(rdev->dummy_page.addr >> 12));
5794 	WREG32(VM_CONTEXT0_CNTL2, 0);
5795 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5796 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5797 
5798 	WREG32(0x15D4, 0);
5799 	WREG32(0x15D8, 0);
5800 	WREG32(0x15DC, 0);
5801 
5802 	/* restore context1-15 */
5803 	/* set vm size, must be a multiple of 4 */
5804 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5805 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5806 	for (i = 1; i < 16; i++) {
5807 		if (i < 8)
5808 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5809 			       rdev->vm_manager.saved_table_addr[i]);
5810 		else
5811 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5812 			       rdev->vm_manager.saved_table_addr[i]);
5813 	}
5814 
5815 	/* enable context1-15 */
5816 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5817 	       (u32)(rdev->dummy_page.addr >> 12));
5818 	WREG32(VM_CONTEXT1_CNTL2, 4);
5819 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5820 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5821 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5822 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5823 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5824 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5825 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5826 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5827 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5828 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5829 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5830 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5831 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5832 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5833 
5834 	if (rdev->family == CHIP_KAVERI) {
5835 		u32 tmp = RREG32(CHUB_CONTROL);
5836 		tmp &= ~BYPASS_VM;
5837 		WREG32(CHUB_CONTROL, tmp);
5838 	}
5839 
5840 	/* XXX SH_MEM regs */
5841 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5842 	mutex_lock(&rdev->srbm_mutex);
5843 	for (i = 0; i < 16; i++) {
5844 		cik_srbm_select(rdev, 0, 0, 0, i);
5845 		/* CP and shaders */
5846 		WREG32(SH_MEM_CONFIG, 0);
5847 		WREG32(SH_MEM_APE1_BASE, 1);
5848 		WREG32(SH_MEM_APE1_LIMIT, 0);
5849 		WREG32(SH_MEM_BASES, 0);
5850 		/* SDMA GFX */
5851 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5852 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5853 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5854 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5855 		/* XXX SDMA RLC - todo */
5856 	}
5857 	cik_srbm_select(rdev, 0, 0, 0, 0);
5858 	mutex_unlock(&rdev->srbm_mutex);
5859 
5860 	cik_pcie_init_compute_vmid(rdev);
5861 
5862 	cik_pcie_gart_tlb_flush(rdev);
5863 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5864 		 (unsigned)(rdev->mc.gtt_size >> 20),
5865 		 (unsigned long long)rdev->gart.table_addr);
5866 	rdev->gart.ready = true;
5867 	return 0;
5868 }
5869 
5870 /**
5871  * cik_pcie_gart_disable - gart disable
5872  *
5873  * @rdev: radeon_device pointer
5874  *
5875  * This disables all VM page table (CIK).
5876  */
5877 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5878 {
5879 	unsigned i;
5880 
5881 	for (i = 1; i < 16; ++i) {
5882 		uint32_t reg;
5883 		if (i < 8)
5884 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5885 		else
5886 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5887 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5888 	}
5889 
5890 	/* Disable all tables */
5891 	WREG32(VM_CONTEXT0_CNTL, 0);
5892 	WREG32(VM_CONTEXT1_CNTL, 0);
5893 	/* Setup TLB control */
5894 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5895 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5896 	/* Setup L2 cache */
5897 	WREG32(VM_L2_CNTL,
5898 	       ENABLE_L2_FRAGMENT_PROCESSING |
5899 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5900 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5901 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5902 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5903 	WREG32(VM_L2_CNTL2, 0);
5904 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5905 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5906 	radeon_gart_table_vram_unpin(rdev);
5907 }
5908 
5909 /**
5910  * cik_pcie_gart_fini - vm fini callback
5911  *
5912  * @rdev: radeon_device pointer
5913  *
5914  * Tears down the driver GART/VM setup (CIK).
5915  */
5916 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5917 {
5918 	cik_pcie_gart_disable(rdev);
5919 	radeon_gart_table_vram_free(rdev);
5920 	radeon_gart_fini(rdev);
5921 }
5922 
5923 /* vm parser */
5924 /**
5925  * cik_ib_parse - vm ib_parse callback
5926  *
5927  * @rdev: radeon_device pointer
5928  * @ib: indirect buffer pointer
5929  *
5930  * CIK uses hw IB checking so this is a nop (CIK).
5931  */
5932 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5933 {
5934 	return 0;
5935 }
5936 
5937 /*
5938  * vm
5939  * VMID 0 is the physical GPU addresses as used by the kernel.
5940  * VMIDs 1-15 are used for userspace clients and are handled
5941  * by the radeon vm/hsa code.
5942  */
5943 /**
5944  * cik_vm_init - cik vm init callback
5945  *
5946  * @rdev: radeon_device pointer
5947  *
5948  * Inits cik specific vm parameters (number of VMs, base of vram for
5949  * VMIDs 1-15) (CIK).
5950  * Returns 0 for success.
5951  */
5952 int cik_vm_init(struct radeon_device *rdev)
5953 {
5954 	/*
5955 	 * number of VMs
5956 	 * VMID 0 is reserved for System
5957 	 * radeon graphics/compute will use VMIDs 1-7
5958 	 * amdkfd will use VMIDs 8-15
5959 	 */
5960 	rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
5961 	/* base offset of vram pages */
5962 	if (rdev->flags & RADEON_IS_IGP) {
5963 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5964 		tmp <<= 22;
5965 		rdev->vm_manager.vram_base_offset = tmp;
5966 	} else
5967 		rdev->vm_manager.vram_base_offset = 0;
5968 
5969 	return 0;
5970 }
5971 
5972 /**
5973  * cik_vm_fini - cik vm fini callback
5974  *
5975  * @rdev: radeon_device pointer
5976  *
5977  * Tear down any asic specific VM setup (CIK).
5978  */
5979 void cik_vm_fini(struct radeon_device *rdev)
5980 {
5981 }
5982 
5983 /**
5984  * cik_vm_decode_fault - print human readable fault info
5985  *
5986  * @rdev: radeon_device pointer
5987  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5988  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5989  *
5990  * Print human readable fault information (CIK).
5991  */
5992 static void cik_vm_decode_fault(struct radeon_device *rdev,
5993 				u32 status, u32 addr, u32 mc_client)
5994 {
5995 	u32 mc_id;
5996 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5997 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5998 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5999 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
6000 
6001 	if (rdev->family == CHIP_HAWAII)
6002 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
6003 	else
6004 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
6005 
6006 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
6007 	       protections, vmid, addr,
6008 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
6009 	       block, mc_client, mc_id);
6010 }
6011 
6012 /**
6013  * cik_vm_flush - cik vm flush using the CP
6014  *
6015  * @rdev: radeon_device pointer
6016  *
6017  * Update the page table base and flush the VM TLB
6018  * using the CP (CIK).
6019  */
6020 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
6021 		  unsigned vm_id, uint64_t pd_addr)
6022 {
6023 	int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
6024 
6025 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6026 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6027 				 WRITE_DATA_DST_SEL(0)));
6028 	if (vm_id < 8) {
6029 		radeon_ring_write(ring,
6030 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
6031 	} else {
6032 		radeon_ring_write(ring,
6033 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
6034 	}
6035 	radeon_ring_write(ring, 0);
6036 	radeon_ring_write(ring, pd_addr >> 12);
6037 
6038 	/* update SH_MEM_* regs */
6039 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6040 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6041 				 WRITE_DATA_DST_SEL(0)));
6042 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6043 	radeon_ring_write(ring, 0);
6044 	radeon_ring_write(ring, VMID(vm_id));
6045 
6046 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
6047 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6048 				 WRITE_DATA_DST_SEL(0)));
6049 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
6050 	radeon_ring_write(ring, 0);
6051 
6052 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
6053 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
6054 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
6055 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
6056 
6057 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6058 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6059 				 WRITE_DATA_DST_SEL(0)));
6060 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6061 	radeon_ring_write(ring, 0);
6062 	radeon_ring_write(ring, VMID(0));
6063 
6064 	/* HDP flush */
6065 	cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
6066 
6067 	/* bits 0-15 are the VM contexts0-15 */
6068 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6069 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6070 				 WRITE_DATA_DST_SEL(0)));
6071 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6072 	radeon_ring_write(ring, 0);
6073 	radeon_ring_write(ring, 1 << vm_id);
6074 
6075 	/* wait for the invalidate to complete */
6076 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6077 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6078 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
6079 				 WAIT_REG_MEM_ENGINE(0))); /* me */
6080 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6081 	radeon_ring_write(ring, 0);
6082 	radeon_ring_write(ring, 0); /* ref */
6083 	radeon_ring_write(ring, 0); /* mask */
6084 	radeon_ring_write(ring, 0x20); /* poll interval */
6085 
6086 	/* compute doesn't have PFP */
6087 	if (usepfp) {
6088 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6089 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6090 		radeon_ring_write(ring, 0x0);
6091 	}
6092 }
6093 
6094 /*
6095  * RLC
6096  * The RLC is a multi-purpose microengine that handles a
6097  * variety of functions, the most important of which is
6098  * the interrupt controller.
6099  */
6100 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
6101 					  bool enable)
6102 {
6103 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
6104 
6105 	if (enable)
6106 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6107 	else
6108 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6109 	WREG32(CP_INT_CNTL_RING0, tmp);
6110 }
6111 
6112 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
6113 {
6114 	u32 tmp;
6115 
6116 	tmp = RREG32(RLC_LB_CNTL);
6117 	if (enable)
6118 		tmp |= LOAD_BALANCE_ENABLE;
6119 	else
6120 		tmp &= ~LOAD_BALANCE_ENABLE;
6121 	WREG32(RLC_LB_CNTL, tmp);
6122 }
6123 
6124 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
6125 {
6126 	u32 i, j, k;
6127 	u32 mask;
6128 
6129 	mutex_lock(&rdev->grbm_idx_mutex);
6130 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6131 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6132 			cik_select_se_sh(rdev, i, j);
6133 			for (k = 0; k < rdev->usec_timeout; k++) {
6134 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
6135 					break;
6136 				udelay(1);
6137 			}
6138 		}
6139 	}
6140 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6141 	mutex_unlock(&rdev->grbm_idx_mutex);
6142 
6143 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
6144 	for (k = 0; k < rdev->usec_timeout; k++) {
6145 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
6146 			break;
6147 		udelay(1);
6148 	}
6149 }
6150 
6151 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
6152 {
6153 	u32 tmp;
6154 
6155 	tmp = RREG32(RLC_CNTL);
6156 	if (tmp != rlc)
6157 		WREG32(RLC_CNTL, rlc);
6158 }
6159 
6160 static u32 cik_halt_rlc(struct radeon_device *rdev)
6161 {
6162 	u32 data, orig;
6163 
6164 	orig = data = RREG32(RLC_CNTL);
6165 
6166 	if (data & RLC_ENABLE) {
6167 		u32 i;
6168 
6169 		data &= ~RLC_ENABLE;
6170 		WREG32(RLC_CNTL, data);
6171 
6172 		for (i = 0; i < rdev->usec_timeout; i++) {
6173 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
6174 				break;
6175 			udelay(1);
6176 		}
6177 
6178 		cik_wait_for_rlc_serdes(rdev);
6179 	}
6180 
6181 	return orig;
6182 }
6183 
6184 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
6185 {
6186 	u32 tmp, i, mask;
6187 
6188 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
6189 	WREG32(RLC_GPR_REG2, tmp);
6190 
6191 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
6192 	for (i = 0; i < rdev->usec_timeout; i++) {
6193 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
6194 			break;
6195 		udelay(1);
6196 	}
6197 
6198 	for (i = 0; i < rdev->usec_timeout; i++) {
6199 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
6200 			break;
6201 		udelay(1);
6202 	}
6203 }
6204 
6205 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
6206 {
6207 	u32 tmp;
6208 
6209 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
6210 	WREG32(RLC_GPR_REG2, tmp);
6211 }
6212 
6213 /**
6214  * cik_rlc_stop - stop the RLC ME
6215  *
6216  * @rdev: radeon_device pointer
6217  *
6218  * Halt the RLC ME (MicroEngine) (CIK).
6219  */
6220 static void cik_rlc_stop(struct radeon_device *rdev)
6221 {
6222 	WREG32(RLC_CNTL, 0);
6223 
6224 	cik_enable_gui_idle_interrupt(rdev, false);
6225 
6226 	cik_wait_for_rlc_serdes(rdev);
6227 }
6228 
6229 /**
6230  * cik_rlc_start - start the RLC ME
6231  *
6232  * @rdev: radeon_device pointer
6233  *
6234  * Unhalt the RLC ME (MicroEngine) (CIK).
6235  */
6236 static void cik_rlc_start(struct radeon_device *rdev)
6237 {
6238 	WREG32(RLC_CNTL, RLC_ENABLE);
6239 
6240 	cik_enable_gui_idle_interrupt(rdev, true);
6241 
6242 	udelay(50);
6243 }
6244 
6245 /**
6246  * cik_rlc_resume - setup the RLC hw
6247  *
6248  * @rdev: radeon_device pointer
6249  *
6250  * Initialize the RLC registers, load the ucode,
6251  * and start the RLC (CIK).
6252  * Returns 0 for success, -EINVAL if the ucode is not available.
6253  */
6254 static int cik_rlc_resume(struct radeon_device *rdev)
6255 {
6256 	u32 i, size, tmp;
6257 
6258 	if (!rdev->rlc_fw)
6259 		return -EINVAL;
6260 
6261 	cik_rlc_stop(rdev);
6262 
6263 	/* disable CG */
6264 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6265 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6266 
6267 	si_rlc_reset(rdev);
6268 
6269 	cik_init_pg(rdev);
6270 
6271 	cik_init_cg(rdev);
6272 
6273 	WREG32(RLC_LB_CNTR_INIT, 0);
6274 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6275 
6276 	mutex_lock(&rdev->grbm_idx_mutex);
6277 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6278 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6279 	WREG32(RLC_LB_PARAMS, 0x00600408);
6280 	WREG32(RLC_LB_CNTL, 0x80000004);
6281 	mutex_unlock(&rdev->grbm_idx_mutex);
6282 
6283 	WREG32(RLC_MC_CNTL, 0);
6284 	WREG32(RLC_UCODE_CNTL, 0);
6285 
6286 	if (rdev->new_fw) {
6287 		const struct rlc_firmware_header_v1_0 *hdr =
6288 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6289 		const __le32 *fw_data = (const __le32 *)
6290 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6291 
6292 		radeon_ucode_print_rlc_hdr(&hdr->header);
6293 
6294 		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6295 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6296 		for (i = 0; i < size; i++)
6297 			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6298 		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6299 	} else {
6300 		const __be32 *fw_data;
6301 
6302 		switch (rdev->family) {
6303 		case CHIP_BONAIRE:
6304 		case CHIP_HAWAII:
6305 		default:
6306 			size = BONAIRE_RLC_UCODE_SIZE;
6307 			break;
6308 		case CHIP_KAVERI:
6309 			size = KV_RLC_UCODE_SIZE;
6310 			break;
6311 		case CHIP_KABINI:
6312 			size = KB_RLC_UCODE_SIZE;
6313 			break;
6314 		case CHIP_MULLINS:
6315 			size = ML_RLC_UCODE_SIZE;
6316 			break;
6317 		}
6318 
6319 		fw_data = (const __be32 *)rdev->rlc_fw->data;
6320 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6321 		for (i = 0; i < size; i++)
6322 			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6323 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6324 	}
6325 
6326 	/* XXX - find out what chips support lbpw */
6327 	cik_enable_lbpw(rdev, false);
6328 
6329 	if (rdev->family == CHIP_BONAIRE)
6330 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
6331 
6332 	cik_rlc_start(rdev);
6333 
6334 	return 0;
6335 }
6336 
6337 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6338 {
6339 	u32 data, orig, tmp, tmp2;
6340 
6341 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6342 
6343 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6344 		cik_enable_gui_idle_interrupt(rdev, true);
6345 
6346 		tmp = cik_halt_rlc(rdev);
6347 
6348 		mutex_lock(&rdev->grbm_idx_mutex);
6349 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6350 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6351 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6352 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6353 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6354 		mutex_unlock(&rdev->grbm_idx_mutex);
6355 
6356 		cik_update_rlc(rdev, tmp);
6357 
6358 		data |= CGCG_EN | CGLS_EN;
6359 	} else {
6360 		cik_enable_gui_idle_interrupt(rdev, false);
6361 
6362 		RREG32(CB_CGTT_SCLK_CTRL);
6363 		RREG32(CB_CGTT_SCLK_CTRL);
6364 		RREG32(CB_CGTT_SCLK_CTRL);
6365 		RREG32(CB_CGTT_SCLK_CTRL);
6366 
6367 		data &= ~(CGCG_EN | CGLS_EN);
6368 	}
6369 
6370 	if (orig != data)
6371 		WREG32(RLC_CGCG_CGLS_CTRL, data);
6372 
6373 }
6374 
6375 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6376 {
6377 	u32 data, orig, tmp = 0;
6378 
6379 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6380 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6381 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6382 				orig = data = RREG32(CP_MEM_SLP_CNTL);
6383 				data |= CP_MEM_LS_EN;
6384 				if (orig != data)
6385 					WREG32(CP_MEM_SLP_CNTL, data);
6386 			}
6387 		}
6388 
6389 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6390 		data |= 0x00000001;
6391 		data &= 0xfffffffd;
6392 		if (orig != data)
6393 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6394 
6395 		tmp = cik_halt_rlc(rdev);
6396 
6397 		mutex_lock(&rdev->grbm_idx_mutex);
6398 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6399 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6400 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6401 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6402 		WREG32(RLC_SERDES_WR_CTRL, data);
6403 		mutex_unlock(&rdev->grbm_idx_mutex);
6404 
6405 		cik_update_rlc(rdev, tmp);
6406 
6407 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6408 			orig = data = RREG32(CGTS_SM_CTRL_REG);
6409 			data &= ~SM_MODE_MASK;
6410 			data |= SM_MODE(0x2);
6411 			data |= SM_MODE_ENABLE;
6412 			data &= ~CGTS_OVERRIDE;
6413 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6414 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6415 				data &= ~CGTS_LS_OVERRIDE;
6416 			data &= ~ON_MONITOR_ADD_MASK;
6417 			data |= ON_MONITOR_ADD_EN;
6418 			data |= ON_MONITOR_ADD(0x96);
6419 			if (orig != data)
6420 				WREG32(CGTS_SM_CTRL_REG, data);
6421 		}
6422 	} else {
6423 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6424 		data |= 0x00000003;
6425 		if (orig != data)
6426 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6427 
6428 		data = RREG32(RLC_MEM_SLP_CNTL);
6429 		if (data & RLC_MEM_LS_EN) {
6430 			data &= ~RLC_MEM_LS_EN;
6431 			WREG32(RLC_MEM_SLP_CNTL, data);
6432 		}
6433 
6434 		data = RREG32(CP_MEM_SLP_CNTL);
6435 		if (data & CP_MEM_LS_EN) {
6436 			data &= ~CP_MEM_LS_EN;
6437 			WREG32(CP_MEM_SLP_CNTL, data);
6438 		}
6439 
6440 		orig = data = RREG32(CGTS_SM_CTRL_REG);
6441 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6442 		if (orig != data)
6443 			WREG32(CGTS_SM_CTRL_REG, data);
6444 
6445 		tmp = cik_halt_rlc(rdev);
6446 
6447 		mutex_lock(&rdev->grbm_idx_mutex);
6448 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6449 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6450 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6451 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6452 		WREG32(RLC_SERDES_WR_CTRL, data);
6453 		mutex_unlock(&rdev->grbm_idx_mutex);
6454 
6455 		cik_update_rlc(rdev, tmp);
6456 	}
6457 }
6458 
6459 static const u32 mc_cg_registers[] =
6460 {
6461 	MC_HUB_MISC_HUB_CG,
6462 	MC_HUB_MISC_SIP_CG,
6463 	MC_HUB_MISC_VM_CG,
6464 	MC_XPB_CLK_GAT,
6465 	ATC_MISC_CG,
6466 	MC_CITF_MISC_WR_CG,
6467 	MC_CITF_MISC_RD_CG,
6468 	MC_CITF_MISC_VM_CG,
6469 	VM_L2_CG,
6470 };
6471 
6472 static void cik_enable_mc_ls(struct radeon_device *rdev,
6473 			     bool enable)
6474 {
6475 	int i;
6476 	u32 orig, data;
6477 
6478 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6479 		orig = data = RREG32(mc_cg_registers[i]);
6480 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6481 			data |= MC_LS_ENABLE;
6482 		else
6483 			data &= ~MC_LS_ENABLE;
6484 		if (data != orig)
6485 			WREG32(mc_cg_registers[i], data);
6486 	}
6487 }
6488 
6489 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6490 			       bool enable)
6491 {
6492 	int i;
6493 	u32 orig, data;
6494 
6495 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6496 		orig = data = RREG32(mc_cg_registers[i]);
6497 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6498 			data |= MC_CG_ENABLE;
6499 		else
6500 			data &= ~MC_CG_ENABLE;
6501 		if (data != orig)
6502 			WREG32(mc_cg_registers[i], data);
6503 	}
6504 }
6505 
6506 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6507 				 bool enable)
6508 {
6509 	u32 orig, data;
6510 
6511 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6512 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6513 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6514 	} else {
6515 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6516 		data |= 0xff000000;
6517 		if (data != orig)
6518 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6519 
6520 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6521 		data |= 0xff000000;
6522 		if (data != orig)
6523 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6524 	}
6525 }
6526 
6527 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6528 				 bool enable)
6529 {
6530 	u32 orig, data;
6531 
6532 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6533 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6534 		data |= 0x100;
6535 		if (orig != data)
6536 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6537 
6538 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6539 		data |= 0x100;
6540 		if (orig != data)
6541 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6542 	} else {
6543 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6544 		data &= ~0x100;
6545 		if (orig != data)
6546 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6547 
6548 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6549 		data &= ~0x100;
6550 		if (orig != data)
6551 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6552 	}
6553 }
6554 
6555 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6556 				bool enable)
6557 {
6558 	u32 orig, data;
6559 
6560 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6561 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6562 		data = 0xfff;
6563 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6564 
6565 		orig = data = RREG32(UVD_CGC_CTRL);
6566 		data |= DCM;
6567 		if (orig != data)
6568 			WREG32(UVD_CGC_CTRL, data);
6569 	} else {
6570 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6571 		data &= ~0xfff;
6572 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6573 
6574 		orig = data = RREG32(UVD_CGC_CTRL);
6575 		data &= ~DCM;
6576 		if (orig != data)
6577 			WREG32(UVD_CGC_CTRL, data);
6578 	}
6579 }
6580 
6581 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6582 			       bool enable)
6583 {
6584 	u32 orig, data;
6585 
6586 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6587 
6588 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6589 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6590 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6591 	else
6592 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6593 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6594 
6595 	if (orig != data)
6596 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6597 }
6598 
6599 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6600 				bool enable)
6601 {
6602 	u32 orig, data;
6603 
6604 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6605 
6606 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6607 		data &= ~CLOCK_GATING_DIS;
6608 	else
6609 		data |= CLOCK_GATING_DIS;
6610 
6611 	if (orig != data)
6612 		WREG32(HDP_HOST_PATH_CNTL, data);
6613 }
6614 
6615 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6616 			      bool enable)
6617 {
6618 	u32 orig, data;
6619 
6620 	orig = data = RREG32(HDP_MEM_POWER_LS);
6621 
6622 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6623 		data |= HDP_LS_ENABLE;
6624 	else
6625 		data &= ~HDP_LS_ENABLE;
6626 
6627 	if (orig != data)
6628 		WREG32(HDP_MEM_POWER_LS, data);
6629 }
6630 
6631 void cik_update_cg(struct radeon_device *rdev,
6632 		   u32 block, bool enable)
6633 {
6634 
6635 	if (block & RADEON_CG_BLOCK_GFX) {
6636 		cik_enable_gui_idle_interrupt(rdev, false);
6637 		/* order matters! */
6638 		if (enable) {
6639 			cik_enable_mgcg(rdev, true);
6640 			cik_enable_cgcg(rdev, true);
6641 		} else {
6642 			cik_enable_cgcg(rdev, false);
6643 			cik_enable_mgcg(rdev, false);
6644 		}
6645 		cik_enable_gui_idle_interrupt(rdev, true);
6646 	}
6647 
6648 	if (block & RADEON_CG_BLOCK_MC) {
6649 		if (!(rdev->flags & RADEON_IS_IGP)) {
6650 			cik_enable_mc_mgcg(rdev, enable);
6651 			cik_enable_mc_ls(rdev, enable);
6652 		}
6653 	}
6654 
6655 	if (block & RADEON_CG_BLOCK_SDMA) {
6656 		cik_enable_sdma_mgcg(rdev, enable);
6657 		cik_enable_sdma_mgls(rdev, enable);
6658 	}
6659 
6660 	if (block & RADEON_CG_BLOCK_BIF) {
6661 		cik_enable_bif_mgls(rdev, enable);
6662 	}
6663 
6664 	if (block & RADEON_CG_BLOCK_UVD) {
6665 		if (rdev->has_uvd)
6666 			cik_enable_uvd_mgcg(rdev, enable);
6667 	}
6668 
6669 	if (block & RADEON_CG_BLOCK_HDP) {
6670 		cik_enable_hdp_mgcg(rdev, enable);
6671 		cik_enable_hdp_ls(rdev, enable);
6672 	}
6673 
6674 	if (block & RADEON_CG_BLOCK_VCE) {
6675 		vce_v2_0_enable_mgcg(rdev, enable);
6676 	}
6677 }
6678 
6679 static void cik_init_cg(struct radeon_device *rdev)
6680 {
6681 
6682 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6683 
6684 	if (rdev->has_uvd)
6685 		si_init_uvd_internal_cg(rdev);
6686 
6687 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6688 			     RADEON_CG_BLOCK_SDMA |
6689 			     RADEON_CG_BLOCK_BIF |
6690 			     RADEON_CG_BLOCK_UVD |
6691 			     RADEON_CG_BLOCK_HDP), true);
6692 }
6693 
6694 static void cik_fini_cg(struct radeon_device *rdev)
6695 {
6696 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6697 			     RADEON_CG_BLOCK_SDMA |
6698 			     RADEON_CG_BLOCK_BIF |
6699 			     RADEON_CG_BLOCK_UVD |
6700 			     RADEON_CG_BLOCK_HDP), false);
6701 
6702 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6703 }
6704 
6705 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6706 					  bool enable)
6707 {
6708 	u32 data, orig;
6709 
6710 	orig = data = RREG32(RLC_PG_CNTL);
6711 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6712 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6713 	else
6714 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6715 	if (orig != data)
6716 		WREG32(RLC_PG_CNTL, data);
6717 }
6718 
6719 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6720 					  bool enable)
6721 {
6722 	u32 data, orig;
6723 
6724 	orig = data = RREG32(RLC_PG_CNTL);
6725 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6726 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6727 	else
6728 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6729 	if (orig != data)
6730 		WREG32(RLC_PG_CNTL, data);
6731 }
6732 
6733 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6734 {
6735 	u32 data, orig;
6736 
6737 	orig = data = RREG32(RLC_PG_CNTL);
6738 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6739 		data &= ~DISABLE_CP_PG;
6740 	else
6741 		data |= DISABLE_CP_PG;
6742 	if (orig != data)
6743 		WREG32(RLC_PG_CNTL, data);
6744 }
6745 
6746 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6747 {
6748 	u32 data, orig;
6749 
6750 	orig = data = RREG32(RLC_PG_CNTL);
6751 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6752 		data &= ~DISABLE_GDS_PG;
6753 	else
6754 		data |= DISABLE_GDS_PG;
6755 	if (orig != data)
6756 		WREG32(RLC_PG_CNTL, data);
6757 }
6758 
6759 #define CP_ME_TABLE_SIZE    96
6760 #define CP_ME_TABLE_OFFSET  2048
6761 #define CP_MEC_TABLE_OFFSET 4096
6762 
6763 void cik_init_cp_pg_table(struct radeon_device *rdev)
6764 {
6765 	volatile u32 *dst_ptr;
6766 	int me, i, max_me = 4;
6767 	u32 bo_offset = 0;
6768 	u32 table_offset, table_size;
6769 
6770 	if (rdev->family == CHIP_KAVERI)
6771 		max_me = 5;
6772 
6773 	if (rdev->rlc.cp_table_ptr == NULL)
6774 		return;
6775 
6776 	/* write the cp table buffer */
6777 	dst_ptr = rdev->rlc.cp_table_ptr;
6778 	for (me = 0; me < max_me; me++) {
6779 		if (rdev->new_fw) {
6780 			const __le32 *fw_data;
6781 			const struct gfx_firmware_header_v1_0 *hdr;
6782 
6783 			if (me == 0) {
6784 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6785 				fw_data = (const __le32 *)
6786 					(rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6787 				table_offset = le32_to_cpu(hdr->jt_offset);
6788 				table_size = le32_to_cpu(hdr->jt_size);
6789 			} else if (me == 1) {
6790 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6791 				fw_data = (const __le32 *)
6792 					(rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6793 				table_offset = le32_to_cpu(hdr->jt_offset);
6794 				table_size = le32_to_cpu(hdr->jt_size);
6795 			} else if (me == 2) {
6796 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6797 				fw_data = (const __le32 *)
6798 					(rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6799 				table_offset = le32_to_cpu(hdr->jt_offset);
6800 				table_size = le32_to_cpu(hdr->jt_size);
6801 			} else if (me == 3) {
6802 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6803 				fw_data = (const __le32 *)
6804 					(rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6805 				table_offset = le32_to_cpu(hdr->jt_offset);
6806 				table_size = le32_to_cpu(hdr->jt_size);
6807 			} else {
6808 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6809 				fw_data = (const __le32 *)
6810 					(rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6811 				table_offset = le32_to_cpu(hdr->jt_offset);
6812 				table_size = le32_to_cpu(hdr->jt_size);
6813 			}
6814 
6815 			for (i = 0; i < table_size; i ++) {
6816 				dst_ptr[bo_offset + i] =
6817 					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6818 			}
6819 			bo_offset += table_size;
6820 		} else {
6821 			const __be32 *fw_data;
6822 			table_size = CP_ME_TABLE_SIZE;
6823 
6824 			if (me == 0) {
6825 				fw_data = (const __be32 *)rdev->ce_fw->data;
6826 				table_offset = CP_ME_TABLE_OFFSET;
6827 			} else if (me == 1) {
6828 				fw_data = (const __be32 *)rdev->pfp_fw->data;
6829 				table_offset = CP_ME_TABLE_OFFSET;
6830 			} else if (me == 2) {
6831 				fw_data = (const __be32 *)rdev->me_fw->data;
6832 				table_offset = CP_ME_TABLE_OFFSET;
6833 			} else {
6834 				fw_data = (const __be32 *)rdev->mec_fw->data;
6835 				table_offset = CP_MEC_TABLE_OFFSET;
6836 			}
6837 
6838 			for (i = 0; i < table_size; i ++) {
6839 				dst_ptr[bo_offset + i] =
6840 					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6841 			}
6842 			bo_offset += table_size;
6843 		}
6844 	}
6845 }
6846 
6847 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6848 				bool enable)
6849 {
6850 	u32 data, orig;
6851 
6852 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6853 		orig = data = RREG32(RLC_PG_CNTL);
6854 		data |= GFX_PG_ENABLE;
6855 		if (orig != data)
6856 			WREG32(RLC_PG_CNTL, data);
6857 
6858 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6859 		data |= AUTO_PG_EN;
6860 		if (orig != data)
6861 			WREG32(RLC_AUTO_PG_CTRL, data);
6862 	} else {
6863 		orig = data = RREG32(RLC_PG_CNTL);
6864 		data &= ~GFX_PG_ENABLE;
6865 		if (orig != data)
6866 			WREG32(RLC_PG_CNTL, data);
6867 
6868 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6869 		data &= ~AUTO_PG_EN;
6870 		if (orig != data)
6871 			WREG32(RLC_AUTO_PG_CTRL, data);
6872 
6873 		data = RREG32(DB_RENDER_CONTROL);
6874 	}
6875 }
6876 
6877 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6878 {
6879 	u32 mask = 0, tmp, tmp1;
6880 	int i;
6881 
6882 	mutex_lock(&rdev->grbm_idx_mutex);
6883 	cik_select_se_sh(rdev, se, sh);
6884 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6885 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6886 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6887 	mutex_unlock(&rdev->grbm_idx_mutex);
6888 
6889 	tmp &= 0xffff0000;
6890 
6891 	tmp |= tmp1;
6892 	tmp >>= 16;
6893 
6894 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6895 		mask <<= 1;
6896 		mask |= 1;
6897 	}
6898 
6899 	return (~tmp) & mask;
6900 }
6901 
6902 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6903 {
6904 	u32 i, j, k, active_cu_number = 0;
6905 	u32 mask, counter, cu_bitmap;
6906 	u32 tmp = 0;
6907 
6908 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6909 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6910 			mask = 1;
6911 			cu_bitmap = 0;
6912 			counter = 0;
6913 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6914 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6915 					if (counter < 2)
6916 						cu_bitmap |= mask;
6917 					counter ++;
6918 				}
6919 				mask <<= 1;
6920 			}
6921 
6922 			active_cu_number += counter;
6923 			tmp |= (cu_bitmap << (i * 16 + j * 8));
6924 		}
6925 	}
6926 
6927 	WREG32(RLC_PG_AO_CU_MASK, tmp);
6928 
6929 	tmp = RREG32(RLC_MAX_PG_CU);
6930 	tmp &= ~MAX_PU_CU_MASK;
6931 	tmp |= MAX_PU_CU(active_cu_number);
6932 	WREG32(RLC_MAX_PG_CU, tmp);
6933 }
6934 
6935 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6936 				       bool enable)
6937 {
6938 	u32 data, orig;
6939 
6940 	orig = data = RREG32(RLC_PG_CNTL);
6941 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6942 		data |= STATIC_PER_CU_PG_ENABLE;
6943 	else
6944 		data &= ~STATIC_PER_CU_PG_ENABLE;
6945 	if (orig != data)
6946 		WREG32(RLC_PG_CNTL, data);
6947 }
6948 
6949 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6950 					bool enable)
6951 {
6952 	u32 data, orig;
6953 
6954 	orig = data = RREG32(RLC_PG_CNTL);
6955 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6956 		data |= DYN_PER_CU_PG_ENABLE;
6957 	else
6958 		data &= ~DYN_PER_CU_PG_ENABLE;
6959 	if (orig != data)
6960 		WREG32(RLC_PG_CNTL, data);
6961 }
6962 
6963 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6964 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6965 
6966 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6967 {
6968 	u32 data, orig;
6969 	u32 i;
6970 
6971 	if (rdev->rlc.cs_data) {
6972 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6973 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6974 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6975 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6976 	} else {
6977 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6978 		for (i = 0; i < 3; i++)
6979 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6980 	}
6981 	if (rdev->rlc.reg_list) {
6982 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6983 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6984 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6985 	}
6986 
6987 	orig = data = RREG32(RLC_PG_CNTL);
6988 	data |= GFX_PG_SRC;
6989 	if (orig != data)
6990 		WREG32(RLC_PG_CNTL, data);
6991 
6992 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6993 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6994 
6995 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6996 	data &= ~IDLE_POLL_COUNT_MASK;
6997 	data |= IDLE_POLL_COUNT(0x60);
6998 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6999 
7000 	data = 0x10101010;
7001 	WREG32(RLC_PG_DELAY, data);
7002 
7003 	data = RREG32(RLC_PG_DELAY_2);
7004 	data &= ~0xff;
7005 	data |= 0x3;
7006 	WREG32(RLC_PG_DELAY_2, data);
7007 
7008 	data = RREG32(RLC_AUTO_PG_CTRL);
7009 	data &= ~GRBM_REG_SGIT_MASK;
7010 	data |= GRBM_REG_SGIT(0x700);
7011 	WREG32(RLC_AUTO_PG_CTRL, data);
7012 
7013 }
7014 
7015 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
7016 {
7017 	cik_enable_gfx_cgpg(rdev, enable);
7018 	cik_enable_gfx_static_mgpg(rdev, enable);
7019 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
7020 }
7021 
7022 u32 cik_get_csb_size(struct radeon_device *rdev)
7023 {
7024 	u32 count = 0;
7025 	const struct cs_section_def *sect = NULL;
7026 	const struct cs_extent_def *ext = NULL;
7027 
7028 	if (rdev->rlc.cs_data == NULL)
7029 		return 0;
7030 
7031 	/* begin clear state */
7032 	count += 2;
7033 	/* context control state */
7034 	count += 3;
7035 
7036 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7037 		for (ext = sect->section; ext->extent != NULL; ++ext) {
7038 			if (sect->id == SECT_CONTEXT)
7039 				count += 2 + ext->reg_count;
7040 			else
7041 				return 0;
7042 		}
7043 	}
7044 	/* pa_sc_raster_config/pa_sc_raster_config1 */
7045 	count += 4;
7046 	/* end clear state */
7047 	count += 2;
7048 	/* clear state */
7049 	count += 2;
7050 
7051 	return count;
7052 }
7053 
7054 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
7055 {
7056 	u32 count = 0, i;
7057 	const struct cs_section_def *sect = NULL;
7058 	const struct cs_extent_def *ext = NULL;
7059 
7060 	if (rdev->rlc.cs_data == NULL)
7061 		return;
7062 	if (buffer == NULL)
7063 		return;
7064 
7065 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7066 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
7067 
7068 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
7069 	buffer[count++] = cpu_to_le32(0x80000000);
7070 	buffer[count++] = cpu_to_le32(0x80000000);
7071 
7072 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7073 		for (ext = sect->section; ext->extent != NULL; ++ext) {
7074 			if (sect->id == SECT_CONTEXT) {
7075 				buffer[count++] =
7076 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
7077 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
7078 				for (i = 0; i < ext->reg_count; i++)
7079 					buffer[count++] = cpu_to_le32(ext->extent[i]);
7080 			} else {
7081 				return;
7082 			}
7083 		}
7084 	}
7085 
7086 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
7087 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
7088 	switch (rdev->family) {
7089 	case CHIP_BONAIRE:
7090 		buffer[count++] = cpu_to_le32(0x16000012);
7091 		buffer[count++] = cpu_to_le32(0x00000000);
7092 		break;
7093 	case CHIP_KAVERI:
7094 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7095 		buffer[count++] = cpu_to_le32(0x00000000);
7096 		break;
7097 	case CHIP_KABINI:
7098 	case CHIP_MULLINS:
7099 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7100 		buffer[count++] = cpu_to_le32(0x00000000);
7101 		break;
7102 	case CHIP_HAWAII:
7103 		buffer[count++] = cpu_to_le32(0x3a00161a);
7104 		buffer[count++] = cpu_to_le32(0x0000002e);
7105 		break;
7106 	default:
7107 		buffer[count++] = cpu_to_le32(0x00000000);
7108 		buffer[count++] = cpu_to_le32(0x00000000);
7109 		break;
7110 	}
7111 
7112 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7113 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
7114 
7115 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
7116 	buffer[count++] = cpu_to_le32(0);
7117 }
7118 
7119 static void cik_init_pg(struct radeon_device *rdev)
7120 {
7121 	if (rdev->pg_flags) {
7122 		cik_enable_sck_slowdown_on_pu(rdev, true);
7123 		cik_enable_sck_slowdown_on_pd(rdev, true);
7124 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7125 			cik_init_gfx_cgpg(rdev);
7126 			cik_enable_cp_pg(rdev, true);
7127 			cik_enable_gds_pg(rdev, true);
7128 		}
7129 		cik_init_ao_cu_mask(rdev);
7130 		cik_update_gfx_pg(rdev, true);
7131 	}
7132 }
7133 
7134 static void cik_fini_pg(struct radeon_device *rdev)
7135 {
7136 	if (rdev->pg_flags) {
7137 		cik_update_gfx_pg(rdev, false);
7138 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7139 			cik_enable_cp_pg(rdev, false);
7140 			cik_enable_gds_pg(rdev, false);
7141 		}
7142 	}
7143 }
7144 
7145 /*
7146  * Interrupts
7147  * Starting with r6xx, interrupts are handled via a ring buffer.
7148  * Ring buffers are areas of GPU accessible memory that the GPU
7149  * writes interrupt vectors into and the host reads vectors out of.
7150  * There is a rptr (read pointer) that determines where the
7151  * host is currently reading, and a wptr (write pointer)
7152  * which determines where the GPU has written.  When the
7153  * pointers are equal, the ring is idle.  When the GPU
7154  * writes vectors to the ring buffer, it increments the
7155  * wptr.  When there is an interrupt, the host then starts
7156  * fetching commands and processing them until the pointers are
7157  * equal again at which point it updates the rptr.
7158  */
7159 
7160 /**
7161  * cik_enable_interrupts - Enable the interrupt ring buffer
7162  *
7163  * @rdev: radeon_device pointer
7164  *
7165  * Enable the interrupt ring buffer (CIK).
7166  */
7167 static void cik_enable_interrupts(struct radeon_device *rdev)
7168 {
7169 	u32 ih_cntl = RREG32(IH_CNTL);
7170 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7171 
7172 	ih_cntl |= ENABLE_INTR;
7173 	ih_rb_cntl |= IH_RB_ENABLE;
7174 	WREG32(IH_CNTL, ih_cntl);
7175 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7176 	rdev->ih.enabled = true;
7177 }
7178 
7179 /**
7180  * cik_disable_interrupts - Disable the interrupt ring buffer
7181  *
7182  * @rdev: radeon_device pointer
7183  *
7184  * Disable the interrupt ring buffer (CIK).
7185  */
7186 static void cik_disable_interrupts(struct radeon_device *rdev)
7187 {
7188 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7189 	u32 ih_cntl = RREG32(IH_CNTL);
7190 
7191 	ih_rb_cntl &= ~IH_RB_ENABLE;
7192 	ih_cntl &= ~ENABLE_INTR;
7193 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7194 	WREG32(IH_CNTL, ih_cntl);
7195 	/* set rptr, wptr to 0 */
7196 	WREG32(IH_RB_RPTR, 0);
7197 	WREG32(IH_RB_WPTR, 0);
7198 	rdev->ih.enabled = false;
7199 	rdev->ih.rptr = 0;
7200 }
7201 
7202 /**
7203  * cik_disable_interrupt_state - Disable all interrupt sources
7204  *
7205  * @rdev: radeon_device pointer
7206  *
7207  * Clear all interrupt enable bits used by the driver (CIK).
7208  */
7209 static void cik_disable_interrupt_state(struct radeon_device *rdev)
7210 {
7211 	u32 tmp;
7212 
7213 	/* gfx ring */
7214 	tmp = RREG32(CP_INT_CNTL_RING0) &
7215 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7216 	WREG32(CP_INT_CNTL_RING0, tmp);
7217 	/* sdma */
7218 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7219 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
7220 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7221 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
7222 	/* compute queues */
7223 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
7224 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
7225 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
7226 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
7227 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
7228 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
7229 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
7230 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
7231 	/* grbm */
7232 	WREG32(GRBM_INT_CNTL, 0);
7233 	/* vline/vblank, etc. */
7234 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7235 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7236 	if (rdev->num_crtc >= 4) {
7237 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7238 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7239 	}
7240 	if (rdev->num_crtc >= 6) {
7241 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7242 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7243 	}
7244 	/* pflip */
7245 	if (rdev->num_crtc >= 2) {
7246 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7247 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7248 	}
7249 	if (rdev->num_crtc >= 4) {
7250 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7251 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7252 	}
7253 	if (rdev->num_crtc >= 6) {
7254 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7255 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7256 	}
7257 
7258 	/* dac hotplug */
7259 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7260 
7261 	/* digital hotplug */
7262 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7263 	WREG32(DC_HPD1_INT_CONTROL, tmp);
7264 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7265 	WREG32(DC_HPD2_INT_CONTROL, tmp);
7266 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7267 	WREG32(DC_HPD3_INT_CONTROL, tmp);
7268 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7269 	WREG32(DC_HPD4_INT_CONTROL, tmp);
7270 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7271 	WREG32(DC_HPD5_INT_CONTROL, tmp);
7272 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7273 	WREG32(DC_HPD6_INT_CONTROL, tmp);
7274 
7275 }
7276 
7277 /**
7278  * cik_irq_init - init and enable the interrupt ring
7279  *
7280  * @rdev: radeon_device pointer
7281  *
7282  * Allocate a ring buffer for the interrupt controller,
7283  * enable the RLC, disable interrupts, enable the IH
7284  * ring buffer and enable it (CIK).
7285  * Called at device load and reume.
7286  * Returns 0 for success, errors for failure.
7287  */
7288 static int cik_irq_init(struct radeon_device *rdev)
7289 {
7290 	int ret = 0;
7291 	int rb_bufsz;
7292 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7293 
7294 	/* allocate ring */
7295 	ret = r600_ih_ring_alloc(rdev);
7296 	if (ret)
7297 		return ret;
7298 
7299 	/* disable irqs */
7300 	cik_disable_interrupts(rdev);
7301 
7302 	/* init rlc */
7303 	ret = cik_rlc_resume(rdev);
7304 	if (ret) {
7305 		r600_ih_ring_fini(rdev);
7306 		return ret;
7307 	}
7308 
7309 	/* setup interrupt control */
7310 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
7311 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7312 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
7313 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7314 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7315 	 */
7316 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7317 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7318 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7319 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
7320 
7321 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7322 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7323 
7324 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7325 		      IH_WPTR_OVERFLOW_CLEAR |
7326 		      (rb_bufsz << 1));
7327 
7328 	if (rdev->wb.enabled)
7329 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7330 
7331 	/* set the writeback address whether it's enabled or not */
7332 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7333 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7334 
7335 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7336 
7337 	/* set rptr, wptr to 0 */
7338 	WREG32(IH_RB_RPTR, 0);
7339 	WREG32(IH_RB_WPTR, 0);
7340 
7341 	/* Default settings for IH_CNTL (disabled at first) */
7342 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7343 	/* RPTR_REARM only works if msi's are enabled */
7344 	if (rdev->msi_enabled)
7345 		ih_cntl |= RPTR_REARM;
7346 	WREG32(IH_CNTL, ih_cntl);
7347 
7348 	/* force the active interrupt state to all disabled */
7349 	cik_disable_interrupt_state(rdev);
7350 
7351 	pci_set_master(rdev->pdev);
7352 
7353 	/* enable irqs */
7354 	cik_enable_interrupts(rdev);
7355 
7356 	return ret;
7357 }
7358 
7359 /**
7360  * cik_irq_set - enable/disable interrupt sources
7361  *
7362  * @rdev: radeon_device pointer
7363  *
7364  * Enable interrupt sources on the GPU (vblanks, hpd,
7365  * etc.) (CIK).
7366  * Returns 0 for success, errors for failure.
7367  */
7368 int cik_irq_set(struct radeon_device *rdev)
7369 {
7370 	u32 cp_int_cntl;
7371 	u32 cp_m1p0;
7372 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7373 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7374 	u32 grbm_int_cntl = 0;
7375 	u32 dma_cntl, dma_cntl1;
7376 
7377 	if (!rdev->irq.installed) {
7378 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7379 		return -EINVAL;
7380 	}
7381 	/* don't enable anything if the ih is disabled */
7382 	if (!rdev->ih.enabled) {
7383 		cik_disable_interrupts(rdev);
7384 		/* force the active interrupt state to all disabled */
7385 		cik_disable_interrupt_state(rdev);
7386 		return 0;
7387 	}
7388 
7389 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7390 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7391 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7392 
7393 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
7394 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
7395 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
7396 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
7397 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
7398 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
7399 
7400 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7401 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7402 
7403 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7404 
7405 	/* enable CP interrupts on all rings */
7406 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7407 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7408 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7409 	}
7410 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7411 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7412 		DRM_DEBUG("si_irq_set: sw int cp1\n");
7413 		if (ring->me == 1) {
7414 			switch (ring->pipe) {
7415 			case 0:
7416 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7417 				break;
7418 			default:
7419 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7420 				break;
7421 			}
7422 		} else {
7423 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7424 		}
7425 	}
7426 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7427 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7428 		DRM_DEBUG("si_irq_set: sw int cp2\n");
7429 		if (ring->me == 1) {
7430 			switch (ring->pipe) {
7431 			case 0:
7432 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7433 				break;
7434 			default:
7435 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7436 				break;
7437 			}
7438 		} else {
7439 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7440 		}
7441 	}
7442 
7443 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7444 		DRM_DEBUG("cik_irq_set: sw int dma\n");
7445 		dma_cntl |= TRAP_ENABLE;
7446 	}
7447 
7448 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7449 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7450 		dma_cntl1 |= TRAP_ENABLE;
7451 	}
7452 
7453 	if (rdev->irq.crtc_vblank_int[0] ||
7454 	    atomic_read(&rdev->irq.pflip[0])) {
7455 		DRM_DEBUG("cik_irq_set: vblank 0\n");
7456 		crtc1 |= VBLANK_INTERRUPT_MASK;
7457 	}
7458 	if (rdev->irq.crtc_vblank_int[1] ||
7459 	    atomic_read(&rdev->irq.pflip[1])) {
7460 		DRM_DEBUG("cik_irq_set: vblank 1\n");
7461 		crtc2 |= VBLANK_INTERRUPT_MASK;
7462 	}
7463 	if (rdev->irq.crtc_vblank_int[2] ||
7464 	    atomic_read(&rdev->irq.pflip[2])) {
7465 		DRM_DEBUG("cik_irq_set: vblank 2\n");
7466 		crtc3 |= VBLANK_INTERRUPT_MASK;
7467 	}
7468 	if (rdev->irq.crtc_vblank_int[3] ||
7469 	    atomic_read(&rdev->irq.pflip[3])) {
7470 		DRM_DEBUG("cik_irq_set: vblank 3\n");
7471 		crtc4 |= VBLANK_INTERRUPT_MASK;
7472 	}
7473 	if (rdev->irq.crtc_vblank_int[4] ||
7474 	    atomic_read(&rdev->irq.pflip[4])) {
7475 		DRM_DEBUG("cik_irq_set: vblank 4\n");
7476 		crtc5 |= VBLANK_INTERRUPT_MASK;
7477 	}
7478 	if (rdev->irq.crtc_vblank_int[5] ||
7479 	    atomic_read(&rdev->irq.pflip[5])) {
7480 		DRM_DEBUG("cik_irq_set: vblank 5\n");
7481 		crtc6 |= VBLANK_INTERRUPT_MASK;
7482 	}
7483 	if (rdev->irq.hpd[0]) {
7484 		DRM_DEBUG("cik_irq_set: hpd 1\n");
7485 		hpd1 |= DC_HPDx_INT_EN;
7486 	}
7487 	if (rdev->irq.hpd[1]) {
7488 		DRM_DEBUG("cik_irq_set: hpd 2\n");
7489 		hpd2 |= DC_HPDx_INT_EN;
7490 	}
7491 	if (rdev->irq.hpd[2]) {
7492 		DRM_DEBUG("cik_irq_set: hpd 3\n");
7493 		hpd3 |= DC_HPDx_INT_EN;
7494 	}
7495 	if (rdev->irq.hpd[3]) {
7496 		DRM_DEBUG("cik_irq_set: hpd 4\n");
7497 		hpd4 |= DC_HPDx_INT_EN;
7498 	}
7499 	if (rdev->irq.hpd[4]) {
7500 		DRM_DEBUG("cik_irq_set: hpd 5\n");
7501 		hpd5 |= DC_HPDx_INT_EN;
7502 	}
7503 	if (rdev->irq.hpd[5]) {
7504 		DRM_DEBUG("cik_irq_set: hpd 6\n");
7505 		hpd6 |= DC_HPDx_INT_EN;
7506 	}
7507 
7508 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7509 
7510 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7511 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7512 
7513 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7514 
7515 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7516 
7517 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7518 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7519 	if (rdev->num_crtc >= 4) {
7520 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7521 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7522 	}
7523 	if (rdev->num_crtc >= 6) {
7524 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7525 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7526 	}
7527 
7528 	if (rdev->num_crtc >= 2) {
7529 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7530 		       GRPH_PFLIP_INT_MASK);
7531 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7532 		       GRPH_PFLIP_INT_MASK);
7533 	}
7534 	if (rdev->num_crtc >= 4) {
7535 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7536 		       GRPH_PFLIP_INT_MASK);
7537 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7538 		       GRPH_PFLIP_INT_MASK);
7539 	}
7540 	if (rdev->num_crtc >= 6) {
7541 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7542 		       GRPH_PFLIP_INT_MASK);
7543 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7544 		       GRPH_PFLIP_INT_MASK);
7545 	}
7546 
7547 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7548 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7549 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7550 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7551 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7552 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7553 
7554 	return 0;
7555 }
7556 
7557 /**
7558  * cik_irq_ack - ack interrupt sources
7559  *
7560  * @rdev: radeon_device pointer
7561  *
7562  * Ack interrupt sources on the GPU (vblanks, hpd,
7563  * etc.) (CIK).  Certain interrupts sources are sw
7564  * generated and do not require an explicit ack.
7565  */
7566 static inline void cik_irq_ack(struct radeon_device *rdev)
7567 {
7568 	u32 tmp;
7569 
7570 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7571 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7572 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7573 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7574 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7575 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7576 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7577 
7578 	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7579 		EVERGREEN_CRTC0_REGISTER_OFFSET);
7580 	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7581 		EVERGREEN_CRTC1_REGISTER_OFFSET);
7582 	if (rdev->num_crtc >= 4) {
7583 		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7584 			EVERGREEN_CRTC2_REGISTER_OFFSET);
7585 		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7586 			EVERGREEN_CRTC3_REGISTER_OFFSET);
7587 	}
7588 	if (rdev->num_crtc >= 6) {
7589 		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7590 			EVERGREEN_CRTC4_REGISTER_OFFSET);
7591 		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7592 			EVERGREEN_CRTC5_REGISTER_OFFSET);
7593 	}
7594 
7595 	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7596 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7597 		       GRPH_PFLIP_INT_CLEAR);
7598 	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7599 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7600 		       GRPH_PFLIP_INT_CLEAR);
7601 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7602 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7603 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7604 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7605 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7606 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7607 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7608 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7609 
7610 	if (rdev->num_crtc >= 4) {
7611 		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7612 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7613 			       GRPH_PFLIP_INT_CLEAR);
7614 		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7615 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7616 			       GRPH_PFLIP_INT_CLEAR);
7617 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7618 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7619 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7620 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7621 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7622 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7623 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7624 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7625 	}
7626 
7627 	if (rdev->num_crtc >= 6) {
7628 		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7629 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7630 			       GRPH_PFLIP_INT_CLEAR);
7631 		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7632 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7633 			       GRPH_PFLIP_INT_CLEAR);
7634 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7635 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7636 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7637 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7638 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7639 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7640 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7641 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7642 	}
7643 
7644 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7645 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7646 		tmp |= DC_HPDx_INT_ACK;
7647 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7648 	}
7649 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7650 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7651 		tmp |= DC_HPDx_INT_ACK;
7652 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7653 	}
7654 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7655 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7656 		tmp |= DC_HPDx_INT_ACK;
7657 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7658 	}
7659 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7660 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7661 		tmp |= DC_HPDx_INT_ACK;
7662 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7663 	}
7664 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7665 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7666 		tmp |= DC_HPDx_INT_ACK;
7667 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7668 	}
7669 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7670 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7671 		tmp |= DC_HPDx_INT_ACK;
7672 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7673 	}
7674 }
7675 
7676 /**
7677  * cik_irq_disable - disable interrupts
7678  *
7679  * @rdev: radeon_device pointer
7680  *
7681  * Disable interrupts on the hw (CIK).
7682  */
7683 static void cik_irq_disable(struct radeon_device *rdev)
7684 {
7685 	cik_disable_interrupts(rdev);
7686 	/* Wait and acknowledge irq */
7687 	mdelay(1);
7688 	cik_irq_ack(rdev);
7689 	cik_disable_interrupt_state(rdev);
7690 }
7691 
7692 /**
7693  * cik_irq_disable - disable interrupts for suspend
7694  *
7695  * @rdev: radeon_device pointer
7696  *
7697  * Disable interrupts and stop the RLC (CIK).
7698  * Used for suspend.
7699  */
7700 static void cik_irq_suspend(struct radeon_device *rdev)
7701 {
7702 	cik_irq_disable(rdev);
7703 	cik_rlc_stop(rdev);
7704 }
7705 
7706 /**
7707  * cik_irq_fini - tear down interrupt support
7708  *
7709  * @rdev: radeon_device pointer
7710  *
7711  * Disable interrupts on the hw and free the IH ring
7712  * buffer (CIK).
7713  * Used for driver unload.
7714  */
7715 static void cik_irq_fini(struct radeon_device *rdev)
7716 {
7717 	cik_irq_suspend(rdev);
7718 	r600_ih_ring_fini(rdev);
7719 }
7720 
7721 /**
7722  * cik_get_ih_wptr - get the IH ring buffer wptr
7723  *
7724  * @rdev: radeon_device pointer
7725  *
7726  * Get the IH ring buffer wptr from either the register
7727  * or the writeback memory buffer (CIK).  Also check for
7728  * ring buffer overflow and deal with it.
7729  * Used by cik_irq_process().
7730  * Returns the value of the wptr.
7731  */
7732 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7733 {
7734 	u32 wptr, tmp;
7735 
7736 	if (rdev->wb.enabled)
7737 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7738 	else
7739 		wptr = RREG32(IH_RB_WPTR);
7740 
7741 	if (wptr & RB_OVERFLOW) {
7742 		wptr &= ~RB_OVERFLOW;
7743 		/* When a ring buffer overflow happen start parsing interrupt
7744 		 * from the last not overwritten vector (wptr + 16). Hopefully
7745 		 * this should allow us to catchup.
7746 		 */
7747 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7748 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7749 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7750 		tmp = RREG32(IH_RB_CNTL);
7751 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7752 		WREG32(IH_RB_CNTL, tmp);
7753 	}
7754 	return (wptr & rdev->ih.ptr_mask);
7755 }
7756 
7757 /*        CIK IV Ring
7758  * Each IV ring entry is 128 bits:
7759  * [7:0]    - interrupt source id
7760  * [31:8]   - reserved
7761  * [59:32]  - interrupt source data
7762  * [63:60]  - reserved
7763  * [71:64]  - RINGID
7764  *            CP:
7765  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7766  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7767  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7768  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7769  *            PIPE_ID - ME0 0=3D
7770  *                    - ME1&2 compute dispatcher (4 pipes each)
7771  *            SDMA:
7772  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7773  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7774  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7775  * [79:72]  - VMID
7776  * [95:80]  - PASID
7777  * [127:96] - reserved
7778  */
7779 /**
7780  * cik_irq_process - interrupt handler
7781  *
7782  * @rdev: radeon_device pointer
7783  *
7784  * Interrupt hander (CIK).  Walk the IH ring,
7785  * ack interrupts and schedule work to handle
7786  * interrupt events.
7787  * Returns irq process return code.
7788  */
7789 int cik_irq_process(struct radeon_device *rdev)
7790 {
7791 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7792 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7793 	u32 wptr;
7794 	u32 rptr;
7795 	u32 src_id, src_data, ring_id;
7796 	u8 me_id, pipe_id, queue_id;
7797 	u32 ring_index;
7798 	bool queue_hotplug = false;
7799 	bool queue_reset = false;
7800 	u32 addr, status, mc_client;
7801 	bool queue_thermal = false;
7802 
7803 	if (!rdev->ih.enabled || rdev->shutdown)
7804 		return IRQ_NONE;
7805 
7806 	wptr = cik_get_ih_wptr(rdev);
7807 
7808 restart_ih:
7809 	/* is somebody else already processing irqs? */
7810 	if (atomic_xchg(&rdev->ih.lock, 1))
7811 		return IRQ_NONE;
7812 
7813 	rptr = rdev->ih.rptr;
7814 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7815 
7816 	/* Order reading of wptr vs. reading of IH ring data */
7817 	rmb();
7818 
7819 	/* display interrupts */
7820 	cik_irq_ack(rdev);
7821 
7822 	while (rptr != wptr) {
7823 		/* wptr/rptr are in bytes! */
7824 		ring_index = rptr / 4;
7825 
7826 		radeon_kfd_interrupt(rdev,
7827 				(const void *) &rdev->ih.ring[ring_index]);
7828 
7829 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7830 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7831 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7832 
7833 		switch (src_id) {
7834 		case 1: /* D1 vblank/vline */
7835 			switch (src_data) {
7836 			case 0: /* D1 vblank */
7837 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7838 					if (rdev->irq.crtc_vblank_int[0]) {
7839 						drm_handle_vblank(rdev->ddev, 0);
7840 						rdev->pm.vblank_sync = true;
7841 						wake_up(&rdev->irq.vblank_queue);
7842 					}
7843 					if (atomic_read(&rdev->irq.pflip[0]))
7844 						radeon_crtc_handle_vblank(rdev, 0);
7845 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7846 					DRM_DEBUG("IH: D1 vblank\n");
7847 				}
7848 				break;
7849 			case 1: /* D1 vline */
7850 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7851 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7852 					DRM_DEBUG("IH: D1 vline\n");
7853 				}
7854 				break;
7855 			default:
7856 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7857 				break;
7858 			}
7859 			break;
7860 		case 2: /* D2 vblank/vline */
7861 			switch (src_data) {
7862 			case 0: /* D2 vblank */
7863 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7864 					if (rdev->irq.crtc_vblank_int[1]) {
7865 						drm_handle_vblank(rdev->ddev, 1);
7866 						rdev->pm.vblank_sync = true;
7867 						wake_up(&rdev->irq.vblank_queue);
7868 					}
7869 					if (atomic_read(&rdev->irq.pflip[1]))
7870 						radeon_crtc_handle_vblank(rdev, 1);
7871 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7872 					DRM_DEBUG("IH: D2 vblank\n");
7873 				}
7874 				break;
7875 			case 1: /* D2 vline */
7876 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7877 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7878 					DRM_DEBUG("IH: D2 vline\n");
7879 				}
7880 				break;
7881 			default:
7882 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7883 				break;
7884 			}
7885 			break;
7886 		case 3: /* D3 vblank/vline */
7887 			switch (src_data) {
7888 			case 0: /* D3 vblank */
7889 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7890 					if (rdev->irq.crtc_vblank_int[2]) {
7891 						drm_handle_vblank(rdev->ddev, 2);
7892 						rdev->pm.vblank_sync = true;
7893 						wake_up(&rdev->irq.vblank_queue);
7894 					}
7895 					if (atomic_read(&rdev->irq.pflip[2]))
7896 						radeon_crtc_handle_vblank(rdev, 2);
7897 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7898 					DRM_DEBUG("IH: D3 vblank\n");
7899 				}
7900 				break;
7901 			case 1: /* D3 vline */
7902 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7903 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7904 					DRM_DEBUG("IH: D3 vline\n");
7905 				}
7906 				break;
7907 			default:
7908 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7909 				break;
7910 			}
7911 			break;
7912 		case 4: /* D4 vblank/vline */
7913 			switch (src_data) {
7914 			case 0: /* D4 vblank */
7915 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7916 					if (rdev->irq.crtc_vblank_int[3]) {
7917 						drm_handle_vblank(rdev->ddev, 3);
7918 						rdev->pm.vblank_sync = true;
7919 						wake_up(&rdev->irq.vblank_queue);
7920 					}
7921 					if (atomic_read(&rdev->irq.pflip[3]))
7922 						radeon_crtc_handle_vblank(rdev, 3);
7923 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7924 					DRM_DEBUG("IH: D4 vblank\n");
7925 				}
7926 				break;
7927 			case 1: /* D4 vline */
7928 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7929 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7930 					DRM_DEBUG("IH: D4 vline\n");
7931 				}
7932 				break;
7933 			default:
7934 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7935 				break;
7936 			}
7937 			break;
7938 		case 5: /* D5 vblank/vline */
7939 			switch (src_data) {
7940 			case 0: /* D5 vblank */
7941 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7942 					if (rdev->irq.crtc_vblank_int[4]) {
7943 						drm_handle_vblank(rdev->ddev, 4);
7944 						rdev->pm.vblank_sync = true;
7945 						wake_up(&rdev->irq.vblank_queue);
7946 					}
7947 					if (atomic_read(&rdev->irq.pflip[4]))
7948 						radeon_crtc_handle_vblank(rdev, 4);
7949 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7950 					DRM_DEBUG("IH: D5 vblank\n");
7951 				}
7952 				break;
7953 			case 1: /* D5 vline */
7954 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7955 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7956 					DRM_DEBUG("IH: D5 vline\n");
7957 				}
7958 				break;
7959 			default:
7960 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7961 				break;
7962 			}
7963 			break;
7964 		case 6: /* D6 vblank/vline */
7965 			switch (src_data) {
7966 			case 0: /* D6 vblank */
7967 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7968 					if (rdev->irq.crtc_vblank_int[5]) {
7969 						drm_handle_vblank(rdev->ddev, 5);
7970 						rdev->pm.vblank_sync = true;
7971 						wake_up(&rdev->irq.vblank_queue);
7972 					}
7973 					if (atomic_read(&rdev->irq.pflip[5]))
7974 						radeon_crtc_handle_vblank(rdev, 5);
7975 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7976 					DRM_DEBUG("IH: D6 vblank\n");
7977 				}
7978 				break;
7979 			case 1: /* D6 vline */
7980 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7981 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7982 					DRM_DEBUG("IH: D6 vline\n");
7983 				}
7984 				break;
7985 			default:
7986 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7987 				break;
7988 			}
7989 			break;
7990 		case 8: /* D1 page flip */
7991 		case 10: /* D2 page flip */
7992 		case 12: /* D3 page flip */
7993 		case 14: /* D4 page flip */
7994 		case 16: /* D5 page flip */
7995 		case 18: /* D6 page flip */
7996 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7997 			if (radeon_use_pflipirq > 0)
7998 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7999 			break;
8000 		case 42: /* HPD hotplug */
8001 			switch (src_data) {
8002 			case 0:
8003 				if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
8004 					rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
8005 					queue_hotplug = true;
8006 					DRM_DEBUG("IH: HPD1\n");
8007 				}
8008 				break;
8009 			case 1:
8010 				if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
8011 					rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
8012 					queue_hotplug = true;
8013 					DRM_DEBUG("IH: HPD2\n");
8014 				}
8015 				break;
8016 			case 2:
8017 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
8018 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
8019 					queue_hotplug = true;
8020 					DRM_DEBUG("IH: HPD3\n");
8021 				}
8022 				break;
8023 			case 3:
8024 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
8025 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
8026 					queue_hotplug = true;
8027 					DRM_DEBUG("IH: HPD4\n");
8028 				}
8029 				break;
8030 			case 4:
8031 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
8032 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
8033 					queue_hotplug = true;
8034 					DRM_DEBUG("IH: HPD5\n");
8035 				}
8036 				break;
8037 			case 5:
8038 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
8039 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
8040 					queue_hotplug = true;
8041 					DRM_DEBUG("IH: HPD6\n");
8042 				}
8043 				break;
8044 			default:
8045 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8046 				break;
8047 			}
8048 			break;
8049 		case 124: /* UVD */
8050 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
8051 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
8052 			break;
8053 		case 146:
8054 		case 147:
8055 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
8056 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
8057 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
8058 			/* reset addr and status */
8059 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
8060 			if (addr == 0x0 && status == 0x0)
8061 				break;
8062 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
8063 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
8064 				addr);
8065 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
8066 				status);
8067 			cik_vm_decode_fault(rdev, status, addr, mc_client);
8068 			break;
8069 		case 167: /* VCE */
8070 			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
8071 			switch (src_data) {
8072 			case 0:
8073 				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
8074 				break;
8075 			case 1:
8076 				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
8077 				break;
8078 			default:
8079 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
8080 				break;
8081 			}
8082 			break;
8083 		case 176: /* GFX RB CP_INT */
8084 		case 177: /* GFX IB CP_INT */
8085 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8086 			break;
8087 		case 181: /* CP EOP event */
8088 			DRM_DEBUG("IH: CP EOP\n");
8089 			/* XXX check the bitfield order! */
8090 			me_id = (ring_id & 0x60) >> 5;
8091 			pipe_id = (ring_id & 0x18) >> 3;
8092 			queue_id = (ring_id & 0x7) >> 0;
8093 			switch (me_id) {
8094 			case 0:
8095 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8096 				break;
8097 			case 1:
8098 			case 2:
8099 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
8100 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8101 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
8102 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8103 				break;
8104 			}
8105 			break;
8106 		case 184: /* CP Privileged reg access */
8107 			DRM_ERROR("Illegal register access in command stream\n");
8108 			/* XXX check the bitfield order! */
8109 			me_id = (ring_id & 0x60) >> 5;
8110 			pipe_id = (ring_id & 0x18) >> 3;
8111 			queue_id = (ring_id & 0x7) >> 0;
8112 			switch (me_id) {
8113 			case 0:
8114 				/* This results in a full GPU reset, but all we need to do is soft
8115 				 * reset the CP for gfx
8116 				 */
8117 				queue_reset = true;
8118 				break;
8119 			case 1:
8120 				/* XXX compute */
8121 				queue_reset = true;
8122 				break;
8123 			case 2:
8124 				/* XXX compute */
8125 				queue_reset = true;
8126 				break;
8127 			}
8128 			break;
8129 		case 185: /* CP Privileged inst */
8130 			DRM_ERROR("Illegal instruction in command stream\n");
8131 			/* XXX check the bitfield order! */
8132 			me_id = (ring_id & 0x60) >> 5;
8133 			pipe_id = (ring_id & 0x18) >> 3;
8134 			queue_id = (ring_id & 0x7) >> 0;
8135 			switch (me_id) {
8136 			case 0:
8137 				/* This results in a full GPU reset, but all we need to do is soft
8138 				 * reset the CP for gfx
8139 				 */
8140 				queue_reset = true;
8141 				break;
8142 			case 1:
8143 				/* XXX compute */
8144 				queue_reset = true;
8145 				break;
8146 			case 2:
8147 				/* XXX compute */
8148 				queue_reset = true;
8149 				break;
8150 			}
8151 			break;
8152 		case 224: /* SDMA trap event */
8153 			/* XXX check the bitfield order! */
8154 			me_id = (ring_id & 0x3) >> 0;
8155 			queue_id = (ring_id & 0xc) >> 2;
8156 			DRM_DEBUG("IH: SDMA trap\n");
8157 			switch (me_id) {
8158 			case 0:
8159 				switch (queue_id) {
8160 				case 0:
8161 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8162 					break;
8163 				case 1:
8164 					/* XXX compute */
8165 					break;
8166 				case 2:
8167 					/* XXX compute */
8168 					break;
8169 				}
8170 				break;
8171 			case 1:
8172 				switch (queue_id) {
8173 				case 0:
8174 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8175 					break;
8176 				case 1:
8177 					/* XXX compute */
8178 					break;
8179 				case 2:
8180 					/* XXX compute */
8181 					break;
8182 				}
8183 				break;
8184 			}
8185 			break;
8186 		case 230: /* thermal low to high */
8187 			DRM_DEBUG("IH: thermal low to high\n");
8188 			rdev->pm.dpm.thermal.high_to_low = false;
8189 			queue_thermal = true;
8190 			break;
8191 		case 231: /* thermal high to low */
8192 			DRM_DEBUG("IH: thermal high to low\n");
8193 			rdev->pm.dpm.thermal.high_to_low = true;
8194 			queue_thermal = true;
8195 			break;
8196 		case 233: /* GUI IDLE */
8197 			DRM_DEBUG("IH: GUI idle\n");
8198 			break;
8199 		case 241: /* SDMA Privileged inst */
8200 		case 247: /* SDMA Privileged inst */
8201 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8202 			/* XXX check the bitfield order! */
8203 			me_id = (ring_id & 0x3) >> 0;
8204 			queue_id = (ring_id & 0xc) >> 2;
8205 			switch (me_id) {
8206 			case 0:
8207 				switch (queue_id) {
8208 				case 0:
8209 					queue_reset = true;
8210 					break;
8211 				case 1:
8212 					/* XXX compute */
8213 					queue_reset = true;
8214 					break;
8215 				case 2:
8216 					/* XXX compute */
8217 					queue_reset = true;
8218 					break;
8219 				}
8220 				break;
8221 			case 1:
8222 				switch (queue_id) {
8223 				case 0:
8224 					queue_reset = true;
8225 					break;
8226 				case 1:
8227 					/* XXX compute */
8228 					queue_reset = true;
8229 					break;
8230 				case 2:
8231 					/* XXX compute */
8232 					queue_reset = true;
8233 					break;
8234 				}
8235 				break;
8236 			}
8237 			break;
8238 		default:
8239 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8240 			break;
8241 		}
8242 
8243 		/* wptr/rptr are in bytes! */
8244 		rptr += 16;
8245 		rptr &= rdev->ih.ptr_mask;
8246 		WREG32(IH_RB_RPTR, rptr);
8247 	}
8248 	if (queue_hotplug)
8249 		schedule_work(&rdev->hotplug_work);
8250 	if (queue_reset) {
8251 		rdev->needs_reset = true;
8252 		wake_up_all(&rdev->fence_queue);
8253 	}
8254 	if (queue_thermal)
8255 		schedule_work(&rdev->pm.dpm.thermal.work);
8256 	rdev->ih.rptr = rptr;
8257 	atomic_set(&rdev->ih.lock, 0);
8258 
8259 	/* make sure wptr hasn't changed while processing */
8260 	wptr = cik_get_ih_wptr(rdev);
8261 	if (wptr != rptr)
8262 		goto restart_ih;
8263 
8264 	return IRQ_HANDLED;
8265 }
8266 
8267 /*
8268  * startup/shutdown callbacks
8269  */
8270 /**
8271  * cik_startup - program the asic to a functional state
8272  *
8273  * @rdev: radeon_device pointer
8274  *
8275  * Programs the asic to a functional state (CIK).
8276  * Called by cik_init() and cik_resume().
8277  * Returns 0 for success, error for failure.
8278  */
8279 static int cik_startup(struct radeon_device *rdev)
8280 {
8281 	struct radeon_ring *ring;
8282 	u32 nop;
8283 	int r;
8284 
8285 	/* enable pcie gen2/3 link */
8286 	cik_pcie_gen3_enable(rdev);
8287 	/* enable aspm */
8288 	cik_program_aspm(rdev);
8289 
8290 	/* scratch needs to be initialized before MC */
8291 	r = r600_vram_scratch_init(rdev);
8292 	if (r)
8293 		return r;
8294 
8295 	cik_mc_program(rdev);
8296 
8297 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8298 		r = ci_mc_load_microcode(rdev);
8299 		if (r) {
8300 			DRM_ERROR("Failed to load MC firmware!\n");
8301 			return r;
8302 		}
8303 	}
8304 
8305 	r = cik_pcie_gart_enable(rdev);
8306 	if (r)
8307 		return r;
8308 	cik_gpu_init(rdev);
8309 
8310 	/* allocate rlc buffers */
8311 	if (rdev->flags & RADEON_IS_IGP) {
8312 		if (rdev->family == CHIP_KAVERI) {
8313 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8314 			rdev->rlc.reg_list_size =
8315 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8316 		} else {
8317 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8318 			rdev->rlc.reg_list_size =
8319 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8320 		}
8321 	}
8322 	rdev->rlc.cs_data = ci_cs_data;
8323 	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
8324 	r = sumo_rlc_init(rdev);
8325 	if (r) {
8326 		DRM_ERROR("Failed to init rlc BOs!\n");
8327 		return r;
8328 	}
8329 
8330 	/* allocate wb buffer */
8331 	r = radeon_wb_init(rdev);
8332 	if (r)
8333 		return r;
8334 
8335 	/* allocate mec buffers */
8336 	r = cik_mec_init(rdev);
8337 	if (r) {
8338 		DRM_ERROR("Failed to init MEC BOs!\n");
8339 		return r;
8340 	}
8341 
8342 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8343 	if (r) {
8344 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8345 		return r;
8346 	}
8347 
8348 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8349 	if (r) {
8350 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8351 		return r;
8352 	}
8353 
8354 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8355 	if (r) {
8356 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8357 		return r;
8358 	}
8359 
8360 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8361 	if (r) {
8362 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8363 		return r;
8364 	}
8365 
8366 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8367 	if (r) {
8368 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8369 		return r;
8370 	}
8371 
8372 	r = radeon_uvd_resume(rdev);
8373 	if (!r) {
8374 		r = uvd_v4_2_resume(rdev);
8375 		if (!r) {
8376 			r = radeon_fence_driver_start_ring(rdev,
8377 							   R600_RING_TYPE_UVD_INDEX);
8378 			if (r)
8379 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
8380 		}
8381 	}
8382 	if (r)
8383 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8384 
8385 	r = radeon_vce_resume(rdev);
8386 	if (!r) {
8387 		r = vce_v2_0_resume(rdev);
8388 		if (!r)
8389 			r = radeon_fence_driver_start_ring(rdev,
8390 							   TN_RING_TYPE_VCE1_INDEX);
8391 		if (!r)
8392 			r = radeon_fence_driver_start_ring(rdev,
8393 							   TN_RING_TYPE_VCE2_INDEX);
8394 	}
8395 	if (r) {
8396 		dev_err(rdev->dev, "VCE init error (%d).\n", r);
8397 		rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8398 		rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8399 	}
8400 
8401 	/* Enable IRQ */
8402 	if (!rdev->irq.installed) {
8403 		r = radeon_irq_kms_init(rdev);
8404 		if (r)
8405 			return r;
8406 	}
8407 
8408 	r = cik_irq_init(rdev);
8409 	if (r) {
8410 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8411 		radeon_irq_kms_fini(rdev);
8412 		return r;
8413 	}
8414 	cik_irq_set(rdev);
8415 
8416 	if (rdev->family == CHIP_HAWAII) {
8417 		if (rdev->new_fw)
8418 			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8419 		else
8420 			nop = RADEON_CP_PACKET2;
8421 	} else {
8422 		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8423 	}
8424 
8425 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8426 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8427 			     nop);
8428 	if (r)
8429 		return r;
8430 
8431 	/* set up the compute queues */
8432 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8433 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8434 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8435 			     nop);
8436 	if (r)
8437 		return r;
8438 	ring->me = 1; /* first MEC */
8439 	ring->pipe = 0; /* first pipe */
8440 	ring->queue = 0; /* first queue */
8441 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8442 
8443 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8444 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8445 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8446 			     nop);
8447 	if (r)
8448 		return r;
8449 	/* dGPU only have 1 MEC */
8450 	ring->me = 1; /* first MEC */
8451 	ring->pipe = 0; /* first pipe */
8452 	ring->queue = 1; /* second queue */
8453 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8454 
8455 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8456 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8457 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8458 	if (r)
8459 		return r;
8460 
8461 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8462 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8463 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8464 	if (r)
8465 		return r;
8466 
8467 	r = cik_cp_resume(rdev);
8468 	if (r)
8469 		return r;
8470 
8471 	r = cik_sdma_resume(rdev);
8472 	if (r)
8473 		return r;
8474 
8475 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8476 	if (ring->ring_size) {
8477 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8478 				     RADEON_CP_PACKET2);
8479 		if (!r)
8480 			r = uvd_v1_0_init(rdev);
8481 		if (r)
8482 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8483 	}
8484 
8485 	r = -ENOENT;
8486 
8487 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8488 	if (ring->ring_size)
8489 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8490 				     VCE_CMD_NO_OP);
8491 
8492 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8493 	if (ring->ring_size)
8494 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8495 				     VCE_CMD_NO_OP);
8496 
8497 	if (!r)
8498 		r = vce_v1_0_init(rdev);
8499 	else if (r != -ENOENT)
8500 		DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8501 
8502 	r = radeon_ib_pool_init(rdev);
8503 	if (r) {
8504 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8505 		return r;
8506 	}
8507 
8508 	r = radeon_vm_manager_init(rdev);
8509 	if (r) {
8510 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8511 		return r;
8512 	}
8513 
8514 	r = radeon_audio_init(rdev);
8515 	if (r)
8516 		return r;
8517 
8518 	r = radeon_kfd_resume(rdev);
8519 	if (r)
8520 		return r;
8521 
8522 	return 0;
8523 }
8524 
8525 /**
8526  * cik_resume - resume the asic to a functional state
8527  *
8528  * @rdev: radeon_device pointer
8529  *
8530  * Programs the asic to a functional state (CIK).
8531  * Called at resume.
8532  * Returns 0 for success, error for failure.
8533  */
8534 int cik_resume(struct radeon_device *rdev)
8535 {
8536 	int r;
8537 
8538 	/* post card */
8539 	atom_asic_init(rdev->mode_info.atom_context);
8540 
8541 	/* init golden registers */
8542 	cik_init_golden_registers(rdev);
8543 
8544 	if (rdev->pm.pm_method == PM_METHOD_DPM)
8545 		radeon_pm_resume(rdev);
8546 
8547 	rdev->accel_working = true;
8548 	r = cik_startup(rdev);
8549 	if (r) {
8550 		DRM_ERROR("cik startup failed on resume\n");
8551 		rdev->accel_working = false;
8552 		return r;
8553 	}
8554 
8555 	return r;
8556 
8557 }
8558 
8559 /**
8560  * cik_suspend - suspend the asic
8561  *
8562  * @rdev: radeon_device pointer
8563  *
8564  * Bring the chip into a state suitable for suspend (CIK).
8565  * Called at suspend.
8566  * Returns 0 for success.
8567  */
8568 int cik_suspend(struct radeon_device *rdev)
8569 {
8570 	radeon_kfd_suspend(rdev);
8571 	radeon_pm_suspend(rdev);
8572 	radeon_audio_fini(rdev);
8573 	radeon_vm_manager_fini(rdev);
8574 	cik_cp_enable(rdev, false);
8575 	cik_sdma_enable(rdev, false);
8576 	uvd_v1_0_fini(rdev);
8577 	radeon_uvd_suspend(rdev);
8578 	radeon_vce_suspend(rdev);
8579 	cik_fini_pg(rdev);
8580 	cik_fini_cg(rdev);
8581 	cik_irq_suspend(rdev);
8582 	radeon_wb_disable(rdev);
8583 	cik_pcie_gart_disable(rdev);
8584 	return 0;
8585 }
8586 
8587 /* Plan is to move initialization in that function and use
8588  * helper function so that radeon_device_init pretty much
8589  * do nothing more than calling asic specific function. This
8590  * should also allow to remove a bunch of callback function
8591  * like vram_info.
8592  */
8593 /**
8594  * cik_init - asic specific driver and hw init
8595  *
8596  * @rdev: radeon_device pointer
8597  *
8598  * Setup asic specific driver variables and program the hw
8599  * to a functional state (CIK).
8600  * Called at driver startup.
8601  * Returns 0 for success, errors for failure.
8602  */
8603 int cik_init(struct radeon_device *rdev)
8604 {
8605 	struct radeon_ring *ring;
8606 	int r;
8607 
8608 	/* Read BIOS */
8609 	if (!radeon_get_bios(rdev)) {
8610 		if (ASIC_IS_AVIVO(rdev))
8611 			return -EINVAL;
8612 	}
8613 	/* Must be an ATOMBIOS */
8614 	if (!rdev->is_atom_bios) {
8615 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8616 		return -EINVAL;
8617 	}
8618 	r = radeon_atombios_init(rdev);
8619 	if (r)
8620 		return r;
8621 
8622 	/* Post card if necessary */
8623 	if (!radeon_card_posted(rdev)) {
8624 		if (!rdev->bios) {
8625 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8626 			return -EINVAL;
8627 		}
8628 		DRM_INFO("GPU not posted. posting now...\n");
8629 		atom_asic_init(rdev->mode_info.atom_context);
8630 	}
8631 	/* init golden registers */
8632 	cik_init_golden_registers(rdev);
8633 	/* Initialize scratch registers */
8634 	cik_scratch_init(rdev);
8635 	/* Initialize surface registers */
8636 	radeon_surface_init(rdev);
8637 	/* Initialize clocks */
8638 	radeon_get_clock_info(rdev->ddev);
8639 
8640 	/* Fence driver */
8641 	r = radeon_fence_driver_init(rdev);
8642 	if (r)
8643 		return r;
8644 
8645 	/* initialize memory controller */
8646 	r = cik_mc_init(rdev);
8647 	if (r)
8648 		return r;
8649 	/* Memory manager */
8650 	r = radeon_bo_init(rdev);
8651 	if (r)
8652 		return r;
8653 
8654 	if (rdev->flags & RADEON_IS_IGP) {
8655 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8656 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8657 			r = cik_init_microcode(rdev);
8658 			if (r) {
8659 				DRM_ERROR("Failed to load firmware!\n");
8660 				return r;
8661 			}
8662 		}
8663 	} else {
8664 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8665 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8666 		    !rdev->mc_fw) {
8667 			r = cik_init_microcode(rdev);
8668 			if (r) {
8669 				DRM_ERROR("Failed to load firmware!\n");
8670 				return r;
8671 			}
8672 		}
8673 	}
8674 
8675 	/* Initialize power management */
8676 	radeon_pm_init(rdev);
8677 
8678 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8679 	ring->ring_obj = NULL;
8680 	r600_ring_init(rdev, ring, 1024 * 1024);
8681 
8682 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8683 	ring->ring_obj = NULL;
8684 	r600_ring_init(rdev, ring, 1024 * 1024);
8685 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8686 	if (r)
8687 		return r;
8688 
8689 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8690 	ring->ring_obj = NULL;
8691 	r600_ring_init(rdev, ring, 1024 * 1024);
8692 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8693 	if (r)
8694 		return r;
8695 
8696 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8697 	ring->ring_obj = NULL;
8698 	r600_ring_init(rdev, ring, 256 * 1024);
8699 
8700 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8701 	ring->ring_obj = NULL;
8702 	r600_ring_init(rdev, ring, 256 * 1024);
8703 
8704 	r = radeon_uvd_init(rdev);
8705 	if (!r) {
8706 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8707 		ring->ring_obj = NULL;
8708 		r600_ring_init(rdev, ring, 4096);
8709 	}
8710 
8711 	r = radeon_vce_init(rdev);
8712 	if (!r) {
8713 		ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8714 		ring->ring_obj = NULL;
8715 		r600_ring_init(rdev, ring, 4096);
8716 
8717 		ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8718 		ring->ring_obj = NULL;
8719 		r600_ring_init(rdev, ring, 4096);
8720 	}
8721 
8722 	rdev->ih.ring_obj = NULL;
8723 	r600_ih_ring_init(rdev, 64 * 1024);
8724 
8725 	r = r600_pcie_gart_init(rdev);
8726 	if (r)
8727 		return r;
8728 
8729 	rdev->accel_working = true;
8730 	r = cik_startup(rdev);
8731 	if (r) {
8732 		dev_err(rdev->dev, "disabling GPU acceleration\n");
8733 		cik_cp_fini(rdev);
8734 		cik_sdma_fini(rdev);
8735 		cik_irq_fini(rdev);
8736 		sumo_rlc_fini(rdev);
8737 		cik_mec_fini(rdev);
8738 		radeon_wb_fini(rdev);
8739 		radeon_ib_pool_fini(rdev);
8740 		radeon_vm_manager_fini(rdev);
8741 		radeon_irq_kms_fini(rdev);
8742 		cik_pcie_gart_fini(rdev);
8743 		rdev->accel_working = false;
8744 	}
8745 
8746 	/* Don't start up if the MC ucode is missing.
8747 	 * The default clocks and voltages before the MC ucode
8748 	 * is loaded are not suffient for advanced operations.
8749 	 */
8750 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8751 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8752 		return -EINVAL;
8753 	}
8754 
8755 	return 0;
8756 }
8757 
8758 /**
8759  * cik_fini - asic specific driver and hw fini
8760  *
8761  * @rdev: radeon_device pointer
8762  *
8763  * Tear down the asic specific driver variables and program the hw
8764  * to an idle state (CIK).
8765  * Called at driver unload.
8766  */
8767 void cik_fini(struct radeon_device *rdev)
8768 {
8769 	radeon_pm_fini(rdev);
8770 	cik_cp_fini(rdev);
8771 	cik_sdma_fini(rdev);
8772 	cik_fini_pg(rdev);
8773 	cik_fini_cg(rdev);
8774 	cik_irq_fini(rdev);
8775 	sumo_rlc_fini(rdev);
8776 	cik_mec_fini(rdev);
8777 	radeon_wb_fini(rdev);
8778 	radeon_vm_manager_fini(rdev);
8779 	radeon_ib_pool_fini(rdev);
8780 	radeon_irq_kms_fini(rdev);
8781 	uvd_v1_0_fini(rdev);
8782 	radeon_uvd_fini(rdev);
8783 	radeon_vce_fini(rdev);
8784 	cik_pcie_gart_fini(rdev);
8785 	r600_vram_scratch_fini(rdev);
8786 	radeon_gem_fini(rdev);
8787 	radeon_fence_driver_fini(rdev);
8788 	radeon_bo_fini(rdev);
8789 	radeon_atombios_fini(rdev);
8790 	kfree(rdev->bios);
8791 	rdev->bios = NULL;
8792 }
8793 
8794 void dce8_program_fmt(struct drm_encoder *encoder)
8795 {
8796 	struct drm_device *dev = encoder->dev;
8797 	struct radeon_device *rdev = dev->dev_private;
8798 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8799 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8800 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8801 	int bpc = 0;
8802 	u32 tmp = 0;
8803 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8804 
8805 	if (connector) {
8806 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8807 		bpc = radeon_get_monitor_bpc(connector);
8808 		dither = radeon_connector->dither;
8809 	}
8810 
8811 	/* LVDS/eDP FMT is set up by atom */
8812 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8813 		return;
8814 
8815 	/* not needed for analog */
8816 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8817 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8818 		return;
8819 
8820 	if (bpc == 0)
8821 		return;
8822 
8823 	switch (bpc) {
8824 	case 6:
8825 		if (dither == RADEON_FMT_DITHER_ENABLE)
8826 			/* XXX sort out optimal dither settings */
8827 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8828 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8829 		else
8830 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8831 		break;
8832 	case 8:
8833 		if (dither == RADEON_FMT_DITHER_ENABLE)
8834 			/* XXX sort out optimal dither settings */
8835 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8836 				FMT_RGB_RANDOM_ENABLE |
8837 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8838 		else
8839 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8840 		break;
8841 	case 10:
8842 		if (dither == RADEON_FMT_DITHER_ENABLE)
8843 			/* XXX sort out optimal dither settings */
8844 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8845 				FMT_RGB_RANDOM_ENABLE |
8846 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8847 		else
8848 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8849 		break;
8850 	default:
8851 		/* not needed */
8852 		break;
8853 	}
8854 
8855 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8856 }
8857 
8858 /* display watermark setup */
8859 /**
8860  * dce8_line_buffer_adjust - Set up the line buffer
8861  *
8862  * @rdev: radeon_device pointer
8863  * @radeon_crtc: the selected display controller
8864  * @mode: the current display mode on the selected display
8865  * controller
8866  *
8867  * Setup up the line buffer allocation for
8868  * the selected display controller (CIK).
8869  * Returns the line buffer size in pixels.
8870  */
8871 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8872 				   struct radeon_crtc *radeon_crtc,
8873 				   struct drm_display_mode *mode)
8874 {
8875 	u32 tmp, buffer_alloc, i;
8876 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8877 	/*
8878 	 * Line Buffer Setup
8879 	 * There are 6 line buffers, one for each display controllers.
8880 	 * There are 3 partitions per LB. Select the number of partitions
8881 	 * to enable based on the display width.  For display widths larger
8882 	 * than 4096, you need use to use 2 display controllers and combine
8883 	 * them using the stereo blender.
8884 	 */
8885 	if (radeon_crtc->base.enabled && mode) {
8886 		if (mode->crtc_hdisplay < 1920) {
8887 			tmp = 1;
8888 			buffer_alloc = 2;
8889 		} else if (mode->crtc_hdisplay < 2560) {
8890 			tmp = 2;
8891 			buffer_alloc = 2;
8892 		} else if (mode->crtc_hdisplay < 4096) {
8893 			tmp = 0;
8894 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8895 		} else {
8896 			DRM_DEBUG_KMS("Mode too big for LB!\n");
8897 			tmp = 0;
8898 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8899 		}
8900 	} else {
8901 		tmp = 1;
8902 		buffer_alloc = 0;
8903 	}
8904 
8905 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8906 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8907 
8908 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8909 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8910 	for (i = 0; i < rdev->usec_timeout; i++) {
8911 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8912 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8913 			break;
8914 		udelay(1);
8915 	}
8916 
8917 	if (radeon_crtc->base.enabled && mode) {
8918 		switch (tmp) {
8919 		case 0:
8920 		default:
8921 			return 4096 * 2;
8922 		case 1:
8923 			return 1920 * 2;
8924 		case 2:
8925 			return 2560 * 2;
8926 		}
8927 	}
8928 
8929 	/* controller not enabled, so no lb used */
8930 	return 0;
8931 }
8932 
8933 /**
8934  * cik_get_number_of_dram_channels - get the number of dram channels
8935  *
8936  * @rdev: radeon_device pointer
8937  *
8938  * Look up the number of video ram channels (CIK).
8939  * Used for display watermark bandwidth calculations
8940  * Returns the number of dram channels
8941  */
8942 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8943 {
8944 	u32 tmp = RREG32(MC_SHARED_CHMAP);
8945 
8946 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8947 	case 0:
8948 	default:
8949 		return 1;
8950 	case 1:
8951 		return 2;
8952 	case 2:
8953 		return 4;
8954 	case 3:
8955 		return 8;
8956 	case 4:
8957 		return 3;
8958 	case 5:
8959 		return 6;
8960 	case 6:
8961 		return 10;
8962 	case 7:
8963 		return 12;
8964 	case 8:
8965 		return 16;
8966 	}
8967 }
8968 
8969 struct dce8_wm_params {
8970 	u32 dram_channels; /* number of dram channels */
8971 	u32 yclk;          /* bandwidth per dram data pin in kHz */
8972 	u32 sclk;          /* engine clock in kHz */
8973 	u32 disp_clk;      /* display clock in kHz */
8974 	u32 src_width;     /* viewport width */
8975 	u32 active_time;   /* active display time in ns */
8976 	u32 blank_time;    /* blank time in ns */
8977 	bool interlaced;    /* mode is interlaced */
8978 	fixed20_12 vsc;    /* vertical scale ratio */
8979 	u32 num_heads;     /* number of active crtcs */
8980 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8981 	u32 lb_size;       /* line buffer allocated to pipe */
8982 	u32 vtaps;         /* vertical scaler taps */
8983 };
8984 
8985 /**
8986  * dce8_dram_bandwidth - get the dram bandwidth
8987  *
8988  * @wm: watermark calculation data
8989  *
8990  * Calculate the raw dram bandwidth (CIK).
8991  * Used for display watermark bandwidth calculations
8992  * Returns the dram bandwidth in MBytes/s
8993  */
8994 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8995 {
8996 	/* Calculate raw DRAM Bandwidth */
8997 	fixed20_12 dram_efficiency; /* 0.7 */
8998 	fixed20_12 yclk, dram_channels, bandwidth;
8999 	fixed20_12 a;
9000 
9001 	a.full = dfixed_const(1000);
9002 	yclk.full = dfixed_const(wm->yclk);
9003 	yclk.full = dfixed_div(yclk, a);
9004 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9005 	a.full = dfixed_const(10);
9006 	dram_efficiency.full = dfixed_const(7);
9007 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
9008 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9009 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
9010 
9011 	return dfixed_trunc(bandwidth);
9012 }
9013 
9014 /**
9015  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
9016  *
9017  * @wm: watermark calculation data
9018  *
9019  * Calculate the dram bandwidth used for display (CIK).
9020  * Used for display watermark bandwidth calculations
9021  * Returns the dram bandwidth for display in MBytes/s
9022  */
9023 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9024 {
9025 	/* Calculate DRAM Bandwidth and the part allocated to display. */
9026 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9027 	fixed20_12 yclk, dram_channels, bandwidth;
9028 	fixed20_12 a;
9029 
9030 	a.full = dfixed_const(1000);
9031 	yclk.full = dfixed_const(wm->yclk);
9032 	yclk.full = dfixed_div(yclk, a);
9033 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9034 	a.full = dfixed_const(10);
9035 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9036 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9037 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9038 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9039 
9040 	return dfixed_trunc(bandwidth);
9041 }
9042 
9043 /**
9044  * dce8_data_return_bandwidth - get the data return bandwidth
9045  *
9046  * @wm: watermark calculation data
9047  *
9048  * Calculate the data return bandwidth used for display (CIK).
9049  * Used for display watermark bandwidth calculations
9050  * Returns the data return bandwidth in MBytes/s
9051  */
9052 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9053 {
9054 	/* Calculate the display Data return Bandwidth */
9055 	fixed20_12 return_efficiency; /* 0.8 */
9056 	fixed20_12 sclk, bandwidth;
9057 	fixed20_12 a;
9058 
9059 	a.full = dfixed_const(1000);
9060 	sclk.full = dfixed_const(wm->sclk);
9061 	sclk.full = dfixed_div(sclk, a);
9062 	a.full = dfixed_const(10);
9063 	return_efficiency.full = dfixed_const(8);
9064 	return_efficiency.full = dfixed_div(return_efficiency, a);
9065 	a.full = dfixed_const(32);
9066 	bandwidth.full = dfixed_mul(a, sclk);
9067 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9068 
9069 	return dfixed_trunc(bandwidth);
9070 }
9071 
9072 /**
9073  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9074  *
9075  * @wm: watermark calculation data
9076  *
9077  * Calculate the dmif bandwidth used for display (CIK).
9078  * Used for display watermark bandwidth calculations
9079  * Returns the dmif bandwidth in MBytes/s
9080  */
9081 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9082 {
9083 	/* Calculate the DMIF Request Bandwidth */
9084 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9085 	fixed20_12 disp_clk, bandwidth;
9086 	fixed20_12 a, b;
9087 
9088 	a.full = dfixed_const(1000);
9089 	disp_clk.full = dfixed_const(wm->disp_clk);
9090 	disp_clk.full = dfixed_div(disp_clk, a);
9091 	a.full = dfixed_const(32);
9092 	b.full = dfixed_mul(a, disp_clk);
9093 
9094 	a.full = dfixed_const(10);
9095 	disp_clk_request_efficiency.full = dfixed_const(8);
9096 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9097 
9098 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9099 
9100 	return dfixed_trunc(bandwidth);
9101 }
9102 
9103 /**
9104  * dce8_available_bandwidth - get the min available bandwidth
9105  *
9106  * @wm: watermark calculation data
9107  *
9108  * Calculate the min available bandwidth used for display (CIK).
9109  * Used for display watermark bandwidth calculations
9110  * Returns the min available bandwidth in MBytes/s
9111  */
9112 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9113 {
9114 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9115 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9116 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9117 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9118 
9119 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9120 }
9121 
9122 /**
9123  * dce8_average_bandwidth - get the average available bandwidth
9124  *
9125  * @wm: watermark calculation data
9126  *
9127  * Calculate the average available bandwidth used for display (CIK).
9128  * Used for display watermark bandwidth calculations
9129  * Returns the average available bandwidth in MBytes/s
9130  */
9131 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9132 {
9133 	/* Calculate the display mode Average Bandwidth
9134 	 * DisplayMode should contain the source and destination dimensions,
9135 	 * timing, etc.
9136 	 */
9137 	fixed20_12 bpp;
9138 	fixed20_12 line_time;
9139 	fixed20_12 src_width;
9140 	fixed20_12 bandwidth;
9141 	fixed20_12 a;
9142 
9143 	a.full = dfixed_const(1000);
9144 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9145 	line_time.full = dfixed_div(line_time, a);
9146 	bpp.full = dfixed_const(wm->bytes_per_pixel);
9147 	src_width.full = dfixed_const(wm->src_width);
9148 	bandwidth.full = dfixed_mul(src_width, bpp);
9149 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9150 	bandwidth.full = dfixed_div(bandwidth, line_time);
9151 
9152 	return dfixed_trunc(bandwidth);
9153 }
9154 
9155 /**
9156  * dce8_latency_watermark - get the latency watermark
9157  *
9158  * @wm: watermark calculation data
9159  *
9160  * Calculate the latency watermark (CIK).
9161  * Used for display watermark bandwidth calculations
9162  * Returns the latency watermark in ns
9163  */
9164 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9165 {
9166 	/* First calculate the latency in ns */
9167 	u32 mc_latency = 2000; /* 2000 ns. */
9168 	u32 available_bandwidth = dce8_available_bandwidth(wm);
9169 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9170 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9171 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9172 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9173 		(wm->num_heads * cursor_line_pair_return_time);
9174 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9175 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9176 	u32 tmp, dmif_size = 12288;
9177 	fixed20_12 a, b, c;
9178 
9179 	if (wm->num_heads == 0)
9180 		return 0;
9181 
9182 	a.full = dfixed_const(2);
9183 	b.full = dfixed_const(1);
9184 	if ((wm->vsc.full > a.full) ||
9185 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9186 	    (wm->vtaps >= 5) ||
9187 	    ((wm->vsc.full >= a.full) && wm->interlaced))
9188 		max_src_lines_per_dst_line = 4;
9189 	else
9190 		max_src_lines_per_dst_line = 2;
9191 
9192 	a.full = dfixed_const(available_bandwidth);
9193 	b.full = dfixed_const(wm->num_heads);
9194 	a.full = dfixed_div(a, b);
9195 
9196 	b.full = dfixed_const(mc_latency + 512);
9197 	c.full = dfixed_const(wm->disp_clk);
9198 	b.full = dfixed_div(b, c);
9199 
9200 	c.full = dfixed_const(dmif_size);
9201 	b.full = dfixed_div(c, b);
9202 
9203 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9204 
9205 	b.full = dfixed_const(1000);
9206 	c.full = dfixed_const(wm->disp_clk);
9207 	b.full = dfixed_div(c, b);
9208 	c.full = dfixed_const(wm->bytes_per_pixel);
9209 	b.full = dfixed_mul(b, c);
9210 
9211 	lb_fill_bw = min(tmp, dfixed_trunc(b));
9212 
9213 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9214 	b.full = dfixed_const(1000);
9215 	c.full = dfixed_const(lb_fill_bw);
9216 	b.full = dfixed_div(c, b);
9217 	a.full = dfixed_div(a, b);
9218 	line_fill_time = dfixed_trunc(a);
9219 
9220 	if (line_fill_time < wm->active_time)
9221 		return latency;
9222 	else
9223 		return latency + (line_fill_time - wm->active_time);
9224 
9225 }
9226 
9227 /**
9228  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9229  * average and available dram bandwidth
9230  *
9231  * @wm: watermark calculation data
9232  *
9233  * Check if the display average bandwidth fits in the display
9234  * dram bandwidth (CIK).
9235  * Used for display watermark bandwidth calculations
9236  * Returns true if the display fits, false if not.
9237  */
9238 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9239 {
9240 	if (dce8_average_bandwidth(wm) <=
9241 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9242 		return true;
9243 	else
9244 		return false;
9245 }
9246 
9247 /**
9248  * dce8_average_bandwidth_vs_available_bandwidth - check
9249  * average and available bandwidth
9250  *
9251  * @wm: watermark calculation data
9252  *
9253  * Check if the display average bandwidth fits in the display
9254  * available bandwidth (CIK).
9255  * Used for display watermark bandwidth calculations
9256  * Returns true if the display fits, false if not.
9257  */
9258 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9259 {
9260 	if (dce8_average_bandwidth(wm) <=
9261 	    (dce8_available_bandwidth(wm) / wm->num_heads))
9262 		return true;
9263 	else
9264 		return false;
9265 }
9266 
9267 /**
9268  * dce8_check_latency_hiding - check latency hiding
9269  *
9270  * @wm: watermark calculation data
9271  *
9272  * Check latency hiding (CIK).
9273  * Used for display watermark bandwidth calculations
9274  * Returns true if the display fits, false if not.
9275  */
9276 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9277 {
9278 	u32 lb_partitions = wm->lb_size / wm->src_width;
9279 	u32 line_time = wm->active_time + wm->blank_time;
9280 	u32 latency_tolerant_lines;
9281 	u32 latency_hiding;
9282 	fixed20_12 a;
9283 
9284 	a.full = dfixed_const(1);
9285 	if (wm->vsc.full > a.full)
9286 		latency_tolerant_lines = 1;
9287 	else {
9288 		if (lb_partitions <= (wm->vtaps + 1))
9289 			latency_tolerant_lines = 1;
9290 		else
9291 			latency_tolerant_lines = 2;
9292 	}
9293 
9294 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9295 
9296 	if (dce8_latency_watermark(wm) <= latency_hiding)
9297 		return true;
9298 	else
9299 		return false;
9300 }
9301 
9302 /**
9303  * dce8_program_watermarks - program display watermarks
9304  *
9305  * @rdev: radeon_device pointer
9306  * @radeon_crtc: the selected display controller
9307  * @lb_size: line buffer size
9308  * @num_heads: number of display controllers in use
9309  *
9310  * Calculate and program the display watermarks for the
9311  * selected display controller (CIK).
9312  */
9313 static void dce8_program_watermarks(struct radeon_device *rdev,
9314 				    struct radeon_crtc *radeon_crtc,
9315 				    u32 lb_size, u32 num_heads)
9316 {
9317 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9318 	struct dce8_wm_params wm_low, wm_high;
9319 	u32 pixel_period;
9320 	u32 line_time = 0;
9321 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9322 	u32 tmp, wm_mask;
9323 
9324 	if (radeon_crtc->base.enabled && num_heads && mode) {
9325 		pixel_period = 1000000 / (u32)mode->clock;
9326 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9327 
9328 		/* watermark for high clocks */
9329 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9330 		    rdev->pm.dpm_enabled) {
9331 			wm_high.yclk =
9332 				radeon_dpm_get_mclk(rdev, false) * 10;
9333 			wm_high.sclk =
9334 				radeon_dpm_get_sclk(rdev, false) * 10;
9335 		} else {
9336 			wm_high.yclk = rdev->pm.current_mclk * 10;
9337 			wm_high.sclk = rdev->pm.current_sclk * 10;
9338 		}
9339 
9340 		wm_high.disp_clk = mode->clock;
9341 		wm_high.src_width = mode->crtc_hdisplay;
9342 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9343 		wm_high.blank_time = line_time - wm_high.active_time;
9344 		wm_high.interlaced = false;
9345 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9346 			wm_high.interlaced = true;
9347 		wm_high.vsc = radeon_crtc->vsc;
9348 		wm_high.vtaps = 1;
9349 		if (radeon_crtc->rmx_type != RMX_OFF)
9350 			wm_high.vtaps = 2;
9351 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9352 		wm_high.lb_size = lb_size;
9353 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9354 		wm_high.num_heads = num_heads;
9355 
9356 		/* set for high clocks */
9357 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9358 
9359 		/* possibly force display priority to high */
9360 		/* should really do this at mode validation time... */
9361 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9362 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9363 		    !dce8_check_latency_hiding(&wm_high) ||
9364 		    (rdev->disp_priority == 2)) {
9365 			DRM_DEBUG_KMS("force priority to high\n");
9366 		}
9367 
9368 		/* watermark for low clocks */
9369 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9370 		    rdev->pm.dpm_enabled) {
9371 			wm_low.yclk =
9372 				radeon_dpm_get_mclk(rdev, true) * 10;
9373 			wm_low.sclk =
9374 				radeon_dpm_get_sclk(rdev, true) * 10;
9375 		} else {
9376 			wm_low.yclk = rdev->pm.current_mclk * 10;
9377 			wm_low.sclk = rdev->pm.current_sclk * 10;
9378 		}
9379 
9380 		wm_low.disp_clk = mode->clock;
9381 		wm_low.src_width = mode->crtc_hdisplay;
9382 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9383 		wm_low.blank_time = line_time - wm_low.active_time;
9384 		wm_low.interlaced = false;
9385 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9386 			wm_low.interlaced = true;
9387 		wm_low.vsc = radeon_crtc->vsc;
9388 		wm_low.vtaps = 1;
9389 		if (radeon_crtc->rmx_type != RMX_OFF)
9390 			wm_low.vtaps = 2;
9391 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9392 		wm_low.lb_size = lb_size;
9393 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9394 		wm_low.num_heads = num_heads;
9395 
9396 		/* set for low clocks */
9397 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9398 
9399 		/* possibly force display priority to high */
9400 		/* should really do this at mode validation time... */
9401 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9402 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9403 		    !dce8_check_latency_hiding(&wm_low) ||
9404 		    (rdev->disp_priority == 2)) {
9405 			DRM_DEBUG_KMS("force priority to high\n");
9406 		}
9407 	}
9408 
9409 	/* select wm A */
9410 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9411 	tmp = wm_mask;
9412 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9413 	tmp |= LATENCY_WATERMARK_MASK(1);
9414 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9415 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9416 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9417 		LATENCY_HIGH_WATERMARK(line_time)));
9418 	/* select wm B */
9419 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9420 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9421 	tmp |= LATENCY_WATERMARK_MASK(2);
9422 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9423 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9424 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9425 		LATENCY_HIGH_WATERMARK(line_time)));
9426 	/* restore original selection */
9427 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9428 
9429 	/* save values for DPM */
9430 	radeon_crtc->line_time = line_time;
9431 	radeon_crtc->wm_high = latency_watermark_a;
9432 	radeon_crtc->wm_low = latency_watermark_b;
9433 }
9434 
9435 /**
9436  * dce8_bandwidth_update - program display watermarks
9437  *
9438  * @rdev: radeon_device pointer
9439  *
9440  * Calculate and program the display watermarks and line
9441  * buffer allocation (CIK).
9442  */
9443 void dce8_bandwidth_update(struct radeon_device *rdev)
9444 {
9445 	struct drm_display_mode *mode = NULL;
9446 	u32 num_heads = 0, lb_size;
9447 	int i;
9448 
9449 	if (!rdev->mode_info.mode_config_initialized)
9450 		return;
9451 
9452 	radeon_update_display_priority(rdev);
9453 
9454 	for (i = 0; i < rdev->num_crtc; i++) {
9455 		if (rdev->mode_info.crtcs[i]->base.enabled)
9456 			num_heads++;
9457 	}
9458 	for (i = 0; i < rdev->num_crtc; i++) {
9459 		mode = &rdev->mode_info.crtcs[i]->base.mode;
9460 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9461 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9462 	}
9463 }
9464 
9465 /**
9466  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9467  *
9468  * @rdev: radeon_device pointer
9469  *
9470  * Fetches a GPU clock counter snapshot (SI).
9471  * Returns the 64 bit clock counter snapshot.
9472  */
9473 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9474 {
9475 	uint64_t clock;
9476 
9477 	mutex_lock(&rdev->gpu_clock_mutex);
9478 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9479 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9480 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9481 	mutex_unlock(&rdev->gpu_clock_mutex);
9482 	return clock;
9483 }
9484 
9485 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9486                               u32 cntl_reg, u32 status_reg)
9487 {
9488 	int r, i;
9489 	struct atom_clock_dividers dividers;
9490 	uint32_t tmp;
9491 
9492 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9493 					   clock, false, &dividers);
9494 	if (r)
9495 		return r;
9496 
9497 	tmp = RREG32_SMC(cntl_reg);
9498 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9499 	tmp |= dividers.post_divider;
9500 	WREG32_SMC(cntl_reg, tmp);
9501 
9502 	for (i = 0; i < 100; i++) {
9503 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9504 			break;
9505 		mdelay(10);
9506 	}
9507 	if (i == 100)
9508 		return -ETIMEDOUT;
9509 
9510 	return 0;
9511 }
9512 
9513 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9514 {
9515 	int r = 0;
9516 
9517 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9518 	if (r)
9519 		return r;
9520 
9521 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9522 	return r;
9523 }
9524 
9525 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9526 {
9527 	int r, i;
9528 	struct atom_clock_dividers dividers;
9529 	u32 tmp;
9530 
9531 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9532 					   ecclk, false, &dividers);
9533 	if (r)
9534 		return r;
9535 
9536 	for (i = 0; i < 100; i++) {
9537 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9538 			break;
9539 		mdelay(10);
9540 	}
9541 	if (i == 100)
9542 		return -ETIMEDOUT;
9543 
9544 	tmp = RREG32_SMC(CG_ECLK_CNTL);
9545 	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9546 	tmp |= dividers.post_divider;
9547 	WREG32_SMC(CG_ECLK_CNTL, tmp);
9548 
9549 	for (i = 0; i < 100; i++) {
9550 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9551 			break;
9552 		mdelay(10);
9553 	}
9554 	if (i == 100)
9555 		return -ETIMEDOUT;
9556 
9557 	return 0;
9558 }
9559 
9560 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9561 {
9562 	struct pci_dev *root = rdev->pdev->bus->self;
9563 	int bridge_pos, gpu_pos;
9564 	u32 speed_cntl, mask, current_data_rate;
9565 	int ret, i;
9566 	u16 tmp16;
9567 
9568 	if (pci_is_root_bus(rdev->pdev->bus))
9569 		return;
9570 
9571 	if (radeon_pcie_gen2 == 0)
9572 		return;
9573 
9574 	if (rdev->flags & RADEON_IS_IGP)
9575 		return;
9576 
9577 	if (!(rdev->flags & RADEON_IS_PCIE))
9578 		return;
9579 
9580 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9581 	if (ret != 0)
9582 		return;
9583 
9584 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9585 		return;
9586 
9587 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9588 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9589 		LC_CURRENT_DATA_RATE_SHIFT;
9590 	if (mask & DRM_PCIE_SPEED_80) {
9591 		if (current_data_rate == 2) {
9592 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9593 			return;
9594 		}
9595 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9596 	} else if (mask & DRM_PCIE_SPEED_50) {
9597 		if (current_data_rate == 1) {
9598 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9599 			return;
9600 		}
9601 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9602 	}
9603 
9604 	bridge_pos = pci_pcie_cap(root);
9605 	if (!bridge_pos)
9606 		return;
9607 
9608 	gpu_pos = pci_pcie_cap(rdev->pdev);
9609 	if (!gpu_pos)
9610 		return;
9611 
9612 	if (mask & DRM_PCIE_SPEED_80) {
9613 		/* re-try equalization if gen3 is not already enabled */
9614 		if (current_data_rate != 2) {
9615 			u16 bridge_cfg, gpu_cfg;
9616 			u16 bridge_cfg2, gpu_cfg2;
9617 			u32 max_lw, current_lw, tmp;
9618 
9619 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9620 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9621 
9622 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9623 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9624 
9625 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9626 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9627 
9628 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9629 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9630 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9631 
9632 			if (current_lw < max_lw) {
9633 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9634 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9635 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9636 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9637 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9638 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9639 				}
9640 			}
9641 
9642 			for (i = 0; i < 10; i++) {
9643 				/* check status */
9644 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9645 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9646 					break;
9647 
9648 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9649 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9650 
9651 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9652 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9653 
9654 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9655 				tmp |= LC_SET_QUIESCE;
9656 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9657 
9658 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9659 				tmp |= LC_REDO_EQ;
9660 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9661 
9662 				mdelay(100);
9663 
9664 				/* linkctl */
9665 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9666 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9667 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9668 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9669 
9670 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9671 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9672 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9673 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9674 
9675 				/* linkctl2 */
9676 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9677 				tmp16 &= ~((1 << 4) | (7 << 9));
9678 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9679 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9680 
9681 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9682 				tmp16 &= ~((1 << 4) | (7 << 9));
9683 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9684 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9685 
9686 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9687 				tmp &= ~LC_SET_QUIESCE;
9688 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9689 			}
9690 		}
9691 	}
9692 
9693 	/* set the link speed */
9694 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9695 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9696 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9697 
9698 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9699 	tmp16 &= ~0xf;
9700 	if (mask & DRM_PCIE_SPEED_80)
9701 		tmp16 |= 3; /* gen3 */
9702 	else if (mask & DRM_PCIE_SPEED_50)
9703 		tmp16 |= 2; /* gen2 */
9704 	else
9705 		tmp16 |= 1; /* gen1 */
9706 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9707 
9708 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9709 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9710 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9711 
9712 	for (i = 0; i < rdev->usec_timeout; i++) {
9713 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9714 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9715 			break;
9716 		udelay(1);
9717 	}
9718 }
9719 
9720 static void cik_program_aspm(struct radeon_device *rdev)
9721 {
9722 	u32 data, orig;
9723 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9724 	bool disable_clkreq = false;
9725 
9726 	if (radeon_aspm == 0)
9727 		return;
9728 
9729 	/* XXX double check IGPs */
9730 	if (rdev->flags & RADEON_IS_IGP)
9731 		return;
9732 
9733 	if (!(rdev->flags & RADEON_IS_PCIE))
9734 		return;
9735 
9736 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9737 	data &= ~LC_XMIT_N_FTS_MASK;
9738 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9739 	if (orig != data)
9740 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9741 
9742 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9743 	data |= LC_GO_TO_RECOVERY;
9744 	if (orig != data)
9745 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9746 
9747 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9748 	data |= P_IGNORE_EDB_ERR;
9749 	if (orig != data)
9750 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9751 
9752 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9753 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9754 	data |= LC_PMI_TO_L1_DIS;
9755 	if (!disable_l0s)
9756 		data |= LC_L0S_INACTIVITY(7);
9757 
9758 	if (!disable_l1) {
9759 		data |= LC_L1_INACTIVITY(7);
9760 		data &= ~LC_PMI_TO_L1_DIS;
9761 		if (orig != data)
9762 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9763 
9764 		if (!disable_plloff_in_l1) {
9765 			bool clk_req_support;
9766 
9767 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9768 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9769 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9770 			if (orig != data)
9771 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9772 
9773 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9774 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9775 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9776 			if (orig != data)
9777 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9778 
9779 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9780 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9781 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9782 			if (orig != data)
9783 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9784 
9785 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9786 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9787 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9788 			if (orig != data)
9789 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9790 
9791 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9792 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9793 			data |= LC_DYN_LANES_PWR_STATE(3);
9794 			if (orig != data)
9795 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9796 
9797 			if (!disable_clkreq &&
9798 			    !pci_is_root_bus(rdev->pdev->bus)) {
9799 				struct pci_dev *root = rdev->pdev->bus->self;
9800 				u32 lnkcap;
9801 
9802 				clk_req_support = false;
9803 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9804 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9805 					clk_req_support = true;
9806 			} else {
9807 				clk_req_support = false;
9808 			}
9809 
9810 			if (clk_req_support) {
9811 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9812 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9813 				if (orig != data)
9814 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9815 
9816 				orig = data = RREG32_SMC(THM_CLK_CNTL);
9817 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9818 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9819 				if (orig != data)
9820 					WREG32_SMC(THM_CLK_CNTL, data);
9821 
9822 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9823 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9824 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9825 				if (orig != data)
9826 					WREG32_SMC(MISC_CLK_CTRL, data);
9827 
9828 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9829 				data &= ~BCLK_AS_XCLK;
9830 				if (orig != data)
9831 					WREG32_SMC(CG_CLKPIN_CNTL, data);
9832 
9833 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9834 				data &= ~FORCE_BIF_REFCLK_EN;
9835 				if (orig != data)
9836 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9837 
9838 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9839 				data &= ~MPLL_CLKOUT_SEL_MASK;
9840 				data |= MPLL_CLKOUT_SEL(4);
9841 				if (orig != data)
9842 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9843 			}
9844 		}
9845 	} else {
9846 		if (orig != data)
9847 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9848 	}
9849 
9850 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9851 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9852 	if (orig != data)
9853 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9854 
9855 	if (!disable_l0s) {
9856 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9857 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9858 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9859 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9860 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9861 				data &= ~LC_L0S_INACTIVITY_MASK;
9862 				if (orig != data)
9863 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9864 			}
9865 		}
9866 	}
9867 }
9868