xref: /openbmc/linux/drivers/gpu/drm/radeon/cik.c (revision 1c0a46255f8d7daf5b601668836e185fd1294e94)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35 
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
45 
46 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
47 MODULE_FIRMWARE("radeon/bonaire_me.bin");
48 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
49 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
50 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
51 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
52 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
53 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
54 
55 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
56 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
57 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
58 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
59 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
60 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
64 
65 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
66 MODULE_FIRMWARE("radeon/hawaii_me.bin");
67 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
68 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
69 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
70 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
71 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
72 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
73 
74 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
75 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
76 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
77 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
78 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
79 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
80 
81 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
82 MODULE_FIRMWARE("radeon/kaveri_me.bin");
83 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
84 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
85 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
86 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
87 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
88 
89 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
90 MODULE_FIRMWARE("radeon/KABINI_me.bin");
91 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
92 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
93 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
94 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
95 
96 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
97 MODULE_FIRMWARE("radeon/kabini_me.bin");
98 MODULE_FIRMWARE("radeon/kabini_ce.bin");
99 MODULE_FIRMWARE("radeon/kabini_mec.bin");
100 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
101 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
102 
103 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
104 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
105 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
106 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
107 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
108 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
109 
110 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
111 MODULE_FIRMWARE("radeon/mullins_me.bin");
112 MODULE_FIRMWARE("radeon/mullins_ce.bin");
113 MODULE_FIRMWARE("radeon/mullins_mec.bin");
114 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
115 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
116 
117 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
118 extern void r600_ih_ring_fini(struct radeon_device *rdev);
119 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
120 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
121 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
122 extern void sumo_rlc_fini(struct radeon_device *rdev);
123 extern int sumo_rlc_init(struct radeon_device *rdev);
124 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
125 extern void si_rlc_reset(struct radeon_device *rdev);
126 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
127 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
128 extern int cik_sdma_resume(struct radeon_device *rdev);
129 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
130 extern void cik_sdma_fini(struct radeon_device *rdev);
131 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
132 static void cik_rlc_stop(struct radeon_device *rdev);
133 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
134 static void cik_program_aspm(struct radeon_device *rdev);
135 static void cik_init_pg(struct radeon_device *rdev);
136 static void cik_init_cg(struct radeon_device *rdev);
137 static void cik_fini_pg(struct radeon_device *rdev);
138 static void cik_fini_cg(struct radeon_device *rdev);
139 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
140 					  bool enable);
141 
142 /* get temperature in millidegrees */
143 int ci_get_temp(struct radeon_device *rdev)
144 {
145 	u32 temp;
146 	int actual_temp = 0;
147 
148 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
149 		CTF_TEMP_SHIFT;
150 
151 	if (temp & 0x200)
152 		actual_temp = 255;
153 	else
154 		actual_temp = temp & 0x1ff;
155 
156 	actual_temp = actual_temp * 1000;
157 
158 	return actual_temp;
159 }
160 
161 /* get temperature in millidegrees */
162 int kv_get_temp(struct radeon_device *rdev)
163 {
164 	u32 temp;
165 	int actual_temp = 0;
166 
167 	temp = RREG32_SMC(0xC0300E0C);
168 
169 	if (temp)
170 		actual_temp = (temp / 8) - 49;
171 	else
172 		actual_temp = 0;
173 
174 	actual_temp = actual_temp * 1000;
175 
176 	return actual_temp;
177 }
178 
179 /*
180  * Indirect registers accessor
181  */
182 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
183 {
184 	unsigned long flags;
185 	u32 r;
186 
187 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
188 	WREG32(PCIE_INDEX, reg);
189 	(void)RREG32(PCIE_INDEX);
190 	r = RREG32(PCIE_DATA);
191 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
192 	return r;
193 }
194 
195 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
196 {
197 	unsigned long flags;
198 
199 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
200 	WREG32(PCIE_INDEX, reg);
201 	(void)RREG32(PCIE_INDEX);
202 	WREG32(PCIE_DATA, v);
203 	(void)RREG32(PCIE_DATA);
204 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
205 }
206 
207 static const u32 spectre_rlc_save_restore_register_list[] =
208 {
209 	(0x0e00 << 16) | (0xc12c >> 2),
210 	0x00000000,
211 	(0x0e00 << 16) | (0xc140 >> 2),
212 	0x00000000,
213 	(0x0e00 << 16) | (0xc150 >> 2),
214 	0x00000000,
215 	(0x0e00 << 16) | (0xc15c >> 2),
216 	0x00000000,
217 	(0x0e00 << 16) | (0xc168 >> 2),
218 	0x00000000,
219 	(0x0e00 << 16) | (0xc170 >> 2),
220 	0x00000000,
221 	(0x0e00 << 16) | (0xc178 >> 2),
222 	0x00000000,
223 	(0x0e00 << 16) | (0xc204 >> 2),
224 	0x00000000,
225 	(0x0e00 << 16) | (0xc2b4 >> 2),
226 	0x00000000,
227 	(0x0e00 << 16) | (0xc2b8 >> 2),
228 	0x00000000,
229 	(0x0e00 << 16) | (0xc2bc >> 2),
230 	0x00000000,
231 	(0x0e00 << 16) | (0xc2c0 >> 2),
232 	0x00000000,
233 	(0x0e00 << 16) | (0x8228 >> 2),
234 	0x00000000,
235 	(0x0e00 << 16) | (0x829c >> 2),
236 	0x00000000,
237 	(0x0e00 << 16) | (0x869c >> 2),
238 	0x00000000,
239 	(0x0600 << 16) | (0x98f4 >> 2),
240 	0x00000000,
241 	(0x0e00 << 16) | (0x98f8 >> 2),
242 	0x00000000,
243 	(0x0e00 << 16) | (0x9900 >> 2),
244 	0x00000000,
245 	(0x0e00 << 16) | (0xc260 >> 2),
246 	0x00000000,
247 	(0x0e00 << 16) | (0x90e8 >> 2),
248 	0x00000000,
249 	(0x0e00 << 16) | (0x3c000 >> 2),
250 	0x00000000,
251 	(0x0e00 << 16) | (0x3c00c >> 2),
252 	0x00000000,
253 	(0x0e00 << 16) | (0x8c1c >> 2),
254 	0x00000000,
255 	(0x0e00 << 16) | (0x9700 >> 2),
256 	0x00000000,
257 	(0x0e00 << 16) | (0xcd20 >> 2),
258 	0x00000000,
259 	(0x4e00 << 16) | (0xcd20 >> 2),
260 	0x00000000,
261 	(0x5e00 << 16) | (0xcd20 >> 2),
262 	0x00000000,
263 	(0x6e00 << 16) | (0xcd20 >> 2),
264 	0x00000000,
265 	(0x7e00 << 16) | (0xcd20 >> 2),
266 	0x00000000,
267 	(0x8e00 << 16) | (0xcd20 >> 2),
268 	0x00000000,
269 	(0x9e00 << 16) | (0xcd20 >> 2),
270 	0x00000000,
271 	(0xae00 << 16) | (0xcd20 >> 2),
272 	0x00000000,
273 	(0xbe00 << 16) | (0xcd20 >> 2),
274 	0x00000000,
275 	(0x0e00 << 16) | (0x89bc >> 2),
276 	0x00000000,
277 	(0x0e00 << 16) | (0x8900 >> 2),
278 	0x00000000,
279 	0x3,
280 	(0x0e00 << 16) | (0xc130 >> 2),
281 	0x00000000,
282 	(0x0e00 << 16) | (0xc134 >> 2),
283 	0x00000000,
284 	(0x0e00 << 16) | (0xc1fc >> 2),
285 	0x00000000,
286 	(0x0e00 << 16) | (0xc208 >> 2),
287 	0x00000000,
288 	(0x0e00 << 16) | (0xc264 >> 2),
289 	0x00000000,
290 	(0x0e00 << 16) | (0xc268 >> 2),
291 	0x00000000,
292 	(0x0e00 << 16) | (0xc26c >> 2),
293 	0x00000000,
294 	(0x0e00 << 16) | (0xc270 >> 2),
295 	0x00000000,
296 	(0x0e00 << 16) | (0xc274 >> 2),
297 	0x00000000,
298 	(0x0e00 << 16) | (0xc278 >> 2),
299 	0x00000000,
300 	(0x0e00 << 16) | (0xc27c >> 2),
301 	0x00000000,
302 	(0x0e00 << 16) | (0xc280 >> 2),
303 	0x00000000,
304 	(0x0e00 << 16) | (0xc284 >> 2),
305 	0x00000000,
306 	(0x0e00 << 16) | (0xc288 >> 2),
307 	0x00000000,
308 	(0x0e00 << 16) | (0xc28c >> 2),
309 	0x00000000,
310 	(0x0e00 << 16) | (0xc290 >> 2),
311 	0x00000000,
312 	(0x0e00 << 16) | (0xc294 >> 2),
313 	0x00000000,
314 	(0x0e00 << 16) | (0xc298 >> 2),
315 	0x00000000,
316 	(0x0e00 << 16) | (0xc29c >> 2),
317 	0x00000000,
318 	(0x0e00 << 16) | (0xc2a0 >> 2),
319 	0x00000000,
320 	(0x0e00 << 16) | (0xc2a4 >> 2),
321 	0x00000000,
322 	(0x0e00 << 16) | (0xc2a8 >> 2),
323 	0x00000000,
324 	(0x0e00 << 16) | (0xc2ac  >> 2),
325 	0x00000000,
326 	(0x0e00 << 16) | (0xc2b0 >> 2),
327 	0x00000000,
328 	(0x0e00 << 16) | (0x301d0 >> 2),
329 	0x00000000,
330 	(0x0e00 << 16) | (0x30238 >> 2),
331 	0x00000000,
332 	(0x0e00 << 16) | (0x30250 >> 2),
333 	0x00000000,
334 	(0x0e00 << 16) | (0x30254 >> 2),
335 	0x00000000,
336 	(0x0e00 << 16) | (0x30258 >> 2),
337 	0x00000000,
338 	(0x0e00 << 16) | (0x3025c >> 2),
339 	0x00000000,
340 	(0x4e00 << 16) | (0xc900 >> 2),
341 	0x00000000,
342 	(0x5e00 << 16) | (0xc900 >> 2),
343 	0x00000000,
344 	(0x6e00 << 16) | (0xc900 >> 2),
345 	0x00000000,
346 	(0x7e00 << 16) | (0xc900 >> 2),
347 	0x00000000,
348 	(0x8e00 << 16) | (0xc900 >> 2),
349 	0x00000000,
350 	(0x9e00 << 16) | (0xc900 >> 2),
351 	0x00000000,
352 	(0xae00 << 16) | (0xc900 >> 2),
353 	0x00000000,
354 	(0xbe00 << 16) | (0xc900 >> 2),
355 	0x00000000,
356 	(0x4e00 << 16) | (0xc904 >> 2),
357 	0x00000000,
358 	(0x5e00 << 16) | (0xc904 >> 2),
359 	0x00000000,
360 	(0x6e00 << 16) | (0xc904 >> 2),
361 	0x00000000,
362 	(0x7e00 << 16) | (0xc904 >> 2),
363 	0x00000000,
364 	(0x8e00 << 16) | (0xc904 >> 2),
365 	0x00000000,
366 	(0x9e00 << 16) | (0xc904 >> 2),
367 	0x00000000,
368 	(0xae00 << 16) | (0xc904 >> 2),
369 	0x00000000,
370 	(0xbe00 << 16) | (0xc904 >> 2),
371 	0x00000000,
372 	(0x4e00 << 16) | (0xc908 >> 2),
373 	0x00000000,
374 	(0x5e00 << 16) | (0xc908 >> 2),
375 	0x00000000,
376 	(0x6e00 << 16) | (0xc908 >> 2),
377 	0x00000000,
378 	(0x7e00 << 16) | (0xc908 >> 2),
379 	0x00000000,
380 	(0x8e00 << 16) | (0xc908 >> 2),
381 	0x00000000,
382 	(0x9e00 << 16) | (0xc908 >> 2),
383 	0x00000000,
384 	(0xae00 << 16) | (0xc908 >> 2),
385 	0x00000000,
386 	(0xbe00 << 16) | (0xc908 >> 2),
387 	0x00000000,
388 	(0x4e00 << 16) | (0xc90c >> 2),
389 	0x00000000,
390 	(0x5e00 << 16) | (0xc90c >> 2),
391 	0x00000000,
392 	(0x6e00 << 16) | (0xc90c >> 2),
393 	0x00000000,
394 	(0x7e00 << 16) | (0xc90c >> 2),
395 	0x00000000,
396 	(0x8e00 << 16) | (0xc90c >> 2),
397 	0x00000000,
398 	(0x9e00 << 16) | (0xc90c >> 2),
399 	0x00000000,
400 	(0xae00 << 16) | (0xc90c >> 2),
401 	0x00000000,
402 	(0xbe00 << 16) | (0xc90c >> 2),
403 	0x00000000,
404 	(0x4e00 << 16) | (0xc910 >> 2),
405 	0x00000000,
406 	(0x5e00 << 16) | (0xc910 >> 2),
407 	0x00000000,
408 	(0x6e00 << 16) | (0xc910 >> 2),
409 	0x00000000,
410 	(0x7e00 << 16) | (0xc910 >> 2),
411 	0x00000000,
412 	(0x8e00 << 16) | (0xc910 >> 2),
413 	0x00000000,
414 	(0x9e00 << 16) | (0xc910 >> 2),
415 	0x00000000,
416 	(0xae00 << 16) | (0xc910 >> 2),
417 	0x00000000,
418 	(0xbe00 << 16) | (0xc910 >> 2),
419 	0x00000000,
420 	(0x0e00 << 16) | (0xc99c >> 2),
421 	0x00000000,
422 	(0x0e00 << 16) | (0x9834 >> 2),
423 	0x00000000,
424 	(0x0000 << 16) | (0x30f00 >> 2),
425 	0x00000000,
426 	(0x0001 << 16) | (0x30f00 >> 2),
427 	0x00000000,
428 	(0x0000 << 16) | (0x30f04 >> 2),
429 	0x00000000,
430 	(0x0001 << 16) | (0x30f04 >> 2),
431 	0x00000000,
432 	(0x0000 << 16) | (0x30f08 >> 2),
433 	0x00000000,
434 	(0x0001 << 16) | (0x30f08 >> 2),
435 	0x00000000,
436 	(0x0000 << 16) | (0x30f0c >> 2),
437 	0x00000000,
438 	(0x0001 << 16) | (0x30f0c >> 2),
439 	0x00000000,
440 	(0x0600 << 16) | (0x9b7c >> 2),
441 	0x00000000,
442 	(0x0e00 << 16) | (0x8a14 >> 2),
443 	0x00000000,
444 	(0x0e00 << 16) | (0x8a18 >> 2),
445 	0x00000000,
446 	(0x0600 << 16) | (0x30a00 >> 2),
447 	0x00000000,
448 	(0x0e00 << 16) | (0x8bf0 >> 2),
449 	0x00000000,
450 	(0x0e00 << 16) | (0x8bcc >> 2),
451 	0x00000000,
452 	(0x0e00 << 16) | (0x8b24 >> 2),
453 	0x00000000,
454 	(0x0e00 << 16) | (0x30a04 >> 2),
455 	0x00000000,
456 	(0x0600 << 16) | (0x30a10 >> 2),
457 	0x00000000,
458 	(0x0600 << 16) | (0x30a14 >> 2),
459 	0x00000000,
460 	(0x0600 << 16) | (0x30a18 >> 2),
461 	0x00000000,
462 	(0x0600 << 16) | (0x30a2c >> 2),
463 	0x00000000,
464 	(0x0e00 << 16) | (0xc700 >> 2),
465 	0x00000000,
466 	(0x0e00 << 16) | (0xc704 >> 2),
467 	0x00000000,
468 	(0x0e00 << 16) | (0xc708 >> 2),
469 	0x00000000,
470 	(0x0e00 << 16) | (0xc768 >> 2),
471 	0x00000000,
472 	(0x0400 << 16) | (0xc770 >> 2),
473 	0x00000000,
474 	(0x0400 << 16) | (0xc774 >> 2),
475 	0x00000000,
476 	(0x0400 << 16) | (0xc778 >> 2),
477 	0x00000000,
478 	(0x0400 << 16) | (0xc77c >> 2),
479 	0x00000000,
480 	(0x0400 << 16) | (0xc780 >> 2),
481 	0x00000000,
482 	(0x0400 << 16) | (0xc784 >> 2),
483 	0x00000000,
484 	(0x0400 << 16) | (0xc788 >> 2),
485 	0x00000000,
486 	(0x0400 << 16) | (0xc78c >> 2),
487 	0x00000000,
488 	(0x0400 << 16) | (0xc798 >> 2),
489 	0x00000000,
490 	(0x0400 << 16) | (0xc79c >> 2),
491 	0x00000000,
492 	(0x0400 << 16) | (0xc7a0 >> 2),
493 	0x00000000,
494 	(0x0400 << 16) | (0xc7a4 >> 2),
495 	0x00000000,
496 	(0x0400 << 16) | (0xc7a8 >> 2),
497 	0x00000000,
498 	(0x0400 << 16) | (0xc7ac >> 2),
499 	0x00000000,
500 	(0x0400 << 16) | (0xc7b0 >> 2),
501 	0x00000000,
502 	(0x0400 << 16) | (0xc7b4 >> 2),
503 	0x00000000,
504 	(0x0e00 << 16) | (0x9100 >> 2),
505 	0x00000000,
506 	(0x0e00 << 16) | (0x3c010 >> 2),
507 	0x00000000,
508 	(0x0e00 << 16) | (0x92a8 >> 2),
509 	0x00000000,
510 	(0x0e00 << 16) | (0x92ac >> 2),
511 	0x00000000,
512 	(0x0e00 << 16) | (0x92b4 >> 2),
513 	0x00000000,
514 	(0x0e00 << 16) | (0x92b8 >> 2),
515 	0x00000000,
516 	(0x0e00 << 16) | (0x92bc >> 2),
517 	0x00000000,
518 	(0x0e00 << 16) | (0x92c0 >> 2),
519 	0x00000000,
520 	(0x0e00 << 16) | (0x92c4 >> 2),
521 	0x00000000,
522 	(0x0e00 << 16) | (0x92c8 >> 2),
523 	0x00000000,
524 	(0x0e00 << 16) | (0x92cc >> 2),
525 	0x00000000,
526 	(0x0e00 << 16) | (0x92d0 >> 2),
527 	0x00000000,
528 	(0x0e00 << 16) | (0x8c00 >> 2),
529 	0x00000000,
530 	(0x0e00 << 16) | (0x8c04 >> 2),
531 	0x00000000,
532 	(0x0e00 << 16) | (0x8c20 >> 2),
533 	0x00000000,
534 	(0x0e00 << 16) | (0x8c38 >> 2),
535 	0x00000000,
536 	(0x0e00 << 16) | (0x8c3c >> 2),
537 	0x00000000,
538 	(0x0e00 << 16) | (0xae00 >> 2),
539 	0x00000000,
540 	(0x0e00 << 16) | (0x9604 >> 2),
541 	0x00000000,
542 	(0x0e00 << 16) | (0xac08 >> 2),
543 	0x00000000,
544 	(0x0e00 << 16) | (0xac0c >> 2),
545 	0x00000000,
546 	(0x0e00 << 16) | (0xac10 >> 2),
547 	0x00000000,
548 	(0x0e00 << 16) | (0xac14 >> 2),
549 	0x00000000,
550 	(0x0e00 << 16) | (0xac58 >> 2),
551 	0x00000000,
552 	(0x0e00 << 16) | (0xac68 >> 2),
553 	0x00000000,
554 	(0x0e00 << 16) | (0xac6c >> 2),
555 	0x00000000,
556 	(0x0e00 << 16) | (0xac70 >> 2),
557 	0x00000000,
558 	(0x0e00 << 16) | (0xac74 >> 2),
559 	0x00000000,
560 	(0x0e00 << 16) | (0xac78 >> 2),
561 	0x00000000,
562 	(0x0e00 << 16) | (0xac7c >> 2),
563 	0x00000000,
564 	(0x0e00 << 16) | (0xac80 >> 2),
565 	0x00000000,
566 	(0x0e00 << 16) | (0xac84 >> 2),
567 	0x00000000,
568 	(0x0e00 << 16) | (0xac88 >> 2),
569 	0x00000000,
570 	(0x0e00 << 16) | (0xac8c >> 2),
571 	0x00000000,
572 	(0x0e00 << 16) | (0x970c >> 2),
573 	0x00000000,
574 	(0x0e00 << 16) | (0x9714 >> 2),
575 	0x00000000,
576 	(0x0e00 << 16) | (0x9718 >> 2),
577 	0x00000000,
578 	(0x0e00 << 16) | (0x971c >> 2),
579 	0x00000000,
580 	(0x0e00 << 16) | (0x31068 >> 2),
581 	0x00000000,
582 	(0x4e00 << 16) | (0x31068 >> 2),
583 	0x00000000,
584 	(0x5e00 << 16) | (0x31068 >> 2),
585 	0x00000000,
586 	(0x6e00 << 16) | (0x31068 >> 2),
587 	0x00000000,
588 	(0x7e00 << 16) | (0x31068 >> 2),
589 	0x00000000,
590 	(0x8e00 << 16) | (0x31068 >> 2),
591 	0x00000000,
592 	(0x9e00 << 16) | (0x31068 >> 2),
593 	0x00000000,
594 	(0xae00 << 16) | (0x31068 >> 2),
595 	0x00000000,
596 	(0xbe00 << 16) | (0x31068 >> 2),
597 	0x00000000,
598 	(0x0e00 << 16) | (0xcd10 >> 2),
599 	0x00000000,
600 	(0x0e00 << 16) | (0xcd14 >> 2),
601 	0x00000000,
602 	(0x0e00 << 16) | (0x88b0 >> 2),
603 	0x00000000,
604 	(0x0e00 << 16) | (0x88b4 >> 2),
605 	0x00000000,
606 	(0x0e00 << 16) | (0x88b8 >> 2),
607 	0x00000000,
608 	(0x0e00 << 16) | (0x88bc >> 2),
609 	0x00000000,
610 	(0x0400 << 16) | (0x89c0 >> 2),
611 	0x00000000,
612 	(0x0e00 << 16) | (0x88c4 >> 2),
613 	0x00000000,
614 	(0x0e00 << 16) | (0x88c8 >> 2),
615 	0x00000000,
616 	(0x0e00 << 16) | (0x88d0 >> 2),
617 	0x00000000,
618 	(0x0e00 << 16) | (0x88d4 >> 2),
619 	0x00000000,
620 	(0x0e00 << 16) | (0x88d8 >> 2),
621 	0x00000000,
622 	(0x0e00 << 16) | (0x8980 >> 2),
623 	0x00000000,
624 	(0x0e00 << 16) | (0x30938 >> 2),
625 	0x00000000,
626 	(0x0e00 << 16) | (0x3093c >> 2),
627 	0x00000000,
628 	(0x0e00 << 16) | (0x30940 >> 2),
629 	0x00000000,
630 	(0x0e00 << 16) | (0x89a0 >> 2),
631 	0x00000000,
632 	(0x0e00 << 16) | (0x30900 >> 2),
633 	0x00000000,
634 	(0x0e00 << 16) | (0x30904 >> 2),
635 	0x00000000,
636 	(0x0e00 << 16) | (0x89b4 >> 2),
637 	0x00000000,
638 	(0x0e00 << 16) | (0x3c210 >> 2),
639 	0x00000000,
640 	(0x0e00 << 16) | (0x3c214 >> 2),
641 	0x00000000,
642 	(0x0e00 << 16) | (0x3c218 >> 2),
643 	0x00000000,
644 	(0x0e00 << 16) | (0x8904 >> 2),
645 	0x00000000,
646 	0x5,
647 	(0x0e00 << 16) | (0x8c28 >> 2),
648 	(0x0e00 << 16) | (0x8c2c >> 2),
649 	(0x0e00 << 16) | (0x8c30 >> 2),
650 	(0x0e00 << 16) | (0x8c34 >> 2),
651 	(0x0e00 << 16) | (0x9600 >> 2),
652 };
653 
654 static const u32 kalindi_rlc_save_restore_register_list[] =
655 {
656 	(0x0e00 << 16) | (0xc12c >> 2),
657 	0x00000000,
658 	(0x0e00 << 16) | (0xc140 >> 2),
659 	0x00000000,
660 	(0x0e00 << 16) | (0xc150 >> 2),
661 	0x00000000,
662 	(0x0e00 << 16) | (0xc15c >> 2),
663 	0x00000000,
664 	(0x0e00 << 16) | (0xc168 >> 2),
665 	0x00000000,
666 	(0x0e00 << 16) | (0xc170 >> 2),
667 	0x00000000,
668 	(0x0e00 << 16) | (0xc204 >> 2),
669 	0x00000000,
670 	(0x0e00 << 16) | (0xc2b4 >> 2),
671 	0x00000000,
672 	(0x0e00 << 16) | (0xc2b8 >> 2),
673 	0x00000000,
674 	(0x0e00 << 16) | (0xc2bc >> 2),
675 	0x00000000,
676 	(0x0e00 << 16) | (0xc2c0 >> 2),
677 	0x00000000,
678 	(0x0e00 << 16) | (0x8228 >> 2),
679 	0x00000000,
680 	(0x0e00 << 16) | (0x829c >> 2),
681 	0x00000000,
682 	(0x0e00 << 16) | (0x869c >> 2),
683 	0x00000000,
684 	(0x0600 << 16) | (0x98f4 >> 2),
685 	0x00000000,
686 	(0x0e00 << 16) | (0x98f8 >> 2),
687 	0x00000000,
688 	(0x0e00 << 16) | (0x9900 >> 2),
689 	0x00000000,
690 	(0x0e00 << 16) | (0xc260 >> 2),
691 	0x00000000,
692 	(0x0e00 << 16) | (0x90e8 >> 2),
693 	0x00000000,
694 	(0x0e00 << 16) | (0x3c000 >> 2),
695 	0x00000000,
696 	(0x0e00 << 16) | (0x3c00c >> 2),
697 	0x00000000,
698 	(0x0e00 << 16) | (0x8c1c >> 2),
699 	0x00000000,
700 	(0x0e00 << 16) | (0x9700 >> 2),
701 	0x00000000,
702 	(0x0e00 << 16) | (0xcd20 >> 2),
703 	0x00000000,
704 	(0x4e00 << 16) | (0xcd20 >> 2),
705 	0x00000000,
706 	(0x5e00 << 16) | (0xcd20 >> 2),
707 	0x00000000,
708 	(0x6e00 << 16) | (0xcd20 >> 2),
709 	0x00000000,
710 	(0x7e00 << 16) | (0xcd20 >> 2),
711 	0x00000000,
712 	(0x0e00 << 16) | (0x89bc >> 2),
713 	0x00000000,
714 	(0x0e00 << 16) | (0x8900 >> 2),
715 	0x00000000,
716 	0x3,
717 	(0x0e00 << 16) | (0xc130 >> 2),
718 	0x00000000,
719 	(0x0e00 << 16) | (0xc134 >> 2),
720 	0x00000000,
721 	(0x0e00 << 16) | (0xc1fc >> 2),
722 	0x00000000,
723 	(0x0e00 << 16) | (0xc208 >> 2),
724 	0x00000000,
725 	(0x0e00 << 16) | (0xc264 >> 2),
726 	0x00000000,
727 	(0x0e00 << 16) | (0xc268 >> 2),
728 	0x00000000,
729 	(0x0e00 << 16) | (0xc26c >> 2),
730 	0x00000000,
731 	(0x0e00 << 16) | (0xc270 >> 2),
732 	0x00000000,
733 	(0x0e00 << 16) | (0xc274 >> 2),
734 	0x00000000,
735 	(0x0e00 << 16) | (0xc28c >> 2),
736 	0x00000000,
737 	(0x0e00 << 16) | (0xc290 >> 2),
738 	0x00000000,
739 	(0x0e00 << 16) | (0xc294 >> 2),
740 	0x00000000,
741 	(0x0e00 << 16) | (0xc298 >> 2),
742 	0x00000000,
743 	(0x0e00 << 16) | (0xc2a0 >> 2),
744 	0x00000000,
745 	(0x0e00 << 16) | (0xc2a4 >> 2),
746 	0x00000000,
747 	(0x0e00 << 16) | (0xc2a8 >> 2),
748 	0x00000000,
749 	(0x0e00 << 16) | (0xc2ac >> 2),
750 	0x00000000,
751 	(0x0e00 << 16) | (0x301d0 >> 2),
752 	0x00000000,
753 	(0x0e00 << 16) | (0x30238 >> 2),
754 	0x00000000,
755 	(0x0e00 << 16) | (0x30250 >> 2),
756 	0x00000000,
757 	(0x0e00 << 16) | (0x30254 >> 2),
758 	0x00000000,
759 	(0x0e00 << 16) | (0x30258 >> 2),
760 	0x00000000,
761 	(0x0e00 << 16) | (0x3025c >> 2),
762 	0x00000000,
763 	(0x4e00 << 16) | (0xc900 >> 2),
764 	0x00000000,
765 	(0x5e00 << 16) | (0xc900 >> 2),
766 	0x00000000,
767 	(0x6e00 << 16) | (0xc900 >> 2),
768 	0x00000000,
769 	(0x7e00 << 16) | (0xc900 >> 2),
770 	0x00000000,
771 	(0x4e00 << 16) | (0xc904 >> 2),
772 	0x00000000,
773 	(0x5e00 << 16) | (0xc904 >> 2),
774 	0x00000000,
775 	(0x6e00 << 16) | (0xc904 >> 2),
776 	0x00000000,
777 	(0x7e00 << 16) | (0xc904 >> 2),
778 	0x00000000,
779 	(0x4e00 << 16) | (0xc908 >> 2),
780 	0x00000000,
781 	(0x5e00 << 16) | (0xc908 >> 2),
782 	0x00000000,
783 	(0x6e00 << 16) | (0xc908 >> 2),
784 	0x00000000,
785 	(0x7e00 << 16) | (0xc908 >> 2),
786 	0x00000000,
787 	(0x4e00 << 16) | (0xc90c >> 2),
788 	0x00000000,
789 	(0x5e00 << 16) | (0xc90c >> 2),
790 	0x00000000,
791 	(0x6e00 << 16) | (0xc90c >> 2),
792 	0x00000000,
793 	(0x7e00 << 16) | (0xc90c >> 2),
794 	0x00000000,
795 	(0x4e00 << 16) | (0xc910 >> 2),
796 	0x00000000,
797 	(0x5e00 << 16) | (0xc910 >> 2),
798 	0x00000000,
799 	(0x6e00 << 16) | (0xc910 >> 2),
800 	0x00000000,
801 	(0x7e00 << 16) | (0xc910 >> 2),
802 	0x00000000,
803 	(0x0e00 << 16) | (0xc99c >> 2),
804 	0x00000000,
805 	(0x0e00 << 16) | (0x9834 >> 2),
806 	0x00000000,
807 	(0x0000 << 16) | (0x30f00 >> 2),
808 	0x00000000,
809 	(0x0000 << 16) | (0x30f04 >> 2),
810 	0x00000000,
811 	(0x0000 << 16) | (0x30f08 >> 2),
812 	0x00000000,
813 	(0x0000 << 16) | (0x30f0c >> 2),
814 	0x00000000,
815 	(0x0600 << 16) | (0x9b7c >> 2),
816 	0x00000000,
817 	(0x0e00 << 16) | (0x8a14 >> 2),
818 	0x00000000,
819 	(0x0e00 << 16) | (0x8a18 >> 2),
820 	0x00000000,
821 	(0x0600 << 16) | (0x30a00 >> 2),
822 	0x00000000,
823 	(0x0e00 << 16) | (0x8bf0 >> 2),
824 	0x00000000,
825 	(0x0e00 << 16) | (0x8bcc >> 2),
826 	0x00000000,
827 	(0x0e00 << 16) | (0x8b24 >> 2),
828 	0x00000000,
829 	(0x0e00 << 16) | (0x30a04 >> 2),
830 	0x00000000,
831 	(0x0600 << 16) | (0x30a10 >> 2),
832 	0x00000000,
833 	(0x0600 << 16) | (0x30a14 >> 2),
834 	0x00000000,
835 	(0x0600 << 16) | (0x30a18 >> 2),
836 	0x00000000,
837 	(0x0600 << 16) | (0x30a2c >> 2),
838 	0x00000000,
839 	(0x0e00 << 16) | (0xc700 >> 2),
840 	0x00000000,
841 	(0x0e00 << 16) | (0xc704 >> 2),
842 	0x00000000,
843 	(0x0e00 << 16) | (0xc708 >> 2),
844 	0x00000000,
845 	(0x0e00 << 16) | (0xc768 >> 2),
846 	0x00000000,
847 	(0x0400 << 16) | (0xc770 >> 2),
848 	0x00000000,
849 	(0x0400 << 16) | (0xc774 >> 2),
850 	0x00000000,
851 	(0x0400 << 16) | (0xc798 >> 2),
852 	0x00000000,
853 	(0x0400 << 16) | (0xc79c >> 2),
854 	0x00000000,
855 	(0x0e00 << 16) | (0x9100 >> 2),
856 	0x00000000,
857 	(0x0e00 << 16) | (0x3c010 >> 2),
858 	0x00000000,
859 	(0x0e00 << 16) | (0x8c00 >> 2),
860 	0x00000000,
861 	(0x0e00 << 16) | (0x8c04 >> 2),
862 	0x00000000,
863 	(0x0e00 << 16) | (0x8c20 >> 2),
864 	0x00000000,
865 	(0x0e00 << 16) | (0x8c38 >> 2),
866 	0x00000000,
867 	(0x0e00 << 16) | (0x8c3c >> 2),
868 	0x00000000,
869 	(0x0e00 << 16) | (0xae00 >> 2),
870 	0x00000000,
871 	(0x0e00 << 16) | (0x9604 >> 2),
872 	0x00000000,
873 	(0x0e00 << 16) | (0xac08 >> 2),
874 	0x00000000,
875 	(0x0e00 << 16) | (0xac0c >> 2),
876 	0x00000000,
877 	(0x0e00 << 16) | (0xac10 >> 2),
878 	0x00000000,
879 	(0x0e00 << 16) | (0xac14 >> 2),
880 	0x00000000,
881 	(0x0e00 << 16) | (0xac58 >> 2),
882 	0x00000000,
883 	(0x0e00 << 16) | (0xac68 >> 2),
884 	0x00000000,
885 	(0x0e00 << 16) | (0xac6c >> 2),
886 	0x00000000,
887 	(0x0e00 << 16) | (0xac70 >> 2),
888 	0x00000000,
889 	(0x0e00 << 16) | (0xac74 >> 2),
890 	0x00000000,
891 	(0x0e00 << 16) | (0xac78 >> 2),
892 	0x00000000,
893 	(0x0e00 << 16) | (0xac7c >> 2),
894 	0x00000000,
895 	(0x0e00 << 16) | (0xac80 >> 2),
896 	0x00000000,
897 	(0x0e00 << 16) | (0xac84 >> 2),
898 	0x00000000,
899 	(0x0e00 << 16) | (0xac88 >> 2),
900 	0x00000000,
901 	(0x0e00 << 16) | (0xac8c >> 2),
902 	0x00000000,
903 	(0x0e00 << 16) | (0x970c >> 2),
904 	0x00000000,
905 	(0x0e00 << 16) | (0x9714 >> 2),
906 	0x00000000,
907 	(0x0e00 << 16) | (0x9718 >> 2),
908 	0x00000000,
909 	(0x0e00 << 16) | (0x971c >> 2),
910 	0x00000000,
911 	(0x0e00 << 16) | (0x31068 >> 2),
912 	0x00000000,
913 	(0x4e00 << 16) | (0x31068 >> 2),
914 	0x00000000,
915 	(0x5e00 << 16) | (0x31068 >> 2),
916 	0x00000000,
917 	(0x6e00 << 16) | (0x31068 >> 2),
918 	0x00000000,
919 	(0x7e00 << 16) | (0x31068 >> 2),
920 	0x00000000,
921 	(0x0e00 << 16) | (0xcd10 >> 2),
922 	0x00000000,
923 	(0x0e00 << 16) | (0xcd14 >> 2),
924 	0x00000000,
925 	(0x0e00 << 16) | (0x88b0 >> 2),
926 	0x00000000,
927 	(0x0e00 << 16) | (0x88b4 >> 2),
928 	0x00000000,
929 	(0x0e00 << 16) | (0x88b8 >> 2),
930 	0x00000000,
931 	(0x0e00 << 16) | (0x88bc >> 2),
932 	0x00000000,
933 	(0x0400 << 16) | (0x89c0 >> 2),
934 	0x00000000,
935 	(0x0e00 << 16) | (0x88c4 >> 2),
936 	0x00000000,
937 	(0x0e00 << 16) | (0x88c8 >> 2),
938 	0x00000000,
939 	(0x0e00 << 16) | (0x88d0 >> 2),
940 	0x00000000,
941 	(0x0e00 << 16) | (0x88d4 >> 2),
942 	0x00000000,
943 	(0x0e00 << 16) | (0x88d8 >> 2),
944 	0x00000000,
945 	(0x0e00 << 16) | (0x8980 >> 2),
946 	0x00000000,
947 	(0x0e00 << 16) | (0x30938 >> 2),
948 	0x00000000,
949 	(0x0e00 << 16) | (0x3093c >> 2),
950 	0x00000000,
951 	(0x0e00 << 16) | (0x30940 >> 2),
952 	0x00000000,
953 	(0x0e00 << 16) | (0x89a0 >> 2),
954 	0x00000000,
955 	(0x0e00 << 16) | (0x30900 >> 2),
956 	0x00000000,
957 	(0x0e00 << 16) | (0x30904 >> 2),
958 	0x00000000,
959 	(0x0e00 << 16) | (0x89b4 >> 2),
960 	0x00000000,
961 	(0x0e00 << 16) | (0x3e1fc >> 2),
962 	0x00000000,
963 	(0x0e00 << 16) | (0x3c210 >> 2),
964 	0x00000000,
965 	(0x0e00 << 16) | (0x3c214 >> 2),
966 	0x00000000,
967 	(0x0e00 << 16) | (0x3c218 >> 2),
968 	0x00000000,
969 	(0x0e00 << 16) | (0x8904 >> 2),
970 	0x00000000,
971 	0x5,
972 	(0x0e00 << 16) | (0x8c28 >> 2),
973 	(0x0e00 << 16) | (0x8c2c >> 2),
974 	(0x0e00 << 16) | (0x8c30 >> 2),
975 	(0x0e00 << 16) | (0x8c34 >> 2),
976 	(0x0e00 << 16) | (0x9600 >> 2),
977 };
978 
979 static const u32 bonaire_golden_spm_registers[] =
980 {
981 	0x30800, 0xe0ffffff, 0xe0000000
982 };
983 
984 static const u32 bonaire_golden_common_registers[] =
985 {
986 	0xc770, 0xffffffff, 0x00000800,
987 	0xc774, 0xffffffff, 0x00000800,
988 	0xc798, 0xffffffff, 0x00007fbf,
989 	0xc79c, 0xffffffff, 0x00007faf
990 };
991 
992 static const u32 bonaire_golden_registers[] =
993 {
994 	0x3354, 0x00000333, 0x00000333,
995 	0x3350, 0x000c0fc0, 0x00040200,
996 	0x9a10, 0x00010000, 0x00058208,
997 	0x3c000, 0xffff1fff, 0x00140000,
998 	0x3c200, 0xfdfc0fff, 0x00000100,
999 	0x3c234, 0x40000000, 0x40000200,
1000 	0x9830, 0xffffffff, 0x00000000,
1001 	0x9834, 0xf00fffff, 0x00000400,
1002 	0x9838, 0x0002021c, 0x00020200,
1003 	0xc78, 0x00000080, 0x00000000,
1004 	0x5bb0, 0x000000f0, 0x00000070,
1005 	0x5bc0, 0xf0311fff, 0x80300000,
1006 	0x98f8, 0x73773777, 0x12010001,
1007 	0x350c, 0x00810000, 0x408af000,
1008 	0x7030, 0x31000111, 0x00000011,
1009 	0x2f48, 0x73773777, 0x12010001,
1010 	0x220c, 0x00007fb6, 0x0021a1b1,
1011 	0x2210, 0x00007fb6, 0x002021b1,
1012 	0x2180, 0x00007fb6, 0x00002191,
1013 	0x2218, 0x00007fb6, 0x002121b1,
1014 	0x221c, 0x00007fb6, 0x002021b1,
1015 	0x21dc, 0x00007fb6, 0x00002191,
1016 	0x21e0, 0x00007fb6, 0x00002191,
1017 	0x3628, 0x0000003f, 0x0000000a,
1018 	0x362c, 0x0000003f, 0x0000000a,
1019 	0x2ae4, 0x00073ffe, 0x000022a2,
1020 	0x240c, 0x000007ff, 0x00000000,
1021 	0x8a14, 0xf000003f, 0x00000007,
1022 	0x8bf0, 0x00002001, 0x00000001,
1023 	0x8b24, 0xffffffff, 0x00ffffff,
1024 	0x30a04, 0x0000ff0f, 0x00000000,
1025 	0x28a4c, 0x07ffffff, 0x06000000,
1026 	0x4d8, 0x00000fff, 0x00000100,
1027 	0x3e78, 0x00000001, 0x00000002,
1028 	0x9100, 0x03000000, 0x0362c688,
1029 	0x8c00, 0x000000ff, 0x00000001,
1030 	0xe40, 0x00001fff, 0x00001fff,
1031 	0x9060, 0x0000007f, 0x00000020,
1032 	0x9508, 0x00010000, 0x00010000,
1033 	0xac14, 0x000003ff, 0x000000f3,
1034 	0xac0c, 0xffffffff, 0x00001032
1035 };
1036 
1037 static const u32 bonaire_mgcg_cgcg_init[] =
1038 {
1039 	0xc420, 0xffffffff, 0xfffffffc,
1040 	0x30800, 0xffffffff, 0xe0000000,
1041 	0x3c2a0, 0xffffffff, 0x00000100,
1042 	0x3c208, 0xffffffff, 0x00000100,
1043 	0x3c2c0, 0xffffffff, 0xc0000100,
1044 	0x3c2c8, 0xffffffff, 0xc0000100,
1045 	0x3c2c4, 0xffffffff, 0xc0000100,
1046 	0x55e4, 0xffffffff, 0x00600100,
1047 	0x3c280, 0xffffffff, 0x00000100,
1048 	0x3c214, 0xffffffff, 0x06000100,
1049 	0x3c220, 0xffffffff, 0x00000100,
1050 	0x3c218, 0xffffffff, 0x06000100,
1051 	0x3c204, 0xffffffff, 0x00000100,
1052 	0x3c2e0, 0xffffffff, 0x00000100,
1053 	0x3c224, 0xffffffff, 0x00000100,
1054 	0x3c200, 0xffffffff, 0x00000100,
1055 	0x3c230, 0xffffffff, 0x00000100,
1056 	0x3c234, 0xffffffff, 0x00000100,
1057 	0x3c250, 0xffffffff, 0x00000100,
1058 	0x3c254, 0xffffffff, 0x00000100,
1059 	0x3c258, 0xffffffff, 0x00000100,
1060 	0x3c25c, 0xffffffff, 0x00000100,
1061 	0x3c260, 0xffffffff, 0x00000100,
1062 	0x3c27c, 0xffffffff, 0x00000100,
1063 	0x3c278, 0xffffffff, 0x00000100,
1064 	0x3c210, 0xffffffff, 0x06000100,
1065 	0x3c290, 0xffffffff, 0x00000100,
1066 	0x3c274, 0xffffffff, 0x00000100,
1067 	0x3c2b4, 0xffffffff, 0x00000100,
1068 	0x3c2b0, 0xffffffff, 0x00000100,
1069 	0x3c270, 0xffffffff, 0x00000100,
1070 	0x30800, 0xffffffff, 0xe0000000,
1071 	0x3c020, 0xffffffff, 0x00010000,
1072 	0x3c024, 0xffffffff, 0x00030002,
1073 	0x3c028, 0xffffffff, 0x00040007,
1074 	0x3c02c, 0xffffffff, 0x00060005,
1075 	0x3c030, 0xffffffff, 0x00090008,
1076 	0x3c034, 0xffffffff, 0x00010000,
1077 	0x3c038, 0xffffffff, 0x00030002,
1078 	0x3c03c, 0xffffffff, 0x00040007,
1079 	0x3c040, 0xffffffff, 0x00060005,
1080 	0x3c044, 0xffffffff, 0x00090008,
1081 	0x3c048, 0xffffffff, 0x00010000,
1082 	0x3c04c, 0xffffffff, 0x00030002,
1083 	0x3c050, 0xffffffff, 0x00040007,
1084 	0x3c054, 0xffffffff, 0x00060005,
1085 	0x3c058, 0xffffffff, 0x00090008,
1086 	0x3c05c, 0xffffffff, 0x00010000,
1087 	0x3c060, 0xffffffff, 0x00030002,
1088 	0x3c064, 0xffffffff, 0x00040007,
1089 	0x3c068, 0xffffffff, 0x00060005,
1090 	0x3c06c, 0xffffffff, 0x00090008,
1091 	0x3c070, 0xffffffff, 0x00010000,
1092 	0x3c074, 0xffffffff, 0x00030002,
1093 	0x3c078, 0xffffffff, 0x00040007,
1094 	0x3c07c, 0xffffffff, 0x00060005,
1095 	0x3c080, 0xffffffff, 0x00090008,
1096 	0x3c084, 0xffffffff, 0x00010000,
1097 	0x3c088, 0xffffffff, 0x00030002,
1098 	0x3c08c, 0xffffffff, 0x00040007,
1099 	0x3c090, 0xffffffff, 0x00060005,
1100 	0x3c094, 0xffffffff, 0x00090008,
1101 	0x3c098, 0xffffffff, 0x00010000,
1102 	0x3c09c, 0xffffffff, 0x00030002,
1103 	0x3c0a0, 0xffffffff, 0x00040007,
1104 	0x3c0a4, 0xffffffff, 0x00060005,
1105 	0x3c0a8, 0xffffffff, 0x00090008,
1106 	0x3c000, 0xffffffff, 0x96e00200,
1107 	0x8708, 0xffffffff, 0x00900100,
1108 	0xc424, 0xffffffff, 0x0020003f,
1109 	0x38, 0xffffffff, 0x0140001c,
1110 	0x3c, 0x000f0000, 0x000f0000,
1111 	0x220, 0xffffffff, 0xC060000C,
1112 	0x224, 0xc0000fff, 0x00000100,
1113 	0xf90, 0xffffffff, 0x00000100,
1114 	0xf98, 0x00000101, 0x00000000,
1115 	0x20a8, 0xffffffff, 0x00000104,
1116 	0x55e4, 0xff000fff, 0x00000100,
1117 	0x30cc, 0xc0000fff, 0x00000104,
1118 	0xc1e4, 0x00000001, 0x00000001,
1119 	0xd00c, 0xff000ff0, 0x00000100,
1120 	0xd80c, 0xff000ff0, 0x00000100
1121 };
1122 
1123 static const u32 spectre_golden_spm_registers[] =
1124 {
1125 	0x30800, 0xe0ffffff, 0xe0000000
1126 };
1127 
1128 static const u32 spectre_golden_common_registers[] =
1129 {
1130 	0xc770, 0xffffffff, 0x00000800,
1131 	0xc774, 0xffffffff, 0x00000800,
1132 	0xc798, 0xffffffff, 0x00007fbf,
1133 	0xc79c, 0xffffffff, 0x00007faf
1134 };
1135 
1136 static const u32 spectre_golden_registers[] =
1137 {
1138 	0x3c000, 0xffff1fff, 0x96940200,
1139 	0x3c00c, 0xffff0001, 0xff000000,
1140 	0x3c200, 0xfffc0fff, 0x00000100,
1141 	0x6ed8, 0x00010101, 0x00010000,
1142 	0x9834, 0xf00fffff, 0x00000400,
1143 	0x9838, 0xfffffffc, 0x00020200,
1144 	0x5bb0, 0x000000f0, 0x00000070,
1145 	0x5bc0, 0xf0311fff, 0x80300000,
1146 	0x98f8, 0x73773777, 0x12010001,
1147 	0x9b7c, 0x00ff0000, 0x00fc0000,
1148 	0x2f48, 0x73773777, 0x12010001,
1149 	0x8a14, 0xf000003f, 0x00000007,
1150 	0x8b24, 0xffffffff, 0x00ffffff,
1151 	0x28350, 0x3f3f3fff, 0x00000082,
1152 	0x28354, 0x0000003f, 0x00000000,
1153 	0x3e78, 0x00000001, 0x00000002,
1154 	0x913c, 0xffff03df, 0x00000004,
1155 	0xc768, 0x00000008, 0x00000008,
1156 	0x8c00, 0x000008ff, 0x00000800,
1157 	0x9508, 0x00010000, 0x00010000,
1158 	0xac0c, 0xffffffff, 0x54763210,
1159 	0x214f8, 0x01ff01ff, 0x00000002,
1160 	0x21498, 0x007ff800, 0x00200000,
1161 	0x2015c, 0xffffffff, 0x00000f40,
1162 	0x30934, 0xffffffff, 0x00000001
1163 };
1164 
1165 static const u32 spectre_mgcg_cgcg_init[] =
1166 {
1167 	0xc420, 0xffffffff, 0xfffffffc,
1168 	0x30800, 0xffffffff, 0xe0000000,
1169 	0x3c2a0, 0xffffffff, 0x00000100,
1170 	0x3c208, 0xffffffff, 0x00000100,
1171 	0x3c2c0, 0xffffffff, 0x00000100,
1172 	0x3c2c8, 0xffffffff, 0x00000100,
1173 	0x3c2c4, 0xffffffff, 0x00000100,
1174 	0x55e4, 0xffffffff, 0x00600100,
1175 	0x3c280, 0xffffffff, 0x00000100,
1176 	0x3c214, 0xffffffff, 0x06000100,
1177 	0x3c220, 0xffffffff, 0x00000100,
1178 	0x3c218, 0xffffffff, 0x06000100,
1179 	0x3c204, 0xffffffff, 0x00000100,
1180 	0x3c2e0, 0xffffffff, 0x00000100,
1181 	0x3c224, 0xffffffff, 0x00000100,
1182 	0x3c200, 0xffffffff, 0x00000100,
1183 	0x3c230, 0xffffffff, 0x00000100,
1184 	0x3c234, 0xffffffff, 0x00000100,
1185 	0x3c250, 0xffffffff, 0x00000100,
1186 	0x3c254, 0xffffffff, 0x00000100,
1187 	0x3c258, 0xffffffff, 0x00000100,
1188 	0x3c25c, 0xffffffff, 0x00000100,
1189 	0x3c260, 0xffffffff, 0x00000100,
1190 	0x3c27c, 0xffffffff, 0x00000100,
1191 	0x3c278, 0xffffffff, 0x00000100,
1192 	0x3c210, 0xffffffff, 0x06000100,
1193 	0x3c290, 0xffffffff, 0x00000100,
1194 	0x3c274, 0xffffffff, 0x00000100,
1195 	0x3c2b4, 0xffffffff, 0x00000100,
1196 	0x3c2b0, 0xffffffff, 0x00000100,
1197 	0x3c270, 0xffffffff, 0x00000100,
1198 	0x30800, 0xffffffff, 0xe0000000,
1199 	0x3c020, 0xffffffff, 0x00010000,
1200 	0x3c024, 0xffffffff, 0x00030002,
1201 	0x3c028, 0xffffffff, 0x00040007,
1202 	0x3c02c, 0xffffffff, 0x00060005,
1203 	0x3c030, 0xffffffff, 0x00090008,
1204 	0x3c034, 0xffffffff, 0x00010000,
1205 	0x3c038, 0xffffffff, 0x00030002,
1206 	0x3c03c, 0xffffffff, 0x00040007,
1207 	0x3c040, 0xffffffff, 0x00060005,
1208 	0x3c044, 0xffffffff, 0x00090008,
1209 	0x3c048, 0xffffffff, 0x00010000,
1210 	0x3c04c, 0xffffffff, 0x00030002,
1211 	0x3c050, 0xffffffff, 0x00040007,
1212 	0x3c054, 0xffffffff, 0x00060005,
1213 	0x3c058, 0xffffffff, 0x00090008,
1214 	0x3c05c, 0xffffffff, 0x00010000,
1215 	0x3c060, 0xffffffff, 0x00030002,
1216 	0x3c064, 0xffffffff, 0x00040007,
1217 	0x3c068, 0xffffffff, 0x00060005,
1218 	0x3c06c, 0xffffffff, 0x00090008,
1219 	0x3c070, 0xffffffff, 0x00010000,
1220 	0x3c074, 0xffffffff, 0x00030002,
1221 	0x3c078, 0xffffffff, 0x00040007,
1222 	0x3c07c, 0xffffffff, 0x00060005,
1223 	0x3c080, 0xffffffff, 0x00090008,
1224 	0x3c084, 0xffffffff, 0x00010000,
1225 	0x3c088, 0xffffffff, 0x00030002,
1226 	0x3c08c, 0xffffffff, 0x00040007,
1227 	0x3c090, 0xffffffff, 0x00060005,
1228 	0x3c094, 0xffffffff, 0x00090008,
1229 	0x3c098, 0xffffffff, 0x00010000,
1230 	0x3c09c, 0xffffffff, 0x00030002,
1231 	0x3c0a0, 0xffffffff, 0x00040007,
1232 	0x3c0a4, 0xffffffff, 0x00060005,
1233 	0x3c0a8, 0xffffffff, 0x00090008,
1234 	0x3c0ac, 0xffffffff, 0x00010000,
1235 	0x3c0b0, 0xffffffff, 0x00030002,
1236 	0x3c0b4, 0xffffffff, 0x00040007,
1237 	0x3c0b8, 0xffffffff, 0x00060005,
1238 	0x3c0bc, 0xffffffff, 0x00090008,
1239 	0x3c000, 0xffffffff, 0x96e00200,
1240 	0x8708, 0xffffffff, 0x00900100,
1241 	0xc424, 0xffffffff, 0x0020003f,
1242 	0x38, 0xffffffff, 0x0140001c,
1243 	0x3c, 0x000f0000, 0x000f0000,
1244 	0x220, 0xffffffff, 0xC060000C,
1245 	0x224, 0xc0000fff, 0x00000100,
1246 	0xf90, 0xffffffff, 0x00000100,
1247 	0xf98, 0x00000101, 0x00000000,
1248 	0x20a8, 0xffffffff, 0x00000104,
1249 	0x55e4, 0xff000fff, 0x00000100,
1250 	0x30cc, 0xc0000fff, 0x00000104,
1251 	0xc1e4, 0x00000001, 0x00000001,
1252 	0xd00c, 0xff000ff0, 0x00000100,
1253 	0xd80c, 0xff000ff0, 0x00000100
1254 };
1255 
1256 static const u32 kalindi_golden_spm_registers[] =
1257 {
1258 	0x30800, 0xe0ffffff, 0xe0000000
1259 };
1260 
1261 static const u32 kalindi_golden_common_registers[] =
1262 {
1263 	0xc770, 0xffffffff, 0x00000800,
1264 	0xc774, 0xffffffff, 0x00000800,
1265 	0xc798, 0xffffffff, 0x00007fbf,
1266 	0xc79c, 0xffffffff, 0x00007faf
1267 };
1268 
1269 static const u32 kalindi_golden_registers[] =
1270 {
1271 	0x3c000, 0xffffdfff, 0x6e944040,
1272 	0x55e4, 0xff607fff, 0xfc000100,
1273 	0x3c220, 0xff000fff, 0x00000100,
1274 	0x3c224, 0xff000fff, 0x00000100,
1275 	0x3c200, 0xfffc0fff, 0x00000100,
1276 	0x6ed8, 0x00010101, 0x00010000,
1277 	0x9830, 0xffffffff, 0x00000000,
1278 	0x9834, 0xf00fffff, 0x00000400,
1279 	0x5bb0, 0x000000f0, 0x00000070,
1280 	0x5bc0, 0xf0311fff, 0x80300000,
1281 	0x98f8, 0x73773777, 0x12010001,
1282 	0x98fc, 0xffffffff, 0x00000010,
1283 	0x9b7c, 0x00ff0000, 0x00fc0000,
1284 	0x8030, 0x00001f0f, 0x0000100a,
1285 	0x2f48, 0x73773777, 0x12010001,
1286 	0x2408, 0x000fffff, 0x000c007f,
1287 	0x8a14, 0xf000003f, 0x00000007,
1288 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1289 	0x30a04, 0x0000ff0f, 0x00000000,
1290 	0x28a4c, 0x07ffffff, 0x06000000,
1291 	0x4d8, 0x00000fff, 0x00000100,
1292 	0x3e78, 0x00000001, 0x00000002,
1293 	0xc768, 0x00000008, 0x00000008,
1294 	0x8c00, 0x000000ff, 0x00000003,
1295 	0x214f8, 0x01ff01ff, 0x00000002,
1296 	0x21498, 0x007ff800, 0x00200000,
1297 	0x2015c, 0xffffffff, 0x00000f40,
1298 	0x88c4, 0x001f3ae3, 0x00000082,
1299 	0x88d4, 0x0000001f, 0x00000010,
1300 	0x30934, 0xffffffff, 0x00000000
1301 };
1302 
1303 static const u32 kalindi_mgcg_cgcg_init[] =
1304 {
1305 	0xc420, 0xffffffff, 0xfffffffc,
1306 	0x30800, 0xffffffff, 0xe0000000,
1307 	0x3c2a0, 0xffffffff, 0x00000100,
1308 	0x3c208, 0xffffffff, 0x00000100,
1309 	0x3c2c0, 0xffffffff, 0x00000100,
1310 	0x3c2c8, 0xffffffff, 0x00000100,
1311 	0x3c2c4, 0xffffffff, 0x00000100,
1312 	0x55e4, 0xffffffff, 0x00600100,
1313 	0x3c280, 0xffffffff, 0x00000100,
1314 	0x3c214, 0xffffffff, 0x06000100,
1315 	0x3c220, 0xffffffff, 0x00000100,
1316 	0x3c218, 0xffffffff, 0x06000100,
1317 	0x3c204, 0xffffffff, 0x00000100,
1318 	0x3c2e0, 0xffffffff, 0x00000100,
1319 	0x3c224, 0xffffffff, 0x00000100,
1320 	0x3c200, 0xffffffff, 0x00000100,
1321 	0x3c230, 0xffffffff, 0x00000100,
1322 	0x3c234, 0xffffffff, 0x00000100,
1323 	0x3c250, 0xffffffff, 0x00000100,
1324 	0x3c254, 0xffffffff, 0x00000100,
1325 	0x3c258, 0xffffffff, 0x00000100,
1326 	0x3c25c, 0xffffffff, 0x00000100,
1327 	0x3c260, 0xffffffff, 0x00000100,
1328 	0x3c27c, 0xffffffff, 0x00000100,
1329 	0x3c278, 0xffffffff, 0x00000100,
1330 	0x3c210, 0xffffffff, 0x06000100,
1331 	0x3c290, 0xffffffff, 0x00000100,
1332 	0x3c274, 0xffffffff, 0x00000100,
1333 	0x3c2b4, 0xffffffff, 0x00000100,
1334 	0x3c2b0, 0xffffffff, 0x00000100,
1335 	0x3c270, 0xffffffff, 0x00000100,
1336 	0x30800, 0xffffffff, 0xe0000000,
1337 	0x3c020, 0xffffffff, 0x00010000,
1338 	0x3c024, 0xffffffff, 0x00030002,
1339 	0x3c028, 0xffffffff, 0x00040007,
1340 	0x3c02c, 0xffffffff, 0x00060005,
1341 	0x3c030, 0xffffffff, 0x00090008,
1342 	0x3c034, 0xffffffff, 0x00010000,
1343 	0x3c038, 0xffffffff, 0x00030002,
1344 	0x3c03c, 0xffffffff, 0x00040007,
1345 	0x3c040, 0xffffffff, 0x00060005,
1346 	0x3c044, 0xffffffff, 0x00090008,
1347 	0x3c000, 0xffffffff, 0x96e00200,
1348 	0x8708, 0xffffffff, 0x00900100,
1349 	0xc424, 0xffffffff, 0x0020003f,
1350 	0x38, 0xffffffff, 0x0140001c,
1351 	0x3c, 0x000f0000, 0x000f0000,
1352 	0x220, 0xffffffff, 0xC060000C,
1353 	0x224, 0xc0000fff, 0x00000100,
1354 	0x20a8, 0xffffffff, 0x00000104,
1355 	0x55e4, 0xff000fff, 0x00000100,
1356 	0x30cc, 0xc0000fff, 0x00000104,
1357 	0xc1e4, 0x00000001, 0x00000001,
1358 	0xd00c, 0xff000ff0, 0x00000100,
1359 	0xd80c, 0xff000ff0, 0x00000100
1360 };
1361 
1362 static const u32 hawaii_golden_spm_registers[] =
1363 {
1364 	0x30800, 0xe0ffffff, 0xe0000000
1365 };
1366 
1367 static const u32 hawaii_golden_common_registers[] =
1368 {
1369 	0x30800, 0xffffffff, 0xe0000000,
1370 	0x28350, 0xffffffff, 0x3a00161a,
1371 	0x28354, 0xffffffff, 0x0000002e,
1372 	0x9a10, 0xffffffff, 0x00018208,
1373 	0x98f8, 0xffffffff, 0x12011003
1374 };
1375 
1376 static const u32 hawaii_golden_registers[] =
1377 {
1378 	0x3354, 0x00000333, 0x00000333,
1379 	0x9a10, 0x00010000, 0x00058208,
1380 	0x9830, 0xffffffff, 0x00000000,
1381 	0x9834, 0xf00fffff, 0x00000400,
1382 	0x9838, 0x0002021c, 0x00020200,
1383 	0xc78, 0x00000080, 0x00000000,
1384 	0x5bb0, 0x000000f0, 0x00000070,
1385 	0x5bc0, 0xf0311fff, 0x80300000,
1386 	0x350c, 0x00810000, 0x408af000,
1387 	0x7030, 0x31000111, 0x00000011,
1388 	0x2f48, 0x73773777, 0x12010001,
1389 	0x2120, 0x0000007f, 0x0000001b,
1390 	0x21dc, 0x00007fb6, 0x00002191,
1391 	0x3628, 0x0000003f, 0x0000000a,
1392 	0x362c, 0x0000003f, 0x0000000a,
1393 	0x2ae4, 0x00073ffe, 0x000022a2,
1394 	0x240c, 0x000007ff, 0x00000000,
1395 	0x8bf0, 0x00002001, 0x00000001,
1396 	0x8b24, 0xffffffff, 0x00ffffff,
1397 	0x30a04, 0x0000ff0f, 0x00000000,
1398 	0x28a4c, 0x07ffffff, 0x06000000,
1399 	0x3e78, 0x00000001, 0x00000002,
1400 	0xc768, 0x00000008, 0x00000008,
1401 	0xc770, 0x00000f00, 0x00000800,
1402 	0xc774, 0x00000f00, 0x00000800,
1403 	0xc798, 0x00ffffff, 0x00ff7fbf,
1404 	0xc79c, 0x00ffffff, 0x00ff7faf,
1405 	0x8c00, 0x000000ff, 0x00000800,
1406 	0xe40, 0x00001fff, 0x00001fff,
1407 	0x9060, 0x0000007f, 0x00000020,
1408 	0x9508, 0x00010000, 0x00010000,
1409 	0xae00, 0x00100000, 0x000ff07c,
1410 	0xac14, 0x000003ff, 0x0000000f,
1411 	0xac10, 0xffffffff, 0x7564fdec,
1412 	0xac0c, 0xffffffff, 0x3120b9a8,
1413 	0xac08, 0x20000000, 0x0f9c0000
1414 };
1415 
1416 static const u32 hawaii_mgcg_cgcg_init[] =
1417 {
1418 	0xc420, 0xffffffff, 0xfffffffd,
1419 	0x30800, 0xffffffff, 0xe0000000,
1420 	0x3c2a0, 0xffffffff, 0x00000100,
1421 	0x3c208, 0xffffffff, 0x00000100,
1422 	0x3c2c0, 0xffffffff, 0x00000100,
1423 	0x3c2c8, 0xffffffff, 0x00000100,
1424 	0x3c2c4, 0xffffffff, 0x00000100,
1425 	0x55e4, 0xffffffff, 0x00200100,
1426 	0x3c280, 0xffffffff, 0x00000100,
1427 	0x3c214, 0xffffffff, 0x06000100,
1428 	0x3c220, 0xffffffff, 0x00000100,
1429 	0x3c218, 0xffffffff, 0x06000100,
1430 	0x3c204, 0xffffffff, 0x00000100,
1431 	0x3c2e0, 0xffffffff, 0x00000100,
1432 	0x3c224, 0xffffffff, 0x00000100,
1433 	0x3c200, 0xffffffff, 0x00000100,
1434 	0x3c230, 0xffffffff, 0x00000100,
1435 	0x3c234, 0xffffffff, 0x00000100,
1436 	0x3c250, 0xffffffff, 0x00000100,
1437 	0x3c254, 0xffffffff, 0x00000100,
1438 	0x3c258, 0xffffffff, 0x00000100,
1439 	0x3c25c, 0xffffffff, 0x00000100,
1440 	0x3c260, 0xffffffff, 0x00000100,
1441 	0x3c27c, 0xffffffff, 0x00000100,
1442 	0x3c278, 0xffffffff, 0x00000100,
1443 	0x3c210, 0xffffffff, 0x06000100,
1444 	0x3c290, 0xffffffff, 0x00000100,
1445 	0x3c274, 0xffffffff, 0x00000100,
1446 	0x3c2b4, 0xffffffff, 0x00000100,
1447 	0x3c2b0, 0xffffffff, 0x00000100,
1448 	0x3c270, 0xffffffff, 0x00000100,
1449 	0x30800, 0xffffffff, 0xe0000000,
1450 	0x3c020, 0xffffffff, 0x00010000,
1451 	0x3c024, 0xffffffff, 0x00030002,
1452 	0x3c028, 0xffffffff, 0x00040007,
1453 	0x3c02c, 0xffffffff, 0x00060005,
1454 	0x3c030, 0xffffffff, 0x00090008,
1455 	0x3c034, 0xffffffff, 0x00010000,
1456 	0x3c038, 0xffffffff, 0x00030002,
1457 	0x3c03c, 0xffffffff, 0x00040007,
1458 	0x3c040, 0xffffffff, 0x00060005,
1459 	0x3c044, 0xffffffff, 0x00090008,
1460 	0x3c048, 0xffffffff, 0x00010000,
1461 	0x3c04c, 0xffffffff, 0x00030002,
1462 	0x3c050, 0xffffffff, 0x00040007,
1463 	0x3c054, 0xffffffff, 0x00060005,
1464 	0x3c058, 0xffffffff, 0x00090008,
1465 	0x3c05c, 0xffffffff, 0x00010000,
1466 	0x3c060, 0xffffffff, 0x00030002,
1467 	0x3c064, 0xffffffff, 0x00040007,
1468 	0x3c068, 0xffffffff, 0x00060005,
1469 	0x3c06c, 0xffffffff, 0x00090008,
1470 	0x3c070, 0xffffffff, 0x00010000,
1471 	0x3c074, 0xffffffff, 0x00030002,
1472 	0x3c078, 0xffffffff, 0x00040007,
1473 	0x3c07c, 0xffffffff, 0x00060005,
1474 	0x3c080, 0xffffffff, 0x00090008,
1475 	0x3c084, 0xffffffff, 0x00010000,
1476 	0x3c088, 0xffffffff, 0x00030002,
1477 	0x3c08c, 0xffffffff, 0x00040007,
1478 	0x3c090, 0xffffffff, 0x00060005,
1479 	0x3c094, 0xffffffff, 0x00090008,
1480 	0x3c098, 0xffffffff, 0x00010000,
1481 	0x3c09c, 0xffffffff, 0x00030002,
1482 	0x3c0a0, 0xffffffff, 0x00040007,
1483 	0x3c0a4, 0xffffffff, 0x00060005,
1484 	0x3c0a8, 0xffffffff, 0x00090008,
1485 	0x3c0ac, 0xffffffff, 0x00010000,
1486 	0x3c0b0, 0xffffffff, 0x00030002,
1487 	0x3c0b4, 0xffffffff, 0x00040007,
1488 	0x3c0b8, 0xffffffff, 0x00060005,
1489 	0x3c0bc, 0xffffffff, 0x00090008,
1490 	0x3c0c0, 0xffffffff, 0x00010000,
1491 	0x3c0c4, 0xffffffff, 0x00030002,
1492 	0x3c0c8, 0xffffffff, 0x00040007,
1493 	0x3c0cc, 0xffffffff, 0x00060005,
1494 	0x3c0d0, 0xffffffff, 0x00090008,
1495 	0x3c0d4, 0xffffffff, 0x00010000,
1496 	0x3c0d8, 0xffffffff, 0x00030002,
1497 	0x3c0dc, 0xffffffff, 0x00040007,
1498 	0x3c0e0, 0xffffffff, 0x00060005,
1499 	0x3c0e4, 0xffffffff, 0x00090008,
1500 	0x3c0e8, 0xffffffff, 0x00010000,
1501 	0x3c0ec, 0xffffffff, 0x00030002,
1502 	0x3c0f0, 0xffffffff, 0x00040007,
1503 	0x3c0f4, 0xffffffff, 0x00060005,
1504 	0x3c0f8, 0xffffffff, 0x00090008,
1505 	0xc318, 0xffffffff, 0x00020200,
1506 	0x3350, 0xffffffff, 0x00000200,
1507 	0x15c0, 0xffffffff, 0x00000400,
1508 	0x55e8, 0xffffffff, 0x00000000,
1509 	0x2f50, 0xffffffff, 0x00000902,
1510 	0x3c000, 0xffffffff, 0x96940200,
1511 	0x8708, 0xffffffff, 0x00900100,
1512 	0xc424, 0xffffffff, 0x0020003f,
1513 	0x38, 0xffffffff, 0x0140001c,
1514 	0x3c, 0x000f0000, 0x000f0000,
1515 	0x220, 0xffffffff, 0xc060000c,
1516 	0x224, 0xc0000fff, 0x00000100,
1517 	0xf90, 0xffffffff, 0x00000100,
1518 	0xf98, 0x00000101, 0x00000000,
1519 	0x20a8, 0xffffffff, 0x00000104,
1520 	0x55e4, 0xff000fff, 0x00000100,
1521 	0x30cc, 0xc0000fff, 0x00000104,
1522 	0xc1e4, 0x00000001, 0x00000001,
1523 	0xd00c, 0xff000ff0, 0x00000100,
1524 	0xd80c, 0xff000ff0, 0x00000100
1525 };
1526 
1527 static const u32 godavari_golden_registers[] =
1528 {
1529 	0x55e4, 0xff607fff, 0xfc000100,
1530 	0x6ed8, 0x00010101, 0x00010000,
1531 	0x9830, 0xffffffff, 0x00000000,
1532 	0x98302, 0xf00fffff, 0x00000400,
1533 	0x6130, 0xffffffff, 0x00010000,
1534 	0x5bb0, 0x000000f0, 0x00000070,
1535 	0x5bc0, 0xf0311fff, 0x80300000,
1536 	0x98f8, 0x73773777, 0x12010001,
1537 	0x98fc, 0xffffffff, 0x00000010,
1538 	0x8030, 0x00001f0f, 0x0000100a,
1539 	0x2f48, 0x73773777, 0x12010001,
1540 	0x2408, 0x000fffff, 0x000c007f,
1541 	0x8a14, 0xf000003f, 0x00000007,
1542 	0x8b24, 0xffffffff, 0x00ff0fff,
1543 	0x30a04, 0x0000ff0f, 0x00000000,
1544 	0x28a4c, 0x07ffffff, 0x06000000,
1545 	0x4d8, 0x00000fff, 0x00000100,
1546 	0xd014, 0x00010000, 0x00810001,
1547 	0xd814, 0x00010000, 0x00810001,
1548 	0x3e78, 0x00000001, 0x00000002,
1549 	0xc768, 0x00000008, 0x00000008,
1550 	0xc770, 0x00000f00, 0x00000800,
1551 	0xc774, 0x00000f00, 0x00000800,
1552 	0xc798, 0x00ffffff, 0x00ff7fbf,
1553 	0xc79c, 0x00ffffff, 0x00ff7faf,
1554 	0x8c00, 0x000000ff, 0x00000001,
1555 	0x214f8, 0x01ff01ff, 0x00000002,
1556 	0x21498, 0x007ff800, 0x00200000,
1557 	0x2015c, 0xffffffff, 0x00000f40,
1558 	0x88c4, 0x001f3ae3, 0x00000082,
1559 	0x88d4, 0x0000001f, 0x00000010,
1560 	0x30934, 0xffffffff, 0x00000000
1561 };
1562 
1563 
1564 static void cik_init_golden_registers(struct radeon_device *rdev)
1565 {
1566 	/* Some of the registers might be dependent on GRBM_GFX_INDEX */
1567 	mutex_lock(&rdev->grbm_idx_mutex);
1568 	switch (rdev->family) {
1569 	case CHIP_BONAIRE:
1570 		radeon_program_register_sequence(rdev,
1571 						 bonaire_mgcg_cgcg_init,
1572 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1573 		radeon_program_register_sequence(rdev,
1574 						 bonaire_golden_registers,
1575 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1576 		radeon_program_register_sequence(rdev,
1577 						 bonaire_golden_common_registers,
1578 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1579 		radeon_program_register_sequence(rdev,
1580 						 bonaire_golden_spm_registers,
1581 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1582 		break;
1583 	case CHIP_KABINI:
1584 		radeon_program_register_sequence(rdev,
1585 						 kalindi_mgcg_cgcg_init,
1586 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1587 		radeon_program_register_sequence(rdev,
1588 						 kalindi_golden_registers,
1589 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1590 		radeon_program_register_sequence(rdev,
1591 						 kalindi_golden_common_registers,
1592 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1593 		radeon_program_register_sequence(rdev,
1594 						 kalindi_golden_spm_registers,
1595 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1596 		break;
1597 	case CHIP_MULLINS:
1598 		radeon_program_register_sequence(rdev,
1599 						 kalindi_mgcg_cgcg_init,
1600 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1601 		radeon_program_register_sequence(rdev,
1602 						 godavari_golden_registers,
1603 						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1604 		radeon_program_register_sequence(rdev,
1605 						 kalindi_golden_common_registers,
1606 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1607 		radeon_program_register_sequence(rdev,
1608 						 kalindi_golden_spm_registers,
1609 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1610 		break;
1611 	case CHIP_KAVERI:
1612 		radeon_program_register_sequence(rdev,
1613 						 spectre_mgcg_cgcg_init,
1614 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1615 		radeon_program_register_sequence(rdev,
1616 						 spectre_golden_registers,
1617 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1618 		radeon_program_register_sequence(rdev,
1619 						 spectre_golden_common_registers,
1620 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1621 		radeon_program_register_sequence(rdev,
1622 						 spectre_golden_spm_registers,
1623 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1624 		break;
1625 	case CHIP_HAWAII:
1626 		radeon_program_register_sequence(rdev,
1627 						 hawaii_mgcg_cgcg_init,
1628 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1629 		radeon_program_register_sequence(rdev,
1630 						 hawaii_golden_registers,
1631 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1632 		radeon_program_register_sequence(rdev,
1633 						 hawaii_golden_common_registers,
1634 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1635 		radeon_program_register_sequence(rdev,
1636 						 hawaii_golden_spm_registers,
1637 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1638 		break;
1639 	default:
1640 		break;
1641 	}
1642 	mutex_unlock(&rdev->grbm_idx_mutex);
1643 }
1644 
1645 /**
1646  * cik_get_xclk - get the xclk
1647  *
1648  * @rdev: radeon_device pointer
1649  *
1650  * Returns the reference clock used by the gfx engine
1651  * (CIK).
1652  */
1653 u32 cik_get_xclk(struct radeon_device *rdev)
1654 {
1655         u32 reference_clock = rdev->clock.spll.reference_freq;
1656 
1657 	if (rdev->flags & RADEON_IS_IGP) {
1658 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1659 			return reference_clock / 2;
1660 	} else {
1661 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1662 			return reference_clock / 4;
1663 	}
1664 	return reference_clock;
1665 }
1666 
1667 /**
1668  * cik_mm_rdoorbell - read a doorbell dword
1669  *
1670  * @rdev: radeon_device pointer
1671  * @index: doorbell index
1672  *
1673  * Returns the value in the doorbell aperture at the
1674  * requested doorbell index (CIK).
1675  */
1676 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1677 {
1678 	if (index < rdev->doorbell.num_doorbells) {
1679 		return readl(rdev->doorbell.ptr + index);
1680 	} else {
1681 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1682 		return 0;
1683 	}
1684 }
1685 
1686 /**
1687  * cik_mm_wdoorbell - write a doorbell dword
1688  *
1689  * @rdev: radeon_device pointer
1690  * @index: doorbell index
1691  * @v: value to write
1692  *
1693  * Writes @v to the doorbell aperture at the
1694  * requested doorbell index (CIK).
1695  */
1696 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1697 {
1698 	if (index < rdev->doorbell.num_doorbells) {
1699 		writel(v, rdev->doorbell.ptr + index);
1700 	} else {
1701 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1702 	}
1703 }
1704 
1705 #define BONAIRE_IO_MC_REGS_SIZE 36
1706 
1707 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1708 {
1709 	{0x00000070, 0x04400000},
1710 	{0x00000071, 0x80c01803},
1711 	{0x00000072, 0x00004004},
1712 	{0x00000073, 0x00000100},
1713 	{0x00000074, 0x00ff0000},
1714 	{0x00000075, 0x34000000},
1715 	{0x00000076, 0x08000014},
1716 	{0x00000077, 0x00cc08ec},
1717 	{0x00000078, 0x00000400},
1718 	{0x00000079, 0x00000000},
1719 	{0x0000007a, 0x04090000},
1720 	{0x0000007c, 0x00000000},
1721 	{0x0000007e, 0x4408a8e8},
1722 	{0x0000007f, 0x00000304},
1723 	{0x00000080, 0x00000000},
1724 	{0x00000082, 0x00000001},
1725 	{0x00000083, 0x00000002},
1726 	{0x00000084, 0xf3e4f400},
1727 	{0x00000085, 0x052024e3},
1728 	{0x00000087, 0x00000000},
1729 	{0x00000088, 0x01000000},
1730 	{0x0000008a, 0x1c0a0000},
1731 	{0x0000008b, 0xff010000},
1732 	{0x0000008d, 0xffffefff},
1733 	{0x0000008e, 0xfff3efff},
1734 	{0x0000008f, 0xfff3efbf},
1735 	{0x00000092, 0xf7ffffff},
1736 	{0x00000093, 0xffffff7f},
1737 	{0x00000095, 0x00101101},
1738 	{0x00000096, 0x00000fff},
1739 	{0x00000097, 0x00116fff},
1740 	{0x00000098, 0x60010000},
1741 	{0x00000099, 0x10010000},
1742 	{0x0000009a, 0x00006000},
1743 	{0x0000009b, 0x00001000},
1744 	{0x0000009f, 0x00b48000}
1745 };
1746 
1747 #define HAWAII_IO_MC_REGS_SIZE 22
1748 
1749 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1750 {
1751 	{0x0000007d, 0x40000000},
1752 	{0x0000007e, 0x40180304},
1753 	{0x0000007f, 0x0000ff00},
1754 	{0x00000081, 0x00000000},
1755 	{0x00000083, 0x00000800},
1756 	{0x00000086, 0x00000000},
1757 	{0x00000087, 0x00000100},
1758 	{0x00000088, 0x00020100},
1759 	{0x00000089, 0x00000000},
1760 	{0x0000008b, 0x00040000},
1761 	{0x0000008c, 0x00000100},
1762 	{0x0000008e, 0xff010000},
1763 	{0x00000090, 0xffffefff},
1764 	{0x00000091, 0xfff3efff},
1765 	{0x00000092, 0xfff3efbf},
1766 	{0x00000093, 0xf7ffffff},
1767 	{0x00000094, 0xffffff7f},
1768 	{0x00000095, 0x00000fff},
1769 	{0x00000096, 0x00116fff},
1770 	{0x00000097, 0x60010000},
1771 	{0x00000098, 0x10010000},
1772 	{0x0000009f, 0x00c79000}
1773 };
1774 
1775 
1776 /**
1777  * cik_srbm_select - select specific register instances
1778  *
1779  * @rdev: radeon_device pointer
1780  * @me: selected ME (micro engine)
1781  * @pipe: pipe
1782  * @queue: queue
1783  * @vmid: VMID
1784  *
1785  * Switches the currently active registers instances.  Some
1786  * registers are instanced per VMID, others are instanced per
1787  * me/pipe/queue combination.
1788  */
1789 static void cik_srbm_select(struct radeon_device *rdev,
1790 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1791 {
1792 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1793 			     MEID(me & 0x3) |
1794 			     VMID(vmid & 0xf) |
1795 			     QUEUEID(queue & 0x7));
1796 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1797 }
1798 
1799 /* ucode loading */
1800 /**
1801  * ci_mc_load_microcode - load MC ucode into the hw
1802  *
1803  * @rdev: radeon_device pointer
1804  *
1805  * Load the GDDR MC ucode into the hw (CIK).
1806  * Returns 0 on success, error on failure.
1807  */
1808 int ci_mc_load_microcode(struct radeon_device *rdev)
1809 {
1810 	const __be32 *fw_data = NULL;
1811 	const __le32 *new_fw_data = NULL;
1812 	u32 running, blackout = 0, tmp;
1813 	u32 *io_mc_regs = NULL;
1814 	const __le32 *new_io_mc_regs = NULL;
1815 	int i, regs_size, ucode_size;
1816 
1817 	if (!rdev->mc_fw)
1818 		return -EINVAL;
1819 
1820 	if (rdev->new_fw) {
1821 		const struct mc_firmware_header_v1_0 *hdr =
1822 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1823 
1824 		radeon_ucode_print_mc_hdr(&hdr->header);
1825 
1826 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1827 		new_io_mc_regs = (const __le32 *)
1828 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1829 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1830 		new_fw_data = (const __le32 *)
1831 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1832 	} else {
1833 		ucode_size = rdev->mc_fw->size / 4;
1834 
1835 		switch (rdev->family) {
1836 		case CHIP_BONAIRE:
1837 			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1838 			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1839 			break;
1840 		case CHIP_HAWAII:
1841 			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1842 			regs_size = HAWAII_IO_MC_REGS_SIZE;
1843 			break;
1844 		default:
1845 			return -EINVAL;
1846 		}
1847 		fw_data = (const __be32 *)rdev->mc_fw->data;
1848 	}
1849 
1850 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1851 
1852 	if (running == 0) {
1853 		if (running) {
1854 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1855 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1856 		}
1857 
1858 		/* reset the engine and set to writable */
1859 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1860 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1861 
1862 		/* load mc io regs */
1863 		for (i = 0; i < regs_size; i++) {
1864 			if (rdev->new_fw) {
1865 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1866 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1867 			} else {
1868 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1869 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1870 			}
1871 		}
1872 
1873 		tmp = RREG32(MC_SEQ_MISC0);
1874 		if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1875 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1876 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1877 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1878 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1879 		}
1880 
1881 		/* load the MC ucode */
1882 		for (i = 0; i < ucode_size; i++) {
1883 			if (rdev->new_fw)
1884 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1885 			else
1886 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1887 		}
1888 
1889 		/* put the engine back into the active state */
1890 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1891 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1892 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1893 
1894 		/* wait for training to complete */
1895 		for (i = 0; i < rdev->usec_timeout; i++) {
1896 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1897 				break;
1898 			udelay(1);
1899 		}
1900 		for (i = 0; i < rdev->usec_timeout; i++) {
1901 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1902 				break;
1903 			udelay(1);
1904 		}
1905 
1906 		if (running)
1907 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1908 	}
1909 
1910 	return 0;
1911 }
1912 
1913 /**
1914  * cik_init_microcode - load ucode images from disk
1915  *
1916  * @rdev: radeon_device pointer
1917  *
1918  * Use the firmware interface to load the ucode images into
1919  * the driver (not loaded into hw).
1920  * Returns 0 on success, error on failure.
1921  */
1922 static int cik_init_microcode(struct radeon_device *rdev)
1923 {
1924 	const char *chip_name;
1925 	const char *new_chip_name;
1926 	size_t pfp_req_size, me_req_size, ce_req_size,
1927 		mec_req_size, rlc_req_size, mc_req_size = 0,
1928 		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1929 	char fw_name[30];
1930 	int new_fw = 0;
1931 	int err;
1932 	int num_fw;
1933 
1934 	DRM_DEBUG("\n");
1935 
1936 	switch (rdev->family) {
1937 	case CHIP_BONAIRE:
1938 		chip_name = "BONAIRE";
1939 		new_chip_name = "bonaire";
1940 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1941 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1942 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1943 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1944 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1945 		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1946 		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1947 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1948 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1949 		num_fw = 8;
1950 		break;
1951 	case CHIP_HAWAII:
1952 		chip_name = "HAWAII";
1953 		new_chip_name = "hawaii";
1954 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1955 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1956 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1957 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1958 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1959 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1960 		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
1961 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1962 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1963 		num_fw = 8;
1964 		break;
1965 	case CHIP_KAVERI:
1966 		chip_name = "KAVERI";
1967 		new_chip_name = "kaveri";
1968 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1969 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1970 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1971 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1972 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1973 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1974 		num_fw = 7;
1975 		break;
1976 	case CHIP_KABINI:
1977 		chip_name = "KABINI";
1978 		new_chip_name = "kabini";
1979 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1980 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1981 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1982 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1983 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1984 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1985 		num_fw = 6;
1986 		break;
1987 	case CHIP_MULLINS:
1988 		chip_name = "MULLINS";
1989 		new_chip_name = "mullins";
1990 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1991 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1992 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1993 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1994 		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
1995 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1996 		num_fw = 6;
1997 		break;
1998 	default: BUG();
1999 	}
2000 
2001 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
2002 
2003 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2004 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2005 	if (err) {
2006 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2007 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2008 		if (err)
2009 			goto out;
2010 		if (rdev->pfp_fw->size != pfp_req_size) {
2011 			printk(KERN_ERR
2012 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2013 			       rdev->pfp_fw->size, fw_name);
2014 			err = -EINVAL;
2015 			goto out;
2016 		}
2017 	} else {
2018 		err = radeon_ucode_validate(rdev->pfp_fw);
2019 		if (err) {
2020 			printk(KERN_ERR
2021 			       "cik_fw: validation failed for firmware \"%s\"\n",
2022 			       fw_name);
2023 			goto out;
2024 		} else {
2025 			new_fw++;
2026 		}
2027 	}
2028 
2029 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2030 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2031 	if (err) {
2032 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2033 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2034 		if (err)
2035 			goto out;
2036 		if (rdev->me_fw->size != me_req_size) {
2037 			printk(KERN_ERR
2038 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2039 			       rdev->me_fw->size, fw_name);
2040 			err = -EINVAL;
2041 		}
2042 	} else {
2043 		err = radeon_ucode_validate(rdev->me_fw);
2044 		if (err) {
2045 			printk(KERN_ERR
2046 			       "cik_fw: validation failed for firmware \"%s\"\n",
2047 			       fw_name);
2048 			goto out;
2049 		} else {
2050 			new_fw++;
2051 		}
2052 	}
2053 
2054 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2055 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2056 	if (err) {
2057 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2058 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2059 		if (err)
2060 			goto out;
2061 		if (rdev->ce_fw->size != ce_req_size) {
2062 			printk(KERN_ERR
2063 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2064 			       rdev->ce_fw->size, fw_name);
2065 			err = -EINVAL;
2066 		}
2067 	} else {
2068 		err = radeon_ucode_validate(rdev->ce_fw);
2069 		if (err) {
2070 			printk(KERN_ERR
2071 			       "cik_fw: validation failed for firmware \"%s\"\n",
2072 			       fw_name);
2073 			goto out;
2074 		} else {
2075 			new_fw++;
2076 		}
2077 	}
2078 
2079 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2080 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2081 	if (err) {
2082 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2083 		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2084 		if (err)
2085 			goto out;
2086 		if (rdev->mec_fw->size != mec_req_size) {
2087 			printk(KERN_ERR
2088 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2089 			       rdev->mec_fw->size, fw_name);
2090 			err = -EINVAL;
2091 		}
2092 	} else {
2093 		err = radeon_ucode_validate(rdev->mec_fw);
2094 		if (err) {
2095 			printk(KERN_ERR
2096 			       "cik_fw: validation failed for firmware \"%s\"\n",
2097 			       fw_name);
2098 			goto out;
2099 		} else {
2100 			new_fw++;
2101 		}
2102 	}
2103 
2104 	if (rdev->family == CHIP_KAVERI) {
2105 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2106 		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2107 		if (err) {
2108 			goto out;
2109 		} else {
2110 			err = radeon_ucode_validate(rdev->mec2_fw);
2111 			if (err) {
2112 				goto out;
2113 			} else {
2114 				new_fw++;
2115 			}
2116 		}
2117 	}
2118 
2119 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2120 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2121 	if (err) {
2122 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2123 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2124 		if (err)
2125 			goto out;
2126 		if (rdev->rlc_fw->size != rlc_req_size) {
2127 			printk(KERN_ERR
2128 			       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2129 			       rdev->rlc_fw->size, fw_name);
2130 			err = -EINVAL;
2131 		}
2132 	} else {
2133 		err = radeon_ucode_validate(rdev->rlc_fw);
2134 		if (err) {
2135 			printk(KERN_ERR
2136 			       "cik_fw: validation failed for firmware \"%s\"\n",
2137 			       fw_name);
2138 			goto out;
2139 		} else {
2140 			new_fw++;
2141 		}
2142 	}
2143 
2144 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2145 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2146 	if (err) {
2147 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2148 		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2149 		if (err)
2150 			goto out;
2151 		if (rdev->sdma_fw->size != sdma_req_size) {
2152 			printk(KERN_ERR
2153 			       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2154 			       rdev->sdma_fw->size, fw_name);
2155 			err = -EINVAL;
2156 		}
2157 	} else {
2158 		err = radeon_ucode_validate(rdev->sdma_fw);
2159 		if (err) {
2160 			printk(KERN_ERR
2161 			       "cik_fw: validation failed for firmware \"%s\"\n",
2162 			       fw_name);
2163 			goto out;
2164 		} else {
2165 			new_fw++;
2166 		}
2167 	}
2168 
2169 	/* No SMC, MC ucode on APUs */
2170 	if (!(rdev->flags & RADEON_IS_IGP)) {
2171 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2172 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2173 		if (err) {
2174 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2175 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2176 			if (err) {
2177 				snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2178 				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2179 				if (err)
2180 					goto out;
2181 			}
2182 			if ((rdev->mc_fw->size != mc_req_size) &&
2183 			    (rdev->mc_fw->size != mc2_req_size)){
2184 				printk(KERN_ERR
2185 				       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2186 				       rdev->mc_fw->size, fw_name);
2187 				err = -EINVAL;
2188 			}
2189 			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2190 		} else {
2191 			err = radeon_ucode_validate(rdev->mc_fw);
2192 			if (err) {
2193 				printk(KERN_ERR
2194 				       "cik_fw: validation failed for firmware \"%s\"\n",
2195 				       fw_name);
2196 				goto out;
2197 			} else {
2198 				new_fw++;
2199 			}
2200 		}
2201 
2202 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2203 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2204 		if (err) {
2205 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2206 			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2207 			if (err) {
2208 				printk(KERN_ERR
2209 				       "smc: error loading firmware \"%s\"\n",
2210 				       fw_name);
2211 				release_firmware(rdev->smc_fw);
2212 				rdev->smc_fw = NULL;
2213 				err = 0;
2214 			} else if (rdev->smc_fw->size != smc_req_size) {
2215 				printk(KERN_ERR
2216 				       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2217 				       rdev->smc_fw->size, fw_name);
2218 				err = -EINVAL;
2219 			}
2220 		} else {
2221 			err = radeon_ucode_validate(rdev->smc_fw);
2222 			if (err) {
2223 				printk(KERN_ERR
2224 				       "cik_fw: validation failed for firmware \"%s\"\n",
2225 				       fw_name);
2226 				goto out;
2227 			} else {
2228 				new_fw++;
2229 			}
2230 		}
2231 	}
2232 
2233 	if (new_fw == 0) {
2234 		rdev->new_fw = false;
2235 	} else if (new_fw < num_fw) {
2236 		printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2237 		err = -EINVAL;
2238 	} else {
2239 		rdev->new_fw = true;
2240 	}
2241 
2242 out:
2243 	if (err) {
2244 		if (err != -EINVAL)
2245 			printk(KERN_ERR
2246 			       "cik_cp: Failed to load firmware \"%s\"\n",
2247 			       fw_name);
2248 		release_firmware(rdev->pfp_fw);
2249 		rdev->pfp_fw = NULL;
2250 		release_firmware(rdev->me_fw);
2251 		rdev->me_fw = NULL;
2252 		release_firmware(rdev->ce_fw);
2253 		rdev->ce_fw = NULL;
2254 		release_firmware(rdev->mec_fw);
2255 		rdev->mec_fw = NULL;
2256 		release_firmware(rdev->mec2_fw);
2257 		rdev->mec2_fw = NULL;
2258 		release_firmware(rdev->rlc_fw);
2259 		rdev->rlc_fw = NULL;
2260 		release_firmware(rdev->sdma_fw);
2261 		rdev->sdma_fw = NULL;
2262 		release_firmware(rdev->mc_fw);
2263 		rdev->mc_fw = NULL;
2264 		release_firmware(rdev->smc_fw);
2265 		rdev->smc_fw = NULL;
2266 	}
2267 	return err;
2268 }
2269 
2270 /*
2271  * Core functions
2272  */
2273 /**
2274  * cik_tiling_mode_table_init - init the hw tiling table
2275  *
2276  * @rdev: radeon_device pointer
2277  *
2278  * Starting with SI, the tiling setup is done globally in a
2279  * set of 32 tiling modes.  Rather than selecting each set of
2280  * parameters per surface as on older asics, we just select
2281  * which index in the tiling table we want to use, and the
2282  * surface uses those parameters (CIK).
2283  */
2284 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2285 {
2286 	const u32 num_tile_mode_states = 32;
2287 	const u32 num_secondary_tile_mode_states = 16;
2288 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2289 	u32 num_pipe_configs;
2290 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2291 		rdev->config.cik.max_shader_engines;
2292 
2293 	switch (rdev->config.cik.mem_row_size_in_kb) {
2294 	case 1:
2295 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2296 		break;
2297 	case 2:
2298 	default:
2299 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2300 		break;
2301 	case 4:
2302 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2303 		break;
2304 	}
2305 
2306 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2307 	if (num_pipe_configs > 8)
2308 		num_pipe_configs = 16;
2309 
2310 	if (num_pipe_configs == 16) {
2311 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2312 			switch (reg_offset) {
2313 			case 0:
2314 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2315 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2316 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2317 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2318 				break;
2319 			case 1:
2320 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2321 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2322 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2323 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2324 				break;
2325 			case 2:
2326 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2327 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2328 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2329 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2330 				break;
2331 			case 3:
2332 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2333 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2334 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2335 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2336 				break;
2337 			case 4:
2338 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2339 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2340 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2341 						 TILE_SPLIT(split_equal_to_row_size));
2342 				break;
2343 			case 5:
2344 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2345 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2346 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2347 				break;
2348 			case 6:
2349 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2350 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2351 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2352 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2353 				break;
2354 			case 7:
2355 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2356 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2357 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2358 						 TILE_SPLIT(split_equal_to_row_size));
2359 				break;
2360 			case 8:
2361 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2362 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2363 				break;
2364 			case 9:
2365 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2366 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2367 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2368 				break;
2369 			case 10:
2370 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2371 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2372 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2373 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2374 				break;
2375 			case 11:
2376 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2377 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2378 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2379 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2380 				break;
2381 			case 12:
2382 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2383 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2384 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2385 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2386 				break;
2387 			case 13:
2388 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2389 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2390 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2391 				break;
2392 			case 14:
2393 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2394 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2395 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2397 				break;
2398 			case 16:
2399 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2400 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2401 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2402 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2403 				break;
2404 			case 17:
2405 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2406 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2407 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2408 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2409 				break;
2410 			case 27:
2411 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2412 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2413 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2414 				break;
2415 			case 28:
2416 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2417 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2418 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2419 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2420 				break;
2421 			case 29:
2422 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2423 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2424 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2425 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2426 				break;
2427 			case 30:
2428 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2429 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2430 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2431 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2432 				break;
2433 			default:
2434 				gb_tile_moden = 0;
2435 				break;
2436 			}
2437 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2438 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2439 		}
2440 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2441 			switch (reg_offset) {
2442 			case 0:
2443 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2444 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2445 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2446 						 NUM_BANKS(ADDR_SURF_16_BANK));
2447 				break;
2448 			case 1:
2449 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2451 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2452 						 NUM_BANKS(ADDR_SURF_16_BANK));
2453 				break;
2454 			case 2:
2455 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2456 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2457 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2458 						 NUM_BANKS(ADDR_SURF_16_BANK));
2459 				break;
2460 			case 3:
2461 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2462 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2463 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2464 						 NUM_BANKS(ADDR_SURF_16_BANK));
2465 				break;
2466 			case 4:
2467 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2468 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2469 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2470 						 NUM_BANKS(ADDR_SURF_8_BANK));
2471 				break;
2472 			case 5:
2473 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2474 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2475 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2476 						 NUM_BANKS(ADDR_SURF_4_BANK));
2477 				break;
2478 			case 6:
2479 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2480 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2481 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2482 						 NUM_BANKS(ADDR_SURF_2_BANK));
2483 				break;
2484 			case 8:
2485 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2486 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2487 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2488 						 NUM_BANKS(ADDR_SURF_16_BANK));
2489 				break;
2490 			case 9:
2491 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2492 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2493 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2494 						 NUM_BANKS(ADDR_SURF_16_BANK));
2495 				break;
2496 			case 10:
2497 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2498 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2499 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2500 						 NUM_BANKS(ADDR_SURF_16_BANK));
2501 				break;
2502 			case 11:
2503 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2504 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2505 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2506 						 NUM_BANKS(ADDR_SURF_8_BANK));
2507 				break;
2508 			case 12:
2509 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2510 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2511 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2512 						 NUM_BANKS(ADDR_SURF_4_BANK));
2513 				break;
2514 			case 13:
2515 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2516 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2517 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2518 						 NUM_BANKS(ADDR_SURF_2_BANK));
2519 				break;
2520 			case 14:
2521 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2522 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2523 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2524 						 NUM_BANKS(ADDR_SURF_2_BANK));
2525 				break;
2526 			default:
2527 				gb_tile_moden = 0;
2528 				break;
2529 			}
2530 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2531 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2532 		}
2533 	} else if (num_pipe_configs == 8) {
2534 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2535 			switch (reg_offset) {
2536 			case 0:
2537 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2538 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2539 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2540 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2541 				break;
2542 			case 1:
2543 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2544 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2545 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2546 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2547 				break;
2548 			case 2:
2549 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2550 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2551 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2552 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2553 				break;
2554 			case 3:
2555 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2556 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2557 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2558 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2559 				break;
2560 			case 4:
2561 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2562 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2563 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2564 						 TILE_SPLIT(split_equal_to_row_size));
2565 				break;
2566 			case 5:
2567 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2568 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2569 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2570 				break;
2571 			case 6:
2572 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2573 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2574 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2576 				break;
2577 			case 7:
2578 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2579 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2580 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2581 						 TILE_SPLIT(split_equal_to_row_size));
2582 				break;
2583 			case 8:
2584 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2585 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2586 				break;
2587 			case 9:
2588 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2589 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2590 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2591 				break;
2592 			case 10:
2593 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2594 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2595 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2596 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2597 				break;
2598 			case 11:
2599 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2600 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2601 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2602 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2603 				break;
2604 			case 12:
2605 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2606 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2607 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2608 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2609 				break;
2610 			case 13:
2611 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2612 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2613 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2614 				break;
2615 			case 14:
2616 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2617 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2618 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2619 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2620 				break;
2621 			case 16:
2622 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2623 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2624 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2625 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2626 				break;
2627 			case 17:
2628 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2629 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2630 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2631 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2632 				break;
2633 			case 27:
2634 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2635 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2636 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2637 				break;
2638 			case 28:
2639 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2640 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2641 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2642 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2643 				break;
2644 			case 29:
2645 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2646 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2647 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2648 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2649 				break;
2650 			case 30:
2651 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2652 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2653 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2654 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2655 				break;
2656 			default:
2657 				gb_tile_moden = 0;
2658 				break;
2659 			}
2660 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2661 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2662 		}
2663 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2664 			switch (reg_offset) {
2665 			case 0:
2666 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2667 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2668 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2669 						 NUM_BANKS(ADDR_SURF_16_BANK));
2670 				break;
2671 			case 1:
2672 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2673 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2674 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2675 						 NUM_BANKS(ADDR_SURF_16_BANK));
2676 				break;
2677 			case 2:
2678 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2679 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2680 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2681 						 NUM_BANKS(ADDR_SURF_16_BANK));
2682 				break;
2683 			case 3:
2684 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2685 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2686 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2687 						 NUM_BANKS(ADDR_SURF_16_BANK));
2688 				break;
2689 			case 4:
2690 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2691 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2692 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2693 						 NUM_BANKS(ADDR_SURF_8_BANK));
2694 				break;
2695 			case 5:
2696 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2697 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2698 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2699 						 NUM_BANKS(ADDR_SURF_4_BANK));
2700 				break;
2701 			case 6:
2702 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2703 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2704 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2705 						 NUM_BANKS(ADDR_SURF_2_BANK));
2706 				break;
2707 			case 8:
2708 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2709 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2710 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2711 						 NUM_BANKS(ADDR_SURF_16_BANK));
2712 				break;
2713 			case 9:
2714 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2715 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2716 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2717 						 NUM_BANKS(ADDR_SURF_16_BANK));
2718 				break;
2719 			case 10:
2720 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2721 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2722 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2723 						 NUM_BANKS(ADDR_SURF_16_BANK));
2724 				break;
2725 			case 11:
2726 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2728 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2729 						 NUM_BANKS(ADDR_SURF_16_BANK));
2730 				break;
2731 			case 12:
2732 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2733 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2734 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2735 						 NUM_BANKS(ADDR_SURF_8_BANK));
2736 				break;
2737 			case 13:
2738 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2739 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2740 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2741 						 NUM_BANKS(ADDR_SURF_4_BANK));
2742 				break;
2743 			case 14:
2744 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2745 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2746 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2747 						 NUM_BANKS(ADDR_SURF_2_BANK));
2748 				break;
2749 			default:
2750 				gb_tile_moden = 0;
2751 				break;
2752 			}
2753 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2754 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2755 		}
2756 	} else if (num_pipe_configs == 4) {
2757 		if (num_rbs == 4) {
2758 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2759 				switch (reg_offset) {
2760 				case 0:
2761 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2762 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2763 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2764 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2765 					break;
2766 				case 1:
2767 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2768 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2769 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2770 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2771 					break;
2772 				case 2:
2773 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2774 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2775 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2776 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2777 					break;
2778 				case 3:
2779 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2780 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2781 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2782 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2783 					break;
2784 				case 4:
2785 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2786 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2787 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2788 							 TILE_SPLIT(split_equal_to_row_size));
2789 					break;
2790 				case 5:
2791 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2792 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2793 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2794 					break;
2795 				case 6:
2796 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2797 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2798 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2799 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2800 					break;
2801 				case 7:
2802 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2803 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2804 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2805 							 TILE_SPLIT(split_equal_to_row_size));
2806 					break;
2807 				case 8:
2808 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2809 							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2810 					break;
2811 				case 9:
2812 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2813 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2814 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2815 					break;
2816 				case 10:
2817 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2818 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2819 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2820 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2821 					break;
2822 				case 11:
2823 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2824 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2825 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2826 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2827 					break;
2828 				case 12:
2829 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2830 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2831 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2832 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2833 					break;
2834 				case 13:
2835 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2836 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2837 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2838 					break;
2839 				case 14:
2840 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2841 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2842 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2843 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2844 					break;
2845 				case 16:
2846 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2847 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2848 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2849 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2850 					break;
2851 				case 17:
2852 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2853 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2854 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2855 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2856 					break;
2857 				case 27:
2858 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2859 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2860 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2861 					break;
2862 				case 28:
2863 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2864 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2865 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2866 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2867 					break;
2868 				case 29:
2869 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2870 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2871 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2872 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2873 					break;
2874 				case 30:
2875 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2876 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2877 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2878 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2879 					break;
2880 				default:
2881 					gb_tile_moden = 0;
2882 					break;
2883 				}
2884 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2885 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2886 			}
2887 		} else if (num_rbs < 4) {
2888 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2889 				switch (reg_offset) {
2890 				case 0:
2891 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2892 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2893 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2894 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2895 					break;
2896 				case 1:
2897 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2898 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2899 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2900 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2901 					break;
2902 				case 2:
2903 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2904 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2905 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2906 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2907 					break;
2908 				case 3:
2909 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2910 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2911 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2912 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2913 					break;
2914 				case 4:
2915 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2916 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2917 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2918 							 TILE_SPLIT(split_equal_to_row_size));
2919 					break;
2920 				case 5:
2921 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2922 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2923 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2924 					break;
2925 				case 6:
2926 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2927 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2928 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2929 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2930 					break;
2931 				case 7:
2932 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2933 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2934 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2935 							 TILE_SPLIT(split_equal_to_row_size));
2936 					break;
2937 				case 8:
2938 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2939 						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2940 					break;
2941 				case 9:
2942 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2943 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2944 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2945 					break;
2946 				case 10:
2947 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2948 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2949 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2950 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2951 					break;
2952 				case 11:
2953 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2954 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2955 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2956 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2957 					break;
2958 				case 12:
2959 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2960 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2961 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2962 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2963 					break;
2964 				case 13:
2965 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2966 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2967 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2968 					break;
2969 				case 14:
2970 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2971 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2972 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2973 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2974 					break;
2975 				case 16:
2976 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2977 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2978 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2979 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2980 					break;
2981 				case 17:
2982 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2983 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2984 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2985 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2986 					break;
2987 				case 27:
2988 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2989 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2990 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2991 					break;
2992 				case 28:
2993 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2994 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2995 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2996 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2997 					break;
2998 				case 29:
2999 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3000 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3001 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3002 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3003 					break;
3004 				case 30:
3005 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3006 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3007 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3008 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3009 					break;
3010 				default:
3011 					gb_tile_moden = 0;
3012 					break;
3013 				}
3014 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3015 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3016 			}
3017 		}
3018 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3019 			switch (reg_offset) {
3020 			case 0:
3021 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3022 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3023 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3024 						 NUM_BANKS(ADDR_SURF_16_BANK));
3025 				break;
3026 			case 1:
3027 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3028 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3029 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3030 						 NUM_BANKS(ADDR_SURF_16_BANK));
3031 				break;
3032 			case 2:
3033 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3034 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3035 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3036 						 NUM_BANKS(ADDR_SURF_16_BANK));
3037 				break;
3038 			case 3:
3039 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3040 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3041 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3042 						 NUM_BANKS(ADDR_SURF_16_BANK));
3043 				break;
3044 			case 4:
3045 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3046 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3047 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3048 						 NUM_BANKS(ADDR_SURF_16_BANK));
3049 				break;
3050 			case 5:
3051 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3052 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3053 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3054 						 NUM_BANKS(ADDR_SURF_8_BANK));
3055 				break;
3056 			case 6:
3057 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3058 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3059 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3060 						 NUM_BANKS(ADDR_SURF_4_BANK));
3061 				break;
3062 			case 8:
3063 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3064 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3065 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3066 						 NUM_BANKS(ADDR_SURF_16_BANK));
3067 				break;
3068 			case 9:
3069 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3070 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3071 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3072 						 NUM_BANKS(ADDR_SURF_16_BANK));
3073 				break;
3074 			case 10:
3075 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3076 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3077 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3078 						 NUM_BANKS(ADDR_SURF_16_BANK));
3079 				break;
3080 			case 11:
3081 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3082 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3083 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3084 						 NUM_BANKS(ADDR_SURF_16_BANK));
3085 				break;
3086 			case 12:
3087 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3088 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3089 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3090 						 NUM_BANKS(ADDR_SURF_16_BANK));
3091 				break;
3092 			case 13:
3093 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3094 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3095 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3096 						 NUM_BANKS(ADDR_SURF_8_BANK));
3097 				break;
3098 			case 14:
3099 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3100 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3101 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3102 						 NUM_BANKS(ADDR_SURF_4_BANK));
3103 				break;
3104 			default:
3105 				gb_tile_moden = 0;
3106 				break;
3107 			}
3108 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3109 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3110 		}
3111 	} else if (num_pipe_configs == 2) {
3112 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
3113 			switch (reg_offset) {
3114 			case 0:
3115 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3116 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3117 						 PIPE_CONFIG(ADDR_SURF_P2) |
3118 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
3119 				break;
3120 			case 1:
3121 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3122 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3123 						 PIPE_CONFIG(ADDR_SURF_P2) |
3124 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
3125 				break;
3126 			case 2:
3127 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3128 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3129 						 PIPE_CONFIG(ADDR_SURF_P2) |
3130 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3131 				break;
3132 			case 3:
3133 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3134 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3135 						 PIPE_CONFIG(ADDR_SURF_P2) |
3136 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
3137 				break;
3138 			case 4:
3139 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3140 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3141 						 PIPE_CONFIG(ADDR_SURF_P2) |
3142 						 TILE_SPLIT(split_equal_to_row_size));
3143 				break;
3144 			case 5:
3145 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3146 						 PIPE_CONFIG(ADDR_SURF_P2) |
3147 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3148 				break;
3149 			case 6:
3150 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3151 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3152 						 PIPE_CONFIG(ADDR_SURF_P2) |
3153 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3154 				break;
3155 			case 7:
3156 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3157 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3158 						 PIPE_CONFIG(ADDR_SURF_P2) |
3159 						 TILE_SPLIT(split_equal_to_row_size));
3160 				break;
3161 			case 8:
3162 				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3163 						PIPE_CONFIG(ADDR_SURF_P2);
3164 				break;
3165 			case 9:
3166 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3167 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3168 						 PIPE_CONFIG(ADDR_SURF_P2));
3169 				break;
3170 			case 10:
3171 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3172 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3173 						 PIPE_CONFIG(ADDR_SURF_P2) |
3174 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3175 				break;
3176 			case 11:
3177 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3178 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3179 						 PIPE_CONFIG(ADDR_SURF_P2) |
3180 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3181 				break;
3182 			case 12:
3183 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3184 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3185 						 PIPE_CONFIG(ADDR_SURF_P2) |
3186 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3187 				break;
3188 			case 13:
3189 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3190 						 PIPE_CONFIG(ADDR_SURF_P2) |
3191 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3192 				break;
3193 			case 14:
3194 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3195 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3196 						 PIPE_CONFIG(ADDR_SURF_P2) |
3197 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3198 				break;
3199 			case 16:
3200 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3201 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3202 						 PIPE_CONFIG(ADDR_SURF_P2) |
3203 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3204 				break;
3205 			case 17:
3206 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3207 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3208 						 PIPE_CONFIG(ADDR_SURF_P2) |
3209 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3210 				break;
3211 			case 27:
3212 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3213 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3214 						 PIPE_CONFIG(ADDR_SURF_P2));
3215 				break;
3216 			case 28:
3217 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3218 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3219 						 PIPE_CONFIG(ADDR_SURF_P2) |
3220 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3221 				break;
3222 			case 29:
3223 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3224 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3225 						 PIPE_CONFIG(ADDR_SURF_P2) |
3226 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3227 				break;
3228 			case 30:
3229 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3230 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3231 						 PIPE_CONFIG(ADDR_SURF_P2) |
3232 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3233 				break;
3234 			default:
3235 				gb_tile_moden = 0;
3236 				break;
3237 			}
3238 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3239 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3240 		}
3241 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3242 			switch (reg_offset) {
3243 			case 0:
3244 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3245 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3246 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3247 						 NUM_BANKS(ADDR_SURF_16_BANK));
3248 				break;
3249 			case 1:
3250 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3251 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3252 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3253 						 NUM_BANKS(ADDR_SURF_16_BANK));
3254 				break;
3255 			case 2:
3256 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3257 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3258 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3259 						 NUM_BANKS(ADDR_SURF_16_BANK));
3260 				break;
3261 			case 3:
3262 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3263 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3264 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3265 						 NUM_BANKS(ADDR_SURF_16_BANK));
3266 				break;
3267 			case 4:
3268 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3269 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3270 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3271 						 NUM_BANKS(ADDR_SURF_16_BANK));
3272 				break;
3273 			case 5:
3274 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3275 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3276 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3277 						 NUM_BANKS(ADDR_SURF_16_BANK));
3278 				break;
3279 			case 6:
3280 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3281 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3282 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3283 						 NUM_BANKS(ADDR_SURF_8_BANK));
3284 				break;
3285 			case 8:
3286 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3287 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3288 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3289 						 NUM_BANKS(ADDR_SURF_16_BANK));
3290 				break;
3291 			case 9:
3292 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3293 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3294 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3295 						 NUM_BANKS(ADDR_SURF_16_BANK));
3296 				break;
3297 			case 10:
3298 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3299 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3300 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3301 						 NUM_BANKS(ADDR_SURF_16_BANK));
3302 				break;
3303 			case 11:
3304 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3305 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3306 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3307 						 NUM_BANKS(ADDR_SURF_16_BANK));
3308 				break;
3309 			case 12:
3310 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3311 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3312 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3313 						 NUM_BANKS(ADDR_SURF_16_BANK));
3314 				break;
3315 			case 13:
3316 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3317 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3318 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3319 						 NUM_BANKS(ADDR_SURF_16_BANK));
3320 				break;
3321 			case 14:
3322 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3323 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3324 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3325 						 NUM_BANKS(ADDR_SURF_8_BANK));
3326 				break;
3327 			default:
3328 				gb_tile_moden = 0;
3329 				break;
3330 			}
3331 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3332 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3333 		}
3334 	} else
3335 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3336 }
3337 
3338 /**
3339  * cik_select_se_sh - select which SE, SH to address
3340  *
3341  * @rdev: radeon_device pointer
3342  * @se_num: shader engine to address
3343  * @sh_num: sh block to address
3344  *
3345  * Select which SE, SH combinations to address. Certain
3346  * registers are instanced per SE or SH.  0xffffffff means
3347  * broadcast to all SEs or SHs (CIK).
3348  */
3349 static void cik_select_se_sh(struct radeon_device *rdev,
3350 			     u32 se_num, u32 sh_num)
3351 {
3352 	u32 data = INSTANCE_BROADCAST_WRITES;
3353 
3354 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3355 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3356 	else if (se_num == 0xffffffff)
3357 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3358 	else if (sh_num == 0xffffffff)
3359 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3360 	else
3361 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3362 	WREG32(GRBM_GFX_INDEX, data);
3363 }
3364 
3365 /**
3366  * cik_create_bitmask - create a bitmask
3367  *
3368  * @bit_width: length of the mask
3369  *
3370  * create a variable length bit mask (CIK).
3371  * Returns the bitmask.
3372  */
3373 static u32 cik_create_bitmask(u32 bit_width)
3374 {
3375 	u32 i, mask = 0;
3376 
3377 	for (i = 0; i < bit_width; i++) {
3378 		mask <<= 1;
3379 		mask |= 1;
3380 	}
3381 	return mask;
3382 }
3383 
3384 /**
3385  * cik_get_rb_disabled - computes the mask of disabled RBs
3386  *
3387  * @rdev: radeon_device pointer
3388  * @max_rb_num: max RBs (render backends) for the asic
3389  * @se_num: number of SEs (shader engines) for the asic
3390  * @sh_per_se: number of SH blocks per SE for the asic
3391  *
3392  * Calculates the bitmask of disabled RBs (CIK).
3393  * Returns the disabled RB bitmask.
3394  */
3395 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3396 			      u32 max_rb_num_per_se,
3397 			      u32 sh_per_se)
3398 {
3399 	u32 data, mask;
3400 
3401 	data = RREG32(CC_RB_BACKEND_DISABLE);
3402 	if (data & 1)
3403 		data &= BACKEND_DISABLE_MASK;
3404 	else
3405 		data = 0;
3406 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3407 
3408 	data >>= BACKEND_DISABLE_SHIFT;
3409 
3410 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3411 
3412 	return data & mask;
3413 }
3414 
3415 /**
3416  * cik_setup_rb - setup the RBs on the asic
3417  *
3418  * @rdev: radeon_device pointer
3419  * @se_num: number of SEs (shader engines) for the asic
3420  * @sh_per_se: number of SH blocks per SE for the asic
3421  * @max_rb_num: max RBs (render backends) for the asic
3422  *
3423  * Configures per-SE/SH RB registers (CIK).
3424  */
3425 static void cik_setup_rb(struct radeon_device *rdev,
3426 			 u32 se_num, u32 sh_per_se,
3427 			 u32 max_rb_num_per_se)
3428 {
3429 	int i, j;
3430 	u32 data, mask;
3431 	u32 disabled_rbs = 0;
3432 	u32 enabled_rbs = 0;
3433 
3434 	mutex_lock(&rdev->grbm_idx_mutex);
3435 	for (i = 0; i < se_num; i++) {
3436 		for (j = 0; j < sh_per_se; j++) {
3437 			cik_select_se_sh(rdev, i, j);
3438 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3439 			if (rdev->family == CHIP_HAWAII)
3440 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3441 			else
3442 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3443 		}
3444 	}
3445 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3446 	mutex_unlock(&rdev->grbm_idx_mutex);
3447 
3448 	mask = 1;
3449 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3450 		if (!(disabled_rbs & mask))
3451 			enabled_rbs |= mask;
3452 		mask <<= 1;
3453 	}
3454 
3455 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3456 
3457 	mutex_lock(&rdev->grbm_idx_mutex);
3458 	for (i = 0; i < se_num; i++) {
3459 		cik_select_se_sh(rdev, i, 0xffffffff);
3460 		data = 0;
3461 		for (j = 0; j < sh_per_se; j++) {
3462 			switch (enabled_rbs & 3) {
3463 			case 0:
3464 				if (j == 0)
3465 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3466 				else
3467 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3468 				break;
3469 			case 1:
3470 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3471 				break;
3472 			case 2:
3473 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3474 				break;
3475 			case 3:
3476 			default:
3477 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3478 				break;
3479 			}
3480 			enabled_rbs >>= 2;
3481 		}
3482 		WREG32(PA_SC_RASTER_CONFIG, data);
3483 	}
3484 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3485 	mutex_unlock(&rdev->grbm_idx_mutex);
3486 }
3487 
3488 /**
3489  * cik_gpu_init - setup the 3D engine
3490  *
3491  * @rdev: radeon_device pointer
3492  *
3493  * Configures the 3D engine and tiling configuration
3494  * registers so that the 3D engine is usable.
3495  */
3496 static void cik_gpu_init(struct radeon_device *rdev)
3497 {
3498 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3499 	u32 mc_shared_chmap, mc_arb_ramcfg;
3500 	u32 hdp_host_path_cntl;
3501 	u32 tmp;
3502 	int i, j;
3503 
3504 	switch (rdev->family) {
3505 	case CHIP_BONAIRE:
3506 		rdev->config.cik.max_shader_engines = 2;
3507 		rdev->config.cik.max_tile_pipes = 4;
3508 		rdev->config.cik.max_cu_per_sh = 7;
3509 		rdev->config.cik.max_sh_per_se = 1;
3510 		rdev->config.cik.max_backends_per_se = 2;
3511 		rdev->config.cik.max_texture_channel_caches = 4;
3512 		rdev->config.cik.max_gprs = 256;
3513 		rdev->config.cik.max_gs_threads = 32;
3514 		rdev->config.cik.max_hw_contexts = 8;
3515 
3516 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3517 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3518 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3519 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3520 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3521 		break;
3522 	case CHIP_HAWAII:
3523 		rdev->config.cik.max_shader_engines = 4;
3524 		rdev->config.cik.max_tile_pipes = 16;
3525 		rdev->config.cik.max_cu_per_sh = 11;
3526 		rdev->config.cik.max_sh_per_se = 1;
3527 		rdev->config.cik.max_backends_per_se = 4;
3528 		rdev->config.cik.max_texture_channel_caches = 16;
3529 		rdev->config.cik.max_gprs = 256;
3530 		rdev->config.cik.max_gs_threads = 32;
3531 		rdev->config.cik.max_hw_contexts = 8;
3532 
3533 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3534 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3535 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3536 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3537 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3538 		break;
3539 	case CHIP_KAVERI:
3540 		rdev->config.cik.max_shader_engines = 1;
3541 		rdev->config.cik.max_tile_pipes = 4;
3542 		if ((rdev->pdev->device == 0x1304) ||
3543 		    (rdev->pdev->device == 0x1305) ||
3544 		    (rdev->pdev->device == 0x130C) ||
3545 		    (rdev->pdev->device == 0x130F) ||
3546 		    (rdev->pdev->device == 0x1310) ||
3547 		    (rdev->pdev->device == 0x1311) ||
3548 		    (rdev->pdev->device == 0x131C)) {
3549 			rdev->config.cik.max_cu_per_sh = 8;
3550 			rdev->config.cik.max_backends_per_se = 2;
3551 		} else if ((rdev->pdev->device == 0x1309) ||
3552 			   (rdev->pdev->device == 0x130A) ||
3553 			   (rdev->pdev->device == 0x130D) ||
3554 			   (rdev->pdev->device == 0x1313) ||
3555 			   (rdev->pdev->device == 0x131D)) {
3556 			rdev->config.cik.max_cu_per_sh = 6;
3557 			rdev->config.cik.max_backends_per_se = 2;
3558 		} else if ((rdev->pdev->device == 0x1306) ||
3559 			   (rdev->pdev->device == 0x1307) ||
3560 			   (rdev->pdev->device == 0x130B) ||
3561 			   (rdev->pdev->device == 0x130E) ||
3562 			   (rdev->pdev->device == 0x1315) ||
3563 			   (rdev->pdev->device == 0x1318) ||
3564 			   (rdev->pdev->device == 0x131B)) {
3565 			rdev->config.cik.max_cu_per_sh = 4;
3566 			rdev->config.cik.max_backends_per_se = 1;
3567 		} else {
3568 			rdev->config.cik.max_cu_per_sh = 3;
3569 			rdev->config.cik.max_backends_per_se = 1;
3570 		}
3571 		rdev->config.cik.max_sh_per_se = 1;
3572 		rdev->config.cik.max_texture_channel_caches = 4;
3573 		rdev->config.cik.max_gprs = 256;
3574 		rdev->config.cik.max_gs_threads = 16;
3575 		rdev->config.cik.max_hw_contexts = 8;
3576 
3577 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3578 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3579 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3580 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3581 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3582 		break;
3583 	case CHIP_KABINI:
3584 	case CHIP_MULLINS:
3585 	default:
3586 		rdev->config.cik.max_shader_engines = 1;
3587 		rdev->config.cik.max_tile_pipes = 2;
3588 		rdev->config.cik.max_cu_per_sh = 2;
3589 		rdev->config.cik.max_sh_per_se = 1;
3590 		rdev->config.cik.max_backends_per_se = 1;
3591 		rdev->config.cik.max_texture_channel_caches = 2;
3592 		rdev->config.cik.max_gprs = 256;
3593 		rdev->config.cik.max_gs_threads = 16;
3594 		rdev->config.cik.max_hw_contexts = 8;
3595 
3596 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3597 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3598 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3599 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3600 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3601 		break;
3602 	}
3603 
3604 	/* Initialize HDP */
3605 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3606 		WREG32((0x2c14 + j), 0x00000000);
3607 		WREG32((0x2c18 + j), 0x00000000);
3608 		WREG32((0x2c1c + j), 0x00000000);
3609 		WREG32((0x2c20 + j), 0x00000000);
3610 		WREG32((0x2c24 + j), 0x00000000);
3611 	}
3612 
3613 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3614 
3615 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3616 
3617 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3618 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3619 
3620 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3621 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3622 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3623 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3624 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3625 		rdev->config.cik.mem_row_size_in_kb = 4;
3626 	/* XXX use MC settings? */
3627 	rdev->config.cik.shader_engine_tile_size = 32;
3628 	rdev->config.cik.num_gpus = 1;
3629 	rdev->config.cik.multi_gpu_tile_size = 64;
3630 
3631 	/* fix up row size */
3632 	gb_addr_config &= ~ROW_SIZE_MASK;
3633 	switch (rdev->config.cik.mem_row_size_in_kb) {
3634 	case 1:
3635 	default:
3636 		gb_addr_config |= ROW_SIZE(0);
3637 		break;
3638 	case 2:
3639 		gb_addr_config |= ROW_SIZE(1);
3640 		break;
3641 	case 4:
3642 		gb_addr_config |= ROW_SIZE(2);
3643 		break;
3644 	}
3645 
3646 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3647 	 * not have bank info, so create a custom tiling dword.
3648 	 * bits 3:0   num_pipes
3649 	 * bits 7:4   num_banks
3650 	 * bits 11:8  group_size
3651 	 * bits 15:12 row_size
3652 	 */
3653 	rdev->config.cik.tile_config = 0;
3654 	switch (rdev->config.cik.num_tile_pipes) {
3655 	case 1:
3656 		rdev->config.cik.tile_config |= (0 << 0);
3657 		break;
3658 	case 2:
3659 		rdev->config.cik.tile_config |= (1 << 0);
3660 		break;
3661 	case 4:
3662 		rdev->config.cik.tile_config |= (2 << 0);
3663 		break;
3664 	case 8:
3665 	default:
3666 		/* XXX what about 12? */
3667 		rdev->config.cik.tile_config |= (3 << 0);
3668 		break;
3669 	}
3670 	rdev->config.cik.tile_config |=
3671 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3672 	rdev->config.cik.tile_config |=
3673 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3674 	rdev->config.cik.tile_config |=
3675 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3676 
3677 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3678 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3679 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3680 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3681 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3682 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3683 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3684 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3685 
3686 	cik_tiling_mode_table_init(rdev);
3687 
3688 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3689 		     rdev->config.cik.max_sh_per_se,
3690 		     rdev->config.cik.max_backends_per_se);
3691 
3692 	rdev->config.cik.active_cus = 0;
3693 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3694 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3695 			rdev->config.cik.active_cus +=
3696 				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3697 		}
3698 	}
3699 
3700 	/* set HW defaults for 3D engine */
3701 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3702 
3703 	mutex_lock(&rdev->grbm_idx_mutex);
3704 	/*
3705 	 * making sure that the following register writes will be broadcasted
3706 	 * to all the shaders
3707 	 */
3708 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3709 	WREG32(SX_DEBUG_1, 0x20);
3710 
3711 	WREG32(TA_CNTL_AUX, 0x00010000);
3712 
3713 	tmp = RREG32(SPI_CONFIG_CNTL);
3714 	tmp |= 0x03000000;
3715 	WREG32(SPI_CONFIG_CNTL, tmp);
3716 
3717 	WREG32(SQ_CONFIG, 1);
3718 
3719 	WREG32(DB_DEBUG, 0);
3720 
3721 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3722 	tmp |= 0x00000400;
3723 	WREG32(DB_DEBUG2, tmp);
3724 
3725 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3726 	tmp |= 0x00020200;
3727 	WREG32(DB_DEBUG3, tmp);
3728 
3729 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3730 	tmp |= 0x00018208;
3731 	WREG32(CB_HW_CONTROL, tmp);
3732 
3733 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3734 
3735 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3736 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3737 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3738 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3739 
3740 	WREG32(VGT_NUM_INSTANCES, 1);
3741 
3742 	WREG32(CP_PERFMON_CNTL, 0);
3743 
3744 	WREG32(SQ_CONFIG, 0);
3745 
3746 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3747 					  FORCE_EOV_MAX_REZ_CNT(255)));
3748 
3749 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3750 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3751 
3752 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3753 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3754 
3755 	tmp = RREG32(HDP_MISC_CNTL);
3756 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3757 	WREG32(HDP_MISC_CNTL, tmp);
3758 
3759 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3760 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3761 
3762 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3763 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3764 	mutex_unlock(&rdev->grbm_idx_mutex);
3765 
3766 	udelay(50);
3767 }
3768 
3769 /*
3770  * GPU scratch registers helpers function.
3771  */
3772 /**
3773  * cik_scratch_init - setup driver info for CP scratch regs
3774  *
3775  * @rdev: radeon_device pointer
3776  *
3777  * Set up the number and offset of the CP scratch registers.
3778  * NOTE: use of CP scratch registers is a legacy inferface and
3779  * is not used by default on newer asics (r6xx+).  On newer asics,
3780  * memory buffers are used for fences rather than scratch regs.
3781  */
3782 static void cik_scratch_init(struct radeon_device *rdev)
3783 {
3784 	int i;
3785 
3786 	rdev->scratch.num_reg = 7;
3787 	rdev->scratch.reg_base = SCRATCH_REG0;
3788 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3789 		rdev->scratch.free[i] = true;
3790 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3791 	}
3792 }
3793 
3794 /**
3795  * cik_ring_test - basic gfx ring test
3796  *
3797  * @rdev: radeon_device pointer
3798  * @ring: radeon_ring structure holding ring information
3799  *
3800  * Allocate a scratch register and write to it using the gfx ring (CIK).
3801  * Provides a basic gfx ring test to verify that the ring is working.
3802  * Used by cik_cp_gfx_resume();
3803  * Returns 0 on success, error on failure.
3804  */
3805 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3806 {
3807 	uint32_t scratch;
3808 	uint32_t tmp = 0;
3809 	unsigned i;
3810 	int r;
3811 
3812 	r = radeon_scratch_get(rdev, &scratch);
3813 	if (r) {
3814 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3815 		return r;
3816 	}
3817 	WREG32(scratch, 0xCAFEDEAD);
3818 	r = radeon_ring_lock(rdev, ring, 3);
3819 	if (r) {
3820 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3821 		radeon_scratch_free(rdev, scratch);
3822 		return r;
3823 	}
3824 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3825 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3826 	radeon_ring_write(ring, 0xDEADBEEF);
3827 	radeon_ring_unlock_commit(rdev, ring, false);
3828 
3829 	for (i = 0; i < rdev->usec_timeout; i++) {
3830 		tmp = RREG32(scratch);
3831 		if (tmp == 0xDEADBEEF)
3832 			break;
3833 		DRM_UDELAY(1);
3834 	}
3835 	if (i < rdev->usec_timeout) {
3836 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3837 	} else {
3838 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3839 			  ring->idx, scratch, tmp);
3840 		r = -EINVAL;
3841 	}
3842 	radeon_scratch_free(rdev, scratch);
3843 	return r;
3844 }
3845 
3846 /**
3847  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3848  *
3849  * @rdev: radeon_device pointer
3850  * @ridx: radeon ring index
3851  *
3852  * Emits an hdp flush on the cp.
3853  */
3854 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3855 				       int ridx)
3856 {
3857 	struct radeon_ring *ring = &rdev->ring[ridx];
3858 	u32 ref_and_mask;
3859 
3860 	switch (ring->idx) {
3861 	case CAYMAN_RING_TYPE_CP1_INDEX:
3862 	case CAYMAN_RING_TYPE_CP2_INDEX:
3863 	default:
3864 		switch (ring->me) {
3865 		case 0:
3866 			ref_and_mask = CP2 << ring->pipe;
3867 			break;
3868 		case 1:
3869 			ref_and_mask = CP6 << ring->pipe;
3870 			break;
3871 		default:
3872 			return;
3873 		}
3874 		break;
3875 	case RADEON_RING_TYPE_GFX_INDEX:
3876 		ref_and_mask = CP0;
3877 		break;
3878 	}
3879 
3880 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3881 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3882 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3883 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3884 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3885 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3886 	radeon_ring_write(ring, ref_and_mask);
3887 	radeon_ring_write(ring, ref_and_mask);
3888 	radeon_ring_write(ring, 0x20); /* poll interval */
3889 }
3890 
3891 /**
3892  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3893  *
3894  * @rdev: radeon_device pointer
3895  * @fence: radeon fence object
3896  *
3897  * Emits a fence sequnce number on the gfx ring and flushes
3898  * GPU caches.
3899  */
3900 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3901 			     struct radeon_fence *fence)
3902 {
3903 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3904 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3905 
3906 	/* EVENT_WRITE_EOP - flush caches, send int */
3907 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3908 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3909 				 EOP_TC_ACTION_EN |
3910 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3911 				 EVENT_INDEX(5)));
3912 	radeon_ring_write(ring, addr & 0xfffffffc);
3913 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3914 	radeon_ring_write(ring, fence->seq);
3915 	radeon_ring_write(ring, 0);
3916 }
3917 
3918 /**
3919  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3920  *
3921  * @rdev: radeon_device pointer
3922  * @fence: radeon fence object
3923  *
3924  * Emits a fence sequnce number on the compute ring and flushes
3925  * GPU caches.
3926  */
3927 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3928 				 struct radeon_fence *fence)
3929 {
3930 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3931 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3932 
3933 	/* RELEASE_MEM - flush caches, send int */
3934 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3935 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3936 				 EOP_TC_ACTION_EN |
3937 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3938 				 EVENT_INDEX(5)));
3939 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3940 	radeon_ring_write(ring, addr & 0xfffffffc);
3941 	radeon_ring_write(ring, upper_32_bits(addr));
3942 	radeon_ring_write(ring, fence->seq);
3943 	radeon_ring_write(ring, 0);
3944 }
3945 
3946 /**
3947  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3948  *
3949  * @rdev: radeon_device pointer
3950  * @ring: radeon ring buffer object
3951  * @semaphore: radeon semaphore object
3952  * @emit_wait: Is this a sempahore wait?
3953  *
3954  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3955  * from running ahead of semaphore waits.
3956  */
3957 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3958 			     struct radeon_ring *ring,
3959 			     struct radeon_semaphore *semaphore,
3960 			     bool emit_wait)
3961 {
3962 	uint64_t addr = semaphore->gpu_addr;
3963 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3964 
3965 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3966 	radeon_ring_write(ring, lower_32_bits(addr));
3967 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3968 
3969 	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3970 		/* Prevent the PFP from running ahead of the semaphore wait */
3971 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3972 		radeon_ring_write(ring, 0x0);
3973 	}
3974 
3975 	return true;
3976 }
3977 
3978 /**
3979  * cik_copy_cpdma - copy pages using the CP DMA engine
3980  *
3981  * @rdev: radeon_device pointer
3982  * @src_offset: src GPU address
3983  * @dst_offset: dst GPU address
3984  * @num_gpu_pages: number of GPU pages to xfer
3985  * @resv: reservation object to sync to
3986  *
3987  * Copy GPU paging using the CP DMA engine (CIK+).
3988  * Used by the radeon ttm implementation to move pages if
3989  * registered as the asic copy callback.
3990  */
3991 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3992 				    uint64_t src_offset, uint64_t dst_offset,
3993 				    unsigned num_gpu_pages,
3994 				    struct reservation_object *resv)
3995 {
3996 	struct radeon_semaphore *sem = NULL;
3997 	struct radeon_fence *fence;
3998 	int ring_index = rdev->asic->copy.blit_ring_index;
3999 	struct radeon_ring *ring = &rdev->ring[ring_index];
4000 	u32 size_in_bytes, cur_size_in_bytes, control;
4001 	int i, num_loops;
4002 	int r = 0;
4003 
4004 	r = radeon_semaphore_create(rdev, &sem);
4005 	if (r) {
4006 		DRM_ERROR("radeon: moving bo (%d).\n", r);
4007 		return ERR_PTR(r);
4008 	}
4009 
4010 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
4011 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
4012 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
4013 	if (r) {
4014 		DRM_ERROR("radeon: moving bo (%d).\n", r);
4015 		radeon_semaphore_free(rdev, &sem, NULL);
4016 		return ERR_PTR(r);
4017 	}
4018 
4019 	radeon_semaphore_sync_resv(rdev, sem, resv, false);
4020 	radeon_semaphore_sync_rings(rdev, sem, ring->idx);
4021 
4022 	for (i = 0; i < num_loops; i++) {
4023 		cur_size_in_bytes = size_in_bytes;
4024 		if (cur_size_in_bytes > 0x1fffff)
4025 			cur_size_in_bytes = 0x1fffff;
4026 		size_in_bytes -= cur_size_in_bytes;
4027 		control = 0;
4028 		if (size_in_bytes == 0)
4029 			control |= PACKET3_DMA_DATA_CP_SYNC;
4030 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4031 		radeon_ring_write(ring, control);
4032 		radeon_ring_write(ring, lower_32_bits(src_offset));
4033 		radeon_ring_write(ring, upper_32_bits(src_offset));
4034 		radeon_ring_write(ring, lower_32_bits(dst_offset));
4035 		radeon_ring_write(ring, upper_32_bits(dst_offset));
4036 		radeon_ring_write(ring, cur_size_in_bytes);
4037 		src_offset += cur_size_in_bytes;
4038 		dst_offset += cur_size_in_bytes;
4039 	}
4040 
4041 	r = radeon_fence_emit(rdev, &fence, ring->idx);
4042 	if (r) {
4043 		radeon_ring_unlock_undo(rdev, ring);
4044 		radeon_semaphore_free(rdev, &sem, NULL);
4045 		return ERR_PTR(r);
4046 	}
4047 
4048 	radeon_ring_unlock_commit(rdev, ring, false);
4049 	radeon_semaphore_free(rdev, &sem, fence);
4050 
4051 	return fence;
4052 }
4053 
4054 /*
4055  * IB stuff
4056  */
4057 /**
4058  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
4059  *
4060  * @rdev: radeon_device pointer
4061  * @ib: radeon indirect buffer object
4062  *
4063  * Emits an DE (drawing engine) or CE (constant engine) IB
4064  * on the gfx ring.  IBs are usually generated by userspace
4065  * acceleration drivers and submitted to the kernel for
4066  * sheduling on the ring.  This function schedules the IB
4067  * on the gfx ring for execution by the GPU.
4068  */
4069 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
4070 {
4071 	struct radeon_ring *ring = &rdev->ring[ib->ring];
4072 	u32 header, control = INDIRECT_BUFFER_VALID;
4073 
4074 	if (ib->is_const_ib) {
4075 		/* set switch buffer packet before const IB */
4076 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4077 		radeon_ring_write(ring, 0);
4078 
4079 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4080 	} else {
4081 		u32 next_rptr;
4082 		if (ring->rptr_save_reg) {
4083 			next_rptr = ring->wptr + 3 + 4;
4084 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4085 			radeon_ring_write(ring, ((ring->rptr_save_reg -
4086 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
4087 			radeon_ring_write(ring, next_rptr);
4088 		} else if (rdev->wb.enabled) {
4089 			next_rptr = ring->wptr + 5 + 4;
4090 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4091 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
4092 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4093 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
4094 			radeon_ring_write(ring, next_rptr);
4095 		}
4096 
4097 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4098 	}
4099 
4100 	control |= ib->length_dw |
4101 		(ib->vm ? (ib->vm->id << 24) : 0);
4102 
4103 	radeon_ring_write(ring, header);
4104 	radeon_ring_write(ring,
4105 #ifdef __BIG_ENDIAN
4106 			  (2 << 0) |
4107 #endif
4108 			  (ib->gpu_addr & 0xFFFFFFFC));
4109 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4110 	radeon_ring_write(ring, control);
4111 }
4112 
4113 /**
4114  * cik_ib_test - basic gfx ring IB test
4115  *
4116  * @rdev: radeon_device pointer
4117  * @ring: radeon_ring structure holding ring information
4118  *
4119  * Allocate an IB and execute it on the gfx ring (CIK).
4120  * Provides a basic gfx ring test to verify that IBs are working.
4121  * Returns 0 on success, error on failure.
4122  */
4123 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4124 {
4125 	struct radeon_ib ib;
4126 	uint32_t scratch;
4127 	uint32_t tmp = 0;
4128 	unsigned i;
4129 	int r;
4130 
4131 	r = radeon_scratch_get(rdev, &scratch);
4132 	if (r) {
4133 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
4134 		return r;
4135 	}
4136 	WREG32(scratch, 0xCAFEDEAD);
4137 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
4138 	if (r) {
4139 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
4140 		radeon_scratch_free(rdev, scratch);
4141 		return r;
4142 	}
4143 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
4144 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
4145 	ib.ptr[2] = 0xDEADBEEF;
4146 	ib.length_dw = 3;
4147 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
4148 	if (r) {
4149 		radeon_scratch_free(rdev, scratch);
4150 		radeon_ib_free(rdev, &ib);
4151 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4152 		return r;
4153 	}
4154 	r = radeon_fence_wait(ib.fence, false);
4155 	if (r) {
4156 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4157 		radeon_scratch_free(rdev, scratch);
4158 		radeon_ib_free(rdev, &ib);
4159 		return r;
4160 	}
4161 	for (i = 0; i < rdev->usec_timeout; i++) {
4162 		tmp = RREG32(scratch);
4163 		if (tmp == 0xDEADBEEF)
4164 			break;
4165 		DRM_UDELAY(1);
4166 	}
4167 	if (i < rdev->usec_timeout) {
4168 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
4169 	} else {
4170 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
4171 			  scratch, tmp);
4172 		r = -EINVAL;
4173 	}
4174 	radeon_scratch_free(rdev, scratch);
4175 	radeon_ib_free(rdev, &ib);
4176 	return r;
4177 }
4178 
4179 /*
4180  * CP.
4181  * On CIK, gfx and compute now have independant command processors.
4182  *
4183  * GFX
4184  * Gfx consists of a single ring and can process both gfx jobs and
4185  * compute jobs.  The gfx CP consists of three microengines (ME):
4186  * PFP - Pre-Fetch Parser
4187  * ME - Micro Engine
4188  * CE - Constant Engine
4189  * The PFP and ME make up what is considered the Drawing Engine (DE).
4190  * The CE is an asynchronous engine used for updating buffer desciptors
4191  * used by the DE so that they can be loaded into cache in parallel
4192  * while the DE is processing state update packets.
4193  *
4194  * Compute
4195  * The compute CP consists of two microengines (ME):
4196  * MEC1 - Compute MicroEngine 1
4197  * MEC2 - Compute MicroEngine 2
4198  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
4199  * The queues are exposed to userspace and are programmed directly
4200  * by the compute runtime.
4201  */
4202 /**
4203  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
4204  *
4205  * @rdev: radeon_device pointer
4206  * @enable: enable or disable the MEs
4207  *
4208  * Halts or unhalts the gfx MEs.
4209  */
4210 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
4211 {
4212 	if (enable)
4213 		WREG32(CP_ME_CNTL, 0);
4214 	else {
4215 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4216 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
4217 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
4218 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4219 	}
4220 	udelay(50);
4221 }
4222 
4223 /**
4224  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
4225  *
4226  * @rdev: radeon_device pointer
4227  *
4228  * Loads the gfx PFP, ME, and CE ucode.
4229  * Returns 0 for success, -EINVAL if the ucode is not available.
4230  */
4231 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
4232 {
4233 	int i;
4234 
4235 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
4236 		return -EINVAL;
4237 
4238 	cik_cp_gfx_enable(rdev, false);
4239 
4240 	if (rdev->new_fw) {
4241 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
4242 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
4243 		const struct gfx_firmware_header_v1_0 *ce_hdr =
4244 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
4245 		const struct gfx_firmware_header_v1_0 *me_hdr =
4246 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
4247 		const __le32 *fw_data;
4248 		u32 fw_size;
4249 
4250 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
4251 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
4252 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
4253 
4254 		/* PFP */
4255 		fw_data = (const __le32 *)
4256 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4257 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4258 		WREG32(CP_PFP_UCODE_ADDR, 0);
4259 		for (i = 0; i < fw_size; i++)
4260 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4261 		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
4262 
4263 		/* CE */
4264 		fw_data = (const __le32 *)
4265 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4266 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4267 		WREG32(CP_CE_UCODE_ADDR, 0);
4268 		for (i = 0; i < fw_size; i++)
4269 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4270 		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
4271 
4272 		/* ME */
4273 		fw_data = (const __be32 *)
4274 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4275 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4276 		WREG32(CP_ME_RAM_WADDR, 0);
4277 		for (i = 0; i < fw_size; i++)
4278 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4279 		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
4280 		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
4281 	} else {
4282 		const __be32 *fw_data;
4283 
4284 		/* PFP */
4285 		fw_data = (const __be32 *)rdev->pfp_fw->data;
4286 		WREG32(CP_PFP_UCODE_ADDR, 0);
4287 		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4288 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4289 		WREG32(CP_PFP_UCODE_ADDR, 0);
4290 
4291 		/* CE */
4292 		fw_data = (const __be32 *)rdev->ce_fw->data;
4293 		WREG32(CP_CE_UCODE_ADDR, 0);
4294 		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4295 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4296 		WREG32(CP_CE_UCODE_ADDR, 0);
4297 
4298 		/* ME */
4299 		fw_data = (const __be32 *)rdev->me_fw->data;
4300 		WREG32(CP_ME_RAM_WADDR, 0);
4301 		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4302 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4303 		WREG32(CP_ME_RAM_WADDR, 0);
4304 	}
4305 
4306 	return 0;
4307 }
4308 
4309 /**
4310  * cik_cp_gfx_start - start the gfx ring
4311  *
4312  * @rdev: radeon_device pointer
4313  *
4314  * Enables the ring and loads the clear state context and other
4315  * packets required to init the ring.
4316  * Returns 0 for success, error for failure.
4317  */
4318 static int cik_cp_gfx_start(struct radeon_device *rdev)
4319 {
4320 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4321 	int r, i;
4322 
4323 	/* init the CP */
4324 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4325 	WREG32(CP_ENDIAN_SWAP, 0);
4326 	WREG32(CP_DEVICE_ID, 1);
4327 
4328 	cik_cp_gfx_enable(rdev, true);
4329 
4330 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4331 	if (r) {
4332 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4333 		return r;
4334 	}
4335 
4336 	/* init the CE partitions.  CE only used for gfx on CIK */
4337 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4338 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4339 	radeon_ring_write(ring, 0xc000);
4340 	radeon_ring_write(ring, 0xc000);
4341 
4342 	/* setup clear context state */
4343 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4344 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4345 
4346 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4347 	radeon_ring_write(ring, 0x80000000);
4348 	radeon_ring_write(ring, 0x80000000);
4349 
4350 	for (i = 0; i < cik_default_size; i++)
4351 		radeon_ring_write(ring, cik_default_state[i]);
4352 
4353 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4354 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4355 
4356 	/* set clear context state */
4357 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4358 	radeon_ring_write(ring, 0);
4359 
4360 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4361 	radeon_ring_write(ring, 0x00000316);
4362 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4363 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4364 
4365 	radeon_ring_unlock_commit(rdev, ring, false);
4366 
4367 	return 0;
4368 }
4369 
4370 /**
4371  * cik_cp_gfx_fini - stop the gfx ring
4372  *
4373  * @rdev: radeon_device pointer
4374  *
4375  * Stop the gfx ring and tear down the driver ring
4376  * info.
4377  */
4378 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4379 {
4380 	cik_cp_gfx_enable(rdev, false);
4381 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4382 }
4383 
4384 /**
4385  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4386  *
4387  * @rdev: radeon_device pointer
4388  *
4389  * Program the location and size of the gfx ring buffer
4390  * and test it to make sure it's working.
4391  * Returns 0 for success, error for failure.
4392  */
4393 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4394 {
4395 	struct radeon_ring *ring;
4396 	u32 tmp;
4397 	u32 rb_bufsz;
4398 	u64 rb_addr;
4399 	int r;
4400 
4401 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4402 	if (rdev->family != CHIP_HAWAII)
4403 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4404 
4405 	/* Set the write pointer delay */
4406 	WREG32(CP_RB_WPTR_DELAY, 0);
4407 
4408 	/* set the RB to use vmid 0 */
4409 	WREG32(CP_RB_VMID, 0);
4410 
4411 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4412 
4413 	/* ring 0 - compute and gfx */
4414 	/* Set ring buffer size */
4415 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4416 	rb_bufsz = order_base_2(ring->ring_size / 8);
4417 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4418 #ifdef __BIG_ENDIAN
4419 	tmp |= BUF_SWAP_32BIT;
4420 #endif
4421 	WREG32(CP_RB0_CNTL, tmp);
4422 
4423 	/* Initialize the ring buffer's read and write pointers */
4424 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4425 	ring->wptr = 0;
4426 	WREG32(CP_RB0_WPTR, ring->wptr);
4427 
4428 	/* set the wb address wether it's enabled or not */
4429 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4430 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4431 
4432 	/* scratch register shadowing is no longer supported */
4433 	WREG32(SCRATCH_UMSK, 0);
4434 
4435 	if (!rdev->wb.enabled)
4436 		tmp |= RB_NO_UPDATE;
4437 
4438 	mdelay(1);
4439 	WREG32(CP_RB0_CNTL, tmp);
4440 
4441 	rb_addr = ring->gpu_addr >> 8;
4442 	WREG32(CP_RB0_BASE, rb_addr);
4443 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4444 
4445 	/* start the ring */
4446 	cik_cp_gfx_start(rdev);
4447 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4448 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4449 	if (r) {
4450 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4451 		return r;
4452 	}
4453 
4454 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4455 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4456 
4457 	return 0;
4458 }
4459 
4460 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4461 		     struct radeon_ring *ring)
4462 {
4463 	u32 rptr;
4464 
4465 	if (rdev->wb.enabled)
4466 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4467 	else
4468 		rptr = RREG32(CP_RB0_RPTR);
4469 
4470 	return rptr;
4471 }
4472 
4473 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4474 		     struct radeon_ring *ring)
4475 {
4476 	u32 wptr;
4477 
4478 	wptr = RREG32(CP_RB0_WPTR);
4479 
4480 	return wptr;
4481 }
4482 
4483 void cik_gfx_set_wptr(struct radeon_device *rdev,
4484 		      struct radeon_ring *ring)
4485 {
4486 	WREG32(CP_RB0_WPTR, ring->wptr);
4487 	(void)RREG32(CP_RB0_WPTR);
4488 }
4489 
4490 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4491 			 struct radeon_ring *ring)
4492 {
4493 	u32 rptr;
4494 
4495 	if (rdev->wb.enabled) {
4496 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4497 	} else {
4498 		mutex_lock(&rdev->srbm_mutex);
4499 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4500 		rptr = RREG32(CP_HQD_PQ_RPTR);
4501 		cik_srbm_select(rdev, 0, 0, 0, 0);
4502 		mutex_unlock(&rdev->srbm_mutex);
4503 	}
4504 
4505 	return rptr;
4506 }
4507 
4508 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4509 			 struct radeon_ring *ring)
4510 {
4511 	u32 wptr;
4512 
4513 	if (rdev->wb.enabled) {
4514 		/* XXX check if swapping is necessary on BE */
4515 		wptr = rdev->wb.wb[ring->wptr_offs/4];
4516 	} else {
4517 		mutex_lock(&rdev->srbm_mutex);
4518 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4519 		wptr = RREG32(CP_HQD_PQ_WPTR);
4520 		cik_srbm_select(rdev, 0, 0, 0, 0);
4521 		mutex_unlock(&rdev->srbm_mutex);
4522 	}
4523 
4524 	return wptr;
4525 }
4526 
4527 void cik_compute_set_wptr(struct radeon_device *rdev,
4528 			  struct radeon_ring *ring)
4529 {
4530 	/* XXX check if swapping is necessary on BE */
4531 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4532 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4533 }
4534 
4535 /**
4536  * cik_cp_compute_enable - enable/disable the compute CP MEs
4537  *
4538  * @rdev: radeon_device pointer
4539  * @enable: enable or disable the MEs
4540  *
4541  * Halts or unhalts the compute MEs.
4542  */
4543 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4544 {
4545 	if (enable)
4546 		WREG32(CP_MEC_CNTL, 0);
4547 	else {
4548 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4549 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4550 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4551 	}
4552 	udelay(50);
4553 }
4554 
4555 /**
4556  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4557  *
4558  * @rdev: radeon_device pointer
4559  *
4560  * Loads the compute MEC1&2 ucode.
4561  * Returns 0 for success, -EINVAL if the ucode is not available.
4562  */
4563 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4564 {
4565 	int i;
4566 
4567 	if (!rdev->mec_fw)
4568 		return -EINVAL;
4569 
4570 	cik_cp_compute_enable(rdev, false);
4571 
4572 	if (rdev->new_fw) {
4573 		const struct gfx_firmware_header_v1_0 *mec_hdr =
4574 			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4575 		const __le32 *fw_data;
4576 		u32 fw_size;
4577 
4578 		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4579 
4580 		/* MEC1 */
4581 		fw_data = (const __le32 *)
4582 			(rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4583 		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4584 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4585 		for (i = 0; i < fw_size; i++)
4586 			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4587 		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4588 
4589 		/* MEC2 */
4590 		if (rdev->family == CHIP_KAVERI) {
4591 			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4592 				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4593 
4594 			fw_data = (const __le32 *)
4595 				(rdev->mec2_fw->data +
4596 				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4597 			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4598 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4599 			for (i = 0; i < fw_size; i++)
4600 				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4601 			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4602 		}
4603 	} else {
4604 		const __be32 *fw_data;
4605 
4606 		/* MEC1 */
4607 		fw_data = (const __be32 *)rdev->mec_fw->data;
4608 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4609 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4610 			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4611 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4612 
4613 		if (rdev->family == CHIP_KAVERI) {
4614 			/* MEC2 */
4615 			fw_data = (const __be32 *)rdev->mec_fw->data;
4616 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4617 			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4618 				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4619 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4620 		}
4621 	}
4622 
4623 	return 0;
4624 }
4625 
4626 /**
4627  * cik_cp_compute_start - start the compute queues
4628  *
4629  * @rdev: radeon_device pointer
4630  *
4631  * Enable the compute queues.
4632  * Returns 0 for success, error for failure.
4633  */
4634 static int cik_cp_compute_start(struct radeon_device *rdev)
4635 {
4636 	cik_cp_compute_enable(rdev, true);
4637 
4638 	return 0;
4639 }
4640 
4641 /**
4642  * cik_cp_compute_fini - stop the compute queues
4643  *
4644  * @rdev: radeon_device pointer
4645  *
4646  * Stop the compute queues and tear down the driver queue
4647  * info.
4648  */
4649 static void cik_cp_compute_fini(struct radeon_device *rdev)
4650 {
4651 	int i, idx, r;
4652 
4653 	cik_cp_compute_enable(rdev, false);
4654 
4655 	for (i = 0; i < 2; i++) {
4656 		if (i == 0)
4657 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4658 		else
4659 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4660 
4661 		if (rdev->ring[idx].mqd_obj) {
4662 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4663 			if (unlikely(r != 0))
4664 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4665 
4666 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4667 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4668 
4669 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4670 			rdev->ring[idx].mqd_obj = NULL;
4671 		}
4672 	}
4673 }
4674 
4675 static void cik_mec_fini(struct radeon_device *rdev)
4676 {
4677 	int r;
4678 
4679 	if (rdev->mec.hpd_eop_obj) {
4680 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4681 		if (unlikely(r != 0))
4682 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4683 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4684 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4685 
4686 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4687 		rdev->mec.hpd_eop_obj = NULL;
4688 	}
4689 }
4690 
4691 #define MEC_HPD_SIZE 2048
4692 
4693 static int cik_mec_init(struct radeon_device *rdev)
4694 {
4695 	int r;
4696 	u32 *hpd;
4697 
4698 	/*
4699 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4700 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4701 	 * Nonetheless, we assign only 1 pipe because all other pipes will
4702 	 * be handled by KFD
4703 	 */
4704 	rdev->mec.num_mec = 1;
4705 	rdev->mec.num_pipe = 1;
4706 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4707 
4708 	if (rdev->mec.hpd_eop_obj == NULL) {
4709 		r = radeon_bo_create(rdev,
4710 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4711 				     PAGE_SIZE, true,
4712 				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4713 				     &rdev->mec.hpd_eop_obj);
4714 		if (r) {
4715 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4716 			return r;
4717 		}
4718 	}
4719 
4720 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4721 	if (unlikely(r != 0)) {
4722 		cik_mec_fini(rdev);
4723 		return r;
4724 	}
4725 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4726 			  &rdev->mec.hpd_eop_gpu_addr);
4727 	if (r) {
4728 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4729 		cik_mec_fini(rdev);
4730 		return r;
4731 	}
4732 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4733 	if (r) {
4734 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4735 		cik_mec_fini(rdev);
4736 		return r;
4737 	}
4738 
4739 	/* clear memory.  Not sure if this is required or not */
4740 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4741 
4742 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4743 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4744 
4745 	return 0;
4746 }
4747 
4748 struct hqd_registers
4749 {
4750 	u32 cp_mqd_base_addr;
4751 	u32 cp_mqd_base_addr_hi;
4752 	u32 cp_hqd_active;
4753 	u32 cp_hqd_vmid;
4754 	u32 cp_hqd_persistent_state;
4755 	u32 cp_hqd_pipe_priority;
4756 	u32 cp_hqd_queue_priority;
4757 	u32 cp_hqd_quantum;
4758 	u32 cp_hqd_pq_base;
4759 	u32 cp_hqd_pq_base_hi;
4760 	u32 cp_hqd_pq_rptr;
4761 	u32 cp_hqd_pq_rptr_report_addr;
4762 	u32 cp_hqd_pq_rptr_report_addr_hi;
4763 	u32 cp_hqd_pq_wptr_poll_addr;
4764 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4765 	u32 cp_hqd_pq_doorbell_control;
4766 	u32 cp_hqd_pq_wptr;
4767 	u32 cp_hqd_pq_control;
4768 	u32 cp_hqd_ib_base_addr;
4769 	u32 cp_hqd_ib_base_addr_hi;
4770 	u32 cp_hqd_ib_rptr;
4771 	u32 cp_hqd_ib_control;
4772 	u32 cp_hqd_iq_timer;
4773 	u32 cp_hqd_iq_rptr;
4774 	u32 cp_hqd_dequeue_request;
4775 	u32 cp_hqd_dma_offload;
4776 	u32 cp_hqd_sema_cmd;
4777 	u32 cp_hqd_msg_type;
4778 	u32 cp_hqd_atomic0_preop_lo;
4779 	u32 cp_hqd_atomic0_preop_hi;
4780 	u32 cp_hqd_atomic1_preop_lo;
4781 	u32 cp_hqd_atomic1_preop_hi;
4782 	u32 cp_hqd_hq_scheduler0;
4783 	u32 cp_hqd_hq_scheduler1;
4784 	u32 cp_mqd_control;
4785 };
4786 
4787 struct bonaire_mqd
4788 {
4789 	u32 header;
4790 	u32 dispatch_initiator;
4791 	u32 dimensions[3];
4792 	u32 start_idx[3];
4793 	u32 num_threads[3];
4794 	u32 pipeline_stat_enable;
4795 	u32 perf_counter_enable;
4796 	u32 pgm[2];
4797 	u32 tba[2];
4798 	u32 tma[2];
4799 	u32 pgm_rsrc[2];
4800 	u32 vmid;
4801 	u32 resource_limits;
4802 	u32 static_thread_mgmt01[2];
4803 	u32 tmp_ring_size;
4804 	u32 static_thread_mgmt23[2];
4805 	u32 restart[3];
4806 	u32 thread_trace_enable;
4807 	u32 reserved1;
4808 	u32 user_data[16];
4809 	u32 vgtcs_invoke_count[2];
4810 	struct hqd_registers queue_state;
4811 	u32 dequeue_cntr;
4812 	u32 interrupt_queue[64];
4813 };
4814 
4815 /**
4816  * cik_cp_compute_resume - setup the compute queue registers
4817  *
4818  * @rdev: radeon_device pointer
4819  *
4820  * Program the compute queues and test them to make sure they
4821  * are working.
4822  * Returns 0 for success, error for failure.
4823  */
4824 static int cik_cp_compute_resume(struct radeon_device *rdev)
4825 {
4826 	int r, i, j, idx;
4827 	u32 tmp;
4828 	bool use_doorbell = true;
4829 	u64 hqd_gpu_addr;
4830 	u64 mqd_gpu_addr;
4831 	u64 eop_gpu_addr;
4832 	u64 wb_gpu_addr;
4833 	u32 *buf;
4834 	struct bonaire_mqd *mqd;
4835 
4836 	r = cik_cp_compute_start(rdev);
4837 	if (r)
4838 		return r;
4839 
4840 	/* fix up chicken bits */
4841 	tmp = RREG32(CP_CPF_DEBUG);
4842 	tmp |= (1 << 23);
4843 	WREG32(CP_CPF_DEBUG, tmp);
4844 
4845 	/* init the pipes */
4846 	mutex_lock(&rdev->srbm_mutex);
4847 
4848 	eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
4849 
4850 	cik_srbm_select(rdev, 0, 0, 0, 0);
4851 
4852 	/* write the EOP addr */
4853 	WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4854 	WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4855 
4856 	/* set the VMID assigned */
4857 	WREG32(CP_HPD_EOP_VMID, 0);
4858 
4859 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4860 	tmp = RREG32(CP_HPD_EOP_CONTROL);
4861 	tmp &= ~EOP_SIZE_MASK;
4862 	tmp |= order_base_2(MEC_HPD_SIZE / 8);
4863 	WREG32(CP_HPD_EOP_CONTROL, tmp);
4864 
4865 	mutex_unlock(&rdev->srbm_mutex);
4866 
4867 	/* init the queues.  Just two for now. */
4868 	for (i = 0; i < 2; i++) {
4869 		if (i == 0)
4870 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4871 		else
4872 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4873 
4874 		if (rdev->ring[idx].mqd_obj == NULL) {
4875 			r = radeon_bo_create(rdev,
4876 					     sizeof(struct bonaire_mqd),
4877 					     PAGE_SIZE, true,
4878 					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4879 					     NULL, &rdev->ring[idx].mqd_obj);
4880 			if (r) {
4881 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4882 				return r;
4883 			}
4884 		}
4885 
4886 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4887 		if (unlikely(r != 0)) {
4888 			cik_cp_compute_fini(rdev);
4889 			return r;
4890 		}
4891 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4892 				  &mqd_gpu_addr);
4893 		if (r) {
4894 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4895 			cik_cp_compute_fini(rdev);
4896 			return r;
4897 		}
4898 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4899 		if (r) {
4900 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4901 			cik_cp_compute_fini(rdev);
4902 			return r;
4903 		}
4904 
4905 		/* init the mqd struct */
4906 		memset(buf, 0, sizeof(struct bonaire_mqd));
4907 
4908 		mqd = (struct bonaire_mqd *)buf;
4909 		mqd->header = 0xC0310800;
4910 		mqd->static_thread_mgmt01[0] = 0xffffffff;
4911 		mqd->static_thread_mgmt01[1] = 0xffffffff;
4912 		mqd->static_thread_mgmt23[0] = 0xffffffff;
4913 		mqd->static_thread_mgmt23[1] = 0xffffffff;
4914 
4915 		mutex_lock(&rdev->srbm_mutex);
4916 		cik_srbm_select(rdev, rdev->ring[idx].me,
4917 				rdev->ring[idx].pipe,
4918 				rdev->ring[idx].queue, 0);
4919 
4920 		/* disable wptr polling */
4921 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4922 		tmp &= ~WPTR_POLL_EN;
4923 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4924 
4925 		/* enable doorbell? */
4926 		mqd->queue_state.cp_hqd_pq_doorbell_control =
4927 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4928 		if (use_doorbell)
4929 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4930 		else
4931 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4932 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4933 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4934 
4935 		/* disable the queue if it's active */
4936 		mqd->queue_state.cp_hqd_dequeue_request = 0;
4937 		mqd->queue_state.cp_hqd_pq_rptr = 0;
4938 		mqd->queue_state.cp_hqd_pq_wptr= 0;
4939 		if (RREG32(CP_HQD_ACTIVE) & 1) {
4940 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4941 			for (j = 0; j < rdev->usec_timeout; j++) {
4942 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4943 					break;
4944 				udelay(1);
4945 			}
4946 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4947 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4948 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4949 		}
4950 
4951 		/* set the pointer to the MQD */
4952 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4953 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4954 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4955 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4956 		/* set MQD vmid to 0 */
4957 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4958 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4959 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4960 
4961 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4962 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4963 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4964 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4965 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4966 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4967 
4968 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4969 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4970 		mqd->queue_state.cp_hqd_pq_control &=
4971 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4972 
4973 		mqd->queue_state.cp_hqd_pq_control |=
4974 			order_base_2(rdev->ring[idx].ring_size / 8);
4975 		mqd->queue_state.cp_hqd_pq_control |=
4976 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4977 #ifdef __BIG_ENDIAN
4978 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4979 #endif
4980 		mqd->queue_state.cp_hqd_pq_control &=
4981 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4982 		mqd->queue_state.cp_hqd_pq_control |=
4983 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4984 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4985 
4986 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4987 		if (i == 0)
4988 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4989 		else
4990 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4991 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4992 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4993 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4994 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4995 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4996 
4997 		/* set the wb address wether it's enabled or not */
4998 		if (i == 0)
4999 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
5000 		else
5001 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
5002 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
5003 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
5004 			upper_32_bits(wb_gpu_addr) & 0xffff;
5005 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
5006 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
5007 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5008 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
5009 
5010 		/* enable the doorbell if requested */
5011 		if (use_doorbell) {
5012 			mqd->queue_state.cp_hqd_pq_doorbell_control =
5013 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5014 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
5015 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
5016 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
5017 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5018 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
5019 				~(DOORBELL_SOURCE | DOORBELL_HIT);
5020 
5021 		} else {
5022 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
5023 		}
5024 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5025 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
5026 
5027 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5028 		rdev->ring[idx].wptr = 0;
5029 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
5030 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5031 		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
5032 
5033 		/* set the vmid for the queue */
5034 		mqd->queue_state.cp_hqd_vmid = 0;
5035 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
5036 
5037 		/* activate the queue */
5038 		mqd->queue_state.cp_hqd_active = 1;
5039 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
5040 
5041 		cik_srbm_select(rdev, 0, 0, 0, 0);
5042 		mutex_unlock(&rdev->srbm_mutex);
5043 
5044 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
5045 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
5046 
5047 		rdev->ring[idx].ready = true;
5048 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
5049 		if (r)
5050 			rdev->ring[idx].ready = false;
5051 	}
5052 
5053 	return 0;
5054 }
5055 
5056 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
5057 {
5058 	cik_cp_gfx_enable(rdev, enable);
5059 	cik_cp_compute_enable(rdev, enable);
5060 }
5061 
5062 static int cik_cp_load_microcode(struct radeon_device *rdev)
5063 {
5064 	int r;
5065 
5066 	r = cik_cp_gfx_load_microcode(rdev);
5067 	if (r)
5068 		return r;
5069 	r = cik_cp_compute_load_microcode(rdev);
5070 	if (r)
5071 		return r;
5072 
5073 	return 0;
5074 }
5075 
5076 static void cik_cp_fini(struct radeon_device *rdev)
5077 {
5078 	cik_cp_gfx_fini(rdev);
5079 	cik_cp_compute_fini(rdev);
5080 }
5081 
5082 static int cik_cp_resume(struct radeon_device *rdev)
5083 {
5084 	int r;
5085 
5086 	cik_enable_gui_idle_interrupt(rdev, false);
5087 
5088 	r = cik_cp_load_microcode(rdev);
5089 	if (r)
5090 		return r;
5091 
5092 	r = cik_cp_gfx_resume(rdev);
5093 	if (r)
5094 		return r;
5095 	r = cik_cp_compute_resume(rdev);
5096 	if (r)
5097 		return r;
5098 
5099 	cik_enable_gui_idle_interrupt(rdev, true);
5100 
5101 	return 0;
5102 }
5103 
5104 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
5105 {
5106 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
5107 		RREG32(GRBM_STATUS));
5108 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
5109 		RREG32(GRBM_STATUS2));
5110 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
5111 		RREG32(GRBM_STATUS_SE0));
5112 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
5113 		RREG32(GRBM_STATUS_SE1));
5114 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
5115 		RREG32(GRBM_STATUS_SE2));
5116 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
5117 		RREG32(GRBM_STATUS_SE3));
5118 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
5119 		RREG32(SRBM_STATUS));
5120 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
5121 		RREG32(SRBM_STATUS2));
5122 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
5123 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
5124 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
5125 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
5126 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
5127 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
5128 		 RREG32(CP_STALLED_STAT1));
5129 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
5130 		 RREG32(CP_STALLED_STAT2));
5131 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
5132 		 RREG32(CP_STALLED_STAT3));
5133 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
5134 		 RREG32(CP_CPF_BUSY_STAT));
5135 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
5136 		 RREG32(CP_CPF_STALLED_STAT1));
5137 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
5138 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
5139 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
5140 		 RREG32(CP_CPC_STALLED_STAT1));
5141 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
5142 }
5143 
5144 /**
5145  * cik_gpu_check_soft_reset - check which blocks are busy
5146  *
5147  * @rdev: radeon_device pointer
5148  *
5149  * Check which blocks are busy and return the relevant reset
5150  * mask to be used by cik_gpu_soft_reset().
5151  * Returns a mask of the blocks to be reset.
5152  */
5153 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
5154 {
5155 	u32 reset_mask = 0;
5156 	u32 tmp;
5157 
5158 	/* GRBM_STATUS */
5159 	tmp = RREG32(GRBM_STATUS);
5160 	if (tmp & (PA_BUSY | SC_BUSY |
5161 		   BCI_BUSY | SX_BUSY |
5162 		   TA_BUSY | VGT_BUSY |
5163 		   DB_BUSY | CB_BUSY |
5164 		   GDS_BUSY | SPI_BUSY |
5165 		   IA_BUSY | IA_BUSY_NO_DMA))
5166 		reset_mask |= RADEON_RESET_GFX;
5167 
5168 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
5169 		reset_mask |= RADEON_RESET_CP;
5170 
5171 	/* GRBM_STATUS2 */
5172 	tmp = RREG32(GRBM_STATUS2);
5173 	if (tmp & RLC_BUSY)
5174 		reset_mask |= RADEON_RESET_RLC;
5175 
5176 	/* SDMA0_STATUS_REG */
5177 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
5178 	if (!(tmp & SDMA_IDLE))
5179 		reset_mask |= RADEON_RESET_DMA;
5180 
5181 	/* SDMA1_STATUS_REG */
5182 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
5183 	if (!(tmp & SDMA_IDLE))
5184 		reset_mask |= RADEON_RESET_DMA1;
5185 
5186 	/* SRBM_STATUS2 */
5187 	tmp = RREG32(SRBM_STATUS2);
5188 	if (tmp & SDMA_BUSY)
5189 		reset_mask |= RADEON_RESET_DMA;
5190 
5191 	if (tmp & SDMA1_BUSY)
5192 		reset_mask |= RADEON_RESET_DMA1;
5193 
5194 	/* SRBM_STATUS */
5195 	tmp = RREG32(SRBM_STATUS);
5196 
5197 	if (tmp & IH_BUSY)
5198 		reset_mask |= RADEON_RESET_IH;
5199 
5200 	if (tmp & SEM_BUSY)
5201 		reset_mask |= RADEON_RESET_SEM;
5202 
5203 	if (tmp & GRBM_RQ_PENDING)
5204 		reset_mask |= RADEON_RESET_GRBM;
5205 
5206 	if (tmp & VMC_BUSY)
5207 		reset_mask |= RADEON_RESET_VMC;
5208 
5209 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
5210 		   MCC_BUSY | MCD_BUSY))
5211 		reset_mask |= RADEON_RESET_MC;
5212 
5213 	if (evergreen_is_display_hung(rdev))
5214 		reset_mask |= RADEON_RESET_DISPLAY;
5215 
5216 	/* Skip MC reset as it's mostly likely not hung, just busy */
5217 	if (reset_mask & RADEON_RESET_MC) {
5218 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
5219 		reset_mask &= ~RADEON_RESET_MC;
5220 	}
5221 
5222 	return reset_mask;
5223 }
5224 
5225 /**
5226  * cik_gpu_soft_reset - soft reset GPU
5227  *
5228  * @rdev: radeon_device pointer
5229  * @reset_mask: mask of which blocks to reset
5230  *
5231  * Soft reset the blocks specified in @reset_mask.
5232  */
5233 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
5234 {
5235 	struct evergreen_mc_save save;
5236 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5237 	u32 tmp;
5238 
5239 	if (reset_mask == 0)
5240 		return;
5241 
5242 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
5243 
5244 	cik_print_gpu_status_regs(rdev);
5245 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5246 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5247 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5248 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5249 
5250 	/* disable CG/PG */
5251 	cik_fini_pg(rdev);
5252 	cik_fini_cg(rdev);
5253 
5254 	/* stop the rlc */
5255 	cik_rlc_stop(rdev);
5256 
5257 	/* Disable GFX parsing/prefetching */
5258 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5259 
5260 	/* Disable MEC parsing/prefetching */
5261 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5262 
5263 	if (reset_mask & RADEON_RESET_DMA) {
5264 		/* sdma0 */
5265 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5266 		tmp |= SDMA_HALT;
5267 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5268 	}
5269 	if (reset_mask & RADEON_RESET_DMA1) {
5270 		/* sdma1 */
5271 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5272 		tmp |= SDMA_HALT;
5273 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5274 	}
5275 
5276 	evergreen_mc_stop(rdev, &save);
5277 	if (evergreen_mc_wait_for_idle(rdev)) {
5278 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5279 	}
5280 
5281 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5282 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5283 
5284 	if (reset_mask & RADEON_RESET_CP) {
5285 		grbm_soft_reset |= SOFT_RESET_CP;
5286 
5287 		srbm_soft_reset |= SOFT_RESET_GRBM;
5288 	}
5289 
5290 	if (reset_mask & RADEON_RESET_DMA)
5291 		srbm_soft_reset |= SOFT_RESET_SDMA;
5292 
5293 	if (reset_mask & RADEON_RESET_DMA1)
5294 		srbm_soft_reset |= SOFT_RESET_SDMA1;
5295 
5296 	if (reset_mask & RADEON_RESET_DISPLAY)
5297 		srbm_soft_reset |= SOFT_RESET_DC;
5298 
5299 	if (reset_mask & RADEON_RESET_RLC)
5300 		grbm_soft_reset |= SOFT_RESET_RLC;
5301 
5302 	if (reset_mask & RADEON_RESET_SEM)
5303 		srbm_soft_reset |= SOFT_RESET_SEM;
5304 
5305 	if (reset_mask & RADEON_RESET_IH)
5306 		srbm_soft_reset |= SOFT_RESET_IH;
5307 
5308 	if (reset_mask & RADEON_RESET_GRBM)
5309 		srbm_soft_reset |= SOFT_RESET_GRBM;
5310 
5311 	if (reset_mask & RADEON_RESET_VMC)
5312 		srbm_soft_reset |= SOFT_RESET_VMC;
5313 
5314 	if (!(rdev->flags & RADEON_IS_IGP)) {
5315 		if (reset_mask & RADEON_RESET_MC)
5316 			srbm_soft_reset |= SOFT_RESET_MC;
5317 	}
5318 
5319 	if (grbm_soft_reset) {
5320 		tmp = RREG32(GRBM_SOFT_RESET);
5321 		tmp |= grbm_soft_reset;
5322 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5323 		WREG32(GRBM_SOFT_RESET, tmp);
5324 		tmp = RREG32(GRBM_SOFT_RESET);
5325 
5326 		udelay(50);
5327 
5328 		tmp &= ~grbm_soft_reset;
5329 		WREG32(GRBM_SOFT_RESET, tmp);
5330 		tmp = RREG32(GRBM_SOFT_RESET);
5331 	}
5332 
5333 	if (srbm_soft_reset) {
5334 		tmp = RREG32(SRBM_SOFT_RESET);
5335 		tmp |= srbm_soft_reset;
5336 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5337 		WREG32(SRBM_SOFT_RESET, tmp);
5338 		tmp = RREG32(SRBM_SOFT_RESET);
5339 
5340 		udelay(50);
5341 
5342 		tmp &= ~srbm_soft_reset;
5343 		WREG32(SRBM_SOFT_RESET, tmp);
5344 		tmp = RREG32(SRBM_SOFT_RESET);
5345 	}
5346 
5347 	/* Wait a little for things to settle down */
5348 	udelay(50);
5349 
5350 	evergreen_mc_resume(rdev, &save);
5351 	udelay(50);
5352 
5353 	cik_print_gpu_status_regs(rdev);
5354 }
5355 
5356 struct kv_reset_save_regs {
5357 	u32 gmcon_reng_execute;
5358 	u32 gmcon_misc;
5359 	u32 gmcon_misc3;
5360 };
5361 
5362 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5363 				   struct kv_reset_save_regs *save)
5364 {
5365 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5366 	save->gmcon_misc = RREG32(GMCON_MISC);
5367 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5368 
5369 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5370 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5371 						STCTRL_STUTTER_EN));
5372 }
5373 
5374 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5375 				      struct kv_reset_save_regs *save)
5376 {
5377 	int i;
5378 
5379 	WREG32(GMCON_PGFSM_WRITE, 0);
5380 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5381 
5382 	for (i = 0; i < 5; i++)
5383 		WREG32(GMCON_PGFSM_WRITE, 0);
5384 
5385 	WREG32(GMCON_PGFSM_WRITE, 0);
5386 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5387 
5388 	for (i = 0; i < 5; i++)
5389 		WREG32(GMCON_PGFSM_WRITE, 0);
5390 
5391 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5392 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5393 
5394 	for (i = 0; i < 5; i++)
5395 		WREG32(GMCON_PGFSM_WRITE, 0);
5396 
5397 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5398 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5399 
5400 	for (i = 0; i < 5; i++)
5401 		WREG32(GMCON_PGFSM_WRITE, 0);
5402 
5403 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5404 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5405 
5406 	for (i = 0; i < 5; i++)
5407 		WREG32(GMCON_PGFSM_WRITE, 0);
5408 
5409 	WREG32(GMCON_PGFSM_WRITE, 0);
5410 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5411 
5412 	for (i = 0; i < 5; i++)
5413 		WREG32(GMCON_PGFSM_WRITE, 0);
5414 
5415 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5416 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5417 
5418 	for (i = 0; i < 5; i++)
5419 		WREG32(GMCON_PGFSM_WRITE, 0);
5420 
5421 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5422 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5423 
5424 	for (i = 0; i < 5; i++)
5425 		WREG32(GMCON_PGFSM_WRITE, 0);
5426 
5427 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5428 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5429 
5430 	for (i = 0; i < 5; i++)
5431 		WREG32(GMCON_PGFSM_WRITE, 0);
5432 
5433 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5434 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5435 
5436 	for (i = 0; i < 5; i++)
5437 		WREG32(GMCON_PGFSM_WRITE, 0);
5438 
5439 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5440 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5441 
5442 	WREG32(GMCON_MISC3, save->gmcon_misc3);
5443 	WREG32(GMCON_MISC, save->gmcon_misc);
5444 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5445 }
5446 
5447 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5448 {
5449 	struct evergreen_mc_save save;
5450 	struct kv_reset_save_regs kv_save = { 0 };
5451 	u32 tmp, i;
5452 
5453 	dev_info(rdev->dev, "GPU pci config reset\n");
5454 
5455 	/* disable dpm? */
5456 
5457 	/* disable cg/pg */
5458 	cik_fini_pg(rdev);
5459 	cik_fini_cg(rdev);
5460 
5461 	/* Disable GFX parsing/prefetching */
5462 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5463 
5464 	/* Disable MEC parsing/prefetching */
5465 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5466 
5467 	/* sdma0 */
5468 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5469 	tmp |= SDMA_HALT;
5470 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5471 	/* sdma1 */
5472 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5473 	tmp |= SDMA_HALT;
5474 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5475 	/* XXX other engines? */
5476 
5477 	/* halt the rlc, disable cp internal ints */
5478 	cik_rlc_stop(rdev);
5479 
5480 	udelay(50);
5481 
5482 	/* disable mem access */
5483 	evergreen_mc_stop(rdev, &save);
5484 	if (evergreen_mc_wait_for_idle(rdev)) {
5485 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5486 	}
5487 
5488 	if (rdev->flags & RADEON_IS_IGP)
5489 		kv_save_regs_for_reset(rdev, &kv_save);
5490 
5491 	/* disable BM */
5492 	pci_clear_master(rdev->pdev);
5493 	/* reset */
5494 	radeon_pci_config_reset(rdev);
5495 
5496 	udelay(100);
5497 
5498 	/* wait for asic to come out of reset */
5499 	for (i = 0; i < rdev->usec_timeout; i++) {
5500 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5501 			break;
5502 		udelay(1);
5503 	}
5504 
5505 	/* does asic init need to be run first??? */
5506 	if (rdev->flags & RADEON_IS_IGP)
5507 		kv_restore_regs_for_reset(rdev, &kv_save);
5508 }
5509 
5510 /**
5511  * cik_asic_reset - soft reset GPU
5512  *
5513  * @rdev: radeon_device pointer
5514  *
5515  * Look up which blocks are hung and attempt
5516  * to reset them.
5517  * Returns 0 for success.
5518  */
5519 int cik_asic_reset(struct radeon_device *rdev)
5520 {
5521 	u32 reset_mask;
5522 
5523 	reset_mask = cik_gpu_check_soft_reset(rdev);
5524 
5525 	if (reset_mask)
5526 		r600_set_bios_scratch_engine_hung(rdev, true);
5527 
5528 	/* try soft reset */
5529 	cik_gpu_soft_reset(rdev, reset_mask);
5530 
5531 	reset_mask = cik_gpu_check_soft_reset(rdev);
5532 
5533 	/* try pci config reset */
5534 	if (reset_mask && radeon_hard_reset)
5535 		cik_gpu_pci_config_reset(rdev);
5536 
5537 	reset_mask = cik_gpu_check_soft_reset(rdev);
5538 
5539 	if (!reset_mask)
5540 		r600_set_bios_scratch_engine_hung(rdev, false);
5541 
5542 	return 0;
5543 }
5544 
5545 /**
5546  * cik_gfx_is_lockup - check if the 3D engine is locked up
5547  *
5548  * @rdev: radeon_device pointer
5549  * @ring: radeon_ring structure holding ring information
5550  *
5551  * Check if the 3D engine is locked up (CIK).
5552  * Returns true if the engine is locked, false if not.
5553  */
5554 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5555 {
5556 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5557 
5558 	if (!(reset_mask & (RADEON_RESET_GFX |
5559 			    RADEON_RESET_COMPUTE |
5560 			    RADEON_RESET_CP))) {
5561 		radeon_ring_lockup_update(rdev, ring);
5562 		return false;
5563 	}
5564 	return radeon_ring_test_lockup(rdev, ring);
5565 }
5566 
5567 /* MC */
5568 /**
5569  * cik_mc_program - program the GPU memory controller
5570  *
5571  * @rdev: radeon_device pointer
5572  *
5573  * Set the location of vram, gart, and AGP in the GPU's
5574  * physical address space (CIK).
5575  */
5576 static void cik_mc_program(struct radeon_device *rdev)
5577 {
5578 	struct evergreen_mc_save save;
5579 	u32 tmp;
5580 	int i, j;
5581 
5582 	/* Initialize HDP */
5583 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5584 		WREG32((0x2c14 + j), 0x00000000);
5585 		WREG32((0x2c18 + j), 0x00000000);
5586 		WREG32((0x2c1c + j), 0x00000000);
5587 		WREG32((0x2c20 + j), 0x00000000);
5588 		WREG32((0x2c24 + j), 0x00000000);
5589 	}
5590 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5591 
5592 	evergreen_mc_stop(rdev, &save);
5593 	if (radeon_mc_wait_for_idle(rdev)) {
5594 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5595 	}
5596 	/* Lockout access through VGA aperture*/
5597 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5598 	/* Update configuration */
5599 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5600 	       rdev->mc.vram_start >> 12);
5601 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5602 	       rdev->mc.vram_end >> 12);
5603 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5604 	       rdev->vram_scratch.gpu_addr >> 12);
5605 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5606 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5607 	WREG32(MC_VM_FB_LOCATION, tmp);
5608 	/* XXX double check these! */
5609 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5610 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5611 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5612 	WREG32(MC_VM_AGP_BASE, 0);
5613 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5614 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5615 	if (radeon_mc_wait_for_idle(rdev)) {
5616 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5617 	}
5618 	evergreen_mc_resume(rdev, &save);
5619 	/* we need to own VRAM, so turn off the VGA renderer here
5620 	 * to stop it overwriting our objects */
5621 	rv515_vga_render_disable(rdev);
5622 }
5623 
5624 /**
5625  * cik_mc_init - initialize the memory controller driver params
5626  *
5627  * @rdev: radeon_device pointer
5628  *
5629  * Look up the amount of vram, vram width, and decide how to place
5630  * vram and gart within the GPU's physical address space (CIK).
5631  * Returns 0 for success.
5632  */
5633 static int cik_mc_init(struct radeon_device *rdev)
5634 {
5635 	u32 tmp;
5636 	int chansize, numchan;
5637 
5638 	/* Get VRAM informations */
5639 	rdev->mc.vram_is_ddr = true;
5640 	tmp = RREG32(MC_ARB_RAMCFG);
5641 	if (tmp & CHANSIZE_MASK) {
5642 		chansize = 64;
5643 	} else {
5644 		chansize = 32;
5645 	}
5646 	tmp = RREG32(MC_SHARED_CHMAP);
5647 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5648 	case 0:
5649 	default:
5650 		numchan = 1;
5651 		break;
5652 	case 1:
5653 		numchan = 2;
5654 		break;
5655 	case 2:
5656 		numchan = 4;
5657 		break;
5658 	case 3:
5659 		numchan = 8;
5660 		break;
5661 	case 4:
5662 		numchan = 3;
5663 		break;
5664 	case 5:
5665 		numchan = 6;
5666 		break;
5667 	case 6:
5668 		numchan = 10;
5669 		break;
5670 	case 7:
5671 		numchan = 12;
5672 		break;
5673 	case 8:
5674 		numchan = 16;
5675 		break;
5676 	}
5677 	rdev->mc.vram_width = numchan * chansize;
5678 	/* Could aper size report 0 ? */
5679 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5680 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5681 	/* size in MB on si */
5682 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5683 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5684 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5685 	si_vram_gtt_location(rdev, &rdev->mc);
5686 	radeon_update_bandwidth_info(rdev);
5687 
5688 	return 0;
5689 }
5690 
5691 /*
5692  * GART
5693  * VMID 0 is the physical GPU addresses as used by the kernel.
5694  * VMIDs 1-15 are used for userspace clients and are handled
5695  * by the radeon vm/hsa code.
5696  */
5697 /**
5698  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5699  *
5700  * @rdev: radeon_device pointer
5701  *
5702  * Flush the TLB for the VMID 0 page table (CIK).
5703  */
5704 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5705 {
5706 	/* flush hdp cache */
5707 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5708 
5709 	/* bits 0-15 are the VM contexts0-15 */
5710 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5711 }
5712 
5713 /**
5714  * cik_pcie_gart_enable - gart enable
5715  *
5716  * @rdev: radeon_device pointer
5717  *
5718  * This sets up the TLBs, programs the page tables for VMID0,
5719  * sets up the hw for VMIDs 1-15 which are allocated on
5720  * demand, and sets up the global locations for the LDS, GDS,
5721  * and GPUVM for FSA64 clients (CIK).
5722  * Returns 0 for success, errors for failure.
5723  */
5724 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5725 {
5726 	int r, i;
5727 
5728 	if (rdev->gart.robj == NULL) {
5729 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5730 		return -EINVAL;
5731 	}
5732 	r = radeon_gart_table_vram_pin(rdev);
5733 	if (r)
5734 		return r;
5735 	/* Setup TLB control */
5736 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5737 	       (0xA << 7) |
5738 	       ENABLE_L1_TLB |
5739 	       ENABLE_L1_FRAGMENT_PROCESSING |
5740 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5741 	       ENABLE_ADVANCED_DRIVER_MODEL |
5742 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5743 	/* Setup L2 cache */
5744 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5745 	       ENABLE_L2_FRAGMENT_PROCESSING |
5746 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5747 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5748 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5749 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5750 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5751 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5752 	       BANK_SELECT(4) |
5753 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5754 	/* setup context0 */
5755 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5756 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5757 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5758 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5759 			(u32)(rdev->dummy_page.addr >> 12));
5760 	WREG32(VM_CONTEXT0_CNTL2, 0);
5761 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5762 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5763 
5764 	WREG32(0x15D4, 0);
5765 	WREG32(0x15D8, 0);
5766 	WREG32(0x15DC, 0);
5767 
5768 	/* restore context1-15 */
5769 	/* set vm size, must be a multiple of 4 */
5770 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5771 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5772 	for (i = 1; i < 16; i++) {
5773 		if (i < 8)
5774 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5775 			       rdev->vm_manager.saved_table_addr[i]);
5776 		else
5777 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5778 			       rdev->vm_manager.saved_table_addr[i]);
5779 	}
5780 
5781 	/* enable context1-15 */
5782 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5783 	       (u32)(rdev->dummy_page.addr >> 12));
5784 	WREG32(VM_CONTEXT1_CNTL2, 4);
5785 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5786 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5787 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5788 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5789 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5790 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5791 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5792 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5793 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5794 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5795 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5796 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5797 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5798 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5799 
5800 	if (rdev->family == CHIP_KAVERI) {
5801 		u32 tmp = RREG32(CHUB_CONTROL);
5802 		tmp &= ~BYPASS_VM;
5803 		WREG32(CHUB_CONTROL, tmp);
5804 	}
5805 
5806 	/* XXX SH_MEM regs */
5807 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5808 	mutex_lock(&rdev->srbm_mutex);
5809 	for (i = 0; i < 16; i++) {
5810 		cik_srbm_select(rdev, 0, 0, 0, i);
5811 		/* CP and shaders */
5812 		WREG32(SH_MEM_CONFIG, 0);
5813 		WREG32(SH_MEM_APE1_BASE, 1);
5814 		WREG32(SH_MEM_APE1_LIMIT, 0);
5815 		WREG32(SH_MEM_BASES, 0);
5816 		/* SDMA GFX */
5817 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5818 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5819 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5820 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5821 		/* XXX SDMA RLC - todo */
5822 	}
5823 	cik_srbm_select(rdev, 0, 0, 0, 0);
5824 	mutex_unlock(&rdev->srbm_mutex);
5825 
5826 	cik_pcie_gart_tlb_flush(rdev);
5827 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5828 		 (unsigned)(rdev->mc.gtt_size >> 20),
5829 		 (unsigned long long)rdev->gart.table_addr);
5830 	rdev->gart.ready = true;
5831 	return 0;
5832 }
5833 
5834 /**
5835  * cik_pcie_gart_disable - gart disable
5836  *
5837  * @rdev: radeon_device pointer
5838  *
5839  * This disables all VM page table (CIK).
5840  */
5841 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5842 {
5843 	unsigned i;
5844 
5845 	for (i = 1; i < 16; ++i) {
5846 		uint32_t reg;
5847 		if (i < 8)
5848 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5849 		else
5850 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5851 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5852 	}
5853 
5854 	/* Disable all tables */
5855 	WREG32(VM_CONTEXT0_CNTL, 0);
5856 	WREG32(VM_CONTEXT1_CNTL, 0);
5857 	/* Setup TLB control */
5858 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5859 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5860 	/* Setup L2 cache */
5861 	WREG32(VM_L2_CNTL,
5862 	       ENABLE_L2_FRAGMENT_PROCESSING |
5863 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5864 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5865 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5866 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5867 	WREG32(VM_L2_CNTL2, 0);
5868 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5869 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5870 	radeon_gart_table_vram_unpin(rdev);
5871 }
5872 
5873 /**
5874  * cik_pcie_gart_fini - vm fini callback
5875  *
5876  * @rdev: radeon_device pointer
5877  *
5878  * Tears down the driver GART/VM setup (CIK).
5879  */
5880 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5881 {
5882 	cik_pcie_gart_disable(rdev);
5883 	radeon_gart_table_vram_free(rdev);
5884 	radeon_gart_fini(rdev);
5885 }
5886 
5887 /* vm parser */
5888 /**
5889  * cik_ib_parse - vm ib_parse callback
5890  *
5891  * @rdev: radeon_device pointer
5892  * @ib: indirect buffer pointer
5893  *
5894  * CIK uses hw IB checking so this is a nop (CIK).
5895  */
5896 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5897 {
5898 	return 0;
5899 }
5900 
5901 /*
5902  * vm
5903  * VMID 0 is the physical GPU addresses as used by the kernel.
5904  * VMIDs 1-15 are used for userspace clients and are handled
5905  * by the radeon vm/hsa code.
5906  */
5907 /**
5908  * cik_vm_init - cik vm init callback
5909  *
5910  * @rdev: radeon_device pointer
5911  *
5912  * Inits cik specific vm parameters (number of VMs, base of vram for
5913  * VMIDs 1-15) (CIK).
5914  * Returns 0 for success.
5915  */
5916 int cik_vm_init(struct radeon_device *rdev)
5917 {
5918 	/*
5919 	 * number of VMs
5920 	 * VMID 0 is reserved for System
5921 	 * radeon graphics/compute will use VMIDs 1-7
5922 	 * amdkfd will use VMIDs 8-15
5923 	 */
5924 	rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
5925 	/* base offset of vram pages */
5926 	if (rdev->flags & RADEON_IS_IGP) {
5927 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5928 		tmp <<= 22;
5929 		rdev->vm_manager.vram_base_offset = tmp;
5930 	} else
5931 		rdev->vm_manager.vram_base_offset = 0;
5932 
5933 	return 0;
5934 }
5935 
5936 /**
5937  * cik_vm_fini - cik vm fini callback
5938  *
5939  * @rdev: radeon_device pointer
5940  *
5941  * Tear down any asic specific VM setup (CIK).
5942  */
5943 void cik_vm_fini(struct radeon_device *rdev)
5944 {
5945 }
5946 
5947 /**
5948  * cik_vm_decode_fault - print human readable fault info
5949  *
5950  * @rdev: radeon_device pointer
5951  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5952  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5953  *
5954  * Print human readable fault information (CIK).
5955  */
5956 static void cik_vm_decode_fault(struct radeon_device *rdev,
5957 				u32 status, u32 addr, u32 mc_client)
5958 {
5959 	u32 mc_id;
5960 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5961 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5962 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5963 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5964 
5965 	if (rdev->family == CHIP_HAWAII)
5966 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5967 	else
5968 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5969 
5970 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5971 	       protections, vmid, addr,
5972 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5973 	       block, mc_client, mc_id);
5974 }
5975 
5976 /**
5977  * cik_vm_flush - cik vm flush using the CP
5978  *
5979  * @rdev: radeon_device pointer
5980  *
5981  * Update the page table base and flush the VM TLB
5982  * using the CP (CIK).
5983  */
5984 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5985 {
5986 	struct radeon_ring *ring = &rdev->ring[ridx];
5987 	int usepfp = (ridx == RADEON_RING_TYPE_GFX_INDEX);
5988 
5989 	if (vm == NULL)
5990 		return;
5991 
5992 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5993 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5994 				 WRITE_DATA_DST_SEL(0)));
5995 	if (vm->id < 8) {
5996 		radeon_ring_write(ring,
5997 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5998 	} else {
5999 		radeon_ring_write(ring,
6000 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
6001 	}
6002 	radeon_ring_write(ring, 0);
6003 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
6004 
6005 	/* update SH_MEM_* regs */
6006 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6007 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6008 				 WRITE_DATA_DST_SEL(0)));
6009 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6010 	radeon_ring_write(ring, 0);
6011 	radeon_ring_write(ring, VMID(vm->id));
6012 
6013 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
6014 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6015 				 WRITE_DATA_DST_SEL(0)));
6016 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
6017 	radeon_ring_write(ring, 0);
6018 
6019 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
6020 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
6021 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
6022 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
6023 
6024 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6025 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6026 				 WRITE_DATA_DST_SEL(0)));
6027 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6028 	radeon_ring_write(ring, 0);
6029 	radeon_ring_write(ring, VMID(0));
6030 
6031 	/* HDP flush */
6032 	cik_hdp_flush_cp_ring_emit(rdev, ridx);
6033 
6034 	/* bits 0-15 are the VM contexts0-15 */
6035 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6036 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6037 				 WRITE_DATA_DST_SEL(0)));
6038 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6039 	radeon_ring_write(ring, 0);
6040 	radeon_ring_write(ring, 1 << vm->id);
6041 
6042 	/* compute doesn't have PFP */
6043 	if (usepfp) {
6044 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6045 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6046 		radeon_ring_write(ring, 0x0);
6047 	}
6048 }
6049 
6050 /*
6051  * RLC
6052  * The RLC is a multi-purpose microengine that handles a
6053  * variety of functions, the most important of which is
6054  * the interrupt controller.
6055  */
6056 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
6057 					  bool enable)
6058 {
6059 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
6060 
6061 	if (enable)
6062 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6063 	else
6064 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6065 	WREG32(CP_INT_CNTL_RING0, tmp);
6066 }
6067 
6068 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
6069 {
6070 	u32 tmp;
6071 
6072 	tmp = RREG32(RLC_LB_CNTL);
6073 	if (enable)
6074 		tmp |= LOAD_BALANCE_ENABLE;
6075 	else
6076 		tmp &= ~LOAD_BALANCE_ENABLE;
6077 	WREG32(RLC_LB_CNTL, tmp);
6078 }
6079 
6080 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
6081 {
6082 	u32 i, j, k;
6083 	u32 mask;
6084 
6085 	mutex_lock(&rdev->grbm_idx_mutex);
6086 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6087 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6088 			cik_select_se_sh(rdev, i, j);
6089 			for (k = 0; k < rdev->usec_timeout; k++) {
6090 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
6091 					break;
6092 				udelay(1);
6093 			}
6094 		}
6095 	}
6096 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6097 	mutex_unlock(&rdev->grbm_idx_mutex);
6098 
6099 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
6100 	for (k = 0; k < rdev->usec_timeout; k++) {
6101 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
6102 			break;
6103 		udelay(1);
6104 	}
6105 }
6106 
6107 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
6108 {
6109 	u32 tmp;
6110 
6111 	tmp = RREG32(RLC_CNTL);
6112 	if (tmp != rlc)
6113 		WREG32(RLC_CNTL, rlc);
6114 }
6115 
6116 static u32 cik_halt_rlc(struct radeon_device *rdev)
6117 {
6118 	u32 data, orig;
6119 
6120 	orig = data = RREG32(RLC_CNTL);
6121 
6122 	if (data & RLC_ENABLE) {
6123 		u32 i;
6124 
6125 		data &= ~RLC_ENABLE;
6126 		WREG32(RLC_CNTL, data);
6127 
6128 		for (i = 0; i < rdev->usec_timeout; i++) {
6129 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
6130 				break;
6131 			udelay(1);
6132 		}
6133 
6134 		cik_wait_for_rlc_serdes(rdev);
6135 	}
6136 
6137 	return orig;
6138 }
6139 
6140 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
6141 {
6142 	u32 tmp, i, mask;
6143 
6144 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
6145 	WREG32(RLC_GPR_REG2, tmp);
6146 
6147 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
6148 	for (i = 0; i < rdev->usec_timeout; i++) {
6149 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
6150 			break;
6151 		udelay(1);
6152 	}
6153 
6154 	for (i = 0; i < rdev->usec_timeout; i++) {
6155 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
6156 			break;
6157 		udelay(1);
6158 	}
6159 }
6160 
6161 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
6162 {
6163 	u32 tmp;
6164 
6165 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
6166 	WREG32(RLC_GPR_REG2, tmp);
6167 }
6168 
6169 /**
6170  * cik_rlc_stop - stop the RLC ME
6171  *
6172  * @rdev: radeon_device pointer
6173  *
6174  * Halt the RLC ME (MicroEngine) (CIK).
6175  */
6176 static void cik_rlc_stop(struct radeon_device *rdev)
6177 {
6178 	WREG32(RLC_CNTL, 0);
6179 
6180 	cik_enable_gui_idle_interrupt(rdev, false);
6181 
6182 	cik_wait_for_rlc_serdes(rdev);
6183 }
6184 
6185 /**
6186  * cik_rlc_start - start the RLC ME
6187  *
6188  * @rdev: radeon_device pointer
6189  *
6190  * Unhalt the RLC ME (MicroEngine) (CIK).
6191  */
6192 static void cik_rlc_start(struct radeon_device *rdev)
6193 {
6194 	WREG32(RLC_CNTL, RLC_ENABLE);
6195 
6196 	cik_enable_gui_idle_interrupt(rdev, true);
6197 
6198 	udelay(50);
6199 }
6200 
6201 /**
6202  * cik_rlc_resume - setup the RLC hw
6203  *
6204  * @rdev: radeon_device pointer
6205  *
6206  * Initialize the RLC registers, load the ucode,
6207  * and start the RLC (CIK).
6208  * Returns 0 for success, -EINVAL if the ucode is not available.
6209  */
6210 static int cik_rlc_resume(struct radeon_device *rdev)
6211 {
6212 	u32 i, size, tmp;
6213 
6214 	if (!rdev->rlc_fw)
6215 		return -EINVAL;
6216 
6217 	cik_rlc_stop(rdev);
6218 
6219 	/* disable CG */
6220 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6221 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6222 
6223 	si_rlc_reset(rdev);
6224 
6225 	cik_init_pg(rdev);
6226 
6227 	cik_init_cg(rdev);
6228 
6229 	WREG32(RLC_LB_CNTR_INIT, 0);
6230 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6231 
6232 	mutex_lock(&rdev->grbm_idx_mutex);
6233 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6234 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6235 	WREG32(RLC_LB_PARAMS, 0x00600408);
6236 	WREG32(RLC_LB_CNTL, 0x80000004);
6237 	mutex_unlock(&rdev->grbm_idx_mutex);
6238 
6239 	WREG32(RLC_MC_CNTL, 0);
6240 	WREG32(RLC_UCODE_CNTL, 0);
6241 
6242 	if (rdev->new_fw) {
6243 		const struct rlc_firmware_header_v1_0 *hdr =
6244 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6245 		const __le32 *fw_data = (const __le32 *)
6246 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6247 
6248 		radeon_ucode_print_rlc_hdr(&hdr->header);
6249 
6250 		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6251 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6252 		for (i = 0; i < size; i++)
6253 			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6254 		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6255 	} else {
6256 		const __be32 *fw_data;
6257 
6258 		switch (rdev->family) {
6259 		case CHIP_BONAIRE:
6260 		case CHIP_HAWAII:
6261 		default:
6262 			size = BONAIRE_RLC_UCODE_SIZE;
6263 			break;
6264 		case CHIP_KAVERI:
6265 			size = KV_RLC_UCODE_SIZE;
6266 			break;
6267 		case CHIP_KABINI:
6268 			size = KB_RLC_UCODE_SIZE;
6269 			break;
6270 		case CHIP_MULLINS:
6271 			size = ML_RLC_UCODE_SIZE;
6272 			break;
6273 		}
6274 
6275 		fw_data = (const __be32 *)rdev->rlc_fw->data;
6276 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6277 		for (i = 0; i < size; i++)
6278 			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6279 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6280 	}
6281 
6282 	/* XXX - find out what chips support lbpw */
6283 	cik_enable_lbpw(rdev, false);
6284 
6285 	if (rdev->family == CHIP_BONAIRE)
6286 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
6287 
6288 	cik_rlc_start(rdev);
6289 
6290 	return 0;
6291 }
6292 
6293 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6294 {
6295 	u32 data, orig, tmp, tmp2;
6296 
6297 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6298 
6299 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6300 		cik_enable_gui_idle_interrupt(rdev, true);
6301 
6302 		tmp = cik_halt_rlc(rdev);
6303 
6304 		mutex_lock(&rdev->grbm_idx_mutex);
6305 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6306 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6307 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6308 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6309 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6310 		mutex_unlock(&rdev->grbm_idx_mutex);
6311 
6312 		cik_update_rlc(rdev, tmp);
6313 
6314 		data |= CGCG_EN | CGLS_EN;
6315 	} else {
6316 		cik_enable_gui_idle_interrupt(rdev, false);
6317 
6318 		RREG32(CB_CGTT_SCLK_CTRL);
6319 		RREG32(CB_CGTT_SCLK_CTRL);
6320 		RREG32(CB_CGTT_SCLK_CTRL);
6321 		RREG32(CB_CGTT_SCLK_CTRL);
6322 
6323 		data &= ~(CGCG_EN | CGLS_EN);
6324 	}
6325 
6326 	if (orig != data)
6327 		WREG32(RLC_CGCG_CGLS_CTRL, data);
6328 
6329 }
6330 
6331 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6332 {
6333 	u32 data, orig, tmp = 0;
6334 
6335 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6336 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6337 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6338 				orig = data = RREG32(CP_MEM_SLP_CNTL);
6339 				data |= CP_MEM_LS_EN;
6340 				if (orig != data)
6341 					WREG32(CP_MEM_SLP_CNTL, data);
6342 			}
6343 		}
6344 
6345 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6346 		data &= 0xfffffffd;
6347 		if (orig != data)
6348 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6349 
6350 		tmp = cik_halt_rlc(rdev);
6351 
6352 		mutex_lock(&rdev->grbm_idx_mutex);
6353 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6354 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6355 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6356 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6357 		WREG32(RLC_SERDES_WR_CTRL, data);
6358 		mutex_unlock(&rdev->grbm_idx_mutex);
6359 
6360 		cik_update_rlc(rdev, tmp);
6361 
6362 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6363 			orig = data = RREG32(CGTS_SM_CTRL_REG);
6364 			data &= ~SM_MODE_MASK;
6365 			data |= SM_MODE(0x2);
6366 			data |= SM_MODE_ENABLE;
6367 			data &= ~CGTS_OVERRIDE;
6368 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6369 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6370 				data &= ~CGTS_LS_OVERRIDE;
6371 			data &= ~ON_MONITOR_ADD_MASK;
6372 			data |= ON_MONITOR_ADD_EN;
6373 			data |= ON_MONITOR_ADD(0x96);
6374 			if (orig != data)
6375 				WREG32(CGTS_SM_CTRL_REG, data);
6376 		}
6377 	} else {
6378 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6379 		data |= 0x00000002;
6380 		if (orig != data)
6381 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6382 
6383 		data = RREG32(RLC_MEM_SLP_CNTL);
6384 		if (data & RLC_MEM_LS_EN) {
6385 			data &= ~RLC_MEM_LS_EN;
6386 			WREG32(RLC_MEM_SLP_CNTL, data);
6387 		}
6388 
6389 		data = RREG32(CP_MEM_SLP_CNTL);
6390 		if (data & CP_MEM_LS_EN) {
6391 			data &= ~CP_MEM_LS_EN;
6392 			WREG32(CP_MEM_SLP_CNTL, data);
6393 		}
6394 
6395 		orig = data = RREG32(CGTS_SM_CTRL_REG);
6396 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6397 		if (orig != data)
6398 			WREG32(CGTS_SM_CTRL_REG, data);
6399 
6400 		tmp = cik_halt_rlc(rdev);
6401 
6402 		mutex_lock(&rdev->grbm_idx_mutex);
6403 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6404 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6405 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6406 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6407 		WREG32(RLC_SERDES_WR_CTRL, data);
6408 		mutex_unlock(&rdev->grbm_idx_mutex);
6409 
6410 		cik_update_rlc(rdev, tmp);
6411 	}
6412 }
6413 
6414 static const u32 mc_cg_registers[] =
6415 {
6416 	MC_HUB_MISC_HUB_CG,
6417 	MC_HUB_MISC_SIP_CG,
6418 	MC_HUB_MISC_VM_CG,
6419 	MC_XPB_CLK_GAT,
6420 	ATC_MISC_CG,
6421 	MC_CITF_MISC_WR_CG,
6422 	MC_CITF_MISC_RD_CG,
6423 	MC_CITF_MISC_VM_CG,
6424 	VM_L2_CG,
6425 };
6426 
6427 static void cik_enable_mc_ls(struct radeon_device *rdev,
6428 			     bool enable)
6429 {
6430 	int i;
6431 	u32 orig, data;
6432 
6433 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6434 		orig = data = RREG32(mc_cg_registers[i]);
6435 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6436 			data |= MC_LS_ENABLE;
6437 		else
6438 			data &= ~MC_LS_ENABLE;
6439 		if (data != orig)
6440 			WREG32(mc_cg_registers[i], data);
6441 	}
6442 }
6443 
6444 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6445 			       bool enable)
6446 {
6447 	int i;
6448 	u32 orig, data;
6449 
6450 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6451 		orig = data = RREG32(mc_cg_registers[i]);
6452 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6453 			data |= MC_CG_ENABLE;
6454 		else
6455 			data &= ~MC_CG_ENABLE;
6456 		if (data != orig)
6457 			WREG32(mc_cg_registers[i], data);
6458 	}
6459 }
6460 
6461 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6462 				 bool enable)
6463 {
6464 	u32 orig, data;
6465 
6466 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6467 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6468 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6469 	} else {
6470 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6471 		data |= 0xff000000;
6472 		if (data != orig)
6473 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6474 
6475 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6476 		data |= 0xff000000;
6477 		if (data != orig)
6478 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6479 	}
6480 }
6481 
6482 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6483 				 bool enable)
6484 {
6485 	u32 orig, data;
6486 
6487 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6488 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6489 		data |= 0x100;
6490 		if (orig != data)
6491 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6492 
6493 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6494 		data |= 0x100;
6495 		if (orig != data)
6496 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6497 	} else {
6498 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6499 		data &= ~0x100;
6500 		if (orig != data)
6501 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6502 
6503 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6504 		data &= ~0x100;
6505 		if (orig != data)
6506 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6507 	}
6508 }
6509 
6510 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6511 				bool enable)
6512 {
6513 	u32 orig, data;
6514 
6515 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6516 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6517 		data = 0xfff;
6518 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6519 
6520 		orig = data = RREG32(UVD_CGC_CTRL);
6521 		data |= DCM;
6522 		if (orig != data)
6523 			WREG32(UVD_CGC_CTRL, data);
6524 	} else {
6525 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6526 		data &= ~0xfff;
6527 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6528 
6529 		orig = data = RREG32(UVD_CGC_CTRL);
6530 		data &= ~DCM;
6531 		if (orig != data)
6532 			WREG32(UVD_CGC_CTRL, data);
6533 	}
6534 }
6535 
6536 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6537 			       bool enable)
6538 {
6539 	u32 orig, data;
6540 
6541 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6542 
6543 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6544 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6545 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6546 	else
6547 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6548 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6549 
6550 	if (orig != data)
6551 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6552 }
6553 
6554 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6555 				bool enable)
6556 {
6557 	u32 orig, data;
6558 
6559 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6560 
6561 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6562 		data &= ~CLOCK_GATING_DIS;
6563 	else
6564 		data |= CLOCK_GATING_DIS;
6565 
6566 	if (orig != data)
6567 		WREG32(HDP_HOST_PATH_CNTL, data);
6568 }
6569 
6570 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6571 			      bool enable)
6572 {
6573 	u32 orig, data;
6574 
6575 	orig = data = RREG32(HDP_MEM_POWER_LS);
6576 
6577 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6578 		data |= HDP_LS_ENABLE;
6579 	else
6580 		data &= ~HDP_LS_ENABLE;
6581 
6582 	if (orig != data)
6583 		WREG32(HDP_MEM_POWER_LS, data);
6584 }
6585 
6586 void cik_update_cg(struct radeon_device *rdev,
6587 		   u32 block, bool enable)
6588 {
6589 
6590 	if (block & RADEON_CG_BLOCK_GFX) {
6591 		cik_enable_gui_idle_interrupt(rdev, false);
6592 		/* order matters! */
6593 		if (enable) {
6594 			cik_enable_mgcg(rdev, true);
6595 			cik_enable_cgcg(rdev, true);
6596 		} else {
6597 			cik_enable_cgcg(rdev, false);
6598 			cik_enable_mgcg(rdev, false);
6599 		}
6600 		cik_enable_gui_idle_interrupt(rdev, true);
6601 	}
6602 
6603 	if (block & RADEON_CG_BLOCK_MC) {
6604 		if (!(rdev->flags & RADEON_IS_IGP)) {
6605 			cik_enable_mc_mgcg(rdev, enable);
6606 			cik_enable_mc_ls(rdev, enable);
6607 		}
6608 	}
6609 
6610 	if (block & RADEON_CG_BLOCK_SDMA) {
6611 		cik_enable_sdma_mgcg(rdev, enable);
6612 		cik_enable_sdma_mgls(rdev, enable);
6613 	}
6614 
6615 	if (block & RADEON_CG_BLOCK_BIF) {
6616 		cik_enable_bif_mgls(rdev, enable);
6617 	}
6618 
6619 	if (block & RADEON_CG_BLOCK_UVD) {
6620 		if (rdev->has_uvd)
6621 			cik_enable_uvd_mgcg(rdev, enable);
6622 	}
6623 
6624 	if (block & RADEON_CG_BLOCK_HDP) {
6625 		cik_enable_hdp_mgcg(rdev, enable);
6626 		cik_enable_hdp_ls(rdev, enable);
6627 	}
6628 
6629 	if (block & RADEON_CG_BLOCK_VCE) {
6630 		vce_v2_0_enable_mgcg(rdev, enable);
6631 	}
6632 }
6633 
6634 static void cik_init_cg(struct radeon_device *rdev)
6635 {
6636 
6637 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6638 
6639 	if (rdev->has_uvd)
6640 		si_init_uvd_internal_cg(rdev);
6641 
6642 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6643 			     RADEON_CG_BLOCK_SDMA |
6644 			     RADEON_CG_BLOCK_BIF |
6645 			     RADEON_CG_BLOCK_UVD |
6646 			     RADEON_CG_BLOCK_HDP), true);
6647 }
6648 
6649 static void cik_fini_cg(struct radeon_device *rdev)
6650 {
6651 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6652 			     RADEON_CG_BLOCK_SDMA |
6653 			     RADEON_CG_BLOCK_BIF |
6654 			     RADEON_CG_BLOCK_UVD |
6655 			     RADEON_CG_BLOCK_HDP), false);
6656 
6657 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6658 }
6659 
6660 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6661 					  bool enable)
6662 {
6663 	u32 data, orig;
6664 
6665 	orig = data = RREG32(RLC_PG_CNTL);
6666 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6667 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6668 	else
6669 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6670 	if (orig != data)
6671 		WREG32(RLC_PG_CNTL, data);
6672 }
6673 
6674 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6675 					  bool enable)
6676 {
6677 	u32 data, orig;
6678 
6679 	orig = data = RREG32(RLC_PG_CNTL);
6680 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6681 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6682 	else
6683 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6684 	if (orig != data)
6685 		WREG32(RLC_PG_CNTL, data);
6686 }
6687 
6688 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6689 {
6690 	u32 data, orig;
6691 
6692 	orig = data = RREG32(RLC_PG_CNTL);
6693 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6694 		data &= ~DISABLE_CP_PG;
6695 	else
6696 		data |= DISABLE_CP_PG;
6697 	if (orig != data)
6698 		WREG32(RLC_PG_CNTL, data);
6699 }
6700 
6701 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6702 {
6703 	u32 data, orig;
6704 
6705 	orig = data = RREG32(RLC_PG_CNTL);
6706 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6707 		data &= ~DISABLE_GDS_PG;
6708 	else
6709 		data |= DISABLE_GDS_PG;
6710 	if (orig != data)
6711 		WREG32(RLC_PG_CNTL, data);
6712 }
6713 
6714 #define CP_ME_TABLE_SIZE    96
6715 #define CP_ME_TABLE_OFFSET  2048
6716 #define CP_MEC_TABLE_OFFSET 4096
6717 
6718 void cik_init_cp_pg_table(struct radeon_device *rdev)
6719 {
6720 	volatile u32 *dst_ptr;
6721 	int me, i, max_me = 4;
6722 	u32 bo_offset = 0;
6723 	u32 table_offset, table_size;
6724 
6725 	if (rdev->family == CHIP_KAVERI)
6726 		max_me = 5;
6727 
6728 	if (rdev->rlc.cp_table_ptr == NULL)
6729 		return;
6730 
6731 	/* write the cp table buffer */
6732 	dst_ptr = rdev->rlc.cp_table_ptr;
6733 	for (me = 0; me < max_me; me++) {
6734 		if (rdev->new_fw) {
6735 			const __le32 *fw_data;
6736 			const struct gfx_firmware_header_v1_0 *hdr;
6737 
6738 			if (me == 0) {
6739 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6740 				fw_data = (const __le32 *)
6741 					(rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6742 				table_offset = le32_to_cpu(hdr->jt_offset);
6743 				table_size = le32_to_cpu(hdr->jt_size);
6744 			} else if (me == 1) {
6745 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6746 				fw_data = (const __le32 *)
6747 					(rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6748 				table_offset = le32_to_cpu(hdr->jt_offset);
6749 				table_size = le32_to_cpu(hdr->jt_size);
6750 			} else if (me == 2) {
6751 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6752 				fw_data = (const __le32 *)
6753 					(rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6754 				table_offset = le32_to_cpu(hdr->jt_offset);
6755 				table_size = le32_to_cpu(hdr->jt_size);
6756 			} else if (me == 3) {
6757 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6758 				fw_data = (const __le32 *)
6759 					(rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6760 				table_offset = le32_to_cpu(hdr->jt_offset);
6761 				table_size = le32_to_cpu(hdr->jt_size);
6762 			} else {
6763 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6764 				fw_data = (const __le32 *)
6765 					(rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6766 				table_offset = le32_to_cpu(hdr->jt_offset);
6767 				table_size = le32_to_cpu(hdr->jt_size);
6768 			}
6769 
6770 			for (i = 0; i < table_size; i ++) {
6771 				dst_ptr[bo_offset + i] =
6772 					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6773 			}
6774 			bo_offset += table_size;
6775 		} else {
6776 			const __be32 *fw_data;
6777 			table_size = CP_ME_TABLE_SIZE;
6778 
6779 			if (me == 0) {
6780 				fw_data = (const __be32 *)rdev->ce_fw->data;
6781 				table_offset = CP_ME_TABLE_OFFSET;
6782 			} else if (me == 1) {
6783 				fw_data = (const __be32 *)rdev->pfp_fw->data;
6784 				table_offset = CP_ME_TABLE_OFFSET;
6785 			} else if (me == 2) {
6786 				fw_data = (const __be32 *)rdev->me_fw->data;
6787 				table_offset = CP_ME_TABLE_OFFSET;
6788 			} else {
6789 				fw_data = (const __be32 *)rdev->mec_fw->data;
6790 				table_offset = CP_MEC_TABLE_OFFSET;
6791 			}
6792 
6793 			for (i = 0; i < table_size; i ++) {
6794 				dst_ptr[bo_offset + i] =
6795 					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6796 			}
6797 			bo_offset += table_size;
6798 		}
6799 	}
6800 }
6801 
6802 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6803 				bool enable)
6804 {
6805 	u32 data, orig;
6806 
6807 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6808 		orig = data = RREG32(RLC_PG_CNTL);
6809 		data |= GFX_PG_ENABLE;
6810 		if (orig != data)
6811 			WREG32(RLC_PG_CNTL, data);
6812 
6813 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6814 		data |= AUTO_PG_EN;
6815 		if (orig != data)
6816 			WREG32(RLC_AUTO_PG_CTRL, data);
6817 	} else {
6818 		orig = data = RREG32(RLC_PG_CNTL);
6819 		data &= ~GFX_PG_ENABLE;
6820 		if (orig != data)
6821 			WREG32(RLC_PG_CNTL, data);
6822 
6823 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6824 		data &= ~AUTO_PG_EN;
6825 		if (orig != data)
6826 			WREG32(RLC_AUTO_PG_CTRL, data);
6827 
6828 		data = RREG32(DB_RENDER_CONTROL);
6829 	}
6830 }
6831 
6832 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6833 {
6834 	u32 mask = 0, tmp, tmp1;
6835 	int i;
6836 
6837 	mutex_lock(&rdev->grbm_idx_mutex);
6838 	cik_select_se_sh(rdev, se, sh);
6839 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6840 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6841 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6842 	mutex_unlock(&rdev->grbm_idx_mutex);
6843 
6844 	tmp &= 0xffff0000;
6845 
6846 	tmp |= tmp1;
6847 	tmp >>= 16;
6848 
6849 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6850 		mask <<= 1;
6851 		mask |= 1;
6852 	}
6853 
6854 	return (~tmp) & mask;
6855 }
6856 
6857 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6858 {
6859 	u32 i, j, k, active_cu_number = 0;
6860 	u32 mask, counter, cu_bitmap;
6861 	u32 tmp = 0;
6862 
6863 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6864 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6865 			mask = 1;
6866 			cu_bitmap = 0;
6867 			counter = 0;
6868 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6869 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6870 					if (counter < 2)
6871 						cu_bitmap |= mask;
6872 					counter ++;
6873 				}
6874 				mask <<= 1;
6875 			}
6876 
6877 			active_cu_number += counter;
6878 			tmp |= (cu_bitmap << (i * 16 + j * 8));
6879 		}
6880 	}
6881 
6882 	WREG32(RLC_PG_AO_CU_MASK, tmp);
6883 
6884 	tmp = RREG32(RLC_MAX_PG_CU);
6885 	tmp &= ~MAX_PU_CU_MASK;
6886 	tmp |= MAX_PU_CU(active_cu_number);
6887 	WREG32(RLC_MAX_PG_CU, tmp);
6888 }
6889 
6890 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6891 				       bool enable)
6892 {
6893 	u32 data, orig;
6894 
6895 	orig = data = RREG32(RLC_PG_CNTL);
6896 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6897 		data |= STATIC_PER_CU_PG_ENABLE;
6898 	else
6899 		data &= ~STATIC_PER_CU_PG_ENABLE;
6900 	if (orig != data)
6901 		WREG32(RLC_PG_CNTL, data);
6902 }
6903 
6904 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6905 					bool enable)
6906 {
6907 	u32 data, orig;
6908 
6909 	orig = data = RREG32(RLC_PG_CNTL);
6910 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6911 		data |= DYN_PER_CU_PG_ENABLE;
6912 	else
6913 		data &= ~DYN_PER_CU_PG_ENABLE;
6914 	if (orig != data)
6915 		WREG32(RLC_PG_CNTL, data);
6916 }
6917 
6918 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6919 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6920 
6921 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6922 {
6923 	u32 data, orig;
6924 	u32 i;
6925 
6926 	if (rdev->rlc.cs_data) {
6927 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6928 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6929 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6930 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6931 	} else {
6932 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6933 		for (i = 0; i < 3; i++)
6934 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6935 	}
6936 	if (rdev->rlc.reg_list) {
6937 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6938 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6939 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6940 	}
6941 
6942 	orig = data = RREG32(RLC_PG_CNTL);
6943 	data |= GFX_PG_SRC;
6944 	if (orig != data)
6945 		WREG32(RLC_PG_CNTL, data);
6946 
6947 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6948 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6949 
6950 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6951 	data &= ~IDLE_POLL_COUNT_MASK;
6952 	data |= IDLE_POLL_COUNT(0x60);
6953 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6954 
6955 	data = 0x10101010;
6956 	WREG32(RLC_PG_DELAY, data);
6957 
6958 	data = RREG32(RLC_PG_DELAY_2);
6959 	data &= ~0xff;
6960 	data |= 0x3;
6961 	WREG32(RLC_PG_DELAY_2, data);
6962 
6963 	data = RREG32(RLC_AUTO_PG_CTRL);
6964 	data &= ~GRBM_REG_SGIT_MASK;
6965 	data |= GRBM_REG_SGIT(0x700);
6966 	WREG32(RLC_AUTO_PG_CTRL, data);
6967 
6968 }
6969 
6970 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6971 {
6972 	cik_enable_gfx_cgpg(rdev, enable);
6973 	cik_enable_gfx_static_mgpg(rdev, enable);
6974 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
6975 }
6976 
6977 u32 cik_get_csb_size(struct radeon_device *rdev)
6978 {
6979 	u32 count = 0;
6980 	const struct cs_section_def *sect = NULL;
6981 	const struct cs_extent_def *ext = NULL;
6982 
6983 	if (rdev->rlc.cs_data == NULL)
6984 		return 0;
6985 
6986 	/* begin clear state */
6987 	count += 2;
6988 	/* context control state */
6989 	count += 3;
6990 
6991 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6992 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6993 			if (sect->id == SECT_CONTEXT)
6994 				count += 2 + ext->reg_count;
6995 			else
6996 				return 0;
6997 		}
6998 	}
6999 	/* pa_sc_raster_config/pa_sc_raster_config1 */
7000 	count += 4;
7001 	/* end clear state */
7002 	count += 2;
7003 	/* clear state */
7004 	count += 2;
7005 
7006 	return count;
7007 }
7008 
7009 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
7010 {
7011 	u32 count = 0, i;
7012 	const struct cs_section_def *sect = NULL;
7013 	const struct cs_extent_def *ext = NULL;
7014 
7015 	if (rdev->rlc.cs_data == NULL)
7016 		return;
7017 	if (buffer == NULL)
7018 		return;
7019 
7020 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7021 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
7022 
7023 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
7024 	buffer[count++] = cpu_to_le32(0x80000000);
7025 	buffer[count++] = cpu_to_le32(0x80000000);
7026 
7027 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7028 		for (ext = sect->section; ext->extent != NULL; ++ext) {
7029 			if (sect->id == SECT_CONTEXT) {
7030 				buffer[count++] =
7031 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
7032 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
7033 				for (i = 0; i < ext->reg_count; i++)
7034 					buffer[count++] = cpu_to_le32(ext->extent[i]);
7035 			} else {
7036 				return;
7037 			}
7038 		}
7039 	}
7040 
7041 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
7042 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
7043 	switch (rdev->family) {
7044 	case CHIP_BONAIRE:
7045 		buffer[count++] = cpu_to_le32(0x16000012);
7046 		buffer[count++] = cpu_to_le32(0x00000000);
7047 		break;
7048 	case CHIP_KAVERI:
7049 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7050 		buffer[count++] = cpu_to_le32(0x00000000);
7051 		break;
7052 	case CHIP_KABINI:
7053 	case CHIP_MULLINS:
7054 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7055 		buffer[count++] = cpu_to_le32(0x00000000);
7056 		break;
7057 	case CHIP_HAWAII:
7058 		buffer[count++] = cpu_to_le32(0x3a00161a);
7059 		buffer[count++] = cpu_to_le32(0x0000002e);
7060 		break;
7061 	default:
7062 		buffer[count++] = cpu_to_le32(0x00000000);
7063 		buffer[count++] = cpu_to_le32(0x00000000);
7064 		break;
7065 	}
7066 
7067 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7068 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
7069 
7070 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
7071 	buffer[count++] = cpu_to_le32(0);
7072 }
7073 
7074 static void cik_init_pg(struct radeon_device *rdev)
7075 {
7076 	if (rdev->pg_flags) {
7077 		cik_enable_sck_slowdown_on_pu(rdev, true);
7078 		cik_enable_sck_slowdown_on_pd(rdev, true);
7079 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7080 			cik_init_gfx_cgpg(rdev);
7081 			cik_enable_cp_pg(rdev, true);
7082 			cik_enable_gds_pg(rdev, true);
7083 		}
7084 		cik_init_ao_cu_mask(rdev);
7085 		cik_update_gfx_pg(rdev, true);
7086 	}
7087 }
7088 
7089 static void cik_fini_pg(struct radeon_device *rdev)
7090 {
7091 	if (rdev->pg_flags) {
7092 		cik_update_gfx_pg(rdev, false);
7093 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7094 			cik_enable_cp_pg(rdev, false);
7095 			cik_enable_gds_pg(rdev, false);
7096 		}
7097 	}
7098 }
7099 
7100 /*
7101  * Interrupts
7102  * Starting with r6xx, interrupts are handled via a ring buffer.
7103  * Ring buffers are areas of GPU accessible memory that the GPU
7104  * writes interrupt vectors into and the host reads vectors out of.
7105  * There is a rptr (read pointer) that determines where the
7106  * host is currently reading, and a wptr (write pointer)
7107  * which determines where the GPU has written.  When the
7108  * pointers are equal, the ring is idle.  When the GPU
7109  * writes vectors to the ring buffer, it increments the
7110  * wptr.  When there is an interrupt, the host then starts
7111  * fetching commands and processing them until the pointers are
7112  * equal again at which point it updates the rptr.
7113  */
7114 
7115 /**
7116  * cik_enable_interrupts - Enable the interrupt ring buffer
7117  *
7118  * @rdev: radeon_device pointer
7119  *
7120  * Enable the interrupt ring buffer (CIK).
7121  */
7122 static void cik_enable_interrupts(struct radeon_device *rdev)
7123 {
7124 	u32 ih_cntl = RREG32(IH_CNTL);
7125 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7126 
7127 	ih_cntl |= ENABLE_INTR;
7128 	ih_rb_cntl |= IH_RB_ENABLE;
7129 	WREG32(IH_CNTL, ih_cntl);
7130 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7131 	rdev->ih.enabled = true;
7132 }
7133 
7134 /**
7135  * cik_disable_interrupts - Disable the interrupt ring buffer
7136  *
7137  * @rdev: radeon_device pointer
7138  *
7139  * Disable the interrupt ring buffer (CIK).
7140  */
7141 static void cik_disable_interrupts(struct radeon_device *rdev)
7142 {
7143 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7144 	u32 ih_cntl = RREG32(IH_CNTL);
7145 
7146 	ih_rb_cntl &= ~IH_RB_ENABLE;
7147 	ih_cntl &= ~ENABLE_INTR;
7148 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7149 	WREG32(IH_CNTL, ih_cntl);
7150 	/* set rptr, wptr to 0 */
7151 	WREG32(IH_RB_RPTR, 0);
7152 	WREG32(IH_RB_WPTR, 0);
7153 	rdev->ih.enabled = false;
7154 	rdev->ih.rptr = 0;
7155 }
7156 
7157 /**
7158  * cik_disable_interrupt_state - Disable all interrupt sources
7159  *
7160  * @rdev: radeon_device pointer
7161  *
7162  * Clear all interrupt enable bits used by the driver (CIK).
7163  */
7164 static void cik_disable_interrupt_state(struct radeon_device *rdev)
7165 {
7166 	u32 tmp;
7167 
7168 	/* gfx ring */
7169 	tmp = RREG32(CP_INT_CNTL_RING0) &
7170 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7171 	WREG32(CP_INT_CNTL_RING0, tmp);
7172 	/* sdma */
7173 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7174 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
7175 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7176 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
7177 	/* compute queues */
7178 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
7179 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
7180 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
7181 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
7182 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
7183 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
7184 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
7185 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
7186 	/* grbm */
7187 	WREG32(GRBM_INT_CNTL, 0);
7188 	/* vline/vblank, etc. */
7189 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7190 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7191 	if (rdev->num_crtc >= 4) {
7192 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7193 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7194 	}
7195 	if (rdev->num_crtc >= 6) {
7196 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7197 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7198 	}
7199 	/* pflip */
7200 	if (rdev->num_crtc >= 2) {
7201 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7202 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7203 	}
7204 	if (rdev->num_crtc >= 4) {
7205 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7206 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7207 	}
7208 	if (rdev->num_crtc >= 6) {
7209 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7210 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7211 	}
7212 
7213 	/* dac hotplug */
7214 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7215 
7216 	/* digital hotplug */
7217 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7218 	WREG32(DC_HPD1_INT_CONTROL, tmp);
7219 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7220 	WREG32(DC_HPD2_INT_CONTROL, tmp);
7221 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7222 	WREG32(DC_HPD3_INT_CONTROL, tmp);
7223 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7224 	WREG32(DC_HPD4_INT_CONTROL, tmp);
7225 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7226 	WREG32(DC_HPD5_INT_CONTROL, tmp);
7227 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7228 	WREG32(DC_HPD6_INT_CONTROL, tmp);
7229 
7230 }
7231 
7232 /**
7233  * cik_irq_init - init and enable the interrupt ring
7234  *
7235  * @rdev: radeon_device pointer
7236  *
7237  * Allocate a ring buffer for the interrupt controller,
7238  * enable the RLC, disable interrupts, enable the IH
7239  * ring buffer and enable it (CIK).
7240  * Called at device load and reume.
7241  * Returns 0 for success, errors for failure.
7242  */
7243 static int cik_irq_init(struct radeon_device *rdev)
7244 {
7245 	int ret = 0;
7246 	int rb_bufsz;
7247 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7248 
7249 	/* allocate ring */
7250 	ret = r600_ih_ring_alloc(rdev);
7251 	if (ret)
7252 		return ret;
7253 
7254 	/* disable irqs */
7255 	cik_disable_interrupts(rdev);
7256 
7257 	/* init rlc */
7258 	ret = cik_rlc_resume(rdev);
7259 	if (ret) {
7260 		r600_ih_ring_fini(rdev);
7261 		return ret;
7262 	}
7263 
7264 	/* setup interrupt control */
7265 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
7266 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7267 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
7268 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7269 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7270 	 */
7271 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7272 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7273 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7274 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
7275 
7276 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7277 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7278 
7279 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7280 		      IH_WPTR_OVERFLOW_CLEAR |
7281 		      (rb_bufsz << 1));
7282 
7283 	if (rdev->wb.enabled)
7284 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7285 
7286 	/* set the writeback address whether it's enabled or not */
7287 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7288 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7289 
7290 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7291 
7292 	/* set rptr, wptr to 0 */
7293 	WREG32(IH_RB_RPTR, 0);
7294 	WREG32(IH_RB_WPTR, 0);
7295 
7296 	/* Default settings for IH_CNTL (disabled at first) */
7297 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7298 	/* RPTR_REARM only works if msi's are enabled */
7299 	if (rdev->msi_enabled)
7300 		ih_cntl |= RPTR_REARM;
7301 	WREG32(IH_CNTL, ih_cntl);
7302 
7303 	/* force the active interrupt state to all disabled */
7304 	cik_disable_interrupt_state(rdev);
7305 
7306 	pci_set_master(rdev->pdev);
7307 
7308 	/* enable irqs */
7309 	cik_enable_interrupts(rdev);
7310 
7311 	return ret;
7312 }
7313 
7314 /**
7315  * cik_irq_set - enable/disable interrupt sources
7316  *
7317  * @rdev: radeon_device pointer
7318  *
7319  * Enable interrupt sources on the GPU (vblanks, hpd,
7320  * etc.) (CIK).
7321  * Returns 0 for success, errors for failure.
7322  */
7323 int cik_irq_set(struct radeon_device *rdev)
7324 {
7325 	u32 cp_int_cntl;
7326 	u32 cp_m1p0;
7327 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7328 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7329 	u32 grbm_int_cntl = 0;
7330 	u32 dma_cntl, dma_cntl1;
7331 	u32 thermal_int;
7332 
7333 	if (!rdev->irq.installed) {
7334 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7335 		return -EINVAL;
7336 	}
7337 	/* don't enable anything if the ih is disabled */
7338 	if (!rdev->ih.enabled) {
7339 		cik_disable_interrupts(rdev);
7340 		/* force the active interrupt state to all disabled */
7341 		cik_disable_interrupt_state(rdev);
7342 		return 0;
7343 	}
7344 
7345 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7346 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7347 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7348 
7349 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
7350 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
7351 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
7352 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
7353 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
7354 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
7355 
7356 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7357 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7358 
7359 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7360 
7361 	if (rdev->flags & RADEON_IS_IGP)
7362 		thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
7363 			~(THERM_INTH_MASK | THERM_INTL_MASK);
7364 	else
7365 		thermal_int = RREG32_SMC(CG_THERMAL_INT) &
7366 			~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
7367 
7368 	/* enable CP interrupts on all rings */
7369 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7370 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7371 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7372 	}
7373 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7374 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7375 		DRM_DEBUG("si_irq_set: sw int cp1\n");
7376 		if (ring->me == 1) {
7377 			switch (ring->pipe) {
7378 			case 0:
7379 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7380 				break;
7381 			default:
7382 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7383 				break;
7384 			}
7385 		} else {
7386 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7387 		}
7388 	}
7389 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7390 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7391 		DRM_DEBUG("si_irq_set: sw int cp2\n");
7392 		if (ring->me == 1) {
7393 			switch (ring->pipe) {
7394 			case 0:
7395 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7396 				break;
7397 			default:
7398 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7399 				break;
7400 			}
7401 		} else {
7402 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7403 		}
7404 	}
7405 
7406 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7407 		DRM_DEBUG("cik_irq_set: sw int dma\n");
7408 		dma_cntl |= TRAP_ENABLE;
7409 	}
7410 
7411 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7412 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7413 		dma_cntl1 |= TRAP_ENABLE;
7414 	}
7415 
7416 	if (rdev->irq.crtc_vblank_int[0] ||
7417 	    atomic_read(&rdev->irq.pflip[0])) {
7418 		DRM_DEBUG("cik_irq_set: vblank 0\n");
7419 		crtc1 |= VBLANK_INTERRUPT_MASK;
7420 	}
7421 	if (rdev->irq.crtc_vblank_int[1] ||
7422 	    atomic_read(&rdev->irq.pflip[1])) {
7423 		DRM_DEBUG("cik_irq_set: vblank 1\n");
7424 		crtc2 |= VBLANK_INTERRUPT_MASK;
7425 	}
7426 	if (rdev->irq.crtc_vblank_int[2] ||
7427 	    atomic_read(&rdev->irq.pflip[2])) {
7428 		DRM_DEBUG("cik_irq_set: vblank 2\n");
7429 		crtc3 |= VBLANK_INTERRUPT_MASK;
7430 	}
7431 	if (rdev->irq.crtc_vblank_int[3] ||
7432 	    atomic_read(&rdev->irq.pflip[3])) {
7433 		DRM_DEBUG("cik_irq_set: vblank 3\n");
7434 		crtc4 |= VBLANK_INTERRUPT_MASK;
7435 	}
7436 	if (rdev->irq.crtc_vblank_int[4] ||
7437 	    atomic_read(&rdev->irq.pflip[4])) {
7438 		DRM_DEBUG("cik_irq_set: vblank 4\n");
7439 		crtc5 |= VBLANK_INTERRUPT_MASK;
7440 	}
7441 	if (rdev->irq.crtc_vblank_int[5] ||
7442 	    atomic_read(&rdev->irq.pflip[5])) {
7443 		DRM_DEBUG("cik_irq_set: vblank 5\n");
7444 		crtc6 |= VBLANK_INTERRUPT_MASK;
7445 	}
7446 	if (rdev->irq.hpd[0]) {
7447 		DRM_DEBUG("cik_irq_set: hpd 1\n");
7448 		hpd1 |= DC_HPDx_INT_EN;
7449 	}
7450 	if (rdev->irq.hpd[1]) {
7451 		DRM_DEBUG("cik_irq_set: hpd 2\n");
7452 		hpd2 |= DC_HPDx_INT_EN;
7453 	}
7454 	if (rdev->irq.hpd[2]) {
7455 		DRM_DEBUG("cik_irq_set: hpd 3\n");
7456 		hpd3 |= DC_HPDx_INT_EN;
7457 	}
7458 	if (rdev->irq.hpd[3]) {
7459 		DRM_DEBUG("cik_irq_set: hpd 4\n");
7460 		hpd4 |= DC_HPDx_INT_EN;
7461 	}
7462 	if (rdev->irq.hpd[4]) {
7463 		DRM_DEBUG("cik_irq_set: hpd 5\n");
7464 		hpd5 |= DC_HPDx_INT_EN;
7465 	}
7466 	if (rdev->irq.hpd[5]) {
7467 		DRM_DEBUG("cik_irq_set: hpd 6\n");
7468 		hpd6 |= DC_HPDx_INT_EN;
7469 	}
7470 
7471 	if (rdev->irq.dpm_thermal) {
7472 		DRM_DEBUG("dpm thermal\n");
7473 		if (rdev->flags & RADEON_IS_IGP)
7474 			thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
7475 		else
7476 			thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
7477 	}
7478 
7479 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7480 
7481 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7482 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7483 
7484 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7485 
7486 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7487 
7488 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7489 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7490 	if (rdev->num_crtc >= 4) {
7491 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7492 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7493 	}
7494 	if (rdev->num_crtc >= 6) {
7495 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7496 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7497 	}
7498 
7499 	if (rdev->num_crtc >= 2) {
7500 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7501 		       GRPH_PFLIP_INT_MASK);
7502 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7503 		       GRPH_PFLIP_INT_MASK);
7504 	}
7505 	if (rdev->num_crtc >= 4) {
7506 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7507 		       GRPH_PFLIP_INT_MASK);
7508 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7509 		       GRPH_PFLIP_INT_MASK);
7510 	}
7511 	if (rdev->num_crtc >= 6) {
7512 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7513 		       GRPH_PFLIP_INT_MASK);
7514 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7515 		       GRPH_PFLIP_INT_MASK);
7516 	}
7517 
7518 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7519 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7520 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7521 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7522 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7523 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7524 
7525 	if (rdev->flags & RADEON_IS_IGP)
7526 		WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
7527 	else
7528 		WREG32_SMC(CG_THERMAL_INT, thermal_int);
7529 
7530 	return 0;
7531 }
7532 
7533 /**
7534  * cik_irq_ack - ack interrupt sources
7535  *
7536  * @rdev: radeon_device pointer
7537  *
7538  * Ack interrupt sources on the GPU (vblanks, hpd,
7539  * etc.) (CIK).  Certain interrupts sources are sw
7540  * generated and do not require an explicit ack.
7541  */
7542 static inline void cik_irq_ack(struct radeon_device *rdev)
7543 {
7544 	u32 tmp;
7545 
7546 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7547 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7548 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7549 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7550 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7551 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7552 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7553 
7554 	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7555 		EVERGREEN_CRTC0_REGISTER_OFFSET);
7556 	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7557 		EVERGREEN_CRTC1_REGISTER_OFFSET);
7558 	if (rdev->num_crtc >= 4) {
7559 		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7560 			EVERGREEN_CRTC2_REGISTER_OFFSET);
7561 		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7562 			EVERGREEN_CRTC3_REGISTER_OFFSET);
7563 	}
7564 	if (rdev->num_crtc >= 6) {
7565 		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7566 			EVERGREEN_CRTC4_REGISTER_OFFSET);
7567 		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7568 			EVERGREEN_CRTC5_REGISTER_OFFSET);
7569 	}
7570 
7571 	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7572 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7573 		       GRPH_PFLIP_INT_CLEAR);
7574 	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7575 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7576 		       GRPH_PFLIP_INT_CLEAR);
7577 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7578 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7579 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7580 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7581 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7582 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7583 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7584 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7585 
7586 	if (rdev->num_crtc >= 4) {
7587 		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7588 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7589 			       GRPH_PFLIP_INT_CLEAR);
7590 		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7591 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7592 			       GRPH_PFLIP_INT_CLEAR);
7593 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7594 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7595 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7596 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7597 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7598 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7599 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7600 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7601 	}
7602 
7603 	if (rdev->num_crtc >= 6) {
7604 		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7605 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7606 			       GRPH_PFLIP_INT_CLEAR);
7607 		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7608 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7609 			       GRPH_PFLIP_INT_CLEAR);
7610 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7611 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7612 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7613 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7614 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7615 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7616 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7617 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7618 	}
7619 
7620 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7621 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7622 		tmp |= DC_HPDx_INT_ACK;
7623 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7624 	}
7625 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7626 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7627 		tmp |= DC_HPDx_INT_ACK;
7628 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7629 	}
7630 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7631 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7632 		tmp |= DC_HPDx_INT_ACK;
7633 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7634 	}
7635 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7636 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7637 		tmp |= DC_HPDx_INT_ACK;
7638 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7639 	}
7640 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7641 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7642 		tmp |= DC_HPDx_INT_ACK;
7643 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7644 	}
7645 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7646 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7647 		tmp |= DC_HPDx_INT_ACK;
7648 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7649 	}
7650 }
7651 
7652 /**
7653  * cik_irq_disable - disable interrupts
7654  *
7655  * @rdev: radeon_device pointer
7656  *
7657  * Disable interrupts on the hw (CIK).
7658  */
7659 static void cik_irq_disable(struct radeon_device *rdev)
7660 {
7661 	cik_disable_interrupts(rdev);
7662 	/* Wait and acknowledge irq */
7663 	mdelay(1);
7664 	cik_irq_ack(rdev);
7665 	cik_disable_interrupt_state(rdev);
7666 }
7667 
7668 /**
7669  * cik_irq_disable - disable interrupts for suspend
7670  *
7671  * @rdev: radeon_device pointer
7672  *
7673  * Disable interrupts and stop the RLC (CIK).
7674  * Used for suspend.
7675  */
7676 static void cik_irq_suspend(struct radeon_device *rdev)
7677 {
7678 	cik_irq_disable(rdev);
7679 	cik_rlc_stop(rdev);
7680 }
7681 
7682 /**
7683  * cik_irq_fini - tear down interrupt support
7684  *
7685  * @rdev: radeon_device pointer
7686  *
7687  * Disable interrupts on the hw and free the IH ring
7688  * buffer (CIK).
7689  * Used for driver unload.
7690  */
7691 static void cik_irq_fini(struct radeon_device *rdev)
7692 {
7693 	cik_irq_suspend(rdev);
7694 	r600_ih_ring_fini(rdev);
7695 }
7696 
7697 /**
7698  * cik_get_ih_wptr - get the IH ring buffer wptr
7699  *
7700  * @rdev: radeon_device pointer
7701  *
7702  * Get the IH ring buffer wptr from either the register
7703  * or the writeback memory buffer (CIK).  Also check for
7704  * ring buffer overflow and deal with it.
7705  * Used by cik_irq_process().
7706  * Returns the value of the wptr.
7707  */
7708 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7709 {
7710 	u32 wptr, tmp;
7711 
7712 	if (rdev->wb.enabled)
7713 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7714 	else
7715 		wptr = RREG32(IH_RB_WPTR);
7716 
7717 	if (wptr & RB_OVERFLOW) {
7718 		wptr &= ~RB_OVERFLOW;
7719 		/* When a ring buffer overflow happen start parsing interrupt
7720 		 * from the last not overwritten vector (wptr + 16). Hopefully
7721 		 * this should allow us to catchup.
7722 		 */
7723 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7724 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7725 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7726 		tmp = RREG32(IH_RB_CNTL);
7727 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7728 		WREG32(IH_RB_CNTL, tmp);
7729 	}
7730 	return (wptr & rdev->ih.ptr_mask);
7731 }
7732 
7733 /*        CIK IV Ring
7734  * Each IV ring entry is 128 bits:
7735  * [7:0]    - interrupt source id
7736  * [31:8]   - reserved
7737  * [59:32]  - interrupt source data
7738  * [63:60]  - reserved
7739  * [71:64]  - RINGID
7740  *            CP:
7741  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7742  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7743  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7744  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7745  *            PIPE_ID - ME0 0=3D
7746  *                    - ME1&2 compute dispatcher (4 pipes each)
7747  *            SDMA:
7748  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7749  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7750  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7751  * [79:72]  - VMID
7752  * [95:80]  - PASID
7753  * [127:96] - reserved
7754  */
7755 /**
7756  * cik_irq_process - interrupt handler
7757  *
7758  * @rdev: radeon_device pointer
7759  *
7760  * Interrupt hander (CIK).  Walk the IH ring,
7761  * ack interrupts and schedule work to handle
7762  * interrupt events.
7763  * Returns irq process return code.
7764  */
7765 int cik_irq_process(struct radeon_device *rdev)
7766 {
7767 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7768 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7769 	u32 wptr;
7770 	u32 rptr;
7771 	u32 src_id, src_data, ring_id;
7772 	u8 me_id, pipe_id, queue_id;
7773 	u32 ring_index;
7774 	bool queue_hotplug = false;
7775 	bool queue_reset = false;
7776 	u32 addr, status, mc_client;
7777 	bool queue_thermal = false;
7778 
7779 	if (!rdev->ih.enabled || rdev->shutdown)
7780 		return IRQ_NONE;
7781 
7782 	wptr = cik_get_ih_wptr(rdev);
7783 
7784 restart_ih:
7785 	/* is somebody else already processing irqs? */
7786 	if (atomic_xchg(&rdev->ih.lock, 1))
7787 		return IRQ_NONE;
7788 
7789 	rptr = rdev->ih.rptr;
7790 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7791 
7792 	/* Order reading of wptr vs. reading of IH ring data */
7793 	rmb();
7794 
7795 	/* display interrupts */
7796 	cik_irq_ack(rdev);
7797 
7798 	while (rptr != wptr) {
7799 		/* wptr/rptr are in bytes! */
7800 		ring_index = rptr / 4;
7801 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7802 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7803 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7804 
7805 		switch (src_id) {
7806 		case 1: /* D1 vblank/vline */
7807 			switch (src_data) {
7808 			case 0: /* D1 vblank */
7809 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7810 					if (rdev->irq.crtc_vblank_int[0]) {
7811 						drm_handle_vblank(rdev->ddev, 0);
7812 						rdev->pm.vblank_sync = true;
7813 						wake_up(&rdev->irq.vblank_queue);
7814 					}
7815 					if (atomic_read(&rdev->irq.pflip[0]))
7816 						radeon_crtc_handle_vblank(rdev, 0);
7817 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7818 					DRM_DEBUG("IH: D1 vblank\n");
7819 				}
7820 				break;
7821 			case 1: /* D1 vline */
7822 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7823 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7824 					DRM_DEBUG("IH: D1 vline\n");
7825 				}
7826 				break;
7827 			default:
7828 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7829 				break;
7830 			}
7831 			break;
7832 		case 2: /* D2 vblank/vline */
7833 			switch (src_data) {
7834 			case 0: /* D2 vblank */
7835 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7836 					if (rdev->irq.crtc_vblank_int[1]) {
7837 						drm_handle_vblank(rdev->ddev, 1);
7838 						rdev->pm.vblank_sync = true;
7839 						wake_up(&rdev->irq.vblank_queue);
7840 					}
7841 					if (atomic_read(&rdev->irq.pflip[1]))
7842 						radeon_crtc_handle_vblank(rdev, 1);
7843 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7844 					DRM_DEBUG("IH: D2 vblank\n");
7845 				}
7846 				break;
7847 			case 1: /* D2 vline */
7848 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7849 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7850 					DRM_DEBUG("IH: D2 vline\n");
7851 				}
7852 				break;
7853 			default:
7854 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7855 				break;
7856 			}
7857 			break;
7858 		case 3: /* D3 vblank/vline */
7859 			switch (src_data) {
7860 			case 0: /* D3 vblank */
7861 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7862 					if (rdev->irq.crtc_vblank_int[2]) {
7863 						drm_handle_vblank(rdev->ddev, 2);
7864 						rdev->pm.vblank_sync = true;
7865 						wake_up(&rdev->irq.vblank_queue);
7866 					}
7867 					if (atomic_read(&rdev->irq.pflip[2]))
7868 						radeon_crtc_handle_vblank(rdev, 2);
7869 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7870 					DRM_DEBUG("IH: D3 vblank\n");
7871 				}
7872 				break;
7873 			case 1: /* D3 vline */
7874 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7875 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7876 					DRM_DEBUG("IH: D3 vline\n");
7877 				}
7878 				break;
7879 			default:
7880 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7881 				break;
7882 			}
7883 			break;
7884 		case 4: /* D4 vblank/vline */
7885 			switch (src_data) {
7886 			case 0: /* D4 vblank */
7887 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7888 					if (rdev->irq.crtc_vblank_int[3]) {
7889 						drm_handle_vblank(rdev->ddev, 3);
7890 						rdev->pm.vblank_sync = true;
7891 						wake_up(&rdev->irq.vblank_queue);
7892 					}
7893 					if (atomic_read(&rdev->irq.pflip[3]))
7894 						radeon_crtc_handle_vblank(rdev, 3);
7895 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7896 					DRM_DEBUG("IH: D4 vblank\n");
7897 				}
7898 				break;
7899 			case 1: /* D4 vline */
7900 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7901 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7902 					DRM_DEBUG("IH: D4 vline\n");
7903 				}
7904 				break;
7905 			default:
7906 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7907 				break;
7908 			}
7909 			break;
7910 		case 5: /* D5 vblank/vline */
7911 			switch (src_data) {
7912 			case 0: /* D5 vblank */
7913 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7914 					if (rdev->irq.crtc_vblank_int[4]) {
7915 						drm_handle_vblank(rdev->ddev, 4);
7916 						rdev->pm.vblank_sync = true;
7917 						wake_up(&rdev->irq.vblank_queue);
7918 					}
7919 					if (atomic_read(&rdev->irq.pflip[4]))
7920 						radeon_crtc_handle_vblank(rdev, 4);
7921 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7922 					DRM_DEBUG("IH: D5 vblank\n");
7923 				}
7924 				break;
7925 			case 1: /* D5 vline */
7926 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7927 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7928 					DRM_DEBUG("IH: D5 vline\n");
7929 				}
7930 				break;
7931 			default:
7932 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7933 				break;
7934 			}
7935 			break;
7936 		case 6: /* D6 vblank/vline */
7937 			switch (src_data) {
7938 			case 0: /* D6 vblank */
7939 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7940 					if (rdev->irq.crtc_vblank_int[5]) {
7941 						drm_handle_vblank(rdev->ddev, 5);
7942 						rdev->pm.vblank_sync = true;
7943 						wake_up(&rdev->irq.vblank_queue);
7944 					}
7945 					if (atomic_read(&rdev->irq.pflip[5]))
7946 						radeon_crtc_handle_vblank(rdev, 5);
7947 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7948 					DRM_DEBUG("IH: D6 vblank\n");
7949 				}
7950 				break;
7951 			case 1: /* D6 vline */
7952 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7953 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7954 					DRM_DEBUG("IH: D6 vline\n");
7955 				}
7956 				break;
7957 			default:
7958 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7959 				break;
7960 			}
7961 			break;
7962 		case 8: /* D1 page flip */
7963 		case 10: /* D2 page flip */
7964 		case 12: /* D3 page flip */
7965 		case 14: /* D4 page flip */
7966 		case 16: /* D5 page flip */
7967 		case 18: /* D6 page flip */
7968 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7969 			if (radeon_use_pflipirq > 0)
7970 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7971 			break;
7972 		case 42: /* HPD hotplug */
7973 			switch (src_data) {
7974 			case 0:
7975 				if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7976 					rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7977 					queue_hotplug = true;
7978 					DRM_DEBUG("IH: HPD1\n");
7979 				}
7980 				break;
7981 			case 1:
7982 				if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7983 					rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7984 					queue_hotplug = true;
7985 					DRM_DEBUG("IH: HPD2\n");
7986 				}
7987 				break;
7988 			case 2:
7989 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7990 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7991 					queue_hotplug = true;
7992 					DRM_DEBUG("IH: HPD3\n");
7993 				}
7994 				break;
7995 			case 3:
7996 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7997 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7998 					queue_hotplug = true;
7999 					DRM_DEBUG("IH: HPD4\n");
8000 				}
8001 				break;
8002 			case 4:
8003 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
8004 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
8005 					queue_hotplug = true;
8006 					DRM_DEBUG("IH: HPD5\n");
8007 				}
8008 				break;
8009 			case 5:
8010 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
8011 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
8012 					queue_hotplug = true;
8013 					DRM_DEBUG("IH: HPD6\n");
8014 				}
8015 				break;
8016 			default:
8017 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8018 				break;
8019 			}
8020 			break;
8021 		case 124: /* UVD */
8022 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
8023 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
8024 			break;
8025 		case 146:
8026 		case 147:
8027 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
8028 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
8029 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
8030 			/* reset addr and status */
8031 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
8032 			if (addr == 0x0 && status == 0x0)
8033 				break;
8034 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
8035 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
8036 				addr);
8037 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
8038 				status);
8039 			cik_vm_decode_fault(rdev, status, addr, mc_client);
8040 			break;
8041 		case 167: /* VCE */
8042 			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
8043 			switch (src_data) {
8044 			case 0:
8045 				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
8046 				break;
8047 			case 1:
8048 				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
8049 				break;
8050 			default:
8051 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
8052 				break;
8053 			}
8054 			break;
8055 		case 176: /* GFX RB CP_INT */
8056 		case 177: /* GFX IB CP_INT */
8057 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8058 			break;
8059 		case 181: /* CP EOP event */
8060 			DRM_DEBUG("IH: CP EOP\n");
8061 			/* XXX check the bitfield order! */
8062 			me_id = (ring_id & 0x60) >> 5;
8063 			pipe_id = (ring_id & 0x18) >> 3;
8064 			queue_id = (ring_id & 0x7) >> 0;
8065 			switch (me_id) {
8066 			case 0:
8067 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8068 				break;
8069 			case 1:
8070 			case 2:
8071 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
8072 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8073 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
8074 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8075 				break;
8076 			}
8077 			break;
8078 		case 184: /* CP Privileged reg access */
8079 			DRM_ERROR("Illegal register access in command stream\n");
8080 			/* XXX check the bitfield order! */
8081 			me_id = (ring_id & 0x60) >> 5;
8082 			pipe_id = (ring_id & 0x18) >> 3;
8083 			queue_id = (ring_id & 0x7) >> 0;
8084 			switch (me_id) {
8085 			case 0:
8086 				/* This results in a full GPU reset, but all we need to do is soft
8087 				 * reset the CP for gfx
8088 				 */
8089 				queue_reset = true;
8090 				break;
8091 			case 1:
8092 				/* XXX compute */
8093 				queue_reset = true;
8094 				break;
8095 			case 2:
8096 				/* XXX compute */
8097 				queue_reset = true;
8098 				break;
8099 			}
8100 			break;
8101 		case 185: /* CP Privileged inst */
8102 			DRM_ERROR("Illegal instruction in command stream\n");
8103 			/* XXX check the bitfield order! */
8104 			me_id = (ring_id & 0x60) >> 5;
8105 			pipe_id = (ring_id & 0x18) >> 3;
8106 			queue_id = (ring_id & 0x7) >> 0;
8107 			switch (me_id) {
8108 			case 0:
8109 				/* This results in a full GPU reset, but all we need to do is soft
8110 				 * reset the CP for gfx
8111 				 */
8112 				queue_reset = true;
8113 				break;
8114 			case 1:
8115 				/* XXX compute */
8116 				queue_reset = true;
8117 				break;
8118 			case 2:
8119 				/* XXX compute */
8120 				queue_reset = true;
8121 				break;
8122 			}
8123 			break;
8124 		case 224: /* SDMA trap event */
8125 			/* XXX check the bitfield order! */
8126 			me_id = (ring_id & 0x3) >> 0;
8127 			queue_id = (ring_id & 0xc) >> 2;
8128 			DRM_DEBUG("IH: SDMA trap\n");
8129 			switch (me_id) {
8130 			case 0:
8131 				switch (queue_id) {
8132 				case 0:
8133 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8134 					break;
8135 				case 1:
8136 					/* XXX compute */
8137 					break;
8138 				case 2:
8139 					/* XXX compute */
8140 					break;
8141 				}
8142 				break;
8143 			case 1:
8144 				switch (queue_id) {
8145 				case 0:
8146 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8147 					break;
8148 				case 1:
8149 					/* XXX compute */
8150 					break;
8151 				case 2:
8152 					/* XXX compute */
8153 					break;
8154 				}
8155 				break;
8156 			}
8157 			break;
8158 		case 230: /* thermal low to high */
8159 			DRM_DEBUG("IH: thermal low to high\n");
8160 			rdev->pm.dpm.thermal.high_to_low = false;
8161 			queue_thermal = true;
8162 			break;
8163 		case 231: /* thermal high to low */
8164 			DRM_DEBUG("IH: thermal high to low\n");
8165 			rdev->pm.dpm.thermal.high_to_low = true;
8166 			queue_thermal = true;
8167 			break;
8168 		case 233: /* GUI IDLE */
8169 			DRM_DEBUG("IH: GUI idle\n");
8170 			break;
8171 		case 241: /* SDMA Privileged inst */
8172 		case 247: /* SDMA Privileged inst */
8173 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8174 			/* XXX check the bitfield order! */
8175 			me_id = (ring_id & 0x3) >> 0;
8176 			queue_id = (ring_id & 0xc) >> 2;
8177 			switch (me_id) {
8178 			case 0:
8179 				switch (queue_id) {
8180 				case 0:
8181 					queue_reset = true;
8182 					break;
8183 				case 1:
8184 					/* XXX compute */
8185 					queue_reset = true;
8186 					break;
8187 				case 2:
8188 					/* XXX compute */
8189 					queue_reset = true;
8190 					break;
8191 				}
8192 				break;
8193 			case 1:
8194 				switch (queue_id) {
8195 				case 0:
8196 					queue_reset = true;
8197 					break;
8198 				case 1:
8199 					/* XXX compute */
8200 					queue_reset = true;
8201 					break;
8202 				case 2:
8203 					/* XXX compute */
8204 					queue_reset = true;
8205 					break;
8206 				}
8207 				break;
8208 			}
8209 			break;
8210 		default:
8211 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8212 			break;
8213 		}
8214 
8215 		/* wptr/rptr are in bytes! */
8216 		rptr += 16;
8217 		rptr &= rdev->ih.ptr_mask;
8218 		WREG32(IH_RB_RPTR, rptr);
8219 	}
8220 	if (queue_hotplug)
8221 		schedule_work(&rdev->hotplug_work);
8222 	if (queue_reset) {
8223 		rdev->needs_reset = true;
8224 		wake_up_all(&rdev->fence_queue);
8225 	}
8226 	if (queue_thermal)
8227 		schedule_work(&rdev->pm.dpm.thermal.work);
8228 	rdev->ih.rptr = rptr;
8229 	atomic_set(&rdev->ih.lock, 0);
8230 
8231 	/* make sure wptr hasn't changed while processing */
8232 	wptr = cik_get_ih_wptr(rdev);
8233 	if (wptr != rptr)
8234 		goto restart_ih;
8235 
8236 	return IRQ_HANDLED;
8237 }
8238 
8239 /*
8240  * startup/shutdown callbacks
8241  */
8242 /**
8243  * cik_startup - program the asic to a functional state
8244  *
8245  * @rdev: radeon_device pointer
8246  *
8247  * Programs the asic to a functional state (CIK).
8248  * Called by cik_init() and cik_resume().
8249  * Returns 0 for success, error for failure.
8250  */
8251 static int cik_startup(struct radeon_device *rdev)
8252 {
8253 	struct radeon_ring *ring;
8254 	u32 nop;
8255 	int r;
8256 
8257 	/* enable pcie gen2/3 link */
8258 	cik_pcie_gen3_enable(rdev);
8259 	/* enable aspm */
8260 	cik_program_aspm(rdev);
8261 
8262 	/* scratch needs to be initialized before MC */
8263 	r = r600_vram_scratch_init(rdev);
8264 	if (r)
8265 		return r;
8266 
8267 	cik_mc_program(rdev);
8268 
8269 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8270 		r = ci_mc_load_microcode(rdev);
8271 		if (r) {
8272 			DRM_ERROR("Failed to load MC firmware!\n");
8273 			return r;
8274 		}
8275 	}
8276 
8277 	r = cik_pcie_gart_enable(rdev);
8278 	if (r)
8279 		return r;
8280 	cik_gpu_init(rdev);
8281 
8282 	/* allocate rlc buffers */
8283 	if (rdev->flags & RADEON_IS_IGP) {
8284 		if (rdev->family == CHIP_KAVERI) {
8285 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8286 			rdev->rlc.reg_list_size =
8287 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8288 		} else {
8289 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8290 			rdev->rlc.reg_list_size =
8291 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8292 		}
8293 	}
8294 	rdev->rlc.cs_data = ci_cs_data;
8295 	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
8296 	r = sumo_rlc_init(rdev);
8297 	if (r) {
8298 		DRM_ERROR("Failed to init rlc BOs!\n");
8299 		return r;
8300 	}
8301 
8302 	/* allocate wb buffer */
8303 	r = radeon_wb_init(rdev);
8304 	if (r)
8305 		return r;
8306 
8307 	/* allocate mec buffers */
8308 	r = cik_mec_init(rdev);
8309 	if (r) {
8310 		DRM_ERROR("Failed to init MEC BOs!\n");
8311 		return r;
8312 	}
8313 
8314 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8315 	if (r) {
8316 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8317 		return r;
8318 	}
8319 
8320 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8321 	if (r) {
8322 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8323 		return r;
8324 	}
8325 
8326 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8327 	if (r) {
8328 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8329 		return r;
8330 	}
8331 
8332 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8333 	if (r) {
8334 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8335 		return r;
8336 	}
8337 
8338 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8339 	if (r) {
8340 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8341 		return r;
8342 	}
8343 
8344 	r = radeon_uvd_resume(rdev);
8345 	if (!r) {
8346 		r = uvd_v4_2_resume(rdev);
8347 		if (!r) {
8348 			r = radeon_fence_driver_start_ring(rdev,
8349 							   R600_RING_TYPE_UVD_INDEX);
8350 			if (r)
8351 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
8352 		}
8353 	}
8354 	if (r)
8355 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8356 
8357 	r = radeon_vce_resume(rdev);
8358 	if (!r) {
8359 		r = vce_v2_0_resume(rdev);
8360 		if (!r)
8361 			r = radeon_fence_driver_start_ring(rdev,
8362 							   TN_RING_TYPE_VCE1_INDEX);
8363 		if (!r)
8364 			r = radeon_fence_driver_start_ring(rdev,
8365 							   TN_RING_TYPE_VCE2_INDEX);
8366 	}
8367 	if (r) {
8368 		dev_err(rdev->dev, "VCE init error (%d).\n", r);
8369 		rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8370 		rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8371 	}
8372 
8373 	/* Enable IRQ */
8374 	if (!rdev->irq.installed) {
8375 		r = radeon_irq_kms_init(rdev);
8376 		if (r)
8377 			return r;
8378 	}
8379 
8380 	r = cik_irq_init(rdev);
8381 	if (r) {
8382 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8383 		radeon_irq_kms_fini(rdev);
8384 		return r;
8385 	}
8386 	cik_irq_set(rdev);
8387 
8388 	if (rdev->family == CHIP_HAWAII) {
8389 		if (rdev->new_fw)
8390 			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8391 		else
8392 			nop = RADEON_CP_PACKET2;
8393 	} else {
8394 		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8395 	}
8396 
8397 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8398 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8399 			     nop);
8400 	if (r)
8401 		return r;
8402 
8403 	/* set up the compute queues */
8404 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8405 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8406 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8407 			     nop);
8408 	if (r)
8409 		return r;
8410 	ring->me = 1; /* first MEC */
8411 	ring->pipe = 0; /* first pipe */
8412 	ring->queue = 0; /* first queue */
8413 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8414 
8415 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8416 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8417 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8418 			     nop);
8419 	if (r)
8420 		return r;
8421 	/* dGPU only have 1 MEC */
8422 	ring->me = 1; /* first MEC */
8423 	ring->pipe = 0; /* first pipe */
8424 	ring->queue = 1; /* second queue */
8425 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8426 
8427 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8428 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8429 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8430 	if (r)
8431 		return r;
8432 
8433 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8434 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8435 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8436 	if (r)
8437 		return r;
8438 
8439 	r = cik_cp_resume(rdev);
8440 	if (r)
8441 		return r;
8442 
8443 	r = cik_sdma_resume(rdev);
8444 	if (r)
8445 		return r;
8446 
8447 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8448 	if (ring->ring_size) {
8449 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8450 				     RADEON_CP_PACKET2);
8451 		if (!r)
8452 			r = uvd_v1_0_init(rdev);
8453 		if (r)
8454 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8455 	}
8456 
8457 	r = -ENOENT;
8458 
8459 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8460 	if (ring->ring_size)
8461 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8462 				     VCE_CMD_NO_OP);
8463 
8464 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8465 	if (ring->ring_size)
8466 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8467 				     VCE_CMD_NO_OP);
8468 
8469 	if (!r)
8470 		r = vce_v1_0_init(rdev);
8471 	else if (r != -ENOENT)
8472 		DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8473 
8474 	r = radeon_ib_pool_init(rdev);
8475 	if (r) {
8476 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8477 		return r;
8478 	}
8479 
8480 	r = radeon_vm_manager_init(rdev);
8481 	if (r) {
8482 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8483 		return r;
8484 	}
8485 
8486 	r = dce6_audio_init(rdev);
8487 	if (r)
8488 		return r;
8489 
8490 	return 0;
8491 }
8492 
8493 /**
8494  * cik_resume - resume the asic to a functional state
8495  *
8496  * @rdev: radeon_device pointer
8497  *
8498  * Programs the asic to a functional state (CIK).
8499  * Called at resume.
8500  * Returns 0 for success, error for failure.
8501  */
8502 int cik_resume(struct radeon_device *rdev)
8503 {
8504 	int r;
8505 
8506 	/* post card */
8507 	atom_asic_init(rdev->mode_info.atom_context);
8508 
8509 	/* init golden registers */
8510 	cik_init_golden_registers(rdev);
8511 
8512 	if (rdev->pm.pm_method == PM_METHOD_DPM)
8513 		radeon_pm_resume(rdev);
8514 
8515 	rdev->accel_working = true;
8516 	r = cik_startup(rdev);
8517 	if (r) {
8518 		DRM_ERROR("cik startup failed on resume\n");
8519 		rdev->accel_working = false;
8520 		return r;
8521 	}
8522 
8523 	return r;
8524 
8525 }
8526 
8527 /**
8528  * cik_suspend - suspend the asic
8529  *
8530  * @rdev: radeon_device pointer
8531  *
8532  * Bring the chip into a state suitable for suspend (CIK).
8533  * Called at suspend.
8534  * Returns 0 for success.
8535  */
8536 int cik_suspend(struct radeon_device *rdev)
8537 {
8538 	radeon_pm_suspend(rdev);
8539 	dce6_audio_fini(rdev);
8540 	radeon_vm_manager_fini(rdev);
8541 	cik_cp_enable(rdev, false);
8542 	cik_sdma_enable(rdev, false);
8543 	uvd_v1_0_fini(rdev);
8544 	radeon_uvd_suspend(rdev);
8545 	radeon_vce_suspend(rdev);
8546 	cik_fini_pg(rdev);
8547 	cik_fini_cg(rdev);
8548 	cik_irq_suspend(rdev);
8549 	radeon_wb_disable(rdev);
8550 	cik_pcie_gart_disable(rdev);
8551 	return 0;
8552 }
8553 
8554 /* Plan is to move initialization in that function and use
8555  * helper function so that radeon_device_init pretty much
8556  * do nothing more than calling asic specific function. This
8557  * should also allow to remove a bunch of callback function
8558  * like vram_info.
8559  */
8560 /**
8561  * cik_init - asic specific driver and hw init
8562  *
8563  * @rdev: radeon_device pointer
8564  *
8565  * Setup asic specific driver variables and program the hw
8566  * to a functional state (CIK).
8567  * Called at driver startup.
8568  * Returns 0 for success, errors for failure.
8569  */
8570 int cik_init(struct radeon_device *rdev)
8571 {
8572 	struct radeon_ring *ring;
8573 	int r;
8574 
8575 	/* Read BIOS */
8576 	if (!radeon_get_bios(rdev)) {
8577 		if (ASIC_IS_AVIVO(rdev))
8578 			return -EINVAL;
8579 	}
8580 	/* Must be an ATOMBIOS */
8581 	if (!rdev->is_atom_bios) {
8582 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8583 		return -EINVAL;
8584 	}
8585 	r = radeon_atombios_init(rdev);
8586 	if (r)
8587 		return r;
8588 
8589 	/* Post card if necessary */
8590 	if (!radeon_card_posted(rdev)) {
8591 		if (!rdev->bios) {
8592 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8593 			return -EINVAL;
8594 		}
8595 		DRM_INFO("GPU not posted. posting now...\n");
8596 		atom_asic_init(rdev->mode_info.atom_context);
8597 	}
8598 	/* init golden registers */
8599 	cik_init_golden_registers(rdev);
8600 	/* Initialize scratch registers */
8601 	cik_scratch_init(rdev);
8602 	/* Initialize surface registers */
8603 	radeon_surface_init(rdev);
8604 	/* Initialize clocks */
8605 	radeon_get_clock_info(rdev->ddev);
8606 
8607 	/* Fence driver */
8608 	r = radeon_fence_driver_init(rdev);
8609 	if (r)
8610 		return r;
8611 
8612 	/* initialize memory controller */
8613 	r = cik_mc_init(rdev);
8614 	if (r)
8615 		return r;
8616 	/* Memory manager */
8617 	r = radeon_bo_init(rdev);
8618 	if (r)
8619 		return r;
8620 
8621 	if (rdev->flags & RADEON_IS_IGP) {
8622 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8623 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8624 			r = cik_init_microcode(rdev);
8625 			if (r) {
8626 				DRM_ERROR("Failed to load firmware!\n");
8627 				return r;
8628 			}
8629 		}
8630 	} else {
8631 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8632 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8633 		    !rdev->mc_fw) {
8634 			r = cik_init_microcode(rdev);
8635 			if (r) {
8636 				DRM_ERROR("Failed to load firmware!\n");
8637 				return r;
8638 			}
8639 		}
8640 	}
8641 
8642 	/* Initialize power management */
8643 	radeon_pm_init(rdev);
8644 
8645 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8646 	ring->ring_obj = NULL;
8647 	r600_ring_init(rdev, ring, 1024 * 1024);
8648 
8649 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8650 	ring->ring_obj = NULL;
8651 	r600_ring_init(rdev, ring, 1024 * 1024);
8652 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8653 	if (r)
8654 		return r;
8655 
8656 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8657 	ring->ring_obj = NULL;
8658 	r600_ring_init(rdev, ring, 1024 * 1024);
8659 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8660 	if (r)
8661 		return r;
8662 
8663 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8664 	ring->ring_obj = NULL;
8665 	r600_ring_init(rdev, ring, 256 * 1024);
8666 
8667 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8668 	ring->ring_obj = NULL;
8669 	r600_ring_init(rdev, ring, 256 * 1024);
8670 
8671 	r = radeon_uvd_init(rdev);
8672 	if (!r) {
8673 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8674 		ring->ring_obj = NULL;
8675 		r600_ring_init(rdev, ring, 4096);
8676 	}
8677 
8678 	r = radeon_vce_init(rdev);
8679 	if (!r) {
8680 		ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8681 		ring->ring_obj = NULL;
8682 		r600_ring_init(rdev, ring, 4096);
8683 
8684 		ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8685 		ring->ring_obj = NULL;
8686 		r600_ring_init(rdev, ring, 4096);
8687 	}
8688 
8689 	rdev->ih.ring_obj = NULL;
8690 	r600_ih_ring_init(rdev, 64 * 1024);
8691 
8692 	r = r600_pcie_gart_init(rdev);
8693 	if (r)
8694 		return r;
8695 
8696 	rdev->accel_working = true;
8697 	r = cik_startup(rdev);
8698 	if (r) {
8699 		dev_err(rdev->dev, "disabling GPU acceleration\n");
8700 		cik_cp_fini(rdev);
8701 		cik_sdma_fini(rdev);
8702 		cik_irq_fini(rdev);
8703 		sumo_rlc_fini(rdev);
8704 		cik_mec_fini(rdev);
8705 		radeon_wb_fini(rdev);
8706 		radeon_ib_pool_fini(rdev);
8707 		radeon_vm_manager_fini(rdev);
8708 		radeon_irq_kms_fini(rdev);
8709 		cik_pcie_gart_fini(rdev);
8710 		rdev->accel_working = false;
8711 	}
8712 
8713 	/* Don't start up if the MC ucode is missing.
8714 	 * The default clocks and voltages before the MC ucode
8715 	 * is loaded are not suffient for advanced operations.
8716 	 */
8717 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8718 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8719 		return -EINVAL;
8720 	}
8721 
8722 	return 0;
8723 }
8724 
8725 /**
8726  * cik_fini - asic specific driver and hw fini
8727  *
8728  * @rdev: radeon_device pointer
8729  *
8730  * Tear down the asic specific driver variables and program the hw
8731  * to an idle state (CIK).
8732  * Called at driver unload.
8733  */
8734 void cik_fini(struct radeon_device *rdev)
8735 {
8736 	radeon_pm_fini(rdev);
8737 	cik_cp_fini(rdev);
8738 	cik_sdma_fini(rdev);
8739 	cik_fini_pg(rdev);
8740 	cik_fini_cg(rdev);
8741 	cik_irq_fini(rdev);
8742 	sumo_rlc_fini(rdev);
8743 	cik_mec_fini(rdev);
8744 	radeon_wb_fini(rdev);
8745 	radeon_vm_manager_fini(rdev);
8746 	radeon_ib_pool_fini(rdev);
8747 	radeon_irq_kms_fini(rdev);
8748 	uvd_v1_0_fini(rdev);
8749 	radeon_uvd_fini(rdev);
8750 	radeon_vce_fini(rdev);
8751 	cik_pcie_gart_fini(rdev);
8752 	r600_vram_scratch_fini(rdev);
8753 	radeon_gem_fini(rdev);
8754 	radeon_fence_driver_fini(rdev);
8755 	radeon_bo_fini(rdev);
8756 	radeon_atombios_fini(rdev);
8757 	kfree(rdev->bios);
8758 	rdev->bios = NULL;
8759 }
8760 
8761 void dce8_program_fmt(struct drm_encoder *encoder)
8762 {
8763 	struct drm_device *dev = encoder->dev;
8764 	struct radeon_device *rdev = dev->dev_private;
8765 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8766 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8767 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8768 	int bpc = 0;
8769 	u32 tmp = 0;
8770 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8771 
8772 	if (connector) {
8773 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8774 		bpc = radeon_get_monitor_bpc(connector);
8775 		dither = radeon_connector->dither;
8776 	}
8777 
8778 	/* LVDS/eDP FMT is set up by atom */
8779 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8780 		return;
8781 
8782 	/* not needed for analog */
8783 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8784 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8785 		return;
8786 
8787 	if (bpc == 0)
8788 		return;
8789 
8790 	switch (bpc) {
8791 	case 6:
8792 		if (dither == RADEON_FMT_DITHER_ENABLE)
8793 			/* XXX sort out optimal dither settings */
8794 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8795 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8796 		else
8797 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8798 		break;
8799 	case 8:
8800 		if (dither == RADEON_FMT_DITHER_ENABLE)
8801 			/* XXX sort out optimal dither settings */
8802 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8803 				FMT_RGB_RANDOM_ENABLE |
8804 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8805 		else
8806 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8807 		break;
8808 	case 10:
8809 		if (dither == RADEON_FMT_DITHER_ENABLE)
8810 			/* XXX sort out optimal dither settings */
8811 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8812 				FMT_RGB_RANDOM_ENABLE |
8813 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8814 		else
8815 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8816 		break;
8817 	default:
8818 		/* not needed */
8819 		break;
8820 	}
8821 
8822 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8823 }
8824 
8825 /* display watermark setup */
8826 /**
8827  * dce8_line_buffer_adjust - Set up the line buffer
8828  *
8829  * @rdev: radeon_device pointer
8830  * @radeon_crtc: the selected display controller
8831  * @mode: the current display mode on the selected display
8832  * controller
8833  *
8834  * Setup up the line buffer allocation for
8835  * the selected display controller (CIK).
8836  * Returns the line buffer size in pixels.
8837  */
8838 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8839 				   struct radeon_crtc *radeon_crtc,
8840 				   struct drm_display_mode *mode)
8841 {
8842 	u32 tmp, buffer_alloc, i;
8843 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8844 	/*
8845 	 * Line Buffer Setup
8846 	 * There are 6 line buffers, one for each display controllers.
8847 	 * There are 3 partitions per LB. Select the number of partitions
8848 	 * to enable based on the display width.  For display widths larger
8849 	 * than 4096, you need use to use 2 display controllers and combine
8850 	 * them using the stereo blender.
8851 	 */
8852 	if (radeon_crtc->base.enabled && mode) {
8853 		if (mode->crtc_hdisplay < 1920) {
8854 			tmp = 1;
8855 			buffer_alloc = 2;
8856 		} else if (mode->crtc_hdisplay < 2560) {
8857 			tmp = 2;
8858 			buffer_alloc = 2;
8859 		} else if (mode->crtc_hdisplay < 4096) {
8860 			tmp = 0;
8861 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8862 		} else {
8863 			DRM_DEBUG_KMS("Mode too big for LB!\n");
8864 			tmp = 0;
8865 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8866 		}
8867 	} else {
8868 		tmp = 1;
8869 		buffer_alloc = 0;
8870 	}
8871 
8872 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8873 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8874 
8875 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8876 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8877 	for (i = 0; i < rdev->usec_timeout; i++) {
8878 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8879 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8880 			break;
8881 		udelay(1);
8882 	}
8883 
8884 	if (radeon_crtc->base.enabled && mode) {
8885 		switch (tmp) {
8886 		case 0:
8887 		default:
8888 			return 4096 * 2;
8889 		case 1:
8890 			return 1920 * 2;
8891 		case 2:
8892 			return 2560 * 2;
8893 		}
8894 	}
8895 
8896 	/* controller not enabled, so no lb used */
8897 	return 0;
8898 }
8899 
8900 /**
8901  * cik_get_number_of_dram_channels - get the number of dram channels
8902  *
8903  * @rdev: radeon_device pointer
8904  *
8905  * Look up the number of video ram channels (CIK).
8906  * Used for display watermark bandwidth calculations
8907  * Returns the number of dram channels
8908  */
8909 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8910 {
8911 	u32 tmp = RREG32(MC_SHARED_CHMAP);
8912 
8913 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8914 	case 0:
8915 	default:
8916 		return 1;
8917 	case 1:
8918 		return 2;
8919 	case 2:
8920 		return 4;
8921 	case 3:
8922 		return 8;
8923 	case 4:
8924 		return 3;
8925 	case 5:
8926 		return 6;
8927 	case 6:
8928 		return 10;
8929 	case 7:
8930 		return 12;
8931 	case 8:
8932 		return 16;
8933 	}
8934 }
8935 
8936 struct dce8_wm_params {
8937 	u32 dram_channels; /* number of dram channels */
8938 	u32 yclk;          /* bandwidth per dram data pin in kHz */
8939 	u32 sclk;          /* engine clock in kHz */
8940 	u32 disp_clk;      /* display clock in kHz */
8941 	u32 src_width;     /* viewport width */
8942 	u32 active_time;   /* active display time in ns */
8943 	u32 blank_time;    /* blank time in ns */
8944 	bool interlaced;    /* mode is interlaced */
8945 	fixed20_12 vsc;    /* vertical scale ratio */
8946 	u32 num_heads;     /* number of active crtcs */
8947 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8948 	u32 lb_size;       /* line buffer allocated to pipe */
8949 	u32 vtaps;         /* vertical scaler taps */
8950 };
8951 
8952 /**
8953  * dce8_dram_bandwidth - get the dram bandwidth
8954  *
8955  * @wm: watermark calculation data
8956  *
8957  * Calculate the raw dram bandwidth (CIK).
8958  * Used for display watermark bandwidth calculations
8959  * Returns the dram bandwidth in MBytes/s
8960  */
8961 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8962 {
8963 	/* Calculate raw DRAM Bandwidth */
8964 	fixed20_12 dram_efficiency; /* 0.7 */
8965 	fixed20_12 yclk, dram_channels, bandwidth;
8966 	fixed20_12 a;
8967 
8968 	a.full = dfixed_const(1000);
8969 	yclk.full = dfixed_const(wm->yclk);
8970 	yclk.full = dfixed_div(yclk, a);
8971 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8972 	a.full = dfixed_const(10);
8973 	dram_efficiency.full = dfixed_const(7);
8974 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
8975 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8976 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8977 
8978 	return dfixed_trunc(bandwidth);
8979 }
8980 
8981 /**
8982  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8983  *
8984  * @wm: watermark calculation data
8985  *
8986  * Calculate the dram bandwidth used for display (CIK).
8987  * Used for display watermark bandwidth calculations
8988  * Returns the dram bandwidth for display in MBytes/s
8989  */
8990 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8991 {
8992 	/* Calculate DRAM Bandwidth and the part allocated to display. */
8993 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8994 	fixed20_12 yclk, dram_channels, bandwidth;
8995 	fixed20_12 a;
8996 
8997 	a.full = dfixed_const(1000);
8998 	yclk.full = dfixed_const(wm->yclk);
8999 	yclk.full = dfixed_div(yclk, a);
9000 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9001 	a.full = dfixed_const(10);
9002 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9003 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9004 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9005 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9006 
9007 	return dfixed_trunc(bandwidth);
9008 }
9009 
9010 /**
9011  * dce8_data_return_bandwidth - get the data return bandwidth
9012  *
9013  * @wm: watermark calculation data
9014  *
9015  * Calculate the data return bandwidth used for display (CIK).
9016  * Used for display watermark bandwidth calculations
9017  * Returns the data return bandwidth in MBytes/s
9018  */
9019 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9020 {
9021 	/* Calculate the display Data return Bandwidth */
9022 	fixed20_12 return_efficiency; /* 0.8 */
9023 	fixed20_12 sclk, bandwidth;
9024 	fixed20_12 a;
9025 
9026 	a.full = dfixed_const(1000);
9027 	sclk.full = dfixed_const(wm->sclk);
9028 	sclk.full = dfixed_div(sclk, a);
9029 	a.full = dfixed_const(10);
9030 	return_efficiency.full = dfixed_const(8);
9031 	return_efficiency.full = dfixed_div(return_efficiency, a);
9032 	a.full = dfixed_const(32);
9033 	bandwidth.full = dfixed_mul(a, sclk);
9034 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9035 
9036 	return dfixed_trunc(bandwidth);
9037 }
9038 
9039 /**
9040  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9041  *
9042  * @wm: watermark calculation data
9043  *
9044  * Calculate the dmif bandwidth used for display (CIK).
9045  * Used for display watermark bandwidth calculations
9046  * Returns the dmif bandwidth in MBytes/s
9047  */
9048 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9049 {
9050 	/* Calculate the DMIF Request Bandwidth */
9051 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9052 	fixed20_12 disp_clk, bandwidth;
9053 	fixed20_12 a, b;
9054 
9055 	a.full = dfixed_const(1000);
9056 	disp_clk.full = dfixed_const(wm->disp_clk);
9057 	disp_clk.full = dfixed_div(disp_clk, a);
9058 	a.full = dfixed_const(32);
9059 	b.full = dfixed_mul(a, disp_clk);
9060 
9061 	a.full = dfixed_const(10);
9062 	disp_clk_request_efficiency.full = dfixed_const(8);
9063 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9064 
9065 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9066 
9067 	return dfixed_trunc(bandwidth);
9068 }
9069 
9070 /**
9071  * dce8_available_bandwidth - get the min available bandwidth
9072  *
9073  * @wm: watermark calculation data
9074  *
9075  * Calculate the min available bandwidth used for display (CIK).
9076  * Used for display watermark bandwidth calculations
9077  * Returns the min available bandwidth in MBytes/s
9078  */
9079 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9080 {
9081 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9082 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9083 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9084 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9085 
9086 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9087 }
9088 
9089 /**
9090  * dce8_average_bandwidth - get the average available bandwidth
9091  *
9092  * @wm: watermark calculation data
9093  *
9094  * Calculate the average available bandwidth used for display (CIK).
9095  * Used for display watermark bandwidth calculations
9096  * Returns the average available bandwidth in MBytes/s
9097  */
9098 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9099 {
9100 	/* Calculate the display mode Average Bandwidth
9101 	 * DisplayMode should contain the source and destination dimensions,
9102 	 * timing, etc.
9103 	 */
9104 	fixed20_12 bpp;
9105 	fixed20_12 line_time;
9106 	fixed20_12 src_width;
9107 	fixed20_12 bandwidth;
9108 	fixed20_12 a;
9109 
9110 	a.full = dfixed_const(1000);
9111 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9112 	line_time.full = dfixed_div(line_time, a);
9113 	bpp.full = dfixed_const(wm->bytes_per_pixel);
9114 	src_width.full = dfixed_const(wm->src_width);
9115 	bandwidth.full = dfixed_mul(src_width, bpp);
9116 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9117 	bandwidth.full = dfixed_div(bandwidth, line_time);
9118 
9119 	return dfixed_trunc(bandwidth);
9120 }
9121 
9122 /**
9123  * dce8_latency_watermark - get the latency watermark
9124  *
9125  * @wm: watermark calculation data
9126  *
9127  * Calculate the latency watermark (CIK).
9128  * Used for display watermark bandwidth calculations
9129  * Returns the latency watermark in ns
9130  */
9131 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9132 {
9133 	/* First calculate the latency in ns */
9134 	u32 mc_latency = 2000; /* 2000 ns. */
9135 	u32 available_bandwidth = dce8_available_bandwidth(wm);
9136 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9137 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9138 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9139 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9140 		(wm->num_heads * cursor_line_pair_return_time);
9141 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9142 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9143 	u32 tmp, dmif_size = 12288;
9144 	fixed20_12 a, b, c;
9145 
9146 	if (wm->num_heads == 0)
9147 		return 0;
9148 
9149 	a.full = dfixed_const(2);
9150 	b.full = dfixed_const(1);
9151 	if ((wm->vsc.full > a.full) ||
9152 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9153 	    (wm->vtaps >= 5) ||
9154 	    ((wm->vsc.full >= a.full) && wm->interlaced))
9155 		max_src_lines_per_dst_line = 4;
9156 	else
9157 		max_src_lines_per_dst_line = 2;
9158 
9159 	a.full = dfixed_const(available_bandwidth);
9160 	b.full = dfixed_const(wm->num_heads);
9161 	a.full = dfixed_div(a, b);
9162 
9163 	b.full = dfixed_const(mc_latency + 512);
9164 	c.full = dfixed_const(wm->disp_clk);
9165 	b.full = dfixed_div(b, c);
9166 
9167 	c.full = dfixed_const(dmif_size);
9168 	b.full = dfixed_div(c, b);
9169 
9170 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9171 
9172 	b.full = dfixed_const(1000);
9173 	c.full = dfixed_const(wm->disp_clk);
9174 	b.full = dfixed_div(c, b);
9175 	c.full = dfixed_const(wm->bytes_per_pixel);
9176 	b.full = dfixed_mul(b, c);
9177 
9178 	lb_fill_bw = min(tmp, dfixed_trunc(b));
9179 
9180 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9181 	b.full = dfixed_const(1000);
9182 	c.full = dfixed_const(lb_fill_bw);
9183 	b.full = dfixed_div(c, b);
9184 	a.full = dfixed_div(a, b);
9185 	line_fill_time = dfixed_trunc(a);
9186 
9187 	if (line_fill_time < wm->active_time)
9188 		return latency;
9189 	else
9190 		return latency + (line_fill_time - wm->active_time);
9191 
9192 }
9193 
9194 /**
9195  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9196  * average and available dram bandwidth
9197  *
9198  * @wm: watermark calculation data
9199  *
9200  * Check if the display average bandwidth fits in the display
9201  * dram bandwidth (CIK).
9202  * Used for display watermark bandwidth calculations
9203  * Returns true if the display fits, false if not.
9204  */
9205 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9206 {
9207 	if (dce8_average_bandwidth(wm) <=
9208 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9209 		return true;
9210 	else
9211 		return false;
9212 }
9213 
9214 /**
9215  * dce8_average_bandwidth_vs_available_bandwidth - check
9216  * average and available bandwidth
9217  *
9218  * @wm: watermark calculation data
9219  *
9220  * Check if the display average bandwidth fits in the display
9221  * available bandwidth (CIK).
9222  * Used for display watermark bandwidth calculations
9223  * Returns true if the display fits, false if not.
9224  */
9225 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9226 {
9227 	if (dce8_average_bandwidth(wm) <=
9228 	    (dce8_available_bandwidth(wm) / wm->num_heads))
9229 		return true;
9230 	else
9231 		return false;
9232 }
9233 
9234 /**
9235  * dce8_check_latency_hiding - check latency hiding
9236  *
9237  * @wm: watermark calculation data
9238  *
9239  * Check latency hiding (CIK).
9240  * Used for display watermark bandwidth calculations
9241  * Returns true if the display fits, false if not.
9242  */
9243 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9244 {
9245 	u32 lb_partitions = wm->lb_size / wm->src_width;
9246 	u32 line_time = wm->active_time + wm->blank_time;
9247 	u32 latency_tolerant_lines;
9248 	u32 latency_hiding;
9249 	fixed20_12 a;
9250 
9251 	a.full = dfixed_const(1);
9252 	if (wm->vsc.full > a.full)
9253 		latency_tolerant_lines = 1;
9254 	else {
9255 		if (lb_partitions <= (wm->vtaps + 1))
9256 			latency_tolerant_lines = 1;
9257 		else
9258 			latency_tolerant_lines = 2;
9259 	}
9260 
9261 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9262 
9263 	if (dce8_latency_watermark(wm) <= latency_hiding)
9264 		return true;
9265 	else
9266 		return false;
9267 }
9268 
9269 /**
9270  * dce8_program_watermarks - program display watermarks
9271  *
9272  * @rdev: radeon_device pointer
9273  * @radeon_crtc: the selected display controller
9274  * @lb_size: line buffer size
9275  * @num_heads: number of display controllers in use
9276  *
9277  * Calculate and program the display watermarks for the
9278  * selected display controller (CIK).
9279  */
9280 static void dce8_program_watermarks(struct radeon_device *rdev,
9281 				    struct radeon_crtc *radeon_crtc,
9282 				    u32 lb_size, u32 num_heads)
9283 {
9284 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9285 	struct dce8_wm_params wm_low, wm_high;
9286 	u32 pixel_period;
9287 	u32 line_time = 0;
9288 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9289 	u32 tmp, wm_mask;
9290 
9291 	if (radeon_crtc->base.enabled && num_heads && mode) {
9292 		pixel_period = 1000000 / (u32)mode->clock;
9293 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9294 
9295 		/* watermark for high clocks */
9296 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9297 		    rdev->pm.dpm_enabled) {
9298 			wm_high.yclk =
9299 				radeon_dpm_get_mclk(rdev, false) * 10;
9300 			wm_high.sclk =
9301 				radeon_dpm_get_sclk(rdev, false) * 10;
9302 		} else {
9303 			wm_high.yclk = rdev->pm.current_mclk * 10;
9304 			wm_high.sclk = rdev->pm.current_sclk * 10;
9305 		}
9306 
9307 		wm_high.disp_clk = mode->clock;
9308 		wm_high.src_width = mode->crtc_hdisplay;
9309 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9310 		wm_high.blank_time = line_time - wm_high.active_time;
9311 		wm_high.interlaced = false;
9312 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9313 			wm_high.interlaced = true;
9314 		wm_high.vsc = radeon_crtc->vsc;
9315 		wm_high.vtaps = 1;
9316 		if (radeon_crtc->rmx_type != RMX_OFF)
9317 			wm_high.vtaps = 2;
9318 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9319 		wm_high.lb_size = lb_size;
9320 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9321 		wm_high.num_heads = num_heads;
9322 
9323 		/* set for high clocks */
9324 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9325 
9326 		/* possibly force display priority to high */
9327 		/* should really do this at mode validation time... */
9328 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9329 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9330 		    !dce8_check_latency_hiding(&wm_high) ||
9331 		    (rdev->disp_priority == 2)) {
9332 			DRM_DEBUG_KMS("force priority to high\n");
9333 		}
9334 
9335 		/* watermark for low clocks */
9336 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9337 		    rdev->pm.dpm_enabled) {
9338 			wm_low.yclk =
9339 				radeon_dpm_get_mclk(rdev, true) * 10;
9340 			wm_low.sclk =
9341 				radeon_dpm_get_sclk(rdev, true) * 10;
9342 		} else {
9343 			wm_low.yclk = rdev->pm.current_mclk * 10;
9344 			wm_low.sclk = rdev->pm.current_sclk * 10;
9345 		}
9346 
9347 		wm_low.disp_clk = mode->clock;
9348 		wm_low.src_width = mode->crtc_hdisplay;
9349 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9350 		wm_low.blank_time = line_time - wm_low.active_time;
9351 		wm_low.interlaced = false;
9352 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9353 			wm_low.interlaced = true;
9354 		wm_low.vsc = radeon_crtc->vsc;
9355 		wm_low.vtaps = 1;
9356 		if (radeon_crtc->rmx_type != RMX_OFF)
9357 			wm_low.vtaps = 2;
9358 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9359 		wm_low.lb_size = lb_size;
9360 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9361 		wm_low.num_heads = num_heads;
9362 
9363 		/* set for low clocks */
9364 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9365 
9366 		/* possibly force display priority to high */
9367 		/* should really do this at mode validation time... */
9368 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9369 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9370 		    !dce8_check_latency_hiding(&wm_low) ||
9371 		    (rdev->disp_priority == 2)) {
9372 			DRM_DEBUG_KMS("force priority to high\n");
9373 		}
9374 	}
9375 
9376 	/* select wm A */
9377 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9378 	tmp = wm_mask;
9379 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9380 	tmp |= LATENCY_WATERMARK_MASK(1);
9381 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9382 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9383 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9384 		LATENCY_HIGH_WATERMARK(line_time)));
9385 	/* select wm B */
9386 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9387 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9388 	tmp |= LATENCY_WATERMARK_MASK(2);
9389 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9390 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9391 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9392 		LATENCY_HIGH_WATERMARK(line_time)));
9393 	/* restore original selection */
9394 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9395 
9396 	/* save values for DPM */
9397 	radeon_crtc->line_time = line_time;
9398 	radeon_crtc->wm_high = latency_watermark_a;
9399 	radeon_crtc->wm_low = latency_watermark_b;
9400 }
9401 
9402 /**
9403  * dce8_bandwidth_update - program display watermarks
9404  *
9405  * @rdev: radeon_device pointer
9406  *
9407  * Calculate and program the display watermarks and line
9408  * buffer allocation (CIK).
9409  */
9410 void dce8_bandwidth_update(struct radeon_device *rdev)
9411 {
9412 	struct drm_display_mode *mode = NULL;
9413 	u32 num_heads = 0, lb_size;
9414 	int i;
9415 
9416 	radeon_update_display_priority(rdev);
9417 
9418 	for (i = 0; i < rdev->num_crtc; i++) {
9419 		if (rdev->mode_info.crtcs[i]->base.enabled)
9420 			num_heads++;
9421 	}
9422 	for (i = 0; i < rdev->num_crtc; i++) {
9423 		mode = &rdev->mode_info.crtcs[i]->base.mode;
9424 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9425 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9426 	}
9427 }
9428 
9429 /**
9430  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9431  *
9432  * @rdev: radeon_device pointer
9433  *
9434  * Fetches a GPU clock counter snapshot (SI).
9435  * Returns the 64 bit clock counter snapshot.
9436  */
9437 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9438 {
9439 	uint64_t clock;
9440 
9441 	mutex_lock(&rdev->gpu_clock_mutex);
9442 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9443 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9444 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9445 	mutex_unlock(&rdev->gpu_clock_mutex);
9446 	return clock;
9447 }
9448 
9449 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9450                               u32 cntl_reg, u32 status_reg)
9451 {
9452 	int r, i;
9453 	struct atom_clock_dividers dividers;
9454 	uint32_t tmp;
9455 
9456 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9457 					   clock, false, &dividers);
9458 	if (r)
9459 		return r;
9460 
9461 	tmp = RREG32_SMC(cntl_reg);
9462 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9463 	tmp |= dividers.post_divider;
9464 	WREG32_SMC(cntl_reg, tmp);
9465 
9466 	for (i = 0; i < 100; i++) {
9467 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9468 			break;
9469 		mdelay(10);
9470 	}
9471 	if (i == 100)
9472 		return -ETIMEDOUT;
9473 
9474 	return 0;
9475 }
9476 
9477 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9478 {
9479 	int r = 0;
9480 
9481 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9482 	if (r)
9483 		return r;
9484 
9485 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9486 	return r;
9487 }
9488 
9489 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9490 {
9491 	int r, i;
9492 	struct atom_clock_dividers dividers;
9493 	u32 tmp;
9494 
9495 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9496 					   ecclk, false, &dividers);
9497 	if (r)
9498 		return r;
9499 
9500 	for (i = 0; i < 100; i++) {
9501 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9502 			break;
9503 		mdelay(10);
9504 	}
9505 	if (i == 100)
9506 		return -ETIMEDOUT;
9507 
9508 	tmp = RREG32_SMC(CG_ECLK_CNTL);
9509 	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9510 	tmp |= dividers.post_divider;
9511 	WREG32_SMC(CG_ECLK_CNTL, tmp);
9512 
9513 	for (i = 0; i < 100; i++) {
9514 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9515 			break;
9516 		mdelay(10);
9517 	}
9518 	if (i == 100)
9519 		return -ETIMEDOUT;
9520 
9521 	return 0;
9522 }
9523 
9524 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9525 {
9526 	struct pci_dev *root = rdev->pdev->bus->self;
9527 	int bridge_pos, gpu_pos;
9528 	u32 speed_cntl, mask, current_data_rate;
9529 	int ret, i;
9530 	u16 tmp16;
9531 
9532 	if (pci_is_root_bus(rdev->pdev->bus))
9533 		return;
9534 
9535 	if (radeon_pcie_gen2 == 0)
9536 		return;
9537 
9538 	if (rdev->flags & RADEON_IS_IGP)
9539 		return;
9540 
9541 	if (!(rdev->flags & RADEON_IS_PCIE))
9542 		return;
9543 
9544 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9545 	if (ret != 0)
9546 		return;
9547 
9548 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9549 		return;
9550 
9551 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9552 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9553 		LC_CURRENT_DATA_RATE_SHIFT;
9554 	if (mask & DRM_PCIE_SPEED_80) {
9555 		if (current_data_rate == 2) {
9556 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9557 			return;
9558 		}
9559 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9560 	} else if (mask & DRM_PCIE_SPEED_50) {
9561 		if (current_data_rate == 1) {
9562 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9563 			return;
9564 		}
9565 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9566 	}
9567 
9568 	bridge_pos = pci_pcie_cap(root);
9569 	if (!bridge_pos)
9570 		return;
9571 
9572 	gpu_pos = pci_pcie_cap(rdev->pdev);
9573 	if (!gpu_pos)
9574 		return;
9575 
9576 	if (mask & DRM_PCIE_SPEED_80) {
9577 		/* re-try equalization if gen3 is not already enabled */
9578 		if (current_data_rate != 2) {
9579 			u16 bridge_cfg, gpu_cfg;
9580 			u16 bridge_cfg2, gpu_cfg2;
9581 			u32 max_lw, current_lw, tmp;
9582 
9583 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9584 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9585 
9586 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9587 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9588 
9589 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9590 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9591 
9592 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9593 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9594 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9595 
9596 			if (current_lw < max_lw) {
9597 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9598 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9599 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9600 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9601 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9602 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9603 				}
9604 			}
9605 
9606 			for (i = 0; i < 10; i++) {
9607 				/* check status */
9608 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9609 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9610 					break;
9611 
9612 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9613 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9614 
9615 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9616 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9617 
9618 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9619 				tmp |= LC_SET_QUIESCE;
9620 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9621 
9622 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9623 				tmp |= LC_REDO_EQ;
9624 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9625 
9626 				mdelay(100);
9627 
9628 				/* linkctl */
9629 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9630 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9631 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9632 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9633 
9634 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9635 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9636 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9637 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9638 
9639 				/* linkctl2 */
9640 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9641 				tmp16 &= ~((1 << 4) | (7 << 9));
9642 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9643 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9644 
9645 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9646 				tmp16 &= ~((1 << 4) | (7 << 9));
9647 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9648 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9649 
9650 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9651 				tmp &= ~LC_SET_QUIESCE;
9652 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9653 			}
9654 		}
9655 	}
9656 
9657 	/* set the link speed */
9658 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9659 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9660 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9661 
9662 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9663 	tmp16 &= ~0xf;
9664 	if (mask & DRM_PCIE_SPEED_80)
9665 		tmp16 |= 3; /* gen3 */
9666 	else if (mask & DRM_PCIE_SPEED_50)
9667 		tmp16 |= 2; /* gen2 */
9668 	else
9669 		tmp16 |= 1; /* gen1 */
9670 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9671 
9672 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9673 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9674 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9675 
9676 	for (i = 0; i < rdev->usec_timeout; i++) {
9677 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9678 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9679 			break;
9680 		udelay(1);
9681 	}
9682 }
9683 
9684 static void cik_program_aspm(struct radeon_device *rdev)
9685 {
9686 	u32 data, orig;
9687 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9688 	bool disable_clkreq = false;
9689 
9690 	if (radeon_aspm == 0)
9691 		return;
9692 
9693 	/* XXX double check IGPs */
9694 	if (rdev->flags & RADEON_IS_IGP)
9695 		return;
9696 
9697 	if (!(rdev->flags & RADEON_IS_PCIE))
9698 		return;
9699 
9700 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9701 	data &= ~LC_XMIT_N_FTS_MASK;
9702 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9703 	if (orig != data)
9704 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9705 
9706 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9707 	data |= LC_GO_TO_RECOVERY;
9708 	if (orig != data)
9709 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9710 
9711 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9712 	data |= P_IGNORE_EDB_ERR;
9713 	if (orig != data)
9714 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9715 
9716 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9717 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9718 	data |= LC_PMI_TO_L1_DIS;
9719 	if (!disable_l0s)
9720 		data |= LC_L0S_INACTIVITY(7);
9721 
9722 	if (!disable_l1) {
9723 		data |= LC_L1_INACTIVITY(7);
9724 		data &= ~LC_PMI_TO_L1_DIS;
9725 		if (orig != data)
9726 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9727 
9728 		if (!disable_plloff_in_l1) {
9729 			bool clk_req_support;
9730 
9731 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9732 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9733 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9734 			if (orig != data)
9735 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9736 
9737 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9738 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9739 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9740 			if (orig != data)
9741 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9742 
9743 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9744 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9745 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9746 			if (orig != data)
9747 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9748 
9749 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9750 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9751 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9752 			if (orig != data)
9753 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9754 
9755 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9756 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9757 			data |= LC_DYN_LANES_PWR_STATE(3);
9758 			if (orig != data)
9759 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9760 
9761 			if (!disable_clkreq &&
9762 			    !pci_is_root_bus(rdev->pdev->bus)) {
9763 				struct pci_dev *root = rdev->pdev->bus->self;
9764 				u32 lnkcap;
9765 
9766 				clk_req_support = false;
9767 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9768 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9769 					clk_req_support = true;
9770 			} else {
9771 				clk_req_support = false;
9772 			}
9773 
9774 			if (clk_req_support) {
9775 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9776 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9777 				if (orig != data)
9778 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9779 
9780 				orig = data = RREG32_SMC(THM_CLK_CNTL);
9781 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9782 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9783 				if (orig != data)
9784 					WREG32_SMC(THM_CLK_CNTL, data);
9785 
9786 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9787 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9788 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9789 				if (orig != data)
9790 					WREG32_SMC(MISC_CLK_CTRL, data);
9791 
9792 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9793 				data &= ~BCLK_AS_XCLK;
9794 				if (orig != data)
9795 					WREG32_SMC(CG_CLKPIN_CNTL, data);
9796 
9797 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9798 				data &= ~FORCE_BIF_REFCLK_EN;
9799 				if (orig != data)
9800 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9801 
9802 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9803 				data &= ~MPLL_CLKOUT_SEL_MASK;
9804 				data |= MPLL_CLKOUT_SEL(4);
9805 				if (orig != data)
9806 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9807 			}
9808 		}
9809 	} else {
9810 		if (orig != data)
9811 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9812 	}
9813 
9814 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9815 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9816 	if (orig != data)
9817 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9818 
9819 	if (!disable_l0s) {
9820 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9821 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9822 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9823 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9824 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9825 				data &= ~LC_L0S_INACTIVITY_MASK;
9826 				if (orig != data)
9827 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9828 			}
9829 		}
9830 	}
9831 }
9832