xref: /openbmc/linux/drivers/gpu/drm/radeon/cik.c (revision 275876e2)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35 
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
45 
46 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
47 MODULE_FIRMWARE("radeon/bonaire_me.bin");
48 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
49 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
50 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
51 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
52 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
53 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
54 
55 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
56 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
57 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
58 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
59 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
60 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
64 
65 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
66 MODULE_FIRMWARE("radeon/hawaii_me.bin");
67 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
68 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
69 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
70 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
71 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
72 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
73 
74 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
75 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
76 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
77 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
78 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
79 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
80 
81 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
82 MODULE_FIRMWARE("radeon/kaveri_me.bin");
83 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
84 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
85 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
86 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
87 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
88 
89 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
90 MODULE_FIRMWARE("radeon/KABINI_me.bin");
91 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
92 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
93 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
94 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
95 
96 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
97 MODULE_FIRMWARE("radeon/kabini_me.bin");
98 MODULE_FIRMWARE("radeon/kabini_ce.bin");
99 MODULE_FIRMWARE("radeon/kabini_mec.bin");
100 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
101 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
102 
103 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
104 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
105 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
106 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
107 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
108 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
109 
110 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
111 MODULE_FIRMWARE("radeon/mullins_me.bin");
112 MODULE_FIRMWARE("radeon/mullins_ce.bin");
113 MODULE_FIRMWARE("radeon/mullins_mec.bin");
114 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
115 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
116 
117 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
118 extern void r600_ih_ring_fini(struct radeon_device *rdev);
119 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
120 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
121 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
122 extern void sumo_rlc_fini(struct radeon_device *rdev);
123 extern int sumo_rlc_init(struct radeon_device *rdev);
124 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
125 extern void si_rlc_reset(struct radeon_device *rdev);
126 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
127 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
128 extern int cik_sdma_resume(struct radeon_device *rdev);
129 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
130 extern void cik_sdma_fini(struct radeon_device *rdev);
131 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
132 static void cik_rlc_stop(struct radeon_device *rdev);
133 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
134 static void cik_program_aspm(struct radeon_device *rdev);
135 static void cik_init_pg(struct radeon_device *rdev);
136 static void cik_init_cg(struct radeon_device *rdev);
137 static void cik_fini_pg(struct radeon_device *rdev);
138 static void cik_fini_cg(struct radeon_device *rdev);
139 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
140 					  bool enable);
141 
142 /* get temperature in millidegrees */
143 int ci_get_temp(struct radeon_device *rdev)
144 {
145 	u32 temp;
146 	int actual_temp = 0;
147 
148 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
149 		CTF_TEMP_SHIFT;
150 
151 	if (temp & 0x200)
152 		actual_temp = 255;
153 	else
154 		actual_temp = temp & 0x1ff;
155 
156 	actual_temp = actual_temp * 1000;
157 
158 	return actual_temp;
159 }
160 
161 /* get temperature in millidegrees */
162 int kv_get_temp(struct radeon_device *rdev)
163 {
164 	u32 temp;
165 	int actual_temp = 0;
166 
167 	temp = RREG32_SMC(0xC0300E0C);
168 
169 	if (temp)
170 		actual_temp = (temp / 8) - 49;
171 	else
172 		actual_temp = 0;
173 
174 	actual_temp = actual_temp * 1000;
175 
176 	return actual_temp;
177 }
178 
179 /*
180  * Indirect registers accessor
181  */
182 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
183 {
184 	unsigned long flags;
185 	u32 r;
186 
187 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
188 	WREG32(PCIE_INDEX, reg);
189 	(void)RREG32(PCIE_INDEX);
190 	r = RREG32(PCIE_DATA);
191 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
192 	return r;
193 }
194 
195 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
196 {
197 	unsigned long flags;
198 
199 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
200 	WREG32(PCIE_INDEX, reg);
201 	(void)RREG32(PCIE_INDEX);
202 	WREG32(PCIE_DATA, v);
203 	(void)RREG32(PCIE_DATA);
204 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
205 }
206 
207 static const u32 spectre_rlc_save_restore_register_list[] =
208 {
209 	(0x0e00 << 16) | (0xc12c >> 2),
210 	0x00000000,
211 	(0x0e00 << 16) | (0xc140 >> 2),
212 	0x00000000,
213 	(0x0e00 << 16) | (0xc150 >> 2),
214 	0x00000000,
215 	(0x0e00 << 16) | (0xc15c >> 2),
216 	0x00000000,
217 	(0x0e00 << 16) | (0xc168 >> 2),
218 	0x00000000,
219 	(0x0e00 << 16) | (0xc170 >> 2),
220 	0x00000000,
221 	(0x0e00 << 16) | (0xc178 >> 2),
222 	0x00000000,
223 	(0x0e00 << 16) | (0xc204 >> 2),
224 	0x00000000,
225 	(0x0e00 << 16) | (0xc2b4 >> 2),
226 	0x00000000,
227 	(0x0e00 << 16) | (0xc2b8 >> 2),
228 	0x00000000,
229 	(0x0e00 << 16) | (0xc2bc >> 2),
230 	0x00000000,
231 	(0x0e00 << 16) | (0xc2c0 >> 2),
232 	0x00000000,
233 	(0x0e00 << 16) | (0x8228 >> 2),
234 	0x00000000,
235 	(0x0e00 << 16) | (0x829c >> 2),
236 	0x00000000,
237 	(0x0e00 << 16) | (0x869c >> 2),
238 	0x00000000,
239 	(0x0600 << 16) | (0x98f4 >> 2),
240 	0x00000000,
241 	(0x0e00 << 16) | (0x98f8 >> 2),
242 	0x00000000,
243 	(0x0e00 << 16) | (0x9900 >> 2),
244 	0x00000000,
245 	(0x0e00 << 16) | (0xc260 >> 2),
246 	0x00000000,
247 	(0x0e00 << 16) | (0x90e8 >> 2),
248 	0x00000000,
249 	(0x0e00 << 16) | (0x3c000 >> 2),
250 	0x00000000,
251 	(0x0e00 << 16) | (0x3c00c >> 2),
252 	0x00000000,
253 	(0x0e00 << 16) | (0x8c1c >> 2),
254 	0x00000000,
255 	(0x0e00 << 16) | (0x9700 >> 2),
256 	0x00000000,
257 	(0x0e00 << 16) | (0xcd20 >> 2),
258 	0x00000000,
259 	(0x4e00 << 16) | (0xcd20 >> 2),
260 	0x00000000,
261 	(0x5e00 << 16) | (0xcd20 >> 2),
262 	0x00000000,
263 	(0x6e00 << 16) | (0xcd20 >> 2),
264 	0x00000000,
265 	(0x7e00 << 16) | (0xcd20 >> 2),
266 	0x00000000,
267 	(0x8e00 << 16) | (0xcd20 >> 2),
268 	0x00000000,
269 	(0x9e00 << 16) | (0xcd20 >> 2),
270 	0x00000000,
271 	(0xae00 << 16) | (0xcd20 >> 2),
272 	0x00000000,
273 	(0xbe00 << 16) | (0xcd20 >> 2),
274 	0x00000000,
275 	(0x0e00 << 16) | (0x89bc >> 2),
276 	0x00000000,
277 	(0x0e00 << 16) | (0x8900 >> 2),
278 	0x00000000,
279 	0x3,
280 	(0x0e00 << 16) | (0xc130 >> 2),
281 	0x00000000,
282 	(0x0e00 << 16) | (0xc134 >> 2),
283 	0x00000000,
284 	(0x0e00 << 16) | (0xc1fc >> 2),
285 	0x00000000,
286 	(0x0e00 << 16) | (0xc208 >> 2),
287 	0x00000000,
288 	(0x0e00 << 16) | (0xc264 >> 2),
289 	0x00000000,
290 	(0x0e00 << 16) | (0xc268 >> 2),
291 	0x00000000,
292 	(0x0e00 << 16) | (0xc26c >> 2),
293 	0x00000000,
294 	(0x0e00 << 16) | (0xc270 >> 2),
295 	0x00000000,
296 	(0x0e00 << 16) | (0xc274 >> 2),
297 	0x00000000,
298 	(0x0e00 << 16) | (0xc278 >> 2),
299 	0x00000000,
300 	(0x0e00 << 16) | (0xc27c >> 2),
301 	0x00000000,
302 	(0x0e00 << 16) | (0xc280 >> 2),
303 	0x00000000,
304 	(0x0e00 << 16) | (0xc284 >> 2),
305 	0x00000000,
306 	(0x0e00 << 16) | (0xc288 >> 2),
307 	0x00000000,
308 	(0x0e00 << 16) | (0xc28c >> 2),
309 	0x00000000,
310 	(0x0e00 << 16) | (0xc290 >> 2),
311 	0x00000000,
312 	(0x0e00 << 16) | (0xc294 >> 2),
313 	0x00000000,
314 	(0x0e00 << 16) | (0xc298 >> 2),
315 	0x00000000,
316 	(0x0e00 << 16) | (0xc29c >> 2),
317 	0x00000000,
318 	(0x0e00 << 16) | (0xc2a0 >> 2),
319 	0x00000000,
320 	(0x0e00 << 16) | (0xc2a4 >> 2),
321 	0x00000000,
322 	(0x0e00 << 16) | (0xc2a8 >> 2),
323 	0x00000000,
324 	(0x0e00 << 16) | (0xc2ac  >> 2),
325 	0x00000000,
326 	(0x0e00 << 16) | (0xc2b0 >> 2),
327 	0x00000000,
328 	(0x0e00 << 16) | (0x301d0 >> 2),
329 	0x00000000,
330 	(0x0e00 << 16) | (0x30238 >> 2),
331 	0x00000000,
332 	(0x0e00 << 16) | (0x30250 >> 2),
333 	0x00000000,
334 	(0x0e00 << 16) | (0x30254 >> 2),
335 	0x00000000,
336 	(0x0e00 << 16) | (0x30258 >> 2),
337 	0x00000000,
338 	(0x0e00 << 16) | (0x3025c >> 2),
339 	0x00000000,
340 	(0x4e00 << 16) | (0xc900 >> 2),
341 	0x00000000,
342 	(0x5e00 << 16) | (0xc900 >> 2),
343 	0x00000000,
344 	(0x6e00 << 16) | (0xc900 >> 2),
345 	0x00000000,
346 	(0x7e00 << 16) | (0xc900 >> 2),
347 	0x00000000,
348 	(0x8e00 << 16) | (0xc900 >> 2),
349 	0x00000000,
350 	(0x9e00 << 16) | (0xc900 >> 2),
351 	0x00000000,
352 	(0xae00 << 16) | (0xc900 >> 2),
353 	0x00000000,
354 	(0xbe00 << 16) | (0xc900 >> 2),
355 	0x00000000,
356 	(0x4e00 << 16) | (0xc904 >> 2),
357 	0x00000000,
358 	(0x5e00 << 16) | (0xc904 >> 2),
359 	0x00000000,
360 	(0x6e00 << 16) | (0xc904 >> 2),
361 	0x00000000,
362 	(0x7e00 << 16) | (0xc904 >> 2),
363 	0x00000000,
364 	(0x8e00 << 16) | (0xc904 >> 2),
365 	0x00000000,
366 	(0x9e00 << 16) | (0xc904 >> 2),
367 	0x00000000,
368 	(0xae00 << 16) | (0xc904 >> 2),
369 	0x00000000,
370 	(0xbe00 << 16) | (0xc904 >> 2),
371 	0x00000000,
372 	(0x4e00 << 16) | (0xc908 >> 2),
373 	0x00000000,
374 	(0x5e00 << 16) | (0xc908 >> 2),
375 	0x00000000,
376 	(0x6e00 << 16) | (0xc908 >> 2),
377 	0x00000000,
378 	(0x7e00 << 16) | (0xc908 >> 2),
379 	0x00000000,
380 	(0x8e00 << 16) | (0xc908 >> 2),
381 	0x00000000,
382 	(0x9e00 << 16) | (0xc908 >> 2),
383 	0x00000000,
384 	(0xae00 << 16) | (0xc908 >> 2),
385 	0x00000000,
386 	(0xbe00 << 16) | (0xc908 >> 2),
387 	0x00000000,
388 	(0x4e00 << 16) | (0xc90c >> 2),
389 	0x00000000,
390 	(0x5e00 << 16) | (0xc90c >> 2),
391 	0x00000000,
392 	(0x6e00 << 16) | (0xc90c >> 2),
393 	0x00000000,
394 	(0x7e00 << 16) | (0xc90c >> 2),
395 	0x00000000,
396 	(0x8e00 << 16) | (0xc90c >> 2),
397 	0x00000000,
398 	(0x9e00 << 16) | (0xc90c >> 2),
399 	0x00000000,
400 	(0xae00 << 16) | (0xc90c >> 2),
401 	0x00000000,
402 	(0xbe00 << 16) | (0xc90c >> 2),
403 	0x00000000,
404 	(0x4e00 << 16) | (0xc910 >> 2),
405 	0x00000000,
406 	(0x5e00 << 16) | (0xc910 >> 2),
407 	0x00000000,
408 	(0x6e00 << 16) | (0xc910 >> 2),
409 	0x00000000,
410 	(0x7e00 << 16) | (0xc910 >> 2),
411 	0x00000000,
412 	(0x8e00 << 16) | (0xc910 >> 2),
413 	0x00000000,
414 	(0x9e00 << 16) | (0xc910 >> 2),
415 	0x00000000,
416 	(0xae00 << 16) | (0xc910 >> 2),
417 	0x00000000,
418 	(0xbe00 << 16) | (0xc910 >> 2),
419 	0x00000000,
420 	(0x0e00 << 16) | (0xc99c >> 2),
421 	0x00000000,
422 	(0x0e00 << 16) | (0x9834 >> 2),
423 	0x00000000,
424 	(0x0000 << 16) | (0x30f00 >> 2),
425 	0x00000000,
426 	(0x0001 << 16) | (0x30f00 >> 2),
427 	0x00000000,
428 	(0x0000 << 16) | (0x30f04 >> 2),
429 	0x00000000,
430 	(0x0001 << 16) | (0x30f04 >> 2),
431 	0x00000000,
432 	(0x0000 << 16) | (0x30f08 >> 2),
433 	0x00000000,
434 	(0x0001 << 16) | (0x30f08 >> 2),
435 	0x00000000,
436 	(0x0000 << 16) | (0x30f0c >> 2),
437 	0x00000000,
438 	(0x0001 << 16) | (0x30f0c >> 2),
439 	0x00000000,
440 	(0x0600 << 16) | (0x9b7c >> 2),
441 	0x00000000,
442 	(0x0e00 << 16) | (0x8a14 >> 2),
443 	0x00000000,
444 	(0x0e00 << 16) | (0x8a18 >> 2),
445 	0x00000000,
446 	(0x0600 << 16) | (0x30a00 >> 2),
447 	0x00000000,
448 	(0x0e00 << 16) | (0x8bf0 >> 2),
449 	0x00000000,
450 	(0x0e00 << 16) | (0x8bcc >> 2),
451 	0x00000000,
452 	(0x0e00 << 16) | (0x8b24 >> 2),
453 	0x00000000,
454 	(0x0e00 << 16) | (0x30a04 >> 2),
455 	0x00000000,
456 	(0x0600 << 16) | (0x30a10 >> 2),
457 	0x00000000,
458 	(0x0600 << 16) | (0x30a14 >> 2),
459 	0x00000000,
460 	(0x0600 << 16) | (0x30a18 >> 2),
461 	0x00000000,
462 	(0x0600 << 16) | (0x30a2c >> 2),
463 	0x00000000,
464 	(0x0e00 << 16) | (0xc700 >> 2),
465 	0x00000000,
466 	(0x0e00 << 16) | (0xc704 >> 2),
467 	0x00000000,
468 	(0x0e00 << 16) | (0xc708 >> 2),
469 	0x00000000,
470 	(0x0e00 << 16) | (0xc768 >> 2),
471 	0x00000000,
472 	(0x0400 << 16) | (0xc770 >> 2),
473 	0x00000000,
474 	(0x0400 << 16) | (0xc774 >> 2),
475 	0x00000000,
476 	(0x0400 << 16) | (0xc778 >> 2),
477 	0x00000000,
478 	(0x0400 << 16) | (0xc77c >> 2),
479 	0x00000000,
480 	(0x0400 << 16) | (0xc780 >> 2),
481 	0x00000000,
482 	(0x0400 << 16) | (0xc784 >> 2),
483 	0x00000000,
484 	(0x0400 << 16) | (0xc788 >> 2),
485 	0x00000000,
486 	(0x0400 << 16) | (0xc78c >> 2),
487 	0x00000000,
488 	(0x0400 << 16) | (0xc798 >> 2),
489 	0x00000000,
490 	(0x0400 << 16) | (0xc79c >> 2),
491 	0x00000000,
492 	(0x0400 << 16) | (0xc7a0 >> 2),
493 	0x00000000,
494 	(0x0400 << 16) | (0xc7a4 >> 2),
495 	0x00000000,
496 	(0x0400 << 16) | (0xc7a8 >> 2),
497 	0x00000000,
498 	(0x0400 << 16) | (0xc7ac >> 2),
499 	0x00000000,
500 	(0x0400 << 16) | (0xc7b0 >> 2),
501 	0x00000000,
502 	(0x0400 << 16) | (0xc7b4 >> 2),
503 	0x00000000,
504 	(0x0e00 << 16) | (0x9100 >> 2),
505 	0x00000000,
506 	(0x0e00 << 16) | (0x3c010 >> 2),
507 	0x00000000,
508 	(0x0e00 << 16) | (0x92a8 >> 2),
509 	0x00000000,
510 	(0x0e00 << 16) | (0x92ac >> 2),
511 	0x00000000,
512 	(0x0e00 << 16) | (0x92b4 >> 2),
513 	0x00000000,
514 	(0x0e00 << 16) | (0x92b8 >> 2),
515 	0x00000000,
516 	(0x0e00 << 16) | (0x92bc >> 2),
517 	0x00000000,
518 	(0x0e00 << 16) | (0x92c0 >> 2),
519 	0x00000000,
520 	(0x0e00 << 16) | (0x92c4 >> 2),
521 	0x00000000,
522 	(0x0e00 << 16) | (0x92c8 >> 2),
523 	0x00000000,
524 	(0x0e00 << 16) | (0x92cc >> 2),
525 	0x00000000,
526 	(0x0e00 << 16) | (0x92d0 >> 2),
527 	0x00000000,
528 	(0x0e00 << 16) | (0x8c00 >> 2),
529 	0x00000000,
530 	(0x0e00 << 16) | (0x8c04 >> 2),
531 	0x00000000,
532 	(0x0e00 << 16) | (0x8c20 >> 2),
533 	0x00000000,
534 	(0x0e00 << 16) | (0x8c38 >> 2),
535 	0x00000000,
536 	(0x0e00 << 16) | (0x8c3c >> 2),
537 	0x00000000,
538 	(0x0e00 << 16) | (0xae00 >> 2),
539 	0x00000000,
540 	(0x0e00 << 16) | (0x9604 >> 2),
541 	0x00000000,
542 	(0x0e00 << 16) | (0xac08 >> 2),
543 	0x00000000,
544 	(0x0e00 << 16) | (0xac0c >> 2),
545 	0x00000000,
546 	(0x0e00 << 16) | (0xac10 >> 2),
547 	0x00000000,
548 	(0x0e00 << 16) | (0xac14 >> 2),
549 	0x00000000,
550 	(0x0e00 << 16) | (0xac58 >> 2),
551 	0x00000000,
552 	(0x0e00 << 16) | (0xac68 >> 2),
553 	0x00000000,
554 	(0x0e00 << 16) | (0xac6c >> 2),
555 	0x00000000,
556 	(0x0e00 << 16) | (0xac70 >> 2),
557 	0x00000000,
558 	(0x0e00 << 16) | (0xac74 >> 2),
559 	0x00000000,
560 	(0x0e00 << 16) | (0xac78 >> 2),
561 	0x00000000,
562 	(0x0e00 << 16) | (0xac7c >> 2),
563 	0x00000000,
564 	(0x0e00 << 16) | (0xac80 >> 2),
565 	0x00000000,
566 	(0x0e00 << 16) | (0xac84 >> 2),
567 	0x00000000,
568 	(0x0e00 << 16) | (0xac88 >> 2),
569 	0x00000000,
570 	(0x0e00 << 16) | (0xac8c >> 2),
571 	0x00000000,
572 	(0x0e00 << 16) | (0x970c >> 2),
573 	0x00000000,
574 	(0x0e00 << 16) | (0x9714 >> 2),
575 	0x00000000,
576 	(0x0e00 << 16) | (0x9718 >> 2),
577 	0x00000000,
578 	(0x0e00 << 16) | (0x971c >> 2),
579 	0x00000000,
580 	(0x0e00 << 16) | (0x31068 >> 2),
581 	0x00000000,
582 	(0x4e00 << 16) | (0x31068 >> 2),
583 	0x00000000,
584 	(0x5e00 << 16) | (0x31068 >> 2),
585 	0x00000000,
586 	(0x6e00 << 16) | (0x31068 >> 2),
587 	0x00000000,
588 	(0x7e00 << 16) | (0x31068 >> 2),
589 	0x00000000,
590 	(0x8e00 << 16) | (0x31068 >> 2),
591 	0x00000000,
592 	(0x9e00 << 16) | (0x31068 >> 2),
593 	0x00000000,
594 	(0xae00 << 16) | (0x31068 >> 2),
595 	0x00000000,
596 	(0xbe00 << 16) | (0x31068 >> 2),
597 	0x00000000,
598 	(0x0e00 << 16) | (0xcd10 >> 2),
599 	0x00000000,
600 	(0x0e00 << 16) | (0xcd14 >> 2),
601 	0x00000000,
602 	(0x0e00 << 16) | (0x88b0 >> 2),
603 	0x00000000,
604 	(0x0e00 << 16) | (0x88b4 >> 2),
605 	0x00000000,
606 	(0x0e00 << 16) | (0x88b8 >> 2),
607 	0x00000000,
608 	(0x0e00 << 16) | (0x88bc >> 2),
609 	0x00000000,
610 	(0x0400 << 16) | (0x89c0 >> 2),
611 	0x00000000,
612 	(0x0e00 << 16) | (0x88c4 >> 2),
613 	0x00000000,
614 	(0x0e00 << 16) | (0x88c8 >> 2),
615 	0x00000000,
616 	(0x0e00 << 16) | (0x88d0 >> 2),
617 	0x00000000,
618 	(0x0e00 << 16) | (0x88d4 >> 2),
619 	0x00000000,
620 	(0x0e00 << 16) | (0x88d8 >> 2),
621 	0x00000000,
622 	(0x0e00 << 16) | (0x8980 >> 2),
623 	0x00000000,
624 	(0x0e00 << 16) | (0x30938 >> 2),
625 	0x00000000,
626 	(0x0e00 << 16) | (0x3093c >> 2),
627 	0x00000000,
628 	(0x0e00 << 16) | (0x30940 >> 2),
629 	0x00000000,
630 	(0x0e00 << 16) | (0x89a0 >> 2),
631 	0x00000000,
632 	(0x0e00 << 16) | (0x30900 >> 2),
633 	0x00000000,
634 	(0x0e00 << 16) | (0x30904 >> 2),
635 	0x00000000,
636 	(0x0e00 << 16) | (0x89b4 >> 2),
637 	0x00000000,
638 	(0x0e00 << 16) | (0x3c210 >> 2),
639 	0x00000000,
640 	(0x0e00 << 16) | (0x3c214 >> 2),
641 	0x00000000,
642 	(0x0e00 << 16) | (0x3c218 >> 2),
643 	0x00000000,
644 	(0x0e00 << 16) | (0x8904 >> 2),
645 	0x00000000,
646 	0x5,
647 	(0x0e00 << 16) | (0x8c28 >> 2),
648 	(0x0e00 << 16) | (0x8c2c >> 2),
649 	(0x0e00 << 16) | (0x8c30 >> 2),
650 	(0x0e00 << 16) | (0x8c34 >> 2),
651 	(0x0e00 << 16) | (0x9600 >> 2),
652 };
653 
654 static const u32 kalindi_rlc_save_restore_register_list[] =
655 {
656 	(0x0e00 << 16) | (0xc12c >> 2),
657 	0x00000000,
658 	(0x0e00 << 16) | (0xc140 >> 2),
659 	0x00000000,
660 	(0x0e00 << 16) | (0xc150 >> 2),
661 	0x00000000,
662 	(0x0e00 << 16) | (0xc15c >> 2),
663 	0x00000000,
664 	(0x0e00 << 16) | (0xc168 >> 2),
665 	0x00000000,
666 	(0x0e00 << 16) | (0xc170 >> 2),
667 	0x00000000,
668 	(0x0e00 << 16) | (0xc204 >> 2),
669 	0x00000000,
670 	(0x0e00 << 16) | (0xc2b4 >> 2),
671 	0x00000000,
672 	(0x0e00 << 16) | (0xc2b8 >> 2),
673 	0x00000000,
674 	(0x0e00 << 16) | (0xc2bc >> 2),
675 	0x00000000,
676 	(0x0e00 << 16) | (0xc2c0 >> 2),
677 	0x00000000,
678 	(0x0e00 << 16) | (0x8228 >> 2),
679 	0x00000000,
680 	(0x0e00 << 16) | (0x829c >> 2),
681 	0x00000000,
682 	(0x0e00 << 16) | (0x869c >> 2),
683 	0x00000000,
684 	(0x0600 << 16) | (0x98f4 >> 2),
685 	0x00000000,
686 	(0x0e00 << 16) | (0x98f8 >> 2),
687 	0x00000000,
688 	(0x0e00 << 16) | (0x9900 >> 2),
689 	0x00000000,
690 	(0x0e00 << 16) | (0xc260 >> 2),
691 	0x00000000,
692 	(0x0e00 << 16) | (0x90e8 >> 2),
693 	0x00000000,
694 	(0x0e00 << 16) | (0x3c000 >> 2),
695 	0x00000000,
696 	(0x0e00 << 16) | (0x3c00c >> 2),
697 	0x00000000,
698 	(0x0e00 << 16) | (0x8c1c >> 2),
699 	0x00000000,
700 	(0x0e00 << 16) | (0x9700 >> 2),
701 	0x00000000,
702 	(0x0e00 << 16) | (0xcd20 >> 2),
703 	0x00000000,
704 	(0x4e00 << 16) | (0xcd20 >> 2),
705 	0x00000000,
706 	(0x5e00 << 16) | (0xcd20 >> 2),
707 	0x00000000,
708 	(0x6e00 << 16) | (0xcd20 >> 2),
709 	0x00000000,
710 	(0x7e00 << 16) | (0xcd20 >> 2),
711 	0x00000000,
712 	(0x0e00 << 16) | (0x89bc >> 2),
713 	0x00000000,
714 	(0x0e00 << 16) | (0x8900 >> 2),
715 	0x00000000,
716 	0x3,
717 	(0x0e00 << 16) | (0xc130 >> 2),
718 	0x00000000,
719 	(0x0e00 << 16) | (0xc134 >> 2),
720 	0x00000000,
721 	(0x0e00 << 16) | (0xc1fc >> 2),
722 	0x00000000,
723 	(0x0e00 << 16) | (0xc208 >> 2),
724 	0x00000000,
725 	(0x0e00 << 16) | (0xc264 >> 2),
726 	0x00000000,
727 	(0x0e00 << 16) | (0xc268 >> 2),
728 	0x00000000,
729 	(0x0e00 << 16) | (0xc26c >> 2),
730 	0x00000000,
731 	(0x0e00 << 16) | (0xc270 >> 2),
732 	0x00000000,
733 	(0x0e00 << 16) | (0xc274 >> 2),
734 	0x00000000,
735 	(0x0e00 << 16) | (0xc28c >> 2),
736 	0x00000000,
737 	(0x0e00 << 16) | (0xc290 >> 2),
738 	0x00000000,
739 	(0x0e00 << 16) | (0xc294 >> 2),
740 	0x00000000,
741 	(0x0e00 << 16) | (0xc298 >> 2),
742 	0x00000000,
743 	(0x0e00 << 16) | (0xc2a0 >> 2),
744 	0x00000000,
745 	(0x0e00 << 16) | (0xc2a4 >> 2),
746 	0x00000000,
747 	(0x0e00 << 16) | (0xc2a8 >> 2),
748 	0x00000000,
749 	(0x0e00 << 16) | (0xc2ac >> 2),
750 	0x00000000,
751 	(0x0e00 << 16) | (0x301d0 >> 2),
752 	0x00000000,
753 	(0x0e00 << 16) | (0x30238 >> 2),
754 	0x00000000,
755 	(0x0e00 << 16) | (0x30250 >> 2),
756 	0x00000000,
757 	(0x0e00 << 16) | (0x30254 >> 2),
758 	0x00000000,
759 	(0x0e00 << 16) | (0x30258 >> 2),
760 	0x00000000,
761 	(0x0e00 << 16) | (0x3025c >> 2),
762 	0x00000000,
763 	(0x4e00 << 16) | (0xc900 >> 2),
764 	0x00000000,
765 	(0x5e00 << 16) | (0xc900 >> 2),
766 	0x00000000,
767 	(0x6e00 << 16) | (0xc900 >> 2),
768 	0x00000000,
769 	(0x7e00 << 16) | (0xc900 >> 2),
770 	0x00000000,
771 	(0x4e00 << 16) | (0xc904 >> 2),
772 	0x00000000,
773 	(0x5e00 << 16) | (0xc904 >> 2),
774 	0x00000000,
775 	(0x6e00 << 16) | (0xc904 >> 2),
776 	0x00000000,
777 	(0x7e00 << 16) | (0xc904 >> 2),
778 	0x00000000,
779 	(0x4e00 << 16) | (0xc908 >> 2),
780 	0x00000000,
781 	(0x5e00 << 16) | (0xc908 >> 2),
782 	0x00000000,
783 	(0x6e00 << 16) | (0xc908 >> 2),
784 	0x00000000,
785 	(0x7e00 << 16) | (0xc908 >> 2),
786 	0x00000000,
787 	(0x4e00 << 16) | (0xc90c >> 2),
788 	0x00000000,
789 	(0x5e00 << 16) | (0xc90c >> 2),
790 	0x00000000,
791 	(0x6e00 << 16) | (0xc90c >> 2),
792 	0x00000000,
793 	(0x7e00 << 16) | (0xc90c >> 2),
794 	0x00000000,
795 	(0x4e00 << 16) | (0xc910 >> 2),
796 	0x00000000,
797 	(0x5e00 << 16) | (0xc910 >> 2),
798 	0x00000000,
799 	(0x6e00 << 16) | (0xc910 >> 2),
800 	0x00000000,
801 	(0x7e00 << 16) | (0xc910 >> 2),
802 	0x00000000,
803 	(0x0e00 << 16) | (0xc99c >> 2),
804 	0x00000000,
805 	(0x0e00 << 16) | (0x9834 >> 2),
806 	0x00000000,
807 	(0x0000 << 16) | (0x30f00 >> 2),
808 	0x00000000,
809 	(0x0000 << 16) | (0x30f04 >> 2),
810 	0x00000000,
811 	(0x0000 << 16) | (0x30f08 >> 2),
812 	0x00000000,
813 	(0x0000 << 16) | (0x30f0c >> 2),
814 	0x00000000,
815 	(0x0600 << 16) | (0x9b7c >> 2),
816 	0x00000000,
817 	(0x0e00 << 16) | (0x8a14 >> 2),
818 	0x00000000,
819 	(0x0e00 << 16) | (0x8a18 >> 2),
820 	0x00000000,
821 	(0x0600 << 16) | (0x30a00 >> 2),
822 	0x00000000,
823 	(0x0e00 << 16) | (0x8bf0 >> 2),
824 	0x00000000,
825 	(0x0e00 << 16) | (0x8bcc >> 2),
826 	0x00000000,
827 	(0x0e00 << 16) | (0x8b24 >> 2),
828 	0x00000000,
829 	(0x0e00 << 16) | (0x30a04 >> 2),
830 	0x00000000,
831 	(0x0600 << 16) | (0x30a10 >> 2),
832 	0x00000000,
833 	(0x0600 << 16) | (0x30a14 >> 2),
834 	0x00000000,
835 	(0x0600 << 16) | (0x30a18 >> 2),
836 	0x00000000,
837 	(0x0600 << 16) | (0x30a2c >> 2),
838 	0x00000000,
839 	(0x0e00 << 16) | (0xc700 >> 2),
840 	0x00000000,
841 	(0x0e00 << 16) | (0xc704 >> 2),
842 	0x00000000,
843 	(0x0e00 << 16) | (0xc708 >> 2),
844 	0x00000000,
845 	(0x0e00 << 16) | (0xc768 >> 2),
846 	0x00000000,
847 	(0x0400 << 16) | (0xc770 >> 2),
848 	0x00000000,
849 	(0x0400 << 16) | (0xc774 >> 2),
850 	0x00000000,
851 	(0x0400 << 16) | (0xc798 >> 2),
852 	0x00000000,
853 	(0x0400 << 16) | (0xc79c >> 2),
854 	0x00000000,
855 	(0x0e00 << 16) | (0x9100 >> 2),
856 	0x00000000,
857 	(0x0e00 << 16) | (0x3c010 >> 2),
858 	0x00000000,
859 	(0x0e00 << 16) | (0x8c00 >> 2),
860 	0x00000000,
861 	(0x0e00 << 16) | (0x8c04 >> 2),
862 	0x00000000,
863 	(0x0e00 << 16) | (0x8c20 >> 2),
864 	0x00000000,
865 	(0x0e00 << 16) | (0x8c38 >> 2),
866 	0x00000000,
867 	(0x0e00 << 16) | (0x8c3c >> 2),
868 	0x00000000,
869 	(0x0e00 << 16) | (0xae00 >> 2),
870 	0x00000000,
871 	(0x0e00 << 16) | (0x9604 >> 2),
872 	0x00000000,
873 	(0x0e00 << 16) | (0xac08 >> 2),
874 	0x00000000,
875 	(0x0e00 << 16) | (0xac0c >> 2),
876 	0x00000000,
877 	(0x0e00 << 16) | (0xac10 >> 2),
878 	0x00000000,
879 	(0x0e00 << 16) | (0xac14 >> 2),
880 	0x00000000,
881 	(0x0e00 << 16) | (0xac58 >> 2),
882 	0x00000000,
883 	(0x0e00 << 16) | (0xac68 >> 2),
884 	0x00000000,
885 	(0x0e00 << 16) | (0xac6c >> 2),
886 	0x00000000,
887 	(0x0e00 << 16) | (0xac70 >> 2),
888 	0x00000000,
889 	(0x0e00 << 16) | (0xac74 >> 2),
890 	0x00000000,
891 	(0x0e00 << 16) | (0xac78 >> 2),
892 	0x00000000,
893 	(0x0e00 << 16) | (0xac7c >> 2),
894 	0x00000000,
895 	(0x0e00 << 16) | (0xac80 >> 2),
896 	0x00000000,
897 	(0x0e00 << 16) | (0xac84 >> 2),
898 	0x00000000,
899 	(0x0e00 << 16) | (0xac88 >> 2),
900 	0x00000000,
901 	(0x0e00 << 16) | (0xac8c >> 2),
902 	0x00000000,
903 	(0x0e00 << 16) | (0x970c >> 2),
904 	0x00000000,
905 	(0x0e00 << 16) | (0x9714 >> 2),
906 	0x00000000,
907 	(0x0e00 << 16) | (0x9718 >> 2),
908 	0x00000000,
909 	(0x0e00 << 16) | (0x971c >> 2),
910 	0x00000000,
911 	(0x0e00 << 16) | (0x31068 >> 2),
912 	0x00000000,
913 	(0x4e00 << 16) | (0x31068 >> 2),
914 	0x00000000,
915 	(0x5e00 << 16) | (0x31068 >> 2),
916 	0x00000000,
917 	(0x6e00 << 16) | (0x31068 >> 2),
918 	0x00000000,
919 	(0x7e00 << 16) | (0x31068 >> 2),
920 	0x00000000,
921 	(0x0e00 << 16) | (0xcd10 >> 2),
922 	0x00000000,
923 	(0x0e00 << 16) | (0xcd14 >> 2),
924 	0x00000000,
925 	(0x0e00 << 16) | (0x88b0 >> 2),
926 	0x00000000,
927 	(0x0e00 << 16) | (0x88b4 >> 2),
928 	0x00000000,
929 	(0x0e00 << 16) | (0x88b8 >> 2),
930 	0x00000000,
931 	(0x0e00 << 16) | (0x88bc >> 2),
932 	0x00000000,
933 	(0x0400 << 16) | (0x89c0 >> 2),
934 	0x00000000,
935 	(0x0e00 << 16) | (0x88c4 >> 2),
936 	0x00000000,
937 	(0x0e00 << 16) | (0x88c8 >> 2),
938 	0x00000000,
939 	(0x0e00 << 16) | (0x88d0 >> 2),
940 	0x00000000,
941 	(0x0e00 << 16) | (0x88d4 >> 2),
942 	0x00000000,
943 	(0x0e00 << 16) | (0x88d8 >> 2),
944 	0x00000000,
945 	(0x0e00 << 16) | (0x8980 >> 2),
946 	0x00000000,
947 	(0x0e00 << 16) | (0x30938 >> 2),
948 	0x00000000,
949 	(0x0e00 << 16) | (0x3093c >> 2),
950 	0x00000000,
951 	(0x0e00 << 16) | (0x30940 >> 2),
952 	0x00000000,
953 	(0x0e00 << 16) | (0x89a0 >> 2),
954 	0x00000000,
955 	(0x0e00 << 16) | (0x30900 >> 2),
956 	0x00000000,
957 	(0x0e00 << 16) | (0x30904 >> 2),
958 	0x00000000,
959 	(0x0e00 << 16) | (0x89b4 >> 2),
960 	0x00000000,
961 	(0x0e00 << 16) | (0x3e1fc >> 2),
962 	0x00000000,
963 	(0x0e00 << 16) | (0x3c210 >> 2),
964 	0x00000000,
965 	(0x0e00 << 16) | (0x3c214 >> 2),
966 	0x00000000,
967 	(0x0e00 << 16) | (0x3c218 >> 2),
968 	0x00000000,
969 	(0x0e00 << 16) | (0x8904 >> 2),
970 	0x00000000,
971 	0x5,
972 	(0x0e00 << 16) | (0x8c28 >> 2),
973 	(0x0e00 << 16) | (0x8c2c >> 2),
974 	(0x0e00 << 16) | (0x8c30 >> 2),
975 	(0x0e00 << 16) | (0x8c34 >> 2),
976 	(0x0e00 << 16) | (0x9600 >> 2),
977 };
978 
979 static const u32 bonaire_golden_spm_registers[] =
980 {
981 	0x30800, 0xe0ffffff, 0xe0000000
982 };
983 
984 static const u32 bonaire_golden_common_registers[] =
985 {
986 	0xc770, 0xffffffff, 0x00000800,
987 	0xc774, 0xffffffff, 0x00000800,
988 	0xc798, 0xffffffff, 0x00007fbf,
989 	0xc79c, 0xffffffff, 0x00007faf
990 };
991 
992 static const u32 bonaire_golden_registers[] =
993 {
994 	0x3354, 0x00000333, 0x00000333,
995 	0x3350, 0x000c0fc0, 0x00040200,
996 	0x9a10, 0x00010000, 0x00058208,
997 	0x3c000, 0xffff1fff, 0x00140000,
998 	0x3c200, 0xfdfc0fff, 0x00000100,
999 	0x3c234, 0x40000000, 0x40000200,
1000 	0x9830, 0xffffffff, 0x00000000,
1001 	0x9834, 0xf00fffff, 0x00000400,
1002 	0x9838, 0x0002021c, 0x00020200,
1003 	0xc78, 0x00000080, 0x00000000,
1004 	0x5bb0, 0x000000f0, 0x00000070,
1005 	0x5bc0, 0xf0311fff, 0x80300000,
1006 	0x98f8, 0x73773777, 0x12010001,
1007 	0x350c, 0x00810000, 0x408af000,
1008 	0x7030, 0x31000111, 0x00000011,
1009 	0x2f48, 0x73773777, 0x12010001,
1010 	0x220c, 0x00007fb6, 0x0021a1b1,
1011 	0x2210, 0x00007fb6, 0x002021b1,
1012 	0x2180, 0x00007fb6, 0x00002191,
1013 	0x2218, 0x00007fb6, 0x002121b1,
1014 	0x221c, 0x00007fb6, 0x002021b1,
1015 	0x21dc, 0x00007fb6, 0x00002191,
1016 	0x21e0, 0x00007fb6, 0x00002191,
1017 	0x3628, 0x0000003f, 0x0000000a,
1018 	0x362c, 0x0000003f, 0x0000000a,
1019 	0x2ae4, 0x00073ffe, 0x000022a2,
1020 	0x240c, 0x000007ff, 0x00000000,
1021 	0x8a14, 0xf000003f, 0x00000007,
1022 	0x8bf0, 0x00002001, 0x00000001,
1023 	0x8b24, 0xffffffff, 0x00ffffff,
1024 	0x30a04, 0x0000ff0f, 0x00000000,
1025 	0x28a4c, 0x07ffffff, 0x06000000,
1026 	0x4d8, 0x00000fff, 0x00000100,
1027 	0x3e78, 0x00000001, 0x00000002,
1028 	0x9100, 0x03000000, 0x0362c688,
1029 	0x8c00, 0x000000ff, 0x00000001,
1030 	0xe40, 0x00001fff, 0x00001fff,
1031 	0x9060, 0x0000007f, 0x00000020,
1032 	0x9508, 0x00010000, 0x00010000,
1033 	0xac14, 0x000003ff, 0x000000f3,
1034 	0xac0c, 0xffffffff, 0x00001032
1035 };
1036 
1037 static const u32 bonaire_mgcg_cgcg_init[] =
1038 {
1039 	0xc420, 0xffffffff, 0xfffffffc,
1040 	0x30800, 0xffffffff, 0xe0000000,
1041 	0x3c2a0, 0xffffffff, 0x00000100,
1042 	0x3c208, 0xffffffff, 0x00000100,
1043 	0x3c2c0, 0xffffffff, 0xc0000100,
1044 	0x3c2c8, 0xffffffff, 0xc0000100,
1045 	0x3c2c4, 0xffffffff, 0xc0000100,
1046 	0x55e4, 0xffffffff, 0x00600100,
1047 	0x3c280, 0xffffffff, 0x00000100,
1048 	0x3c214, 0xffffffff, 0x06000100,
1049 	0x3c220, 0xffffffff, 0x00000100,
1050 	0x3c218, 0xffffffff, 0x06000100,
1051 	0x3c204, 0xffffffff, 0x00000100,
1052 	0x3c2e0, 0xffffffff, 0x00000100,
1053 	0x3c224, 0xffffffff, 0x00000100,
1054 	0x3c200, 0xffffffff, 0x00000100,
1055 	0x3c230, 0xffffffff, 0x00000100,
1056 	0x3c234, 0xffffffff, 0x00000100,
1057 	0x3c250, 0xffffffff, 0x00000100,
1058 	0x3c254, 0xffffffff, 0x00000100,
1059 	0x3c258, 0xffffffff, 0x00000100,
1060 	0x3c25c, 0xffffffff, 0x00000100,
1061 	0x3c260, 0xffffffff, 0x00000100,
1062 	0x3c27c, 0xffffffff, 0x00000100,
1063 	0x3c278, 0xffffffff, 0x00000100,
1064 	0x3c210, 0xffffffff, 0x06000100,
1065 	0x3c290, 0xffffffff, 0x00000100,
1066 	0x3c274, 0xffffffff, 0x00000100,
1067 	0x3c2b4, 0xffffffff, 0x00000100,
1068 	0x3c2b0, 0xffffffff, 0x00000100,
1069 	0x3c270, 0xffffffff, 0x00000100,
1070 	0x30800, 0xffffffff, 0xe0000000,
1071 	0x3c020, 0xffffffff, 0x00010000,
1072 	0x3c024, 0xffffffff, 0x00030002,
1073 	0x3c028, 0xffffffff, 0x00040007,
1074 	0x3c02c, 0xffffffff, 0x00060005,
1075 	0x3c030, 0xffffffff, 0x00090008,
1076 	0x3c034, 0xffffffff, 0x00010000,
1077 	0x3c038, 0xffffffff, 0x00030002,
1078 	0x3c03c, 0xffffffff, 0x00040007,
1079 	0x3c040, 0xffffffff, 0x00060005,
1080 	0x3c044, 0xffffffff, 0x00090008,
1081 	0x3c048, 0xffffffff, 0x00010000,
1082 	0x3c04c, 0xffffffff, 0x00030002,
1083 	0x3c050, 0xffffffff, 0x00040007,
1084 	0x3c054, 0xffffffff, 0x00060005,
1085 	0x3c058, 0xffffffff, 0x00090008,
1086 	0x3c05c, 0xffffffff, 0x00010000,
1087 	0x3c060, 0xffffffff, 0x00030002,
1088 	0x3c064, 0xffffffff, 0x00040007,
1089 	0x3c068, 0xffffffff, 0x00060005,
1090 	0x3c06c, 0xffffffff, 0x00090008,
1091 	0x3c070, 0xffffffff, 0x00010000,
1092 	0x3c074, 0xffffffff, 0x00030002,
1093 	0x3c078, 0xffffffff, 0x00040007,
1094 	0x3c07c, 0xffffffff, 0x00060005,
1095 	0x3c080, 0xffffffff, 0x00090008,
1096 	0x3c084, 0xffffffff, 0x00010000,
1097 	0x3c088, 0xffffffff, 0x00030002,
1098 	0x3c08c, 0xffffffff, 0x00040007,
1099 	0x3c090, 0xffffffff, 0x00060005,
1100 	0x3c094, 0xffffffff, 0x00090008,
1101 	0x3c098, 0xffffffff, 0x00010000,
1102 	0x3c09c, 0xffffffff, 0x00030002,
1103 	0x3c0a0, 0xffffffff, 0x00040007,
1104 	0x3c0a4, 0xffffffff, 0x00060005,
1105 	0x3c0a8, 0xffffffff, 0x00090008,
1106 	0x3c000, 0xffffffff, 0x96e00200,
1107 	0x8708, 0xffffffff, 0x00900100,
1108 	0xc424, 0xffffffff, 0x0020003f,
1109 	0x38, 0xffffffff, 0x0140001c,
1110 	0x3c, 0x000f0000, 0x000f0000,
1111 	0x220, 0xffffffff, 0xC060000C,
1112 	0x224, 0xc0000fff, 0x00000100,
1113 	0xf90, 0xffffffff, 0x00000100,
1114 	0xf98, 0x00000101, 0x00000000,
1115 	0x20a8, 0xffffffff, 0x00000104,
1116 	0x55e4, 0xff000fff, 0x00000100,
1117 	0x30cc, 0xc0000fff, 0x00000104,
1118 	0xc1e4, 0x00000001, 0x00000001,
1119 	0xd00c, 0xff000ff0, 0x00000100,
1120 	0xd80c, 0xff000ff0, 0x00000100
1121 };
1122 
1123 static const u32 spectre_golden_spm_registers[] =
1124 {
1125 	0x30800, 0xe0ffffff, 0xe0000000
1126 };
1127 
1128 static const u32 spectre_golden_common_registers[] =
1129 {
1130 	0xc770, 0xffffffff, 0x00000800,
1131 	0xc774, 0xffffffff, 0x00000800,
1132 	0xc798, 0xffffffff, 0x00007fbf,
1133 	0xc79c, 0xffffffff, 0x00007faf
1134 };
1135 
1136 static const u32 spectre_golden_registers[] =
1137 {
1138 	0x3c000, 0xffff1fff, 0x96940200,
1139 	0x3c00c, 0xffff0001, 0xff000000,
1140 	0x3c200, 0xfffc0fff, 0x00000100,
1141 	0x6ed8, 0x00010101, 0x00010000,
1142 	0x9834, 0xf00fffff, 0x00000400,
1143 	0x9838, 0xfffffffc, 0x00020200,
1144 	0x5bb0, 0x000000f0, 0x00000070,
1145 	0x5bc0, 0xf0311fff, 0x80300000,
1146 	0x98f8, 0x73773777, 0x12010001,
1147 	0x9b7c, 0x00ff0000, 0x00fc0000,
1148 	0x2f48, 0x73773777, 0x12010001,
1149 	0x8a14, 0xf000003f, 0x00000007,
1150 	0x8b24, 0xffffffff, 0x00ffffff,
1151 	0x28350, 0x3f3f3fff, 0x00000082,
1152 	0x28354, 0x0000003f, 0x00000000,
1153 	0x3e78, 0x00000001, 0x00000002,
1154 	0x913c, 0xffff03df, 0x00000004,
1155 	0xc768, 0x00000008, 0x00000008,
1156 	0x8c00, 0x000008ff, 0x00000800,
1157 	0x9508, 0x00010000, 0x00010000,
1158 	0xac0c, 0xffffffff, 0x54763210,
1159 	0x214f8, 0x01ff01ff, 0x00000002,
1160 	0x21498, 0x007ff800, 0x00200000,
1161 	0x2015c, 0xffffffff, 0x00000f40,
1162 	0x30934, 0xffffffff, 0x00000001
1163 };
1164 
1165 static const u32 spectre_mgcg_cgcg_init[] =
1166 {
1167 	0xc420, 0xffffffff, 0xfffffffc,
1168 	0x30800, 0xffffffff, 0xe0000000,
1169 	0x3c2a0, 0xffffffff, 0x00000100,
1170 	0x3c208, 0xffffffff, 0x00000100,
1171 	0x3c2c0, 0xffffffff, 0x00000100,
1172 	0x3c2c8, 0xffffffff, 0x00000100,
1173 	0x3c2c4, 0xffffffff, 0x00000100,
1174 	0x55e4, 0xffffffff, 0x00600100,
1175 	0x3c280, 0xffffffff, 0x00000100,
1176 	0x3c214, 0xffffffff, 0x06000100,
1177 	0x3c220, 0xffffffff, 0x00000100,
1178 	0x3c218, 0xffffffff, 0x06000100,
1179 	0x3c204, 0xffffffff, 0x00000100,
1180 	0x3c2e0, 0xffffffff, 0x00000100,
1181 	0x3c224, 0xffffffff, 0x00000100,
1182 	0x3c200, 0xffffffff, 0x00000100,
1183 	0x3c230, 0xffffffff, 0x00000100,
1184 	0x3c234, 0xffffffff, 0x00000100,
1185 	0x3c250, 0xffffffff, 0x00000100,
1186 	0x3c254, 0xffffffff, 0x00000100,
1187 	0x3c258, 0xffffffff, 0x00000100,
1188 	0x3c25c, 0xffffffff, 0x00000100,
1189 	0x3c260, 0xffffffff, 0x00000100,
1190 	0x3c27c, 0xffffffff, 0x00000100,
1191 	0x3c278, 0xffffffff, 0x00000100,
1192 	0x3c210, 0xffffffff, 0x06000100,
1193 	0x3c290, 0xffffffff, 0x00000100,
1194 	0x3c274, 0xffffffff, 0x00000100,
1195 	0x3c2b4, 0xffffffff, 0x00000100,
1196 	0x3c2b0, 0xffffffff, 0x00000100,
1197 	0x3c270, 0xffffffff, 0x00000100,
1198 	0x30800, 0xffffffff, 0xe0000000,
1199 	0x3c020, 0xffffffff, 0x00010000,
1200 	0x3c024, 0xffffffff, 0x00030002,
1201 	0x3c028, 0xffffffff, 0x00040007,
1202 	0x3c02c, 0xffffffff, 0x00060005,
1203 	0x3c030, 0xffffffff, 0x00090008,
1204 	0x3c034, 0xffffffff, 0x00010000,
1205 	0x3c038, 0xffffffff, 0x00030002,
1206 	0x3c03c, 0xffffffff, 0x00040007,
1207 	0x3c040, 0xffffffff, 0x00060005,
1208 	0x3c044, 0xffffffff, 0x00090008,
1209 	0x3c048, 0xffffffff, 0x00010000,
1210 	0x3c04c, 0xffffffff, 0x00030002,
1211 	0x3c050, 0xffffffff, 0x00040007,
1212 	0x3c054, 0xffffffff, 0x00060005,
1213 	0x3c058, 0xffffffff, 0x00090008,
1214 	0x3c05c, 0xffffffff, 0x00010000,
1215 	0x3c060, 0xffffffff, 0x00030002,
1216 	0x3c064, 0xffffffff, 0x00040007,
1217 	0x3c068, 0xffffffff, 0x00060005,
1218 	0x3c06c, 0xffffffff, 0x00090008,
1219 	0x3c070, 0xffffffff, 0x00010000,
1220 	0x3c074, 0xffffffff, 0x00030002,
1221 	0x3c078, 0xffffffff, 0x00040007,
1222 	0x3c07c, 0xffffffff, 0x00060005,
1223 	0x3c080, 0xffffffff, 0x00090008,
1224 	0x3c084, 0xffffffff, 0x00010000,
1225 	0x3c088, 0xffffffff, 0x00030002,
1226 	0x3c08c, 0xffffffff, 0x00040007,
1227 	0x3c090, 0xffffffff, 0x00060005,
1228 	0x3c094, 0xffffffff, 0x00090008,
1229 	0x3c098, 0xffffffff, 0x00010000,
1230 	0x3c09c, 0xffffffff, 0x00030002,
1231 	0x3c0a0, 0xffffffff, 0x00040007,
1232 	0x3c0a4, 0xffffffff, 0x00060005,
1233 	0x3c0a8, 0xffffffff, 0x00090008,
1234 	0x3c0ac, 0xffffffff, 0x00010000,
1235 	0x3c0b0, 0xffffffff, 0x00030002,
1236 	0x3c0b4, 0xffffffff, 0x00040007,
1237 	0x3c0b8, 0xffffffff, 0x00060005,
1238 	0x3c0bc, 0xffffffff, 0x00090008,
1239 	0x3c000, 0xffffffff, 0x96e00200,
1240 	0x8708, 0xffffffff, 0x00900100,
1241 	0xc424, 0xffffffff, 0x0020003f,
1242 	0x38, 0xffffffff, 0x0140001c,
1243 	0x3c, 0x000f0000, 0x000f0000,
1244 	0x220, 0xffffffff, 0xC060000C,
1245 	0x224, 0xc0000fff, 0x00000100,
1246 	0xf90, 0xffffffff, 0x00000100,
1247 	0xf98, 0x00000101, 0x00000000,
1248 	0x20a8, 0xffffffff, 0x00000104,
1249 	0x55e4, 0xff000fff, 0x00000100,
1250 	0x30cc, 0xc0000fff, 0x00000104,
1251 	0xc1e4, 0x00000001, 0x00000001,
1252 	0xd00c, 0xff000ff0, 0x00000100,
1253 	0xd80c, 0xff000ff0, 0x00000100
1254 };
1255 
1256 static const u32 kalindi_golden_spm_registers[] =
1257 {
1258 	0x30800, 0xe0ffffff, 0xe0000000
1259 };
1260 
1261 static const u32 kalindi_golden_common_registers[] =
1262 {
1263 	0xc770, 0xffffffff, 0x00000800,
1264 	0xc774, 0xffffffff, 0x00000800,
1265 	0xc798, 0xffffffff, 0x00007fbf,
1266 	0xc79c, 0xffffffff, 0x00007faf
1267 };
1268 
1269 static const u32 kalindi_golden_registers[] =
1270 {
1271 	0x3c000, 0xffffdfff, 0x6e944040,
1272 	0x55e4, 0xff607fff, 0xfc000100,
1273 	0x3c220, 0xff000fff, 0x00000100,
1274 	0x3c224, 0xff000fff, 0x00000100,
1275 	0x3c200, 0xfffc0fff, 0x00000100,
1276 	0x6ed8, 0x00010101, 0x00010000,
1277 	0x9830, 0xffffffff, 0x00000000,
1278 	0x9834, 0xf00fffff, 0x00000400,
1279 	0x5bb0, 0x000000f0, 0x00000070,
1280 	0x5bc0, 0xf0311fff, 0x80300000,
1281 	0x98f8, 0x73773777, 0x12010001,
1282 	0x98fc, 0xffffffff, 0x00000010,
1283 	0x9b7c, 0x00ff0000, 0x00fc0000,
1284 	0x8030, 0x00001f0f, 0x0000100a,
1285 	0x2f48, 0x73773777, 0x12010001,
1286 	0x2408, 0x000fffff, 0x000c007f,
1287 	0x8a14, 0xf000003f, 0x00000007,
1288 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1289 	0x30a04, 0x0000ff0f, 0x00000000,
1290 	0x28a4c, 0x07ffffff, 0x06000000,
1291 	0x4d8, 0x00000fff, 0x00000100,
1292 	0x3e78, 0x00000001, 0x00000002,
1293 	0xc768, 0x00000008, 0x00000008,
1294 	0x8c00, 0x000000ff, 0x00000003,
1295 	0x214f8, 0x01ff01ff, 0x00000002,
1296 	0x21498, 0x007ff800, 0x00200000,
1297 	0x2015c, 0xffffffff, 0x00000f40,
1298 	0x88c4, 0x001f3ae3, 0x00000082,
1299 	0x88d4, 0x0000001f, 0x00000010,
1300 	0x30934, 0xffffffff, 0x00000000
1301 };
1302 
1303 static const u32 kalindi_mgcg_cgcg_init[] =
1304 {
1305 	0xc420, 0xffffffff, 0xfffffffc,
1306 	0x30800, 0xffffffff, 0xe0000000,
1307 	0x3c2a0, 0xffffffff, 0x00000100,
1308 	0x3c208, 0xffffffff, 0x00000100,
1309 	0x3c2c0, 0xffffffff, 0x00000100,
1310 	0x3c2c8, 0xffffffff, 0x00000100,
1311 	0x3c2c4, 0xffffffff, 0x00000100,
1312 	0x55e4, 0xffffffff, 0x00600100,
1313 	0x3c280, 0xffffffff, 0x00000100,
1314 	0x3c214, 0xffffffff, 0x06000100,
1315 	0x3c220, 0xffffffff, 0x00000100,
1316 	0x3c218, 0xffffffff, 0x06000100,
1317 	0x3c204, 0xffffffff, 0x00000100,
1318 	0x3c2e0, 0xffffffff, 0x00000100,
1319 	0x3c224, 0xffffffff, 0x00000100,
1320 	0x3c200, 0xffffffff, 0x00000100,
1321 	0x3c230, 0xffffffff, 0x00000100,
1322 	0x3c234, 0xffffffff, 0x00000100,
1323 	0x3c250, 0xffffffff, 0x00000100,
1324 	0x3c254, 0xffffffff, 0x00000100,
1325 	0x3c258, 0xffffffff, 0x00000100,
1326 	0x3c25c, 0xffffffff, 0x00000100,
1327 	0x3c260, 0xffffffff, 0x00000100,
1328 	0x3c27c, 0xffffffff, 0x00000100,
1329 	0x3c278, 0xffffffff, 0x00000100,
1330 	0x3c210, 0xffffffff, 0x06000100,
1331 	0x3c290, 0xffffffff, 0x00000100,
1332 	0x3c274, 0xffffffff, 0x00000100,
1333 	0x3c2b4, 0xffffffff, 0x00000100,
1334 	0x3c2b0, 0xffffffff, 0x00000100,
1335 	0x3c270, 0xffffffff, 0x00000100,
1336 	0x30800, 0xffffffff, 0xe0000000,
1337 	0x3c020, 0xffffffff, 0x00010000,
1338 	0x3c024, 0xffffffff, 0x00030002,
1339 	0x3c028, 0xffffffff, 0x00040007,
1340 	0x3c02c, 0xffffffff, 0x00060005,
1341 	0x3c030, 0xffffffff, 0x00090008,
1342 	0x3c034, 0xffffffff, 0x00010000,
1343 	0x3c038, 0xffffffff, 0x00030002,
1344 	0x3c03c, 0xffffffff, 0x00040007,
1345 	0x3c040, 0xffffffff, 0x00060005,
1346 	0x3c044, 0xffffffff, 0x00090008,
1347 	0x3c000, 0xffffffff, 0x96e00200,
1348 	0x8708, 0xffffffff, 0x00900100,
1349 	0xc424, 0xffffffff, 0x0020003f,
1350 	0x38, 0xffffffff, 0x0140001c,
1351 	0x3c, 0x000f0000, 0x000f0000,
1352 	0x220, 0xffffffff, 0xC060000C,
1353 	0x224, 0xc0000fff, 0x00000100,
1354 	0x20a8, 0xffffffff, 0x00000104,
1355 	0x55e4, 0xff000fff, 0x00000100,
1356 	0x30cc, 0xc0000fff, 0x00000104,
1357 	0xc1e4, 0x00000001, 0x00000001,
1358 	0xd00c, 0xff000ff0, 0x00000100,
1359 	0xd80c, 0xff000ff0, 0x00000100
1360 };
1361 
1362 static const u32 hawaii_golden_spm_registers[] =
1363 {
1364 	0x30800, 0xe0ffffff, 0xe0000000
1365 };
1366 
1367 static const u32 hawaii_golden_common_registers[] =
1368 {
1369 	0x30800, 0xffffffff, 0xe0000000,
1370 	0x28350, 0xffffffff, 0x3a00161a,
1371 	0x28354, 0xffffffff, 0x0000002e,
1372 	0x9a10, 0xffffffff, 0x00018208,
1373 	0x98f8, 0xffffffff, 0x12011003
1374 };
1375 
1376 static const u32 hawaii_golden_registers[] =
1377 {
1378 	0x3354, 0x00000333, 0x00000333,
1379 	0x9a10, 0x00010000, 0x00058208,
1380 	0x9830, 0xffffffff, 0x00000000,
1381 	0x9834, 0xf00fffff, 0x00000400,
1382 	0x9838, 0x0002021c, 0x00020200,
1383 	0xc78, 0x00000080, 0x00000000,
1384 	0x5bb0, 0x000000f0, 0x00000070,
1385 	0x5bc0, 0xf0311fff, 0x80300000,
1386 	0x350c, 0x00810000, 0x408af000,
1387 	0x7030, 0x31000111, 0x00000011,
1388 	0x2f48, 0x73773777, 0x12010001,
1389 	0x2120, 0x0000007f, 0x0000001b,
1390 	0x21dc, 0x00007fb6, 0x00002191,
1391 	0x3628, 0x0000003f, 0x0000000a,
1392 	0x362c, 0x0000003f, 0x0000000a,
1393 	0x2ae4, 0x00073ffe, 0x000022a2,
1394 	0x240c, 0x000007ff, 0x00000000,
1395 	0x8bf0, 0x00002001, 0x00000001,
1396 	0x8b24, 0xffffffff, 0x00ffffff,
1397 	0x30a04, 0x0000ff0f, 0x00000000,
1398 	0x28a4c, 0x07ffffff, 0x06000000,
1399 	0x3e78, 0x00000001, 0x00000002,
1400 	0xc768, 0x00000008, 0x00000008,
1401 	0xc770, 0x00000f00, 0x00000800,
1402 	0xc774, 0x00000f00, 0x00000800,
1403 	0xc798, 0x00ffffff, 0x00ff7fbf,
1404 	0xc79c, 0x00ffffff, 0x00ff7faf,
1405 	0x8c00, 0x000000ff, 0x00000800,
1406 	0xe40, 0x00001fff, 0x00001fff,
1407 	0x9060, 0x0000007f, 0x00000020,
1408 	0x9508, 0x00010000, 0x00010000,
1409 	0xae00, 0x00100000, 0x000ff07c,
1410 	0xac14, 0x000003ff, 0x0000000f,
1411 	0xac10, 0xffffffff, 0x7564fdec,
1412 	0xac0c, 0xffffffff, 0x3120b9a8,
1413 	0xac08, 0x20000000, 0x0f9c0000
1414 };
1415 
1416 static const u32 hawaii_mgcg_cgcg_init[] =
1417 {
1418 	0xc420, 0xffffffff, 0xfffffffd,
1419 	0x30800, 0xffffffff, 0xe0000000,
1420 	0x3c2a0, 0xffffffff, 0x00000100,
1421 	0x3c208, 0xffffffff, 0x00000100,
1422 	0x3c2c0, 0xffffffff, 0x00000100,
1423 	0x3c2c8, 0xffffffff, 0x00000100,
1424 	0x3c2c4, 0xffffffff, 0x00000100,
1425 	0x55e4, 0xffffffff, 0x00200100,
1426 	0x3c280, 0xffffffff, 0x00000100,
1427 	0x3c214, 0xffffffff, 0x06000100,
1428 	0x3c220, 0xffffffff, 0x00000100,
1429 	0x3c218, 0xffffffff, 0x06000100,
1430 	0x3c204, 0xffffffff, 0x00000100,
1431 	0x3c2e0, 0xffffffff, 0x00000100,
1432 	0x3c224, 0xffffffff, 0x00000100,
1433 	0x3c200, 0xffffffff, 0x00000100,
1434 	0x3c230, 0xffffffff, 0x00000100,
1435 	0x3c234, 0xffffffff, 0x00000100,
1436 	0x3c250, 0xffffffff, 0x00000100,
1437 	0x3c254, 0xffffffff, 0x00000100,
1438 	0x3c258, 0xffffffff, 0x00000100,
1439 	0x3c25c, 0xffffffff, 0x00000100,
1440 	0x3c260, 0xffffffff, 0x00000100,
1441 	0x3c27c, 0xffffffff, 0x00000100,
1442 	0x3c278, 0xffffffff, 0x00000100,
1443 	0x3c210, 0xffffffff, 0x06000100,
1444 	0x3c290, 0xffffffff, 0x00000100,
1445 	0x3c274, 0xffffffff, 0x00000100,
1446 	0x3c2b4, 0xffffffff, 0x00000100,
1447 	0x3c2b0, 0xffffffff, 0x00000100,
1448 	0x3c270, 0xffffffff, 0x00000100,
1449 	0x30800, 0xffffffff, 0xe0000000,
1450 	0x3c020, 0xffffffff, 0x00010000,
1451 	0x3c024, 0xffffffff, 0x00030002,
1452 	0x3c028, 0xffffffff, 0x00040007,
1453 	0x3c02c, 0xffffffff, 0x00060005,
1454 	0x3c030, 0xffffffff, 0x00090008,
1455 	0x3c034, 0xffffffff, 0x00010000,
1456 	0x3c038, 0xffffffff, 0x00030002,
1457 	0x3c03c, 0xffffffff, 0x00040007,
1458 	0x3c040, 0xffffffff, 0x00060005,
1459 	0x3c044, 0xffffffff, 0x00090008,
1460 	0x3c048, 0xffffffff, 0x00010000,
1461 	0x3c04c, 0xffffffff, 0x00030002,
1462 	0x3c050, 0xffffffff, 0x00040007,
1463 	0x3c054, 0xffffffff, 0x00060005,
1464 	0x3c058, 0xffffffff, 0x00090008,
1465 	0x3c05c, 0xffffffff, 0x00010000,
1466 	0x3c060, 0xffffffff, 0x00030002,
1467 	0x3c064, 0xffffffff, 0x00040007,
1468 	0x3c068, 0xffffffff, 0x00060005,
1469 	0x3c06c, 0xffffffff, 0x00090008,
1470 	0x3c070, 0xffffffff, 0x00010000,
1471 	0x3c074, 0xffffffff, 0x00030002,
1472 	0x3c078, 0xffffffff, 0x00040007,
1473 	0x3c07c, 0xffffffff, 0x00060005,
1474 	0x3c080, 0xffffffff, 0x00090008,
1475 	0x3c084, 0xffffffff, 0x00010000,
1476 	0x3c088, 0xffffffff, 0x00030002,
1477 	0x3c08c, 0xffffffff, 0x00040007,
1478 	0x3c090, 0xffffffff, 0x00060005,
1479 	0x3c094, 0xffffffff, 0x00090008,
1480 	0x3c098, 0xffffffff, 0x00010000,
1481 	0x3c09c, 0xffffffff, 0x00030002,
1482 	0x3c0a0, 0xffffffff, 0x00040007,
1483 	0x3c0a4, 0xffffffff, 0x00060005,
1484 	0x3c0a8, 0xffffffff, 0x00090008,
1485 	0x3c0ac, 0xffffffff, 0x00010000,
1486 	0x3c0b0, 0xffffffff, 0x00030002,
1487 	0x3c0b4, 0xffffffff, 0x00040007,
1488 	0x3c0b8, 0xffffffff, 0x00060005,
1489 	0x3c0bc, 0xffffffff, 0x00090008,
1490 	0x3c0c0, 0xffffffff, 0x00010000,
1491 	0x3c0c4, 0xffffffff, 0x00030002,
1492 	0x3c0c8, 0xffffffff, 0x00040007,
1493 	0x3c0cc, 0xffffffff, 0x00060005,
1494 	0x3c0d0, 0xffffffff, 0x00090008,
1495 	0x3c0d4, 0xffffffff, 0x00010000,
1496 	0x3c0d8, 0xffffffff, 0x00030002,
1497 	0x3c0dc, 0xffffffff, 0x00040007,
1498 	0x3c0e0, 0xffffffff, 0x00060005,
1499 	0x3c0e4, 0xffffffff, 0x00090008,
1500 	0x3c0e8, 0xffffffff, 0x00010000,
1501 	0x3c0ec, 0xffffffff, 0x00030002,
1502 	0x3c0f0, 0xffffffff, 0x00040007,
1503 	0x3c0f4, 0xffffffff, 0x00060005,
1504 	0x3c0f8, 0xffffffff, 0x00090008,
1505 	0xc318, 0xffffffff, 0x00020200,
1506 	0x3350, 0xffffffff, 0x00000200,
1507 	0x15c0, 0xffffffff, 0x00000400,
1508 	0x55e8, 0xffffffff, 0x00000000,
1509 	0x2f50, 0xffffffff, 0x00000902,
1510 	0x3c000, 0xffffffff, 0x96940200,
1511 	0x8708, 0xffffffff, 0x00900100,
1512 	0xc424, 0xffffffff, 0x0020003f,
1513 	0x38, 0xffffffff, 0x0140001c,
1514 	0x3c, 0x000f0000, 0x000f0000,
1515 	0x220, 0xffffffff, 0xc060000c,
1516 	0x224, 0xc0000fff, 0x00000100,
1517 	0xf90, 0xffffffff, 0x00000100,
1518 	0xf98, 0x00000101, 0x00000000,
1519 	0x20a8, 0xffffffff, 0x00000104,
1520 	0x55e4, 0xff000fff, 0x00000100,
1521 	0x30cc, 0xc0000fff, 0x00000104,
1522 	0xc1e4, 0x00000001, 0x00000001,
1523 	0xd00c, 0xff000ff0, 0x00000100,
1524 	0xd80c, 0xff000ff0, 0x00000100
1525 };
1526 
1527 static const u32 godavari_golden_registers[] =
1528 {
1529 	0x55e4, 0xff607fff, 0xfc000100,
1530 	0x6ed8, 0x00010101, 0x00010000,
1531 	0x9830, 0xffffffff, 0x00000000,
1532 	0x98302, 0xf00fffff, 0x00000400,
1533 	0x6130, 0xffffffff, 0x00010000,
1534 	0x5bb0, 0x000000f0, 0x00000070,
1535 	0x5bc0, 0xf0311fff, 0x80300000,
1536 	0x98f8, 0x73773777, 0x12010001,
1537 	0x98fc, 0xffffffff, 0x00000010,
1538 	0x8030, 0x00001f0f, 0x0000100a,
1539 	0x2f48, 0x73773777, 0x12010001,
1540 	0x2408, 0x000fffff, 0x000c007f,
1541 	0x8a14, 0xf000003f, 0x00000007,
1542 	0x8b24, 0xffffffff, 0x00ff0fff,
1543 	0x30a04, 0x0000ff0f, 0x00000000,
1544 	0x28a4c, 0x07ffffff, 0x06000000,
1545 	0x4d8, 0x00000fff, 0x00000100,
1546 	0xd014, 0x00010000, 0x00810001,
1547 	0xd814, 0x00010000, 0x00810001,
1548 	0x3e78, 0x00000001, 0x00000002,
1549 	0xc768, 0x00000008, 0x00000008,
1550 	0xc770, 0x00000f00, 0x00000800,
1551 	0xc774, 0x00000f00, 0x00000800,
1552 	0xc798, 0x00ffffff, 0x00ff7fbf,
1553 	0xc79c, 0x00ffffff, 0x00ff7faf,
1554 	0x8c00, 0x000000ff, 0x00000001,
1555 	0x214f8, 0x01ff01ff, 0x00000002,
1556 	0x21498, 0x007ff800, 0x00200000,
1557 	0x2015c, 0xffffffff, 0x00000f40,
1558 	0x88c4, 0x001f3ae3, 0x00000082,
1559 	0x88d4, 0x0000001f, 0x00000010,
1560 	0x30934, 0xffffffff, 0x00000000
1561 };
1562 
1563 
1564 static void cik_init_golden_registers(struct radeon_device *rdev)
1565 {
1566 	switch (rdev->family) {
1567 	case CHIP_BONAIRE:
1568 		radeon_program_register_sequence(rdev,
1569 						 bonaire_mgcg_cgcg_init,
1570 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1571 		radeon_program_register_sequence(rdev,
1572 						 bonaire_golden_registers,
1573 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1574 		radeon_program_register_sequence(rdev,
1575 						 bonaire_golden_common_registers,
1576 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1577 		radeon_program_register_sequence(rdev,
1578 						 bonaire_golden_spm_registers,
1579 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1580 		break;
1581 	case CHIP_KABINI:
1582 		radeon_program_register_sequence(rdev,
1583 						 kalindi_mgcg_cgcg_init,
1584 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1585 		radeon_program_register_sequence(rdev,
1586 						 kalindi_golden_registers,
1587 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1588 		radeon_program_register_sequence(rdev,
1589 						 kalindi_golden_common_registers,
1590 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1591 		radeon_program_register_sequence(rdev,
1592 						 kalindi_golden_spm_registers,
1593 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1594 		break;
1595 	case CHIP_MULLINS:
1596 		radeon_program_register_sequence(rdev,
1597 						 kalindi_mgcg_cgcg_init,
1598 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1599 		radeon_program_register_sequence(rdev,
1600 						 godavari_golden_registers,
1601 						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1602 		radeon_program_register_sequence(rdev,
1603 						 kalindi_golden_common_registers,
1604 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1605 		radeon_program_register_sequence(rdev,
1606 						 kalindi_golden_spm_registers,
1607 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1608 		break;
1609 	case CHIP_KAVERI:
1610 		radeon_program_register_sequence(rdev,
1611 						 spectre_mgcg_cgcg_init,
1612 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1613 		radeon_program_register_sequence(rdev,
1614 						 spectre_golden_registers,
1615 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1616 		radeon_program_register_sequence(rdev,
1617 						 spectre_golden_common_registers,
1618 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1619 		radeon_program_register_sequence(rdev,
1620 						 spectre_golden_spm_registers,
1621 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1622 		break;
1623 	case CHIP_HAWAII:
1624 		radeon_program_register_sequence(rdev,
1625 						 hawaii_mgcg_cgcg_init,
1626 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1627 		radeon_program_register_sequence(rdev,
1628 						 hawaii_golden_registers,
1629 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1630 		radeon_program_register_sequence(rdev,
1631 						 hawaii_golden_common_registers,
1632 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1633 		radeon_program_register_sequence(rdev,
1634 						 hawaii_golden_spm_registers,
1635 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1636 		break;
1637 	default:
1638 		break;
1639 	}
1640 }
1641 
1642 /**
1643  * cik_get_xclk - get the xclk
1644  *
1645  * @rdev: radeon_device pointer
1646  *
1647  * Returns the reference clock used by the gfx engine
1648  * (CIK).
1649  */
1650 u32 cik_get_xclk(struct radeon_device *rdev)
1651 {
1652         u32 reference_clock = rdev->clock.spll.reference_freq;
1653 
1654 	if (rdev->flags & RADEON_IS_IGP) {
1655 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1656 			return reference_clock / 2;
1657 	} else {
1658 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1659 			return reference_clock / 4;
1660 	}
1661 	return reference_clock;
1662 }
1663 
1664 /**
1665  * cik_mm_rdoorbell - read a doorbell dword
1666  *
1667  * @rdev: radeon_device pointer
1668  * @index: doorbell index
1669  *
1670  * Returns the value in the doorbell aperture at the
1671  * requested doorbell index (CIK).
1672  */
1673 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1674 {
1675 	if (index < rdev->doorbell.num_doorbells) {
1676 		return readl(rdev->doorbell.ptr + index);
1677 	} else {
1678 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1679 		return 0;
1680 	}
1681 }
1682 
1683 /**
1684  * cik_mm_wdoorbell - write a doorbell dword
1685  *
1686  * @rdev: radeon_device pointer
1687  * @index: doorbell index
1688  * @v: value to write
1689  *
1690  * Writes @v to the doorbell aperture at the
1691  * requested doorbell index (CIK).
1692  */
1693 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1694 {
1695 	if (index < rdev->doorbell.num_doorbells) {
1696 		writel(v, rdev->doorbell.ptr + index);
1697 	} else {
1698 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1699 	}
1700 }
1701 
1702 #define BONAIRE_IO_MC_REGS_SIZE 36
1703 
1704 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1705 {
1706 	{0x00000070, 0x04400000},
1707 	{0x00000071, 0x80c01803},
1708 	{0x00000072, 0x00004004},
1709 	{0x00000073, 0x00000100},
1710 	{0x00000074, 0x00ff0000},
1711 	{0x00000075, 0x34000000},
1712 	{0x00000076, 0x08000014},
1713 	{0x00000077, 0x00cc08ec},
1714 	{0x00000078, 0x00000400},
1715 	{0x00000079, 0x00000000},
1716 	{0x0000007a, 0x04090000},
1717 	{0x0000007c, 0x00000000},
1718 	{0x0000007e, 0x4408a8e8},
1719 	{0x0000007f, 0x00000304},
1720 	{0x00000080, 0x00000000},
1721 	{0x00000082, 0x00000001},
1722 	{0x00000083, 0x00000002},
1723 	{0x00000084, 0xf3e4f400},
1724 	{0x00000085, 0x052024e3},
1725 	{0x00000087, 0x00000000},
1726 	{0x00000088, 0x01000000},
1727 	{0x0000008a, 0x1c0a0000},
1728 	{0x0000008b, 0xff010000},
1729 	{0x0000008d, 0xffffefff},
1730 	{0x0000008e, 0xfff3efff},
1731 	{0x0000008f, 0xfff3efbf},
1732 	{0x00000092, 0xf7ffffff},
1733 	{0x00000093, 0xffffff7f},
1734 	{0x00000095, 0x00101101},
1735 	{0x00000096, 0x00000fff},
1736 	{0x00000097, 0x00116fff},
1737 	{0x00000098, 0x60010000},
1738 	{0x00000099, 0x10010000},
1739 	{0x0000009a, 0x00006000},
1740 	{0x0000009b, 0x00001000},
1741 	{0x0000009f, 0x00b48000}
1742 };
1743 
1744 #define HAWAII_IO_MC_REGS_SIZE 22
1745 
1746 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1747 {
1748 	{0x0000007d, 0x40000000},
1749 	{0x0000007e, 0x40180304},
1750 	{0x0000007f, 0x0000ff00},
1751 	{0x00000081, 0x00000000},
1752 	{0x00000083, 0x00000800},
1753 	{0x00000086, 0x00000000},
1754 	{0x00000087, 0x00000100},
1755 	{0x00000088, 0x00020100},
1756 	{0x00000089, 0x00000000},
1757 	{0x0000008b, 0x00040000},
1758 	{0x0000008c, 0x00000100},
1759 	{0x0000008e, 0xff010000},
1760 	{0x00000090, 0xffffefff},
1761 	{0x00000091, 0xfff3efff},
1762 	{0x00000092, 0xfff3efbf},
1763 	{0x00000093, 0xf7ffffff},
1764 	{0x00000094, 0xffffff7f},
1765 	{0x00000095, 0x00000fff},
1766 	{0x00000096, 0x00116fff},
1767 	{0x00000097, 0x60010000},
1768 	{0x00000098, 0x10010000},
1769 	{0x0000009f, 0x00c79000}
1770 };
1771 
1772 
1773 /**
1774  * cik_srbm_select - select specific register instances
1775  *
1776  * @rdev: radeon_device pointer
1777  * @me: selected ME (micro engine)
1778  * @pipe: pipe
1779  * @queue: queue
1780  * @vmid: VMID
1781  *
1782  * Switches the currently active registers instances.  Some
1783  * registers are instanced per VMID, others are instanced per
1784  * me/pipe/queue combination.
1785  */
1786 static void cik_srbm_select(struct radeon_device *rdev,
1787 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1788 {
1789 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1790 			     MEID(me & 0x3) |
1791 			     VMID(vmid & 0xf) |
1792 			     QUEUEID(queue & 0x7));
1793 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1794 }
1795 
1796 /* ucode loading */
1797 /**
1798  * ci_mc_load_microcode - load MC ucode into the hw
1799  *
1800  * @rdev: radeon_device pointer
1801  *
1802  * Load the GDDR MC ucode into the hw (CIK).
1803  * Returns 0 on success, error on failure.
1804  */
1805 int ci_mc_load_microcode(struct radeon_device *rdev)
1806 {
1807 	const __be32 *fw_data = NULL;
1808 	const __le32 *new_fw_data = NULL;
1809 	u32 running, blackout = 0;
1810 	u32 *io_mc_regs = NULL;
1811 	const __le32 *new_io_mc_regs = NULL;
1812 	int i, regs_size, ucode_size;
1813 
1814 	if (!rdev->mc_fw)
1815 		return -EINVAL;
1816 
1817 	if (rdev->new_fw) {
1818 		const struct mc_firmware_header_v1_0 *hdr =
1819 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1820 
1821 		radeon_ucode_print_mc_hdr(&hdr->header);
1822 
1823 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1824 		new_io_mc_regs = (const __le32 *)
1825 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1826 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1827 		new_fw_data = (const __le32 *)
1828 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1829 	} else {
1830 		ucode_size = rdev->mc_fw->size / 4;
1831 
1832 		switch (rdev->family) {
1833 		case CHIP_BONAIRE:
1834 			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1835 			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1836 			break;
1837 		case CHIP_HAWAII:
1838 			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1839 			regs_size = HAWAII_IO_MC_REGS_SIZE;
1840 			break;
1841 		default:
1842 			return -EINVAL;
1843 		}
1844 		fw_data = (const __be32 *)rdev->mc_fw->data;
1845 	}
1846 
1847 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1848 
1849 	if (running == 0) {
1850 		if (running) {
1851 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1852 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1853 		}
1854 
1855 		/* reset the engine and set to writable */
1856 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1857 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1858 
1859 		/* load mc io regs */
1860 		for (i = 0; i < regs_size; i++) {
1861 			if (rdev->new_fw) {
1862 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1863 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1864 			} else {
1865 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1866 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1867 			}
1868 		}
1869 		/* load the MC ucode */
1870 		for (i = 0; i < ucode_size; i++) {
1871 			if (rdev->new_fw)
1872 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1873 			else
1874 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1875 		}
1876 
1877 		/* put the engine back into the active state */
1878 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1879 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1880 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1881 
1882 		/* wait for training to complete */
1883 		for (i = 0; i < rdev->usec_timeout; i++) {
1884 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1885 				break;
1886 			udelay(1);
1887 		}
1888 		for (i = 0; i < rdev->usec_timeout; i++) {
1889 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1890 				break;
1891 			udelay(1);
1892 		}
1893 
1894 		if (running)
1895 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1896 	}
1897 
1898 	return 0;
1899 }
1900 
1901 /**
1902  * cik_init_microcode - load ucode images from disk
1903  *
1904  * @rdev: radeon_device pointer
1905  *
1906  * Use the firmware interface to load the ucode images into
1907  * the driver (not loaded into hw).
1908  * Returns 0 on success, error on failure.
1909  */
1910 static int cik_init_microcode(struct radeon_device *rdev)
1911 {
1912 	const char *chip_name;
1913 	const char *new_chip_name;
1914 	size_t pfp_req_size, me_req_size, ce_req_size,
1915 		mec_req_size, rlc_req_size, mc_req_size = 0,
1916 		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1917 	char fw_name[30];
1918 	int new_fw = 0;
1919 	int err;
1920 	int num_fw;
1921 
1922 	DRM_DEBUG("\n");
1923 
1924 	switch (rdev->family) {
1925 	case CHIP_BONAIRE:
1926 		chip_name = "BONAIRE";
1927 		new_chip_name = "bonaire";
1928 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1929 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1930 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1931 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1932 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1933 		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1934 		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1935 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1936 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1937 		num_fw = 8;
1938 		break;
1939 	case CHIP_HAWAII:
1940 		chip_name = "HAWAII";
1941 		new_chip_name = "hawaii";
1942 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1943 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1944 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1945 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1946 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1947 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1948 		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
1949 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1950 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1951 		num_fw = 8;
1952 		break;
1953 	case CHIP_KAVERI:
1954 		chip_name = "KAVERI";
1955 		new_chip_name = "kaveri";
1956 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1957 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1958 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1959 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1960 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1961 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1962 		num_fw = 7;
1963 		break;
1964 	case CHIP_KABINI:
1965 		chip_name = "KABINI";
1966 		new_chip_name = "kabini";
1967 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1968 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1969 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1970 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1971 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1972 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1973 		num_fw = 6;
1974 		break;
1975 	case CHIP_MULLINS:
1976 		chip_name = "MULLINS";
1977 		new_chip_name = "mullins";
1978 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1979 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1980 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1981 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1982 		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
1983 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1984 		num_fw = 6;
1985 		break;
1986 	default: BUG();
1987 	}
1988 
1989 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
1990 
1991 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
1992 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1993 	if (err) {
1994 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1995 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1996 		if (err)
1997 			goto out;
1998 		if (rdev->pfp_fw->size != pfp_req_size) {
1999 			printk(KERN_ERR
2000 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2001 			       rdev->pfp_fw->size, fw_name);
2002 			err = -EINVAL;
2003 			goto out;
2004 		}
2005 	} else {
2006 		err = radeon_ucode_validate(rdev->pfp_fw);
2007 		if (err) {
2008 			printk(KERN_ERR
2009 			       "cik_fw: validation failed for firmware \"%s\"\n",
2010 			       fw_name);
2011 			goto out;
2012 		} else {
2013 			new_fw++;
2014 		}
2015 	}
2016 
2017 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2018 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2019 	if (err) {
2020 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2021 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2022 		if (err)
2023 			goto out;
2024 		if (rdev->me_fw->size != me_req_size) {
2025 			printk(KERN_ERR
2026 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2027 			       rdev->me_fw->size, fw_name);
2028 			err = -EINVAL;
2029 		}
2030 	} else {
2031 		err = radeon_ucode_validate(rdev->me_fw);
2032 		if (err) {
2033 			printk(KERN_ERR
2034 			       "cik_fw: validation failed for firmware \"%s\"\n",
2035 			       fw_name);
2036 			goto out;
2037 		} else {
2038 			new_fw++;
2039 		}
2040 	}
2041 
2042 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2043 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2044 	if (err) {
2045 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2046 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2047 		if (err)
2048 			goto out;
2049 		if (rdev->ce_fw->size != ce_req_size) {
2050 			printk(KERN_ERR
2051 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2052 			       rdev->ce_fw->size, fw_name);
2053 			err = -EINVAL;
2054 		}
2055 	} else {
2056 		err = radeon_ucode_validate(rdev->ce_fw);
2057 		if (err) {
2058 			printk(KERN_ERR
2059 			       "cik_fw: validation failed for firmware \"%s\"\n",
2060 			       fw_name);
2061 			goto out;
2062 		} else {
2063 			new_fw++;
2064 		}
2065 	}
2066 
2067 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2068 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2069 	if (err) {
2070 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2071 		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2072 		if (err)
2073 			goto out;
2074 		if (rdev->mec_fw->size != mec_req_size) {
2075 			printk(KERN_ERR
2076 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2077 			       rdev->mec_fw->size, fw_name);
2078 			err = -EINVAL;
2079 		}
2080 	} else {
2081 		err = radeon_ucode_validate(rdev->mec_fw);
2082 		if (err) {
2083 			printk(KERN_ERR
2084 			       "cik_fw: validation failed for firmware \"%s\"\n",
2085 			       fw_name);
2086 			goto out;
2087 		} else {
2088 			new_fw++;
2089 		}
2090 	}
2091 
2092 	if (rdev->family == CHIP_KAVERI) {
2093 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2094 		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2095 		if (err) {
2096 			goto out;
2097 		} else {
2098 			err = radeon_ucode_validate(rdev->mec2_fw);
2099 			if (err) {
2100 				goto out;
2101 			} else {
2102 				new_fw++;
2103 			}
2104 		}
2105 	}
2106 
2107 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2108 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2109 	if (err) {
2110 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2111 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2112 		if (err)
2113 			goto out;
2114 		if (rdev->rlc_fw->size != rlc_req_size) {
2115 			printk(KERN_ERR
2116 			       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2117 			       rdev->rlc_fw->size, fw_name);
2118 			err = -EINVAL;
2119 		}
2120 	} else {
2121 		err = radeon_ucode_validate(rdev->rlc_fw);
2122 		if (err) {
2123 			printk(KERN_ERR
2124 			       "cik_fw: validation failed for firmware \"%s\"\n",
2125 			       fw_name);
2126 			goto out;
2127 		} else {
2128 			new_fw++;
2129 		}
2130 	}
2131 
2132 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2133 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2134 	if (err) {
2135 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2136 		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2137 		if (err)
2138 			goto out;
2139 		if (rdev->sdma_fw->size != sdma_req_size) {
2140 			printk(KERN_ERR
2141 			       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2142 			       rdev->sdma_fw->size, fw_name);
2143 			err = -EINVAL;
2144 		}
2145 	} else {
2146 		err = radeon_ucode_validate(rdev->sdma_fw);
2147 		if (err) {
2148 			printk(KERN_ERR
2149 			       "cik_fw: validation failed for firmware \"%s\"\n",
2150 			       fw_name);
2151 			goto out;
2152 		} else {
2153 			new_fw++;
2154 		}
2155 	}
2156 
2157 	/* No SMC, MC ucode on APUs */
2158 	if (!(rdev->flags & RADEON_IS_IGP)) {
2159 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2160 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2161 		if (err) {
2162 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2163 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2164 			if (err) {
2165 				snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2166 				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2167 				if (err)
2168 					goto out;
2169 			}
2170 			if ((rdev->mc_fw->size != mc_req_size) &&
2171 			    (rdev->mc_fw->size != mc2_req_size)){
2172 				printk(KERN_ERR
2173 				       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2174 				       rdev->mc_fw->size, fw_name);
2175 				err = -EINVAL;
2176 			}
2177 			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2178 		} else {
2179 			err = radeon_ucode_validate(rdev->mc_fw);
2180 			if (err) {
2181 				printk(KERN_ERR
2182 				       "cik_fw: validation failed for firmware \"%s\"\n",
2183 				       fw_name);
2184 				goto out;
2185 			} else {
2186 				new_fw++;
2187 			}
2188 		}
2189 
2190 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2191 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2192 		if (err) {
2193 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2194 			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2195 			if (err) {
2196 				printk(KERN_ERR
2197 				       "smc: error loading firmware \"%s\"\n",
2198 				       fw_name);
2199 				release_firmware(rdev->smc_fw);
2200 				rdev->smc_fw = NULL;
2201 				err = 0;
2202 			} else if (rdev->smc_fw->size != smc_req_size) {
2203 				printk(KERN_ERR
2204 				       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2205 				       rdev->smc_fw->size, fw_name);
2206 				err = -EINVAL;
2207 			}
2208 		} else {
2209 			err = radeon_ucode_validate(rdev->smc_fw);
2210 			if (err) {
2211 				printk(KERN_ERR
2212 				       "cik_fw: validation failed for firmware \"%s\"\n",
2213 				       fw_name);
2214 				goto out;
2215 			} else {
2216 				new_fw++;
2217 			}
2218 		}
2219 	}
2220 
2221 	if (new_fw == 0) {
2222 		rdev->new_fw = false;
2223 	} else if (new_fw < num_fw) {
2224 		printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2225 		err = -EINVAL;
2226 	} else {
2227 		rdev->new_fw = true;
2228 	}
2229 
2230 out:
2231 	if (err) {
2232 		if (err != -EINVAL)
2233 			printk(KERN_ERR
2234 			       "cik_cp: Failed to load firmware \"%s\"\n",
2235 			       fw_name);
2236 		release_firmware(rdev->pfp_fw);
2237 		rdev->pfp_fw = NULL;
2238 		release_firmware(rdev->me_fw);
2239 		rdev->me_fw = NULL;
2240 		release_firmware(rdev->ce_fw);
2241 		rdev->ce_fw = NULL;
2242 		release_firmware(rdev->mec_fw);
2243 		rdev->mec_fw = NULL;
2244 		release_firmware(rdev->mec2_fw);
2245 		rdev->mec2_fw = NULL;
2246 		release_firmware(rdev->rlc_fw);
2247 		rdev->rlc_fw = NULL;
2248 		release_firmware(rdev->sdma_fw);
2249 		rdev->sdma_fw = NULL;
2250 		release_firmware(rdev->mc_fw);
2251 		rdev->mc_fw = NULL;
2252 		release_firmware(rdev->smc_fw);
2253 		rdev->smc_fw = NULL;
2254 	}
2255 	return err;
2256 }
2257 
2258 /*
2259  * Core functions
2260  */
2261 /**
2262  * cik_tiling_mode_table_init - init the hw tiling table
2263  *
2264  * @rdev: radeon_device pointer
2265  *
2266  * Starting with SI, the tiling setup is done globally in a
2267  * set of 32 tiling modes.  Rather than selecting each set of
2268  * parameters per surface as on older asics, we just select
2269  * which index in the tiling table we want to use, and the
2270  * surface uses those parameters (CIK).
2271  */
2272 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2273 {
2274 	const u32 num_tile_mode_states = 32;
2275 	const u32 num_secondary_tile_mode_states = 16;
2276 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2277 	u32 num_pipe_configs;
2278 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2279 		rdev->config.cik.max_shader_engines;
2280 
2281 	switch (rdev->config.cik.mem_row_size_in_kb) {
2282 	case 1:
2283 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2284 		break;
2285 	case 2:
2286 	default:
2287 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2288 		break;
2289 	case 4:
2290 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2291 		break;
2292 	}
2293 
2294 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2295 	if (num_pipe_configs > 8)
2296 		num_pipe_configs = 16;
2297 
2298 	if (num_pipe_configs == 16) {
2299 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2300 			switch (reg_offset) {
2301 			case 0:
2302 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2303 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2304 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2305 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2306 				break;
2307 			case 1:
2308 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2309 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2310 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2311 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2312 				break;
2313 			case 2:
2314 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2315 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2316 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2317 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2318 				break;
2319 			case 3:
2320 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2321 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2322 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2323 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2324 				break;
2325 			case 4:
2326 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2327 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2328 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2329 						 TILE_SPLIT(split_equal_to_row_size));
2330 				break;
2331 			case 5:
2332 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2333 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2334 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2335 				break;
2336 			case 6:
2337 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2338 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2339 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2340 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2341 				break;
2342 			case 7:
2343 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2344 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2345 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2346 						 TILE_SPLIT(split_equal_to_row_size));
2347 				break;
2348 			case 8:
2349 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2350 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2351 				break;
2352 			case 9:
2353 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2354 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2355 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2356 				break;
2357 			case 10:
2358 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2359 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2360 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2361 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2362 				break;
2363 			case 11:
2364 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2365 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2366 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2367 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2368 				break;
2369 			case 12:
2370 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2371 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2372 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2373 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2374 				break;
2375 			case 13:
2376 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2377 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2378 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2379 				break;
2380 			case 14:
2381 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2382 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2383 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2384 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2385 				break;
2386 			case 16:
2387 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2388 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2389 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2390 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2391 				break;
2392 			case 17:
2393 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2394 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2395 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2397 				break;
2398 			case 27:
2399 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2400 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2401 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2402 				break;
2403 			case 28:
2404 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2405 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2406 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2407 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2408 				break;
2409 			case 29:
2410 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2411 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2412 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2413 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2414 				break;
2415 			case 30:
2416 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2417 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2418 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2419 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2420 				break;
2421 			default:
2422 				gb_tile_moden = 0;
2423 				break;
2424 			}
2425 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2426 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2427 		}
2428 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2429 			switch (reg_offset) {
2430 			case 0:
2431 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2432 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2433 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2434 						 NUM_BANKS(ADDR_SURF_16_BANK));
2435 				break;
2436 			case 1:
2437 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2438 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2439 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2440 						 NUM_BANKS(ADDR_SURF_16_BANK));
2441 				break;
2442 			case 2:
2443 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2444 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2445 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2446 						 NUM_BANKS(ADDR_SURF_16_BANK));
2447 				break;
2448 			case 3:
2449 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2451 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2452 						 NUM_BANKS(ADDR_SURF_16_BANK));
2453 				break;
2454 			case 4:
2455 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2456 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2457 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2458 						 NUM_BANKS(ADDR_SURF_8_BANK));
2459 				break;
2460 			case 5:
2461 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2462 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2463 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2464 						 NUM_BANKS(ADDR_SURF_4_BANK));
2465 				break;
2466 			case 6:
2467 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2468 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2469 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2470 						 NUM_BANKS(ADDR_SURF_2_BANK));
2471 				break;
2472 			case 8:
2473 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2474 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2475 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2476 						 NUM_BANKS(ADDR_SURF_16_BANK));
2477 				break;
2478 			case 9:
2479 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2480 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2481 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2482 						 NUM_BANKS(ADDR_SURF_16_BANK));
2483 				break;
2484 			case 10:
2485 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2486 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2487 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2488 						 NUM_BANKS(ADDR_SURF_16_BANK));
2489 				break;
2490 			case 11:
2491 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2492 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2493 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2494 						 NUM_BANKS(ADDR_SURF_8_BANK));
2495 				break;
2496 			case 12:
2497 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2498 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2499 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2500 						 NUM_BANKS(ADDR_SURF_4_BANK));
2501 				break;
2502 			case 13:
2503 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2504 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2505 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2506 						 NUM_BANKS(ADDR_SURF_2_BANK));
2507 				break;
2508 			case 14:
2509 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2510 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2511 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2512 						 NUM_BANKS(ADDR_SURF_2_BANK));
2513 				break;
2514 			default:
2515 				gb_tile_moden = 0;
2516 				break;
2517 			}
2518 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2519 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2520 		}
2521 	} else if (num_pipe_configs == 8) {
2522 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2523 			switch (reg_offset) {
2524 			case 0:
2525 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2526 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2527 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2528 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2529 				break;
2530 			case 1:
2531 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2532 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2533 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2534 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2535 				break;
2536 			case 2:
2537 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2538 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2539 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2540 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2541 				break;
2542 			case 3:
2543 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2544 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2545 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2546 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2547 				break;
2548 			case 4:
2549 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2550 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2551 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2552 						 TILE_SPLIT(split_equal_to_row_size));
2553 				break;
2554 			case 5:
2555 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2556 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2557 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2558 				break;
2559 			case 6:
2560 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2561 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2562 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2564 				break;
2565 			case 7:
2566 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2567 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2568 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2569 						 TILE_SPLIT(split_equal_to_row_size));
2570 				break;
2571 			case 8:
2572 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2573 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2574 				break;
2575 			case 9:
2576 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2577 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2578 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2579 				break;
2580 			case 10:
2581 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2582 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2583 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2584 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2585 				break;
2586 			case 11:
2587 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2588 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2589 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2590 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2591 				break;
2592 			case 12:
2593 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2594 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2595 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2596 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2597 				break;
2598 			case 13:
2599 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2600 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2601 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2602 				break;
2603 			case 14:
2604 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2605 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2606 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2607 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2608 				break;
2609 			case 16:
2610 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2611 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2612 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2613 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2614 				break;
2615 			case 17:
2616 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2617 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2618 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2619 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2620 				break;
2621 			case 27:
2622 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2623 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2624 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2625 				break;
2626 			case 28:
2627 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2628 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2629 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2630 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2631 				break;
2632 			case 29:
2633 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2634 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2635 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2636 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2637 				break;
2638 			case 30:
2639 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2640 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2641 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2642 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2643 				break;
2644 			default:
2645 				gb_tile_moden = 0;
2646 				break;
2647 			}
2648 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2649 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2650 		}
2651 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2652 			switch (reg_offset) {
2653 			case 0:
2654 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2655 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2656 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2657 						 NUM_BANKS(ADDR_SURF_16_BANK));
2658 				break;
2659 			case 1:
2660 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2661 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2662 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2663 						 NUM_BANKS(ADDR_SURF_16_BANK));
2664 				break;
2665 			case 2:
2666 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2667 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2668 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2669 						 NUM_BANKS(ADDR_SURF_16_BANK));
2670 				break;
2671 			case 3:
2672 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2673 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2674 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2675 						 NUM_BANKS(ADDR_SURF_16_BANK));
2676 				break;
2677 			case 4:
2678 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2679 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2680 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2681 						 NUM_BANKS(ADDR_SURF_8_BANK));
2682 				break;
2683 			case 5:
2684 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2685 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2686 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2687 						 NUM_BANKS(ADDR_SURF_4_BANK));
2688 				break;
2689 			case 6:
2690 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2691 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2692 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2693 						 NUM_BANKS(ADDR_SURF_2_BANK));
2694 				break;
2695 			case 8:
2696 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2697 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2698 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2699 						 NUM_BANKS(ADDR_SURF_16_BANK));
2700 				break;
2701 			case 9:
2702 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2703 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2704 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2705 						 NUM_BANKS(ADDR_SURF_16_BANK));
2706 				break;
2707 			case 10:
2708 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2709 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2710 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2711 						 NUM_BANKS(ADDR_SURF_16_BANK));
2712 				break;
2713 			case 11:
2714 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2715 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2716 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2717 						 NUM_BANKS(ADDR_SURF_16_BANK));
2718 				break;
2719 			case 12:
2720 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2721 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2722 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2723 						 NUM_BANKS(ADDR_SURF_8_BANK));
2724 				break;
2725 			case 13:
2726 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2728 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2729 						 NUM_BANKS(ADDR_SURF_4_BANK));
2730 				break;
2731 			case 14:
2732 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2733 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2734 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2735 						 NUM_BANKS(ADDR_SURF_2_BANK));
2736 				break;
2737 			default:
2738 				gb_tile_moden = 0;
2739 				break;
2740 			}
2741 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2742 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2743 		}
2744 	} else if (num_pipe_configs == 4) {
2745 		if (num_rbs == 4) {
2746 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2747 				switch (reg_offset) {
2748 				case 0:
2749 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2750 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2751 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2752 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2753 					break;
2754 				case 1:
2755 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2756 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2757 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2758 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2759 					break;
2760 				case 2:
2761 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2762 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2763 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2764 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2765 					break;
2766 				case 3:
2767 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2768 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2769 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2770 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2771 					break;
2772 				case 4:
2773 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2774 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2775 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2776 							 TILE_SPLIT(split_equal_to_row_size));
2777 					break;
2778 				case 5:
2779 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2780 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2781 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2782 					break;
2783 				case 6:
2784 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2785 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2786 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2787 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2788 					break;
2789 				case 7:
2790 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2791 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2792 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2793 							 TILE_SPLIT(split_equal_to_row_size));
2794 					break;
2795 				case 8:
2796 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2797 							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2798 					break;
2799 				case 9:
2800 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2801 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2802 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2803 					break;
2804 				case 10:
2805 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2806 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2807 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2808 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2809 					break;
2810 				case 11:
2811 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2812 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2813 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2814 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2815 					break;
2816 				case 12:
2817 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2818 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2819 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2820 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2821 					break;
2822 				case 13:
2823 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2824 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2825 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2826 					break;
2827 				case 14:
2828 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2829 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2830 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2831 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2832 					break;
2833 				case 16:
2834 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2835 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2836 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2837 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2838 					break;
2839 				case 17:
2840 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2841 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2842 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2843 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2844 					break;
2845 				case 27:
2846 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2847 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2848 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2849 					break;
2850 				case 28:
2851 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2852 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2853 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2854 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2855 					break;
2856 				case 29:
2857 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2858 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2859 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2860 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2861 					break;
2862 				case 30:
2863 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2864 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2865 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2866 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2867 					break;
2868 				default:
2869 					gb_tile_moden = 0;
2870 					break;
2871 				}
2872 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2873 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2874 			}
2875 		} else if (num_rbs < 4) {
2876 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2877 				switch (reg_offset) {
2878 				case 0:
2879 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2880 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2881 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2882 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2883 					break;
2884 				case 1:
2885 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2886 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2887 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2888 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2889 					break;
2890 				case 2:
2891 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2892 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2893 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2894 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2895 					break;
2896 				case 3:
2897 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2898 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2899 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2900 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2901 					break;
2902 				case 4:
2903 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2904 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2905 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2906 							 TILE_SPLIT(split_equal_to_row_size));
2907 					break;
2908 				case 5:
2909 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2910 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2911 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2912 					break;
2913 				case 6:
2914 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2915 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2916 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2917 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2918 					break;
2919 				case 7:
2920 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2921 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2922 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2923 							 TILE_SPLIT(split_equal_to_row_size));
2924 					break;
2925 				case 8:
2926 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2927 						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2928 					break;
2929 				case 9:
2930 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2931 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2932 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2933 					break;
2934 				case 10:
2935 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2936 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2937 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2938 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2939 					break;
2940 				case 11:
2941 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2942 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2943 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2944 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2945 					break;
2946 				case 12:
2947 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2948 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2949 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2950 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2951 					break;
2952 				case 13:
2953 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2954 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2955 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2956 					break;
2957 				case 14:
2958 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2959 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2960 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2961 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2962 					break;
2963 				case 16:
2964 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2965 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2966 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2967 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2968 					break;
2969 				case 17:
2970 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2971 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2972 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2973 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2974 					break;
2975 				case 27:
2976 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2977 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2978 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2979 					break;
2980 				case 28:
2981 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2982 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2983 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2984 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2985 					break;
2986 				case 29:
2987 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2988 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2989 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2990 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2991 					break;
2992 				case 30:
2993 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2994 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2995 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2996 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2997 					break;
2998 				default:
2999 					gb_tile_moden = 0;
3000 					break;
3001 				}
3002 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3003 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3004 			}
3005 		}
3006 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3007 			switch (reg_offset) {
3008 			case 0:
3009 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3010 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3011 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3012 						 NUM_BANKS(ADDR_SURF_16_BANK));
3013 				break;
3014 			case 1:
3015 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3016 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3017 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3018 						 NUM_BANKS(ADDR_SURF_16_BANK));
3019 				break;
3020 			case 2:
3021 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3022 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3023 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3024 						 NUM_BANKS(ADDR_SURF_16_BANK));
3025 				break;
3026 			case 3:
3027 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3028 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3029 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3030 						 NUM_BANKS(ADDR_SURF_16_BANK));
3031 				break;
3032 			case 4:
3033 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3034 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3035 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3036 						 NUM_BANKS(ADDR_SURF_16_BANK));
3037 				break;
3038 			case 5:
3039 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3040 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3041 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3042 						 NUM_BANKS(ADDR_SURF_8_BANK));
3043 				break;
3044 			case 6:
3045 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3046 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3047 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3048 						 NUM_BANKS(ADDR_SURF_4_BANK));
3049 				break;
3050 			case 8:
3051 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3052 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3053 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3054 						 NUM_BANKS(ADDR_SURF_16_BANK));
3055 				break;
3056 			case 9:
3057 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3058 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3059 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3060 						 NUM_BANKS(ADDR_SURF_16_BANK));
3061 				break;
3062 			case 10:
3063 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3064 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3065 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3066 						 NUM_BANKS(ADDR_SURF_16_BANK));
3067 				break;
3068 			case 11:
3069 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3070 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3071 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3072 						 NUM_BANKS(ADDR_SURF_16_BANK));
3073 				break;
3074 			case 12:
3075 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3076 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3077 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3078 						 NUM_BANKS(ADDR_SURF_16_BANK));
3079 				break;
3080 			case 13:
3081 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3082 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3083 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3084 						 NUM_BANKS(ADDR_SURF_8_BANK));
3085 				break;
3086 			case 14:
3087 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3088 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3089 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3090 						 NUM_BANKS(ADDR_SURF_4_BANK));
3091 				break;
3092 			default:
3093 				gb_tile_moden = 0;
3094 				break;
3095 			}
3096 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3097 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3098 		}
3099 	} else if (num_pipe_configs == 2) {
3100 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
3101 			switch (reg_offset) {
3102 			case 0:
3103 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3104 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3105 						 PIPE_CONFIG(ADDR_SURF_P2) |
3106 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
3107 				break;
3108 			case 1:
3109 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3110 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3111 						 PIPE_CONFIG(ADDR_SURF_P2) |
3112 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
3113 				break;
3114 			case 2:
3115 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3116 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3117 						 PIPE_CONFIG(ADDR_SURF_P2) |
3118 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3119 				break;
3120 			case 3:
3121 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3122 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3123 						 PIPE_CONFIG(ADDR_SURF_P2) |
3124 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
3125 				break;
3126 			case 4:
3127 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3128 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3129 						 PIPE_CONFIG(ADDR_SURF_P2) |
3130 						 TILE_SPLIT(split_equal_to_row_size));
3131 				break;
3132 			case 5:
3133 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3134 						 PIPE_CONFIG(ADDR_SURF_P2) |
3135 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3136 				break;
3137 			case 6:
3138 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3139 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3140 						 PIPE_CONFIG(ADDR_SURF_P2) |
3141 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3142 				break;
3143 			case 7:
3144 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3145 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3146 						 PIPE_CONFIG(ADDR_SURF_P2) |
3147 						 TILE_SPLIT(split_equal_to_row_size));
3148 				break;
3149 			case 8:
3150 				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3151 						PIPE_CONFIG(ADDR_SURF_P2);
3152 				break;
3153 			case 9:
3154 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3155 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3156 						 PIPE_CONFIG(ADDR_SURF_P2));
3157 				break;
3158 			case 10:
3159 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3160 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3161 						 PIPE_CONFIG(ADDR_SURF_P2) |
3162 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3163 				break;
3164 			case 11:
3165 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3166 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3167 						 PIPE_CONFIG(ADDR_SURF_P2) |
3168 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3169 				break;
3170 			case 12:
3171 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3172 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3173 						 PIPE_CONFIG(ADDR_SURF_P2) |
3174 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3175 				break;
3176 			case 13:
3177 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3178 						 PIPE_CONFIG(ADDR_SURF_P2) |
3179 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3180 				break;
3181 			case 14:
3182 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3183 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3184 						 PIPE_CONFIG(ADDR_SURF_P2) |
3185 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3186 				break;
3187 			case 16:
3188 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3189 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3190 						 PIPE_CONFIG(ADDR_SURF_P2) |
3191 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3192 				break;
3193 			case 17:
3194 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3195 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3196 						 PIPE_CONFIG(ADDR_SURF_P2) |
3197 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3198 				break;
3199 			case 27:
3200 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3201 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3202 						 PIPE_CONFIG(ADDR_SURF_P2));
3203 				break;
3204 			case 28:
3205 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3206 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3207 						 PIPE_CONFIG(ADDR_SURF_P2) |
3208 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3209 				break;
3210 			case 29:
3211 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3212 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3213 						 PIPE_CONFIG(ADDR_SURF_P2) |
3214 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3215 				break;
3216 			case 30:
3217 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3218 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3219 						 PIPE_CONFIG(ADDR_SURF_P2) |
3220 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3221 				break;
3222 			default:
3223 				gb_tile_moden = 0;
3224 				break;
3225 			}
3226 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3227 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3228 		}
3229 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3230 			switch (reg_offset) {
3231 			case 0:
3232 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3233 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3234 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3235 						 NUM_BANKS(ADDR_SURF_16_BANK));
3236 				break;
3237 			case 1:
3238 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3239 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3240 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3241 						 NUM_BANKS(ADDR_SURF_16_BANK));
3242 				break;
3243 			case 2:
3244 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3245 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3246 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3247 						 NUM_BANKS(ADDR_SURF_16_BANK));
3248 				break;
3249 			case 3:
3250 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3251 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3252 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3253 						 NUM_BANKS(ADDR_SURF_16_BANK));
3254 				break;
3255 			case 4:
3256 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3257 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3258 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3259 						 NUM_BANKS(ADDR_SURF_16_BANK));
3260 				break;
3261 			case 5:
3262 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3263 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3264 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3265 						 NUM_BANKS(ADDR_SURF_16_BANK));
3266 				break;
3267 			case 6:
3268 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3269 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3270 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3271 						 NUM_BANKS(ADDR_SURF_8_BANK));
3272 				break;
3273 			case 8:
3274 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3275 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3276 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3277 						 NUM_BANKS(ADDR_SURF_16_BANK));
3278 				break;
3279 			case 9:
3280 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3281 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3282 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3283 						 NUM_BANKS(ADDR_SURF_16_BANK));
3284 				break;
3285 			case 10:
3286 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3287 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3288 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3289 						 NUM_BANKS(ADDR_SURF_16_BANK));
3290 				break;
3291 			case 11:
3292 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3293 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3294 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3295 						 NUM_BANKS(ADDR_SURF_16_BANK));
3296 				break;
3297 			case 12:
3298 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3299 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3300 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3301 						 NUM_BANKS(ADDR_SURF_16_BANK));
3302 				break;
3303 			case 13:
3304 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3305 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3306 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3307 						 NUM_BANKS(ADDR_SURF_16_BANK));
3308 				break;
3309 			case 14:
3310 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3311 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3312 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3313 						 NUM_BANKS(ADDR_SURF_8_BANK));
3314 				break;
3315 			default:
3316 				gb_tile_moden = 0;
3317 				break;
3318 			}
3319 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3320 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3321 		}
3322 	} else
3323 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3324 }
3325 
3326 /**
3327  * cik_select_se_sh - select which SE, SH to address
3328  *
3329  * @rdev: radeon_device pointer
3330  * @se_num: shader engine to address
3331  * @sh_num: sh block to address
3332  *
3333  * Select which SE, SH combinations to address. Certain
3334  * registers are instanced per SE or SH.  0xffffffff means
3335  * broadcast to all SEs or SHs (CIK).
3336  */
3337 static void cik_select_se_sh(struct radeon_device *rdev,
3338 			     u32 se_num, u32 sh_num)
3339 {
3340 	u32 data = INSTANCE_BROADCAST_WRITES;
3341 
3342 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3343 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3344 	else if (se_num == 0xffffffff)
3345 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3346 	else if (sh_num == 0xffffffff)
3347 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3348 	else
3349 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3350 	WREG32(GRBM_GFX_INDEX, data);
3351 }
3352 
3353 /**
3354  * cik_create_bitmask - create a bitmask
3355  *
3356  * @bit_width: length of the mask
3357  *
3358  * create a variable length bit mask (CIK).
3359  * Returns the bitmask.
3360  */
3361 static u32 cik_create_bitmask(u32 bit_width)
3362 {
3363 	u32 i, mask = 0;
3364 
3365 	for (i = 0; i < bit_width; i++) {
3366 		mask <<= 1;
3367 		mask |= 1;
3368 	}
3369 	return mask;
3370 }
3371 
3372 /**
3373  * cik_get_rb_disabled - computes the mask of disabled RBs
3374  *
3375  * @rdev: radeon_device pointer
3376  * @max_rb_num: max RBs (render backends) for the asic
3377  * @se_num: number of SEs (shader engines) for the asic
3378  * @sh_per_se: number of SH blocks per SE for the asic
3379  *
3380  * Calculates the bitmask of disabled RBs (CIK).
3381  * Returns the disabled RB bitmask.
3382  */
3383 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3384 			      u32 max_rb_num_per_se,
3385 			      u32 sh_per_se)
3386 {
3387 	u32 data, mask;
3388 
3389 	data = RREG32(CC_RB_BACKEND_DISABLE);
3390 	if (data & 1)
3391 		data &= BACKEND_DISABLE_MASK;
3392 	else
3393 		data = 0;
3394 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3395 
3396 	data >>= BACKEND_DISABLE_SHIFT;
3397 
3398 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3399 
3400 	return data & mask;
3401 }
3402 
3403 /**
3404  * cik_setup_rb - setup the RBs on the asic
3405  *
3406  * @rdev: radeon_device pointer
3407  * @se_num: number of SEs (shader engines) for the asic
3408  * @sh_per_se: number of SH blocks per SE for the asic
3409  * @max_rb_num: max RBs (render backends) for the asic
3410  *
3411  * Configures per-SE/SH RB registers (CIK).
3412  */
3413 static void cik_setup_rb(struct radeon_device *rdev,
3414 			 u32 se_num, u32 sh_per_se,
3415 			 u32 max_rb_num_per_se)
3416 {
3417 	int i, j;
3418 	u32 data, mask;
3419 	u32 disabled_rbs = 0;
3420 	u32 enabled_rbs = 0;
3421 
3422 	for (i = 0; i < se_num; i++) {
3423 		for (j = 0; j < sh_per_se; j++) {
3424 			cik_select_se_sh(rdev, i, j);
3425 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3426 			if (rdev->family == CHIP_HAWAII)
3427 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3428 			else
3429 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3430 		}
3431 	}
3432 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3433 
3434 	mask = 1;
3435 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3436 		if (!(disabled_rbs & mask))
3437 			enabled_rbs |= mask;
3438 		mask <<= 1;
3439 	}
3440 
3441 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3442 
3443 	for (i = 0; i < se_num; i++) {
3444 		cik_select_se_sh(rdev, i, 0xffffffff);
3445 		data = 0;
3446 		for (j = 0; j < sh_per_se; j++) {
3447 			switch (enabled_rbs & 3) {
3448 			case 0:
3449 				if (j == 0)
3450 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3451 				else
3452 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3453 				break;
3454 			case 1:
3455 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3456 				break;
3457 			case 2:
3458 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3459 				break;
3460 			case 3:
3461 			default:
3462 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3463 				break;
3464 			}
3465 			enabled_rbs >>= 2;
3466 		}
3467 		WREG32(PA_SC_RASTER_CONFIG, data);
3468 	}
3469 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3470 }
3471 
3472 /**
3473  * cik_gpu_init - setup the 3D engine
3474  *
3475  * @rdev: radeon_device pointer
3476  *
3477  * Configures the 3D engine and tiling configuration
3478  * registers so that the 3D engine is usable.
3479  */
3480 static void cik_gpu_init(struct radeon_device *rdev)
3481 {
3482 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3483 	u32 mc_shared_chmap, mc_arb_ramcfg;
3484 	u32 hdp_host_path_cntl;
3485 	u32 tmp;
3486 	int i, j, k;
3487 
3488 	switch (rdev->family) {
3489 	case CHIP_BONAIRE:
3490 		rdev->config.cik.max_shader_engines = 2;
3491 		rdev->config.cik.max_tile_pipes = 4;
3492 		rdev->config.cik.max_cu_per_sh = 7;
3493 		rdev->config.cik.max_sh_per_se = 1;
3494 		rdev->config.cik.max_backends_per_se = 2;
3495 		rdev->config.cik.max_texture_channel_caches = 4;
3496 		rdev->config.cik.max_gprs = 256;
3497 		rdev->config.cik.max_gs_threads = 32;
3498 		rdev->config.cik.max_hw_contexts = 8;
3499 
3500 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3501 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3502 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3503 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3504 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3505 		break;
3506 	case CHIP_HAWAII:
3507 		rdev->config.cik.max_shader_engines = 4;
3508 		rdev->config.cik.max_tile_pipes = 16;
3509 		rdev->config.cik.max_cu_per_sh = 11;
3510 		rdev->config.cik.max_sh_per_se = 1;
3511 		rdev->config.cik.max_backends_per_se = 4;
3512 		rdev->config.cik.max_texture_channel_caches = 16;
3513 		rdev->config.cik.max_gprs = 256;
3514 		rdev->config.cik.max_gs_threads = 32;
3515 		rdev->config.cik.max_hw_contexts = 8;
3516 
3517 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3518 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3519 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3520 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3521 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3522 		break;
3523 	case CHIP_KAVERI:
3524 		rdev->config.cik.max_shader_engines = 1;
3525 		rdev->config.cik.max_tile_pipes = 4;
3526 		if ((rdev->pdev->device == 0x1304) ||
3527 		    (rdev->pdev->device == 0x1305) ||
3528 		    (rdev->pdev->device == 0x130C) ||
3529 		    (rdev->pdev->device == 0x130F) ||
3530 		    (rdev->pdev->device == 0x1310) ||
3531 		    (rdev->pdev->device == 0x1311) ||
3532 		    (rdev->pdev->device == 0x131C)) {
3533 			rdev->config.cik.max_cu_per_sh = 8;
3534 			rdev->config.cik.max_backends_per_se = 2;
3535 		} else if ((rdev->pdev->device == 0x1309) ||
3536 			   (rdev->pdev->device == 0x130A) ||
3537 			   (rdev->pdev->device == 0x130D) ||
3538 			   (rdev->pdev->device == 0x1313) ||
3539 			   (rdev->pdev->device == 0x131D)) {
3540 			rdev->config.cik.max_cu_per_sh = 6;
3541 			rdev->config.cik.max_backends_per_se = 2;
3542 		} else if ((rdev->pdev->device == 0x1306) ||
3543 			   (rdev->pdev->device == 0x1307) ||
3544 			   (rdev->pdev->device == 0x130B) ||
3545 			   (rdev->pdev->device == 0x130E) ||
3546 			   (rdev->pdev->device == 0x1315) ||
3547 			   (rdev->pdev->device == 0x131B)) {
3548 			rdev->config.cik.max_cu_per_sh = 4;
3549 			rdev->config.cik.max_backends_per_se = 1;
3550 		} else {
3551 			rdev->config.cik.max_cu_per_sh = 3;
3552 			rdev->config.cik.max_backends_per_se = 1;
3553 		}
3554 		rdev->config.cik.max_sh_per_se = 1;
3555 		rdev->config.cik.max_texture_channel_caches = 4;
3556 		rdev->config.cik.max_gprs = 256;
3557 		rdev->config.cik.max_gs_threads = 16;
3558 		rdev->config.cik.max_hw_contexts = 8;
3559 
3560 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3561 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3562 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3563 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3564 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3565 		break;
3566 	case CHIP_KABINI:
3567 	case CHIP_MULLINS:
3568 	default:
3569 		rdev->config.cik.max_shader_engines = 1;
3570 		rdev->config.cik.max_tile_pipes = 2;
3571 		rdev->config.cik.max_cu_per_sh = 2;
3572 		rdev->config.cik.max_sh_per_se = 1;
3573 		rdev->config.cik.max_backends_per_se = 1;
3574 		rdev->config.cik.max_texture_channel_caches = 2;
3575 		rdev->config.cik.max_gprs = 256;
3576 		rdev->config.cik.max_gs_threads = 16;
3577 		rdev->config.cik.max_hw_contexts = 8;
3578 
3579 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3580 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3581 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3582 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3583 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3584 		break;
3585 	}
3586 
3587 	/* Initialize HDP */
3588 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3589 		WREG32((0x2c14 + j), 0x00000000);
3590 		WREG32((0x2c18 + j), 0x00000000);
3591 		WREG32((0x2c1c + j), 0x00000000);
3592 		WREG32((0x2c20 + j), 0x00000000);
3593 		WREG32((0x2c24 + j), 0x00000000);
3594 	}
3595 
3596 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3597 
3598 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3599 
3600 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3601 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3602 
3603 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3604 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3605 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3606 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3607 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3608 		rdev->config.cik.mem_row_size_in_kb = 4;
3609 	/* XXX use MC settings? */
3610 	rdev->config.cik.shader_engine_tile_size = 32;
3611 	rdev->config.cik.num_gpus = 1;
3612 	rdev->config.cik.multi_gpu_tile_size = 64;
3613 
3614 	/* fix up row size */
3615 	gb_addr_config &= ~ROW_SIZE_MASK;
3616 	switch (rdev->config.cik.mem_row_size_in_kb) {
3617 	case 1:
3618 	default:
3619 		gb_addr_config |= ROW_SIZE(0);
3620 		break;
3621 	case 2:
3622 		gb_addr_config |= ROW_SIZE(1);
3623 		break;
3624 	case 4:
3625 		gb_addr_config |= ROW_SIZE(2);
3626 		break;
3627 	}
3628 
3629 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3630 	 * not have bank info, so create a custom tiling dword.
3631 	 * bits 3:0   num_pipes
3632 	 * bits 7:4   num_banks
3633 	 * bits 11:8  group_size
3634 	 * bits 15:12 row_size
3635 	 */
3636 	rdev->config.cik.tile_config = 0;
3637 	switch (rdev->config.cik.num_tile_pipes) {
3638 	case 1:
3639 		rdev->config.cik.tile_config |= (0 << 0);
3640 		break;
3641 	case 2:
3642 		rdev->config.cik.tile_config |= (1 << 0);
3643 		break;
3644 	case 4:
3645 		rdev->config.cik.tile_config |= (2 << 0);
3646 		break;
3647 	case 8:
3648 	default:
3649 		/* XXX what about 12? */
3650 		rdev->config.cik.tile_config |= (3 << 0);
3651 		break;
3652 	}
3653 	rdev->config.cik.tile_config |=
3654 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3655 	rdev->config.cik.tile_config |=
3656 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3657 	rdev->config.cik.tile_config |=
3658 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3659 
3660 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3661 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3662 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3663 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3664 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3665 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3666 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3667 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3668 
3669 	cik_tiling_mode_table_init(rdev);
3670 
3671 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3672 		     rdev->config.cik.max_sh_per_se,
3673 		     rdev->config.cik.max_backends_per_se);
3674 
3675 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3676 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3677 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k++) {
3678 				rdev->config.cik.active_cus +=
3679 					hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3680 			}
3681 		}
3682 	}
3683 
3684 	/* set HW defaults for 3D engine */
3685 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3686 
3687 	WREG32(SX_DEBUG_1, 0x20);
3688 
3689 	WREG32(TA_CNTL_AUX, 0x00010000);
3690 
3691 	tmp = RREG32(SPI_CONFIG_CNTL);
3692 	tmp |= 0x03000000;
3693 	WREG32(SPI_CONFIG_CNTL, tmp);
3694 
3695 	WREG32(SQ_CONFIG, 1);
3696 
3697 	WREG32(DB_DEBUG, 0);
3698 
3699 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3700 	tmp |= 0x00000400;
3701 	WREG32(DB_DEBUG2, tmp);
3702 
3703 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3704 	tmp |= 0x00020200;
3705 	WREG32(DB_DEBUG3, tmp);
3706 
3707 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3708 	tmp |= 0x00018208;
3709 	WREG32(CB_HW_CONTROL, tmp);
3710 
3711 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3712 
3713 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3714 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3715 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3716 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3717 
3718 	WREG32(VGT_NUM_INSTANCES, 1);
3719 
3720 	WREG32(CP_PERFMON_CNTL, 0);
3721 
3722 	WREG32(SQ_CONFIG, 0);
3723 
3724 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3725 					  FORCE_EOV_MAX_REZ_CNT(255)));
3726 
3727 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3728 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3729 
3730 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3731 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3732 
3733 	tmp = RREG32(HDP_MISC_CNTL);
3734 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3735 	WREG32(HDP_MISC_CNTL, tmp);
3736 
3737 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3738 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3739 
3740 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3741 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3742 
3743 	udelay(50);
3744 }
3745 
3746 /*
3747  * GPU scratch registers helpers function.
3748  */
3749 /**
3750  * cik_scratch_init - setup driver info for CP scratch regs
3751  *
3752  * @rdev: radeon_device pointer
3753  *
3754  * Set up the number and offset of the CP scratch registers.
3755  * NOTE: use of CP scratch registers is a legacy inferface and
3756  * is not used by default on newer asics (r6xx+).  On newer asics,
3757  * memory buffers are used for fences rather than scratch regs.
3758  */
3759 static void cik_scratch_init(struct radeon_device *rdev)
3760 {
3761 	int i;
3762 
3763 	rdev->scratch.num_reg = 7;
3764 	rdev->scratch.reg_base = SCRATCH_REG0;
3765 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3766 		rdev->scratch.free[i] = true;
3767 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3768 	}
3769 }
3770 
3771 /**
3772  * cik_ring_test - basic gfx ring test
3773  *
3774  * @rdev: radeon_device pointer
3775  * @ring: radeon_ring structure holding ring information
3776  *
3777  * Allocate a scratch register and write to it using the gfx ring (CIK).
3778  * Provides a basic gfx ring test to verify that the ring is working.
3779  * Used by cik_cp_gfx_resume();
3780  * Returns 0 on success, error on failure.
3781  */
3782 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3783 {
3784 	uint32_t scratch;
3785 	uint32_t tmp = 0;
3786 	unsigned i;
3787 	int r;
3788 
3789 	r = radeon_scratch_get(rdev, &scratch);
3790 	if (r) {
3791 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3792 		return r;
3793 	}
3794 	WREG32(scratch, 0xCAFEDEAD);
3795 	r = radeon_ring_lock(rdev, ring, 3);
3796 	if (r) {
3797 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3798 		radeon_scratch_free(rdev, scratch);
3799 		return r;
3800 	}
3801 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3802 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3803 	radeon_ring_write(ring, 0xDEADBEEF);
3804 	radeon_ring_unlock_commit(rdev, ring);
3805 
3806 	for (i = 0; i < rdev->usec_timeout; i++) {
3807 		tmp = RREG32(scratch);
3808 		if (tmp == 0xDEADBEEF)
3809 			break;
3810 		DRM_UDELAY(1);
3811 	}
3812 	if (i < rdev->usec_timeout) {
3813 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3814 	} else {
3815 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3816 			  ring->idx, scratch, tmp);
3817 		r = -EINVAL;
3818 	}
3819 	radeon_scratch_free(rdev, scratch);
3820 	return r;
3821 }
3822 
3823 /**
3824  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3825  *
3826  * @rdev: radeon_device pointer
3827  * @ridx: radeon ring index
3828  *
3829  * Emits an hdp flush on the cp.
3830  */
3831 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3832 				       int ridx)
3833 {
3834 	struct radeon_ring *ring = &rdev->ring[ridx];
3835 	u32 ref_and_mask;
3836 
3837 	switch (ring->idx) {
3838 	case CAYMAN_RING_TYPE_CP1_INDEX:
3839 	case CAYMAN_RING_TYPE_CP2_INDEX:
3840 	default:
3841 		switch (ring->me) {
3842 		case 0:
3843 			ref_and_mask = CP2 << ring->pipe;
3844 			break;
3845 		case 1:
3846 			ref_and_mask = CP6 << ring->pipe;
3847 			break;
3848 		default:
3849 			return;
3850 		}
3851 		break;
3852 	case RADEON_RING_TYPE_GFX_INDEX:
3853 		ref_and_mask = CP0;
3854 		break;
3855 	}
3856 
3857 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3858 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3859 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3860 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3861 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3862 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3863 	radeon_ring_write(ring, ref_and_mask);
3864 	radeon_ring_write(ring, ref_and_mask);
3865 	radeon_ring_write(ring, 0x20); /* poll interval */
3866 }
3867 
3868 /**
3869  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3870  *
3871  * @rdev: radeon_device pointer
3872  * @fence: radeon fence object
3873  *
3874  * Emits a fence sequnce number on the gfx ring and flushes
3875  * GPU caches.
3876  */
3877 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3878 			     struct radeon_fence *fence)
3879 {
3880 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3881 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3882 
3883 	/* EVENT_WRITE_EOP - flush caches, send int */
3884 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3885 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3886 				 EOP_TC_ACTION_EN |
3887 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3888 				 EVENT_INDEX(5)));
3889 	radeon_ring_write(ring, addr & 0xfffffffc);
3890 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3891 	radeon_ring_write(ring, fence->seq);
3892 	radeon_ring_write(ring, 0);
3893 }
3894 
3895 /**
3896  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3897  *
3898  * @rdev: radeon_device pointer
3899  * @fence: radeon fence object
3900  *
3901  * Emits a fence sequnce number on the compute ring and flushes
3902  * GPU caches.
3903  */
3904 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3905 				 struct radeon_fence *fence)
3906 {
3907 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3908 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3909 
3910 	/* RELEASE_MEM - flush caches, send int */
3911 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3912 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3913 				 EOP_TC_ACTION_EN |
3914 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3915 				 EVENT_INDEX(5)));
3916 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3917 	radeon_ring_write(ring, addr & 0xfffffffc);
3918 	radeon_ring_write(ring, upper_32_bits(addr));
3919 	radeon_ring_write(ring, fence->seq);
3920 	radeon_ring_write(ring, 0);
3921 }
3922 
3923 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3924 			     struct radeon_ring *ring,
3925 			     struct radeon_semaphore *semaphore,
3926 			     bool emit_wait)
3927 {
3928 	uint64_t addr = semaphore->gpu_addr;
3929 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3930 
3931 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3932 	radeon_ring_write(ring, lower_32_bits(addr));
3933 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3934 
3935 	return true;
3936 }
3937 
3938 /**
3939  * cik_copy_cpdma - copy pages using the CP DMA engine
3940  *
3941  * @rdev: radeon_device pointer
3942  * @src_offset: src GPU address
3943  * @dst_offset: dst GPU address
3944  * @num_gpu_pages: number of GPU pages to xfer
3945  * @fence: radeon fence object
3946  *
3947  * Copy GPU paging using the CP DMA engine (CIK+).
3948  * Used by the radeon ttm implementation to move pages if
3949  * registered as the asic copy callback.
3950  */
3951 int cik_copy_cpdma(struct radeon_device *rdev,
3952 		   uint64_t src_offset, uint64_t dst_offset,
3953 		   unsigned num_gpu_pages,
3954 		   struct radeon_fence **fence)
3955 {
3956 	struct radeon_semaphore *sem = NULL;
3957 	int ring_index = rdev->asic->copy.blit_ring_index;
3958 	struct radeon_ring *ring = &rdev->ring[ring_index];
3959 	u32 size_in_bytes, cur_size_in_bytes, control;
3960 	int i, num_loops;
3961 	int r = 0;
3962 
3963 	r = radeon_semaphore_create(rdev, &sem);
3964 	if (r) {
3965 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3966 		return r;
3967 	}
3968 
3969 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3970 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3971 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3972 	if (r) {
3973 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3974 		radeon_semaphore_free(rdev, &sem, NULL);
3975 		return r;
3976 	}
3977 
3978 	radeon_semaphore_sync_to(sem, *fence);
3979 	radeon_semaphore_sync_rings(rdev, sem, ring->idx);
3980 
3981 	for (i = 0; i < num_loops; i++) {
3982 		cur_size_in_bytes = size_in_bytes;
3983 		if (cur_size_in_bytes > 0x1fffff)
3984 			cur_size_in_bytes = 0x1fffff;
3985 		size_in_bytes -= cur_size_in_bytes;
3986 		control = 0;
3987 		if (size_in_bytes == 0)
3988 			control |= PACKET3_DMA_DATA_CP_SYNC;
3989 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3990 		radeon_ring_write(ring, control);
3991 		radeon_ring_write(ring, lower_32_bits(src_offset));
3992 		radeon_ring_write(ring, upper_32_bits(src_offset));
3993 		radeon_ring_write(ring, lower_32_bits(dst_offset));
3994 		radeon_ring_write(ring, upper_32_bits(dst_offset));
3995 		radeon_ring_write(ring, cur_size_in_bytes);
3996 		src_offset += cur_size_in_bytes;
3997 		dst_offset += cur_size_in_bytes;
3998 	}
3999 
4000 	r = radeon_fence_emit(rdev, fence, ring->idx);
4001 	if (r) {
4002 		radeon_ring_unlock_undo(rdev, ring);
4003 		radeon_semaphore_free(rdev, &sem, NULL);
4004 		return r;
4005 	}
4006 
4007 	radeon_ring_unlock_commit(rdev, ring);
4008 	radeon_semaphore_free(rdev, &sem, *fence);
4009 
4010 	return r;
4011 }
4012 
4013 /*
4014  * IB stuff
4015  */
4016 /**
4017  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
4018  *
4019  * @rdev: radeon_device pointer
4020  * @ib: radeon indirect buffer object
4021  *
4022  * Emits an DE (drawing engine) or CE (constant engine) IB
4023  * on the gfx ring.  IBs are usually generated by userspace
4024  * acceleration drivers and submitted to the kernel for
4025  * sheduling on the ring.  This function schedules the IB
4026  * on the gfx ring for execution by the GPU.
4027  */
4028 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
4029 {
4030 	struct radeon_ring *ring = &rdev->ring[ib->ring];
4031 	u32 header, control = INDIRECT_BUFFER_VALID;
4032 
4033 	if (ib->is_const_ib) {
4034 		/* set switch buffer packet before const IB */
4035 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4036 		radeon_ring_write(ring, 0);
4037 
4038 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4039 	} else {
4040 		u32 next_rptr;
4041 		if (ring->rptr_save_reg) {
4042 			next_rptr = ring->wptr + 3 + 4;
4043 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4044 			radeon_ring_write(ring, ((ring->rptr_save_reg -
4045 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
4046 			radeon_ring_write(ring, next_rptr);
4047 		} else if (rdev->wb.enabled) {
4048 			next_rptr = ring->wptr + 5 + 4;
4049 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4050 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
4051 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4052 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
4053 			radeon_ring_write(ring, next_rptr);
4054 		}
4055 
4056 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4057 	}
4058 
4059 	control |= ib->length_dw |
4060 		(ib->vm ? (ib->vm->id << 24) : 0);
4061 
4062 	radeon_ring_write(ring, header);
4063 	radeon_ring_write(ring,
4064 #ifdef __BIG_ENDIAN
4065 			  (2 << 0) |
4066 #endif
4067 			  (ib->gpu_addr & 0xFFFFFFFC));
4068 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4069 	radeon_ring_write(ring, control);
4070 }
4071 
4072 /**
4073  * cik_ib_test - basic gfx ring IB test
4074  *
4075  * @rdev: radeon_device pointer
4076  * @ring: radeon_ring structure holding ring information
4077  *
4078  * Allocate an IB and execute it on the gfx ring (CIK).
4079  * Provides a basic gfx ring test to verify that IBs are working.
4080  * Returns 0 on success, error on failure.
4081  */
4082 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4083 {
4084 	struct radeon_ib ib;
4085 	uint32_t scratch;
4086 	uint32_t tmp = 0;
4087 	unsigned i;
4088 	int r;
4089 
4090 	r = radeon_scratch_get(rdev, &scratch);
4091 	if (r) {
4092 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
4093 		return r;
4094 	}
4095 	WREG32(scratch, 0xCAFEDEAD);
4096 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
4097 	if (r) {
4098 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
4099 		radeon_scratch_free(rdev, scratch);
4100 		return r;
4101 	}
4102 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
4103 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
4104 	ib.ptr[2] = 0xDEADBEEF;
4105 	ib.length_dw = 3;
4106 	r = radeon_ib_schedule(rdev, &ib, NULL);
4107 	if (r) {
4108 		radeon_scratch_free(rdev, scratch);
4109 		radeon_ib_free(rdev, &ib);
4110 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4111 		return r;
4112 	}
4113 	r = radeon_fence_wait(ib.fence, false);
4114 	if (r) {
4115 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4116 		radeon_scratch_free(rdev, scratch);
4117 		radeon_ib_free(rdev, &ib);
4118 		return r;
4119 	}
4120 	for (i = 0; i < rdev->usec_timeout; i++) {
4121 		tmp = RREG32(scratch);
4122 		if (tmp == 0xDEADBEEF)
4123 			break;
4124 		DRM_UDELAY(1);
4125 	}
4126 	if (i < rdev->usec_timeout) {
4127 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
4128 	} else {
4129 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
4130 			  scratch, tmp);
4131 		r = -EINVAL;
4132 	}
4133 	radeon_scratch_free(rdev, scratch);
4134 	radeon_ib_free(rdev, &ib);
4135 	return r;
4136 }
4137 
4138 /*
4139  * CP.
4140  * On CIK, gfx and compute now have independant command processors.
4141  *
4142  * GFX
4143  * Gfx consists of a single ring and can process both gfx jobs and
4144  * compute jobs.  The gfx CP consists of three microengines (ME):
4145  * PFP - Pre-Fetch Parser
4146  * ME - Micro Engine
4147  * CE - Constant Engine
4148  * The PFP and ME make up what is considered the Drawing Engine (DE).
4149  * The CE is an asynchronous engine used for updating buffer desciptors
4150  * used by the DE so that they can be loaded into cache in parallel
4151  * while the DE is processing state update packets.
4152  *
4153  * Compute
4154  * The compute CP consists of two microengines (ME):
4155  * MEC1 - Compute MicroEngine 1
4156  * MEC2 - Compute MicroEngine 2
4157  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
4158  * The queues are exposed to userspace and are programmed directly
4159  * by the compute runtime.
4160  */
4161 /**
4162  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
4163  *
4164  * @rdev: radeon_device pointer
4165  * @enable: enable or disable the MEs
4166  *
4167  * Halts or unhalts the gfx MEs.
4168  */
4169 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
4170 {
4171 	if (enable)
4172 		WREG32(CP_ME_CNTL, 0);
4173 	else {
4174 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4175 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
4176 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
4177 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4178 	}
4179 	udelay(50);
4180 }
4181 
4182 /**
4183  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
4184  *
4185  * @rdev: radeon_device pointer
4186  *
4187  * Loads the gfx PFP, ME, and CE ucode.
4188  * Returns 0 for success, -EINVAL if the ucode is not available.
4189  */
4190 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
4191 {
4192 	int i;
4193 
4194 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
4195 		return -EINVAL;
4196 
4197 	cik_cp_gfx_enable(rdev, false);
4198 
4199 	if (rdev->new_fw) {
4200 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
4201 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
4202 		const struct gfx_firmware_header_v1_0 *ce_hdr =
4203 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
4204 		const struct gfx_firmware_header_v1_0 *me_hdr =
4205 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
4206 		const __le32 *fw_data;
4207 		u32 fw_size;
4208 
4209 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
4210 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
4211 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
4212 
4213 		/* PFP */
4214 		fw_data = (const __le32 *)
4215 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4216 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4217 		WREG32(CP_PFP_UCODE_ADDR, 0);
4218 		for (i = 0; i < fw_size; i++)
4219 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4220 		WREG32(CP_PFP_UCODE_ADDR, 0);
4221 
4222 		/* CE */
4223 		fw_data = (const __le32 *)
4224 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4225 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4226 		WREG32(CP_CE_UCODE_ADDR, 0);
4227 		for (i = 0; i < fw_size; i++)
4228 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4229 		WREG32(CP_CE_UCODE_ADDR, 0);
4230 
4231 		/* ME */
4232 		fw_data = (const __be32 *)
4233 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4234 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4235 		WREG32(CP_ME_RAM_WADDR, 0);
4236 		for (i = 0; i < fw_size; i++)
4237 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4238 		WREG32(CP_ME_RAM_WADDR, 0);
4239 	} else {
4240 		const __be32 *fw_data;
4241 
4242 		/* PFP */
4243 		fw_data = (const __be32 *)rdev->pfp_fw->data;
4244 		WREG32(CP_PFP_UCODE_ADDR, 0);
4245 		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4246 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4247 		WREG32(CP_PFP_UCODE_ADDR, 0);
4248 
4249 		/* CE */
4250 		fw_data = (const __be32 *)rdev->ce_fw->data;
4251 		WREG32(CP_CE_UCODE_ADDR, 0);
4252 		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4253 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4254 		WREG32(CP_CE_UCODE_ADDR, 0);
4255 
4256 		/* ME */
4257 		fw_data = (const __be32 *)rdev->me_fw->data;
4258 		WREG32(CP_ME_RAM_WADDR, 0);
4259 		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4260 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4261 		WREG32(CP_ME_RAM_WADDR, 0);
4262 	}
4263 
4264 	WREG32(CP_PFP_UCODE_ADDR, 0);
4265 	WREG32(CP_CE_UCODE_ADDR, 0);
4266 	WREG32(CP_ME_RAM_WADDR, 0);
4267 	WREG32(CP_ME_RAM_RADDR, 0);
4268 	return 0;
4269 }
4270 
4271 /**
4272  * cik_cp_gfx_start - start the gfx ring
4273  *
4274  * @rdev: radeon_device pointer
4275  *
4276  * Enables the ring and loads the clear state context and other
4277  * packets required to init the ring.
4278  * Returns 0 for success, error for failure.
4279  */
4280 static int cik_cp_gfx_start(struct radeon_device *rdev)
4281 {
4282 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4283 	int r, i;
4284 
4285 	/* init the CP */
4286 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4287 	WREG32(CP_ENDIAN_SWAP, 0);
4288 	WREG32(CP_DEVICE_ID, 1);
4289 
4290 	cik_cp_gfx_enable(rdev, true);
4291 
4292 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4293 	if (r) {
4294 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4295 		return r;
4296 	}
4297 
4298 	/* init the CE partitions.  CE only used for gfx on CIK */
4299 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4300 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4301 	radeon_ring_write(ring, 0xc000);
4302 	radeon_ring_write(ring, 0xc000);
4303 
4304 	/* setup clear context state */
4305 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4306 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4307 
4308 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4309 	radeon_ring_write(ring, 0x80000000);
4310 	radeon_ring_write(ring, 0x80000000);
4311 
4312 	for (i = 0; i < cik_default_size; i++)
4313 		radeon_ring_write(ring, cik_default_state[i]);
4314 
4315 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4316 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4317 
4318 	/* set clear context state */
4319 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4320 	radeon_ring_write(ring, 0);
4321 
4322 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4323 	radeon_ring_write(ring, 0x00000316);
4324 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4325 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4326 
4327 	radeon_ring_unlock_commit(rdev, ring);
4328 
4329 	return 0;
4330 }
4331 
4332 /**
4333  * cik_cp_gfx_fini - stop the gfx ring
4334  *
4335  * @rdev: radeon_device pointer
4336  *
4337  * Stop the gfx ring and tear down the driver ring
4338  * info.
4339  */
4340 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4341 {
4342 	cik_cp_gfx_enable(rdev, false);
4343 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4344 }
4345 
4346 /**
4347  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4348  *
4349  * @rdev: radeon_device pointer
4350  *
4351  * Program the location and size of the gfx ring buffer
4352  * and test it to make sure it's working.
4353  * Returns 0 for success, error for failure.
4354  */
4355 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4356 {
4357 	struct radeon_ring *ring;
4358 	u32 tmp;
4359 	u32 rb_bufsz;
4360 	u64 rb_addr;
4361 	int r;
4362 
4363 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4364 	if (rdev->family != CHIP_HAWAII)
4365 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4366 
4367 	/* Set the write pointer delay */
4368 	WREG32(CP_RB_WPTR_DELAY, 0);
4369 
4370 	/* set the RB to use vmid 0 */
4371 	WREG32(CP_RB_VMID, 0);
4372 
4373 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4374 
4375 	/* ring 0 - compute and gfx */
4376 	/* Set ring buffer size */
4377 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4378 	rb_bufsz = order_base_2(ring->ring_size / 8);
4379 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4380 #ifdef __BIG_ENDIAN
4381 	tmp |= BUF_SWAP_32BIT;
4382 #endif
4383 	WREG32(CP_RB0_CNTL, tmp);
4384 
4385 	/* Initialize the ring buffer's read and write pointers */
4386 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4387 	ring->wptr = 0;
4388 	WREG32(CP_RB0_WPTR, ring->wptr);
4389 
4390 	/* set the wb address wether it's enabled or not */
4391 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4392 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4393 
4394 	/* scratch register shadowing is no longer supported */
4395 	WREG32(SCRATCH_UMSK, 0);
4396 
4397 	if (!rdev->wb.enabled)
4398 		tmp |= RB_NO_UPDATE;
4399 
4400 	mdelay(1);
4401 	WREG32(CP_RB0_CNTL, tmp);
4402 
4403 	rb_addr = ring->gpu_addr >> 8;
4404 	WREG32(CP_RB0_BASE, rb_addr);
4405 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4406 
4407 	/* start the ring */
4408 	cik_cp_gfx_start(rdev);
4409 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4410 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4411 	if (r) {
4412 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4413 		return r;
4414 	}
4415 
4416 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4417 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4418 
4419 	return 0;
4420 }
4421 
4422 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4423 		     struct radeon_ring *ring)
4424 {
4425 	u32 rptr;
4426 
4427 	if (rdev->wb.enabled)
4428 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4429 	else
4430 		rptr = RREG32(CP_RB0_RPTR);
4431 
4432 	return rptr;
4433 }
4434 
4435 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4436 		     struct radeon_ring *ring)
4437 {
4438 	u32 wptr;
4439 
4440 	wptr = RREG32(CP_RB0_WPTR);
4441 
4442 	return wptr;
4443 }
4444 
4445 void cik_gfx_set_wptr(struct radeon_device *rdev,
4446 		      struct radeon_ring *ring)
4447 {
4448 	WREG32(CP_RB0_WPTR, ring->wptr);
4449 	(void)RREG32(CP_RB0_WPTR);
4450 }
4451 
4452 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4453 			 struct radeon_ring *ring)
4454 {
4455 	u32 rptr;
4456 
4457 	if (rdev->wb.enabled) {
4458 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4459 	} else {
4460 		mutex_lock(&rdev->srbm_mutex);
4461 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4462 		rptr = RREG32(CP_HQD_PQ_RPTR);
4463 		cik_srbm_select(rdev, 0, 0, 0, 0);
4464 		mutex_unlock(&rdev->srbm_mutex);
4465 	}
4466 
4467 	return rptr;
4468 }
4469 
4470 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4471 			 struct radeon_ring *ring)
4472 {
4473 	u32 wptr;
4474 
4475 	if (rdev->wb.enabled) {
4476 		/* XXX check if swapping is necessary on BE */
4477 		wptr = rdev->wb.wb[ring->wptr_offs/4];
4478 	} else {
4479 		mutex_lock(&rdev->srbm_mutex);
4480 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4481 		wptr = RREG32(CP_HQD_PQ_WPTR);
4482 		cik_srbm_select(rdev, 0, 0, 0, 0);
4483 		mutex_unlock(&rdev->srbm_mutex);
4484 	}
4485 
4486 	return wptr;
4487 }
4488 
4489 void cik_compute_set_wptr(struct radeon_device *rdev,
4490 			  struct radeon_ring *ring)
4491 {
4492 	/* XXX check if swapping is necessary on BE */
4493 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4494 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4495 }
4496 
4497 /**
4498  * cik_cp_compute_enable - enable/disable the compute CP MEs
4499  *
4500  * @rdev: radeon_device pointer
4501  * @enable: enable or disable the MEs
4502  *
4503  * Halts or unhalts the compute MEs.
4504  */
4505 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4506 {
4507 	if (enable)
4508 		WREG32(CP_MEC_CNTL, 0);
4509 	else {
4510 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4511 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4512 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4513 	}
4514 	udelay(50);
4515 }
4516 
4517 /**
4518  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4519  *
4520  * @rdev: radeon_device pointer
4521  *
4522  * Loads the compute MEC1&2 ucode.
4523  * Returns 0 for success, -EINVAL if the ucode is not available.
4524  */
4525 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4526 {
4527 	int i;
4528 
4529 	if (!rdev->mec_fw)
4530 		return -EINVAL;
4531 
4532 	cik_cp_compute_enable(rdev, false);
4533 
4534 	if (rdev->new_fw) {
4535 		const struct gfx_firmware_header_v1_0 *mec_hdr =
4536 			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4537 		const __le32 *fw_data;
4538 		u32 fw_size;
4539 
4540 		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4541 
4542 		/* MEC1 */
4543 		fw_data = (const __le32 *)
4544 			(rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4545 		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4546 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4547 		for (i = 0; i < fw_size; i++)
4548 			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4549 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4550 
4551 		/* MEC2 */
4552 		if (rdev->family == CHIP_KAVERI) {
4553 			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4554 				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4555 
4556 			fw_data = (const __le32 *)
4557 				(rdev->mec2_fw->data +
4558 				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4559 			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4560 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4561 			for (i = 0; i < fw_size; i++)
4562 				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4563 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4564 		}
4565 	} else {
4566 		const __be32 *fw_data;
4567 
4568 		/* MEC1 */
4569 		fw_data = (const __be32 *)rdev->mec_fw->data;
4570 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4571 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4572 			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4573 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4574 
4575 		if (rdev->family == CHIP_KAVERI) {
4576 			/* MEC2 */
4577 			fw_data = (const __be32 *)rdev->mec_fw->data;
4578 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4579 			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4580 				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4581 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4582 		}
4583 	}
4584 
4585 	return 0;
4586 }
4587 
4588 /**
4589  * cik_cp_compute_start - start the compute queues
4590  *
4591  * @rdev: radeon_device pointer
4592  *
4593  * Enable the compute queues.
4594  * Returns 0 for success, error for failure.
4595  */
4596 static int cik_cp_compute_start(struct radeon_device *rdev)
4597 {
4598 	cik_cp_compute_enable(rdev, true);
4599 
4600 	return 0;
4601 }
4602 
4603 /**
4604  * cik_cp_compute_fini - stop the compute queues
4605  *
4606  * @rdev: radeon_device pointer
4607  *
4608  * Stop the compute queues and tear down the driver queue
4609  * info.
4610  */
4611 static void cik_cp_compute_fini(struct radeon_device *rdev)
4612 {
4613 	int i, idx, r;
4614 
4615 	cik_cp_compute_enable(rdev, false);
4616 
4617 	for (i = 0; i < 2; i++) {
4618 		if (i == 0)
4619 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4620 		else
4621 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4622 
4623 		if (rdev->ring[idx].mqd_obj) {
4624 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4625 			if (unlikely(r != 0))
4626 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4627 
4628 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4629 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4630 
4631 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4632 			rdev->ring[idx].mqd_obj = NULL;
4633 		}
4634 	}
4635 }
4636 
4637 static void cik_mec_fini(struct radeon_device *rdev)
4638 {
4639 	int r;
4640 
4641 	if (rdev->mec.hpd_eop_obj) {
4642 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4643 		if (unlikely(r != 0))
4644 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4645 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4646 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4647 
4648 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4649 		rdev->mec.hpd_eop_obj = NULL;
4650 	}
4651 }
4652 
4653 #define MEC_HPD_SIZE 2048
4654 
4655 static int cik_mec_init(struct radeon_device *rdev)
4656 {
4657 	int r;
4658 	u32 *hpd;
4659 
4660 	/*
4661 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4662 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4663 	 */
4664 	if (rdev->family == CHIP_KAVERI)
4665 		rdev->mec.num_mec = 2;
4666 	else
4667 		rdev->mec.num_mec = 1;
4668 	rdev->mec.num_pipe = 4;
4669 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4670 
4671 	if (rdev->mec.hpd_eop_obj == NULL) {
4672 		r = radeon_bo_create(rdev,
4673 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4674 				     PAGE_SIZE, true,
4675 				     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4676 				     &rdev->mec.hpd_eop_obj);
4677 		if (r) {
4678 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4679 			return r;
4680 		}
4681 	}
4682 
4683 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4684 	if (unlikely(r != 0)) {
4685 		cik_mec_fini(rdev);
4686 		return r;
4687 	}
4688 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4689 			  &rdev->mec.hpd_eop_gpu_addr);
4690 	if (r) {
4691 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4692 		cik_mec_fini(rdev);
4693 		return r;
4694 	}
4695 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4696 	if (r) {
4697 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4698 		cik_mec_fini(rdev);
4699 		return r;
4700 	}
4701 
4702 	/* clear memory.  Not sure if this is required or not */
4703 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4704 
4705 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4706 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4707 
4708 	return 0;
4709 }
4710 
4711 struct hqd_registers
4712 {
4713 	u32 cp_mqd_base_addr;
4714 	u32 cp_mqd_base_addr_hi;
4715 	u32 cp_hqd_active;
4716 	u32 cp_hqd_vmid;
4717 	u32 cp_hqd_persistent_state;
4718 	u32 cp_hqd_pipe_priority;
4719 	u32 cp_hqd_queue_priority;
4720 	u32 cp_hqd_quantum;
4721 	u32 cp_hqd_pq_base;
4722 	u32 cp_hqd_pq_base_hi;
4723 	u32 cp_hqd_pq_rptr;
4724 	u32 cp_hqd_pq_rptr_report_addr;
4725 	u32 cp_hqd_pq_rptr_report_addr_hi;
4726 	u32 cp_hqd_pq_wptr_poll_addr;
4727 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4728 	u32 cp_hqd_pq_doorbell_control;
4729 	u32 cp_hqd_pq_wptr;
4730 	u32 cp_hqd_pq_control;
4731 	u32 cp_hqd_ib_base_addr;
4732 	u32 cp_hqd_ib_base_addr_hi;
4733 	u32 cp_hqd_ib_rptr;
4734 	u32 cp_hqd_ib_control;
4735 	u32 cp_hqd_iq_timer;
4736 	u32 cp_hqd_iq_rptr;
4737 	u32 cp_hqd_dequeue_request;
4738 	u32 cp_hqd_dma_offload;
4739 	u32 cp_hqd_sema_cmd;
4740 	u32 cp_hqd_msg_type;
4741 	u32 cp_hqd_atomic0_preop_lo;
4742 	u32 cp_hqd_atomic0_preop_hi;
4743 	u32 cp_hqd_atomic1_preop_lo;
4744 	u32 cp_hqd_atomic1_preop_hi;
4745 	u32 cp_hqd_hq_scheduler0;
4746 	u32 cp_hqd_hq_scheduler1;
4747 	u32 cp_mqd_control;
4748 };
4749 
4750 struct bonaire_mqd
4751 {
4752 	u32 header;
4753 	u32 dispatch_initiator;
4754 	u32 dimensions[3];
4755 	u32 start_idx[3];
4756 	u32 num_threads[3];
4757 	u32 pipeline_stat_enable;
4758 	u32 perf_counter_enable;
4759 	u32 pgm[2];
4760 	u32 tba[2];
4761 	u32 tma[2];
4762 	u32 pgm_rsrc[2];
4763 	u32 vmid;
4764 	u32 resource_limits;
4765 	u32 static_thread_mgmt01[2];
4766 	u32 tmp_ring_size;
4767 	u32 static_thread_mgmt23[2];
4768 	u32 restart[3];
4769 	u32 thread_trace_enable;
4770 	u32 reserved1;
4771 	u32 user_data[16];
4772 	u32 vgtcs_invoke_count[2];
4773 	struct hqd_registers queue_state;
4774 	u32 dequeue_cntr;
4775 	u32 interrupt_queue[64];
4776 };
4777 
4778 /**
4779  * cik_cp_compute_resume - setup the compute queue registers
4780  *
4781  * @rdev: radeon_device pointer
4782  *
4783  * Program the compute queues and test them to make sure they
4784  * are working.
4785  * Returns 0 for success, error for failure.
4786  */
4787 static int cik_cp_compute_resume(struct radeon_device *rdev)
4788 {
4789 	int r, i, idx;
4790 	u32 tmp;
4791 	bool use_doorbell = true;
4792 	u64 hqd_gpu_addr;
4793 	u64 mqd_gpu_addr;
4794 	u64 eop_gpu_addr;
4795 	u64 wb_gpu_addr;
4796 	u32 *buf;
4797 	struct bonaire_mqd *mqd;
4798 
4799 	r = cik_cp_compute_start(rdev);
4800 	if (r)
4801 		return r;
4802 
4803 	/* fix up chicken bits */
4804 	tmp = RREG32(CP_CPF_DEBUG);
4805 	tmp |= (1 << 23);
4806 	WREG32(CP_CPF_DEBUG, tmp);
4807 
4808 	/* init the pipes */
4809 	mutex_lock(&rdev->srbm_mutex);
4810 	for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
4811 		int me = (i < 4) ? 1 : 2;
4812 		int pipe = (i < 4) ? i : (i - 4);
4813 
4814 		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
4815 
4816 		cik_srbm_select(rdev, me, pipe, 0, 0);
4817 
4818 		/* write the EOP addr */
4819 		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4820 		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4821 
4822 		/* set the VMID assigned */
4823 		WREG32(CP_HPD_EOP_VMID, 0);
4824 
4825 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4826 		tmp = RREG32(CP_HPD_EOP_CONTROL);
4827 		tmp &= ~EOP_SIZE_MASK;
4828 		tmp |= order_base_2(MEC_HPD_SIZE / 8);
4829 		WREG32(CP_HPD_EOP_CONTROL, tmp);
4830 	}
4831 	cik_srbm_select(rdev, 0, 0, 0, 0);
4832 	mutex_unlock(&rdev->srbm_mutex);
4833 
4834 	/* init the queues.  Just two for now. */
4835 	for (i = 0; i < 2; i++) {
4836 		if (i == 0)
4837 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4838 		else
4839 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4840 
4841 		if (rdev->ring[idx].mqd_obj == NULL) {
4842 			r = radeon_bo_create(rdev,
4843 					     sizeof(struct bonaire_mqd),
4844 					     PAGE_SIZE, true,
4845 					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4846 					     &rdev->ring[idx].mqd_obj);
4847 			if (r) {
4848 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4849 				return r;
4850 			}
4851 		}
4852 
4853 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4854 		if (unlikely(r != 0)) {
4855 			cik_cp_compute_fini(rdev);
4856 			return r;
4857 		}
4858 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4859 				  &mqd_gpu_addr);
4860 		if (r) {
4861 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4862 			cik_cp_compute_fini(rdev);
4863 			return r;
4864 		}
4865 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4866 		if (r) {
4867 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4868 			cik_cp_compute_fini(rdev);
4869 			return r;
4870 		}
4871 
4872 		/* init the mqd struct */
4873 		memset(buf, 0, sizeof(struct bonaire_mqd));
4874 
4875 		mqd = (struct bonaire_mqd *)buf;
4876 		mqd->header = 0xC0310800;
4877 		mqd->static_thread_mgmt01[0] = 0xffffffff;
4878 		mqd->static_thread_mgmt01[1] = 0xffffffff;
4879 		mqd->static_thread_mgmt23[0] = 0xffffffff;
4880 		mqd->static_thread_mgmt23[1] = 0xffffffff;
4881 
4882 		mutex_lock(&rdev->srbm_mutex);
4883 		cik_srbm_select(rdev, rdev->ring[idx].me,
4884 				rdev->ring[idx].pipe,
4885 				rdev->ring[idx].queue, 0);
4886 
4887 		/* disable wptr polling */
4888 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4889 		tmp &= ~WPTR_POLL_EN;
4890 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4891 
4892 		/* enable doorbell? */
4893 		mqd->queue_state.cp_hqd_pq_doorbell_control =
4894 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4895 		if (use_doorbell)
4896 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4897 		else
4898 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4899 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4900 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4901 
4902 		/* disable the queue if it's active */
4903 		mqd->queue_state.cp_hqd_dequeue_request = 0;
4904 		mqd->queue_state.cp_hqd_pq_rptr = 0;
4905 		mqd->queue_state.cp_hqd_pq_wptr= 0;
4906 		if (RREG32(CP_HQD_ACTIVE) & 1) {
4907 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4908 			for (i = 0; i < rdev->usec_timeout; i++) {
4909 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4910 					break;
4911 				udelay(1);
4912 			}
4913 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4914 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4915 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4916 		}
4917 
4918 		/* set the pointer to the MQD */
4919 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4920 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4921 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4922 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4923 		/* set MQD vmid to 0 */
4924 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4925 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4926 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4927 
4928 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4929 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4930 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4931 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4932 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4933 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4934 
4935 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4936 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4937 		mqd->queue_state.cp_hqd_pq_control &=
4938 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4939 
4940 		mqd->queue_state.cp_hqd_pq_control |=
4941 			order_base_2(rdev->ring[idx].ring_size / 8);
4942 		mqd->queue_state.cp_hqd_pq_control |=
4943 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4944 #ifdef __BIG_ENDIAN
4945 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4946 #endif
4947 		mqd->queue_state.cp_hqd_pq_control &=
4948 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4949 		mqd->queue_state.cp_hqd_pq_control |=
4950 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4951 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4952 
4953 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4954 		if (i == 0)
4955 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4956 		else
4957 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4958 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4959 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4960 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4961 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4962 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4963 
4964 		/* set the wb address wether it's enabled or not */
4965 		if (i == 0)
4966 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4967 		else
4968 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4969 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4970 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4971 			upper_32_bits(wb_gpu_addr) & 0xffff;
4972 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4973 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4974 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4975 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4976 
4977 		/* enable the doorbell if requested */
4978 		if (use_doorbell) {
4979 			mqd->queue_state.cp_hqd_pq_doorbell_control =
4980 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4981 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4982 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
4983 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4984 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4985 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
4986 				~(DOORBELL_SOURCE | DOORBELL_HIT);
4987 
4988 		} else {
4989 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4990 		}
4991 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4992 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4993 
4994 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4995 		rdev->ring[idx].wptr = 0;
4996 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4997 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4998 		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4999 
5000 		/* set the vmid for the queue */
5001 		mqd->queue_state.cp_hqd_vmid = 0;
5002 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
5003 
5004 		/* activate the queue */
5005 		mqd->queue_state.cp_hqd_active = 1;
5006 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
5007 
5008 		cik_srbm_select(rdev, 0, 0, 0, 0);
5009 		mutex_unlock(&rdev->srbm_mutex);
5010 
5011 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
5012 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
5013 
5014 		rdev->ring[idx].ready = true;
5015 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
5016 		if (r)
5017 			rdev->ring[idx].ready = false;
5018 	}
5019 
5020 	return 0;
5021 }
5022 
5023 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
5024 {
5025 	cik_cp_gfx_enable(rdev, enable);
5026 	cik_cp_compute_enable(rdev, enable);
5027 }
5028 
5029 static int cik_cp_load_microcode(struct radeon_device *rdev)
5030 {
5031 	int r;
5032 
5033 	r = cik_cp_gfx_load_microcode(rdev);
5034 	if (r)
5035 		return r;
5036 	r = cik_cp_compute_load_microcode(rdev);
5037 	if (r)
5038 		return r;
5039 
5040 	return 0;
5041 }
5042 
5043 static void cik_cp_fini(struct radeon_device *rdev)
5044 {
5045 	cik_cp_gfx_fini(rdev);
5046 	cik_cp_compute_fini(rdev);
5047 }
5048 
5049 static int cik_cp_resume(struct radeon_device *rdev)
5050 {
5051 	int r;
5052 
5053 	cik_enable_gui_idle_interrupt(rdev, false);
5054 
5055 	r = cik_cp_load_microcode(rdev);
5056 	if (r)
5057 		return r;
5058 
5059 	r = cik_cp_gfx_resume(rdev);
5060 	if (r)
5061 		return r;
5062 	r = cik_cp_compute_resume(rdev);
5063 	if (r)
5064 		return r;
5065 
5066 	cik_enable_gui_idle_interrupt(rdev, true);
5067 
5068 	return 0;
5069 }
5070 
5071 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
5072 {
5073 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
5074 		RREG32(GRBM_STATUS));
5075 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
5076 		RREG32(GRBM_STATUS2));
5077 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
5078 		RREG32(GRBM_STATUS_SE0));
5079 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
5080 		RREG32(GRBM_STATUS_SE1));
5081 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
5082 		RREG32(GRBM_STATUS_SE2));
5083 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
5084 		RREG32(GRBM_STATUS_SE3));
5085 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
5086 		RREG32(SRBM_STATUS));
5087 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
5088 		RREG32(SRBM_STATUS2));
5089 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
5090 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
5091 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
5092 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
5093 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
5094 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
5095 		 RREG32(CP_STALLED_STAT1));
5096 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
5097 		 RREG32(CP_STALLED_STAT2));
5098 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
5099 		 RREG32(CP_STALLED_STAT3));
5100 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
5101 		 RREG32(CP_CPF_BUSY_STAT));
5102 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
5103 		 RREG32(CP_CPF_STALLED_STAT1));
5104 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
5105 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
5106 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
5107 		 RREG32(CP_CPC_STALLED_STAT1));
5108 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
5109 }
5110 
5111 /**
5112  * cik_gpu_check_soft_reset - check which blocks are busy
5113  *
5114  * @rdev: radeon_device pointer
5115  *
5116  * Check which blocks are busy and return the relevant reset
5117  * mask to be used by cik_gpu_soft_reset().
5118  * Returns a mask of the blocks to be reset.
5119  */
5120 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
5121 {
5122 	u32 reset_mask = 0;
5123 	u32 tmp;
5124 
5125 	/* GRBM_STATUS */
5126 	tmp = RREG32(GRBM_STATUS);
5127 	if (tmp & (PA_BUSY | SC_BUSY |
5128 		   BCI_BUSY | SX_BUSY |
5129 		   TA_BUSY | VGT_BUSY |
5130 		   DB_BUSY | CB_BUSY |
5131 		   GDS_BUSY | SPI_BUSY |
5132 		   IA_BUSY | IA_BUSY_NO_DMA))
5133 		reset_mask |= RADEON_RESET_GFX;
5134 
5135 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
5136 		reset_mask |= RADEON_RESET_CP;
5137 
5138 	/* GRBM_STATUS2 */
5139 	tmp = RREG32(GRBM_STATUS2);
5140 	if (tmp & RLC_BUSY)
5141 		reset_mask |= RADEON_RESET_RLC;
5142 
5143 	/* SDMA0_STATUS_REG */
5144 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
5145 	if (!(tmp & SDMA_IDLE))
5146 		reset_mask |= RADEON_RESET_DMA;
5147 
5148 	/* SDMA1_STATUS_REG */
5149 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
5150 	if (!(tmp & SDMA_IDLE))
5151 		reset_mask |= RADEON_RESET_DMA1;
5152 
5153 	/* SRBM_STATUS2 */
5154 	tmp = RREG32(SRBM_STATUS2);
5155 	if (tmp & SDMA_BUSY)
5156 		reset_mask |= RADEON_RESET_DMA;
5157 
5158 	if (tmp & SDMA1_BUSY)
5159 		reset_mask |= RADEON_RESET_DMA1;
5160 
5161 	/* SRBM_STATUS */
5162 	tmp = RREG32(SRBM_STATUS);
5163 
5164 	if (tmp & IH_BUSY)
5165 		reset_mask |= RADEON_RESET_IH;
5166 
5167 	if (tmp & SEM_BUSY)
5168 		reset_mask |= RADEON_RESET_SEM;
5169 
5170 	if (tmp & GRBM_RQ_PENDING)
5171 		reset_mask |= RADEON_RESET_GRBM;
5172 
5173 	if (tmp & VMC_BUSY)
5174 		reset_mask |= RADEON_RESET_VMC;
5175 
5176 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
5177 		   MCC_BUSY | MCD_BUSY))
5178 		reset_mask |= RADEON_RESET_MC;
5179 
5180 	if (evergreen_is_display_hung(rdev))
5181 		reset_mask |= RADEON_RESET_DISPLAY;
5182 
5183 	/* Skip MC reset as it's mostly likely not hung, just busy */
5184 	if (reset_mask & RADEON_RESET_MC) {
5185 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
5186 		reset_mask &= ~RADEON_RESET_MC;
5187 	}
5188 
5189 	return reset_mask;
5190 }
5191 
5192 /**
5193  * cik_gpu_soft_reset - soft reset GPU
5194  *
5195  * @rdev: radeon_device pointer
5196  * @reset_mask: mask of which blocks to reset
5197  *
5198  * Soft reset the blocks specified in @reset_mask.
5199  */
5200 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
5201 {
5202 	struct evergreen_mc_save save;
5203 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5204 	u32 tmp;
5205 
5206 	if (reset_mask == 0)
5207 		return;
5208 
5209 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
5210 
5211 	cik_print_gpu_status_regs(rdev);
5212 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5213 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5214 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5215 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5216 
5217 	/* disable CG/PG */
5218 	cik_fini_pg(rdev);
5219 	cik_fini_cg(rdev);
5220 
5221 	/* stop the rlc */
5222 	cik_rlc_stop(rdev);
5223 
5224 	/* Disable GFX parsing/prefetching */
5225 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5226 
5227 	/* Disable MEC parsing/prefetching */
5228 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5229 
5230 	if (reset_mask & RADEON_RESET_DMA) {
5231 		/* sdma0 */
5232 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5233 		tmp |= SDMA_HALT;
5234 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5235 	}
5236 	if (reset_mask & RADEON_RESET_DMA1) {
5237 		/* sdma1 */
5238 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5239 		tmp |= SDMA_HALT;
5240 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5241 	}
5242 
5243 	evergreen_mc_stop(rdev, &save);
5244 	if (evergreen_mc_wait_for_idle(rdev)) {
5245 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5246 	}
5247 
5248 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5249 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5250 
5251 	if (reset_mask & RADEON_RESET_CP) {
5252 		grbm_soft_reset |= SOFT_RESET_CP;
5253 
5254 		srbm_soft_reset |= SOFT_RESET_GRBM;
5255 	}
5256 
5257 	if (reset_mask & RADEON_RESET_DMA)
5258 		srbm_soft_reset |= SOFT_RESET_SDMA;
5259 
5260 	if (reset_mask & RADEON_RESET_DMA1)
5261 		srbm_soft_reset |= SOFT_RESET_SDMA1;
5262 
5263 	if (reset_mask & RADEON_RESET_DISPLAY)
5264 		srbm_soft_reset |= SOFT_RESET_DC;
5265 
5266 	if (reset_mask & RADEON_RESET_RLC)
5267 		grbm_soft_reset |= SOFT_RESET_RLC;
5268 
5269 	if (reset_mask & RADEON_RESET_SEM)
5270 		srbm_soft_reset |= SOFT_RESET_SEM;
5271 
5272 	if (reset_mask & RADEON_RESET_IH)
5273 		srbm_soft_reset |= SOFT_RESET_IH;
5274 
5275 	if (reset_mask & RADEON_RESET_GRBM)
5276 		srbm_soft_reset |= SOFT_RESET_GRBM;
5277 
5278 	if (reset_mask & RADEON_RESET_VMC)
5279 		srbm_soft_reset |= SOFT_RESET_VMC;
5280 
5281 	if (!(rdev->flags & RADEON_IS_IGP)) {
5282 		if (reset_mask & RADEON_RESET_MC)
5283 			srbm_soft_reset |= SOFT_RESET_MC;
5284 	}
5285 
5286 	if (grbm_soft_reset) {
5287 		tmp = RREG32(GRBM_SOFT_RESET);
5288 		tmp |= grbm_soft_reset;
5289 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5290 		WREG32(GRBM_SOFT_RESET, tmp);
5291 		tmp = RREG32(GRBM_SOFT_RESET);
5292 
5293 		udelay(50);
5294 
5295 		tmp &= ~grbm_soft_reset;
5296 		WREG32(GRBM_SOFT_RESET, tmp);
5297 		tmp = RREG32(GRBM_SOFT_RESET);
5298 	}
5299 
5300 	if (srbm_soft_reset) {
5301 		tmp = RREG32(SRBM_SOFT_RESET);
5302 		tmp |= srbm_soft_reset;
5303 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5304 		WREG32(SRBM_SOFT_RESET, tmp);
5305 		tmp = RREG32(SRBM_SOFT_RESET);
5306 
5307 		udelay(50);
5308 
5309 		tmp &= ~srbm_soft_reset;
5310 		WREG32(SRBM_SOFT_RESET, tmp);
5311 		tmp = RREG32(SRBM_SOFT_RESET);
5312 	}
5313 
5314 	/* Wait a little for things to settle down */
5315 	udelay(50);
5316 
5317 	evergreen_mc_resume(rdev, &save);
5318 	udelay(50);
5319 
5320 	cik_print_gpu_status_regs(rdev);
5321 }
5322 
5323 struct kv_reset_save_regs {
5324 	u32 gmcon_reng_execute;
5325 	u32 gmcon_misc;
5326 	u32 gmcon_misc3;
5327 };
5328 
5329 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5330 				   struct kv_reset_save_regs *save)
5331 {
5332 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5333 	save->gmcon_misc = RREG32(GMCON_MISC);
5334 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5335 
5336 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5337 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5338 						STCTRL_STUTTER_EN));
5339 }
5340 
5341 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5342 				      struct kv_reset_save_regs *save)
5343 {
5344 	int i;
5345 
5346 	WREG32(GMCON_PGFSM_WRITE, 0);
5347 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5348 
5349 	for (i = 0; i < 5; i++)
5350 		WREG32(GMCON_PGFSM_WRITE, 0);
5351 
5352 	WREG32(GMCON_PGFSM_WRITE, 0);
5353 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5354 
5355 	for (i = 0; i < 5; i++)
5356 		WREG32(GMCON_PGFSM_WRITE, 0);
5357 
5358 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5359 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5360 
5361 	for (i = 0; i < 5; i++)
5362 		WREG32(GMCON_PGFSM_WRITE, 0);
5363 
5364 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5365 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5366 
5367 	for (i = 0; i < 5; i++)
5368 		WREG32(GMCON_PGFSM_WRITE, 0);
5369 
5370 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5371 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5372 
5373 	for (i = 0; i < 5; i++)
5374 		WREG32(GMCON_PGFSM_WRITE, 0);
5375 
5376 	WREG32(GMCON_PGFSM_WRITE, 0);
5377 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5378 
5379 	for (i = 0; i < 5; i++)
5380 		WREG32(GMCON_PGFSM_WRITE, 0);
5381 
5382 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5383 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5384 
5385 	for (i = 0; i < 5; i++)
5386 		WREG32(GMCON_PGFSM_WRITE, 0);
5387 
5388 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5389 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5390 
5391 	for (i = 0; i < 5; i++)
5392 		WREG32(GMCON_PGFSM_WRITE, 0);
5393 
5394 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5395 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5396 
5397 	for (i = 0; i < 5; i++)
5398 		WREG32(GMCON_PGFSM_WRITE, 0);
5399 
5400 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5401 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5402 
5403 	for (i = 0; i < 5; i++)
5404 		WREG32(GMCON_PGFSM_WRITE, 0);
5405 
5406 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5407 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5408 
5409 	WREG32(GMCON_MISC3, save->gmcon_misc3);
5410 	WREG32(GMCON_MISC, save->gmcon_misc);
5411 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5412 }
5413 
5414 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5415 {
5416 	struct evergreen_mc_save save;
5417 	struct kv_reset_save_regs kv_save = { 0 };
5418 	u32 tmp, i;
5419 
5420 	dev_info(rdev->dev, "GPU pci config reset\n");
5421 
5422 	/* disable dpm? */
5423 
5424 	/* disable cg/pg */
5425 	cik_fini_pg(rdev);
5426 	cik_fini_cg(rdev);
5427 
5428 	/* Disable GFX parsing/prefetching */
5429 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5430 
5431 	/* Disable MEC parsing/prefetching */
5432 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5433 
5434 	/* sdma0 */
5435 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5436 	tmp |= SDMA_HALT;
5437 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5438 	/* sdma1 */
5439 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5440 	tmp |= SDMA_HALT;
5441 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5442 	/* XXX other engines? */
5443 
5444 	/* halt the rlc, disable cp internal ints */
5445 	cik_rlc_stop(rdev);
5446 
5447 	udelay(50);
5448 
5449 	/* disable mem access */
5450 	evergreen_mc_stop(rdev, &save);
5451 	if (evergreen_mc_wait_for_idle(rdev)) {
5452 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5453 	}
5454 
5455 	if (rdev->flags & RADEON_IS_IGP)
5456 		kv_save_regs_for_reset(rdev, &kv_save);
5457 
5458 	/* disable BM */
5459 	pci_clear_master(rdev->pdev);
5460 	/* reset */
5461 	radeon_pci_config_reset(rdev);
5462 
5463 	udelay(100);
5464 
5465 	/* wait for asic to come out of reset */
5466 	for (i = 0; i < rdev->usec_timeout; i++) {
5467 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5468 			break;
5469 		udelay(1);
5470 	}
5471 
5472 	/* does asic init need to be run first??? */
5473 	if (rdev->flags & RADEON_IS_IGP)
5474 		kv_restore_regs_for_reset(rdev, &kv_save);
5475 }
5476 
5477 /**
5478  * cik_asic_reset - soft reset GPU
5479  *
5480  * @rdev: radeon_device pointer
5481  *
5482  * Look up which blocks are hung and attempt
5483  * to reset them.
5484  * Returns 0 for success.
5485  */
5486 int cik_asic_reset(struct radeon_device *rdev)
5487 {
5488 	u32 reset_mask;
5489 
5490 	reset_mask = cik_gpu_check_soft_reset(rdev);
5491 
5492 	if (reset_mask)
5493 		r600_set_bios_scratch_engine_hung(rdev, true);
5494 
5495 	/* try soft reset */
5496 	cik_gpu_soft_reset(rdev, reset_mask);
5497 
5498 	reset_mask = cik_gpu_check_soft_reset(rdev);
5499 
5500 	/* try pci config reset */
5501 	if (reset_mask && radeon_hard_reset)
5502 		cik_gpu_pci_config_reset(rdev);
5503 
5504 	reset_mask = cik_gpu_check_soft_reset(rdev);
5505 
5506 	if (!reset_mask)
5507 		r600_set_bios_scratch_engine_hung(rdev, false);
5508 
5509 	return 0;
5510 }
5511 
5512 /**
5513  * cik_gfx_is_lockup - check if the 3D engine is locked up
5514  *
5515  * @rdev: radeon_device pointer
5516  * @ring: radeon_ring structure holding ring information
5517  *
5518  * Check if the 3D engine is locked up (CIK).
5519  * Returns true if the engine is locked, false if not.
5520  */
5521 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5522 {
5523 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5524 
5525 	if (!(reset_mask & (RADEON_RESET_GFX |
5526 			    RADEON_RESET_COMPUTE |
5527 			    RADEON_RESET_CP))) {
5528 		radeon_ring_lockup_update(rdev, ring);
5529 		return false;
5530 	}
5531 	return radeon_ring_test_lockup(rdev, ring);
5532 }
5533 
5534 /* MC */
5535 /**
5536  * cik_mc_program - program the GPU memory controller
5537  *
5538  * @rdev: radeon_device pointer
5539  *
5540  * Set the location of vram, gart, and AGP in the GPU's
5541  * physical address space (CIK).
5542  */
5543 static void cik_mc_program(struct radeon_device *rdev)
5544 {
5545 	struct evergreen_mc_save save;
5546 	u32 tmp;
5547 	int i, j;
5548 
5549 	/* Initialize HDP */
5550 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5551 		WREG32((0x2c14 + j), 0x00000000);
5552 		WREG32((0x2c18 + j), 0x00000000);
5553 		WREG32((0x2c1c + j), 0x00000000);
5554 		WREG32((0x2c20 + j), 0x00000000);
5555 		WREG32((0x2c24 + j), 0x00000000);
5556 	}
5557 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5558 
5559 	evergreen_mc_stop(rdev, &save);
5560 	if (radeon_mc_wait_for_idle(rdev)) {
5561 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5562 	}
5563 	/* Lockout access through VGA aperture*/
5564 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5565 	/* Update configuration */
5566 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5567 	       rdev->mc.vram_start >> 12);
5568 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5569 	       rdev->mc.vram_end >> 12);
5570 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5571 	       rdev->vram_scratch.gpu_addr >> 12);
5572 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5573 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5574 	WREG32(MC_VM_FB_LOCATION, tmp);
5575 	/* XXX double check these! */
5576 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5577 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5578 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5579 	WREG32(MC_VM_AGP_BASE, 0);
5580 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5581 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5582 	if (radeon_mc_wait_for_idle(rdev)) {
5583 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5584 	}
5585 	evergreen_mc_resume(rdev, &save);
5586 	/* we need to own VRAM, so turn off the VGA renderer here
5587 	 * to stop it overwriting our objects */
5588 	rv515_vga_render_disable(rdev);
5589 }
5590 
5591 /**
5592  * cik_mc_init - initialize the memory controller driver params
5593  *
5594  * @rdev: radeon_device pointer
5595  *
5596  * Look up the amount of vram, vram width, and decide how to place
5597  * vram and gart within the GPU's physical address space (CIK).
5598  * Returns 0 for success.
5599  */
5600 static int cik_mc_init(struct radeon_device *rdev)
5601 {
5602 	u32 tmp;
5603 	int chansize, numchan;
5604 
5605 	/* Get VRAM informations */
5606 	rdev->mc.vram_is_ddr = true;
5607 	tmp = RREG32(MC_ARB_RAMCFG);
5608 	if (tmp & CHANSIZE_MASK) {
5609 		chansize = 64;
5610 	} else {
5611 		chansize = 32;
5612 	}
5613 	tmp = RREG32(MC_SHARED_CHMAP);
5614 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5615 	case 0:
5616 	default:
5617 		numchan = 1;
5618 		break;
5619 	case 1:
5620 		numchan = 2;
5621 		break;
5622 	case 2:
5623 		numchan = 4;
5624 		break;
5625 	case 3:
5626 		numchan = 8;
5627 		break;
5628 	case 4:
5629 		numchan = 3;
5630 		break;
5631 	case 5:
5632 		numchan = 6;
5633 		break;
5634 	case 6:
5635 		numchan = 10;
5636 		break;
5637 	case 7:
5638 		numchan = 12;
5639 		break;
5640 	case 8:
5641 		numchan = 16;
5642 		break;
5643 	}
5644 	rdev->mc.vram_width = numchan * chansize;
5645 	/* Could aper size report 0 ? */
5646 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5647 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5648 	/* size in MB on si */
5649 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5650 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5651 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5652 	si_vram_gtt_location(rdev, &rdev->mc);
5653 	radeon_update_bandwidth_info(rdev);
5654 
5655 	return 0;
5656 }
5657 
5658 /*
5659  * GART
5660  * VMID 0 is the physical GPU addresses as used by the kernel.
5661  * VMIDs 1-15 are used for userspace clients and are handled
5662  * by the radeon vm/hsa code.
5663  */
5664 /**
5665  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5666  *
5667  * @rdev: radeon_device pointer
5668  *
5669  * Flush the TLB for the VMID 0 page table (CIK).
5670  */
5671 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5672 {
5673 	/* flush hdp cache */
5674 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5675 
5676 	/* bits 0-15 are the VM contexts0-15 */
5677 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5678 }
5679 
5680 /**
5681  * cik_pcie_gart_enable - gart enable
5682  *
5683  * @rdev: radeon_device pointer
5684  *
5685  * This sets up the TLBs, programs the page tables for VMID0,
5686  * sets up the hw for VMIDs 1-15 which are allocated on
5687  * demand, and sets up the global locations for the LDS, GDS,
5688  * and GPUVM for FSA64 clients (CIK).
5689  * Returns 0 for success, errors for failure.
5690  */
5691 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5692 {
5693 	int r, i;
5694 
5695 	if (rdev->gart.robj == NULL) {
5696 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5697 		return -EINVAL;
5698 	}
5699 	r = radeon_gart_table_vram_pin(rdev);
5700 	if (r)
5701 		return r;
5702 	/* Setup TLB control */
5703 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5704 	       (0xA << 7) |
5705 	       ENABLE_L1_TLB |
5706 	       ENABLE_L1_FRAGMENT_PROCESSING |
5707 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5708 	       ENABLE_ADVANCED_DRIVER_MODEL |
5709 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5710 	/* Setup L2 cache */
5711 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5712 	       ENABLE_L2_FRAGMENT_PROCESSING |
5713 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5714 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5715 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5716 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5717 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5718 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5719 	       BANK_SELECT(4) |
5720 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5721 	/* setup context0 */
5722 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5723 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5724 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5725 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5726 			(u32)(rdev->dummy_page.addr >> 12));
5727 	WREG32(VM_CONTEXT0_CNTL2, 0);
5728 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5729 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5730 
5731 	WREG32(0x15D4, 0);
5732 	WREG32(0x15D8, 0);
5733 	WREG32(0x15DC, 0);
5734 
5735 	/* empty context1-15 */
5736 	/* FIXME start with 4G, once using 2 level pt switch to full
5737 	 * vm size space
5738 	 */
5739 	/* set vm size, must be a multiple of 4 */
5740 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5741 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5742 	for (i = 1; i < 16; i++) {
5743 		if (i < 8)
5744 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5745 			       rdev->gart.table_addr >> 12);
5746 		else
5747 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5748 			       rdev->gart.table_addr >> 12);
5749 	}
5750 
5751 	/* enable context1-15 */
5752 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5753 	       (u32)(rdev->dummy_page.addr >> 12));
5754 	WREG32(VM_CONTEXT1_CNTL2, 4);
5755 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5756 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5757 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5758 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5759 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5760 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5761 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5762 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5763 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5764 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5765 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5766 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5767 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5768 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5769 
5770 	if (rdev->family == CHIP_KAVERI) {
5771 		u32 tmp = RREG32(CHUB_CONTROL);
5772 		tmp &= ~BYPASS_VM;
5773 		WREG32(CHUB_CONTROL, tmp);
5774 	}
5775 
5776 	/* XXX SH_MEM regs */
5777 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5778 	mutex_lock(&rdev->srbm_mutex);
5779 	for (i = 0; i < 16; i++) {
5780 		cik_srbm_select(rdev, 0, 0, 0, i);
5781 		/* CP and shaders */
5782 		WREG32(SH_MEM_CONFIG, 0);
5783 		WREG32(SH_MEM_APE1_BASE, 1);
5784 		WREG32(SH_MEM_APE1_LIMIT, 0);
5785 		WREG32(SH_MEM_BASES, 0);
5786 		/* SDMA GFX */
5787 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5788 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5789 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5790 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5791 		/* XXX SDMA RLC - todo */
5792 	}
5793 	cik_srbm_select(rdev, 0, 0, 0, 0);
5794 	mutex_unlock(&rdev->srbm_mutex);
5795 
5796 	cik_pcie_gart_tlb_flush(rdev);
5797 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5798 		 (unsigned)(rdev->mc.gtt_size >> 20),
5799 		 (unsigned long long)rdev->gart.table_addr);
5800 	rdev->gart.ready = true;
5801 	return 0;
5802 }
5803 
5804 /**
5805  * cik_pcie_gart_disable - gart disable
5806  *
5807  * @rdev: radeon_device pointer
5808  *
5809  * This disables all VM page table (CIK).
5810  */
5811 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5812 {
5813 	/* Disable all tables */
5814 	WREG32(VM_CONTEXT0_CNTL, 0);
5815 	WREG32(VM_CONTEXT1_CNTL, 0);
5816 	/* Setup TLB control */
5817 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5818 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5819 	/* Setup L2 cache */
5820 	WREG32(VM_L2_CNTL,
5821 	       ENABLE_L2_FRAGMENT_PROCESSING |
5822 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5823 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5824 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5825 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5826 	WREG32(VM_L2_CNTL2, 0);
5827 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5828 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5829 	radeon_gart_table_vram_unpin(rdev);
5830 }
5831 
5832 /**
5833  * cik_pcie_gart_fini - vm fini callback
5834  *
5835  * @rdev: radeon_device pointer
5836  *
5837  * Tears down the driver GART/VM setup (CIK).
5838  */
5839 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5840 {
5841 	cik_pcie_gart_disable(rdev);
5842 	radeon_gart_table_vram_free(rdev);
5843 	radeon_gart_fini(rdev);
5844 }
5845 
5846 /* vm parser */
5847 /**
5848  * cik_ib_parse - vm ib_parse callback
5849  *
5850  * @rdev: radeon_device pointer
5851  * @ib: indirect buffer pointer
5852  *
5853  * CIK uses hw IB checking so this is a nop (CIK).
5854  */
5855 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5856 {
5857 	return 0;
5858 }
5859 
5860 /*
5861  * vm
5862  * VMID 0 is the physical GPU addresses as used by the kernel.
5863  * VMIDs 1-15 are used for userspace clients and are handled
5864  * by the radeon vm/hsa code.
5865  */
5866 /**
5867  * cik_vm_init - cik vm init callback
5868  *
5869  * @rdev: radeon_device pointer
5870  *
5871  * Inits cik specific vm parameters (number of VMs, base of vram for
5872  * VMIDs 1-15) (CIK).
5873  * Returns 0 for success.
5874  */
5875 int cik_vm_init(struct radeon_device *rdev)
5876 {
5877 	/* number of VMs */
5878 	rdev->vm_manager.nvm = 16;
5879 	/* base offset of vram pages */
5880 	if (rdev->flags & RADEON_IS_IGP) {
5881 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5882 		tmp <<= 22;
5883 		rdev->vm_manager.vram_base_offset = tmp;
5884 	} else
5885 		rdev->vm_manager.vram_base_offset = 0;
5886 
5887 	return 0;
5888 }
5889 
5890 /**
5891  * cik_vm_fini - cik vm fini callback
5892  *
5893  * @rdev: radeon_device pointer
5894  *
5895  * Tear down any asic specific VM setup (CIK).
5896  */
5897 void cik_vm_fini(struct radeon_device *rdev)
5898 {
5899 }
5900 
5901 /**
5902  * cik_vm_decode_fault - print human readable fault info
5903  *
5904  * @rdev: radeon_device pointer
5905  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5906  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5907  *
5908  * Print human readable fault information (CIK).
5909  */
5910 static void cik_vm_decode_fault(struct radeon_device *rdev,
5911 				u32 status, u32 addr, u32 mc_client)
5912 {
5913 	u32 mc_id;
5914 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5915 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5916 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5917 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5918 
5919 	if (rdev->family == CHIP_HAWAII)
5920 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5921 	else
5922 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5923 
5924 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5925 	       protections, vmid, addr,
5926 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5927 	       block, mc_client, mc_id);
5928 }
5929 
5930 /**
5931  * cik_vm_flush - cik vm flush using the CP
5932  *
5933  * @rdev: radeon_device pointer
5934  *
5935  * Update the page table base and flush the VM TLB
5936  * using the CP (CIK).
5937  */
5938 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5939 {
5940 	struct radeon_ring *ring = &rdev->ring[ridx];
5941 	int usepfp = (ridx == RADEON_RING_TYPE_GFX_INDEX);
5942 
5943 	if (vm == NULL)
5944 		return;
5945 
5946 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5947 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5948 				 WRITE_DATA_DST_SEL(0)));
5949 	if (vm->id < 8) {
5950 		radeon_ring_write(ring,
5951 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5952 	} else {
5953 		radeon_ring_write(ring,
5954 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5955 	}
5956 	radeon_ring_write(ring, 0);
5957 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5958 
5959 	/* update SH_MEM_* regs */
5960 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5961 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5962 				 WRITE_DATA_DST_SEL(0)));
5963 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5964 	radeon_ring_write(ring, 0);
5965 	radeon_ring_write(ring, VMID(vm->id));
5966 
5967 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5968 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5969 				 WRITE_DATA_DST_SEL(0)));
5970 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
5971 	radeon_ring_write(ring, 0);
5972 
5973 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5974 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5975 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5976 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5977 
5978 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5979 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5980 				 WRITE_DATA_DST_SEL(0)));
5981 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5982 	radeon_ring_write(ring, 0);
5983 	radeon_ring_write(ring, VMID(0));
5984 
5985 	/* HDP flush */
5986 	cik_hdp_flush_cp_ring_emit(rdev, ridx);
5987 
5988 	/* bits 0-15 are the VM contexts0-15 */
5989 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5990 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5991 				 WRITE_DATA_DST_SEL(0)));
5992 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5993 	radeon_ring_write(ring, 0);
5994 	radeon_ring_write(ring, 1 << vm->id);
5995 
5996 	/* compute doesn't have PFP */
5997 	if (usepfp) {
5998 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5999 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6000 		radeon_ring_write(ring, 0x0);
6001 	}
6002 }
6003 
6004 /*
6005  * RLC
6006  * The RLC is a multi-purpose microengine that handles a
6007  * variety of functions, the most important of which is
6008  * the interrupt controller.
6009  */
6010 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
6011 					  bool enable)
6012 {
6013 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
6014 
6015 	if (enable)
6016 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6017 	else
6018 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6019 	WREG32(CP_INT_CNTL_RING0, tmp);
6020 }
6021 
6022 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
6023 {
6024 	u32 tmp;
6025 
6026 	tmp = RREG32(RLC_LB_CNTL);
6027 	if (enable)
6028 		tmp |= LOAD_BALANCE_ENABLE;
6029 	else
6030 		tmp &= ~LOAD_BALANCE_ENABLE;
6031 	WREG32(RLC_LB_CNTL, tmp);
6032 }
6033 
6034 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
6035 {
6036 	u32 i, j, k;
6037 	u32 mask;
6038 
6039 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6040 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6041 			cik_select_se_sh(rdev, i, j);
6042 			for (k = 0; k < rdev->usec_timeout; k++) {
6043 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
6044 					break;
6045 				udelay(1);
6046 			}
6047 		}
6048 	}
6049 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6050 
6051 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
6052 	for (k = 0; k < rdev->usec_timeout; k++) {
6053 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
6054 			break;
6055 		udelay(1);
6056 	}
6057 }
6058 
6059 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
6060 {
6061 	u32 tmp;
6062 
6063 	tmp = RREG32(RLC_CNTL);
6064 	if (tmp != rlc)
6065 		WREG32(RLC_CNTL, rlc);
6066 }
6067 
6068 static u32 cik_halt_rlc(struct radeon_device *rdev)
6069 {
6070 	u32 data, orig;
6071 
6072 	orig = data = RREG32(RLC_CNTL);
6073 
6074 	if (data & RLC_ENABLE) {
6075 		u32 i;
6076 
6077 		data &= ~RLC_ENABLE;
6078 		WREG32(RLC_CNTL, data);
6079 
6080 		for (i = 0; i < rdev->usec_timeout; i++) {
6081 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
6082 				break;
6083 			udelay(1);
6084 		}
6085 
6086 		cik_wait_for_rlc_serdes(rdev);
6087 	}
6088 
6089 	return orig;
6090 }
6091 
6092 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
6093 {
6094 	u32 tmp, i, mask;
6095 
6096 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
6097 	WREG32(RLC_GPR_REG2, tmp);
6098 
6099 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
6100 	for (i = 0; i < rdev->usec_timeout; i++) {
6101 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
6102 			break;
6103 		udelay(1);
6104 	}
6105 
6106 	for (i = 0; i < rdev->usec_timeout; i++) {
6107 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
6108 			break;
6109 		udelay(1);
6110 	}
6111 }
6112 
6113 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
6114 {
6115 	u32 tmp;
6116 
6117 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
6118 	WREG32(RLC_GPR_REG2, tmp);
6119 }
6120 
6121 /**
6122  * cik_rlc_stop - stop the RLC ME
6123  *
6124  * @rdev: radeon_device pointer
6125  *
6126  * Halt the RLC ME (MicroEngine) (CIK).
6127  */
6128 static void cik_rlc_stop(struct radeon_device *rdev)
6129 {
6130 	WREG32(RLC_CNTL, 0);
6131 
6132 	cik_enable_gui_idle_interrupt(rdev, false);
6133 
6134 	cik_wait_for_rlc_serdes(rdev);
6135 }
6136 
6137 /**
6138  * cik_rlc_start - start the RLC ME
6139  *
6140  * @rdev: radeon_device pointer
6141  *
6142  * Unhalt the RLC ME (MicroEngine) (CIK).
6143  */
6144 static void cik_rlc_start(struct radeon_device *rdev)
6145 {
6146 	WREG32(RLC_CNTL, RLC_ENABLE);
6147 
6148 	cik_enable_gui_idle_interrupt(rdev, true);
6149 
6150 	udelay(50);
6151 }
6152 
6153 /**
6154  * cik_rlc_resume - setup the RLC hw
6155  *
6156  * @rdev: radeon_device pointer
6157  *
6158  * Initialize the RLC registers, load the ucode,
6159  * and start the RLC (CIK).
6160  * Returns 0 for success, -EINVAL if the ucode is not available.
6161  */
6162 static int cik_rlc_resume(struct radeon_device *rdev)
6163 {
6164 	u32 i, size, tmp;
6165 
6166 	if (!rdev->rlc_fw)
6167 		return -EINVAL;
6168 
6169 	cik_rlc_stop(rdev);
6170 
6171 	/* disable CG */
6172 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6173 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6174 
6175 	si_rlc_reset(rdev);
6176 
6177 	cik_init_pg(rdev);
6178 
6179 	cik_init_cg(rdev);
6180 
6181 	WREG32(RLC_LB_CNTR_INIT, 0);
6182 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6183 
6184 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6185 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6186 	WREG32(RLC_LB_PARAMS, 0x00600408);
6187 	WREG32(RLC_LB_CNTL, 0x80000004);
6188 
6189 	WREG32(RLC_MC_CNTL, 0);
6190 	WREG32(RLC_UCODE_CNTL, 0);
6191 
6192 	if (rdev->new_fw) {
6193 		const struct rlc_firmware_header_v1_0 *hdr =
6194 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6195 		const __le32 *fw_data = (const __le32 *)
6196 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6197 
6198 		radeon_ucode_print_rlc_hdr(&hdr->header);
6199 
6200 		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6201 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6202 		for (i = 0; i < size; i++)
6203 			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6204 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6205 	} else {
6206 		const __be32 *fw_data;
6207 
6208 		switch (rdev->family) {
6209 		case CHIP_BONAIRE:
6210 		case CHIP_HAWAII:
6211 		default:
6212 			size = BONAIRE_RLC_UCODE_SIZE;
6213 			break;
6214 		case CHIP_KAVERI:
6215 			size = KV_RLC_UCODE_SIZE;
6216 			break;
6217 		case CHIP_KABINI:
6218 			size = KB_RLC_UCODE_SIZE;
6219 			break;
6220 		case CHIP_MULLINS:
6221 			size = ML_RLC_UCODE_SIZE;
6222 			break;
6223 		}
6224 
6225 		fw_data = (const __be32 *)rdev->rlc_fw->data;
6226 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6227 		for (i = 0; i < size; i++)
6228 			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6229 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6230 	}
6231 
6232 	/* XXX - find out what chips support lbpw */
6233 	cik_enable_lbpw(rdev, false);
6234 
6235 	if (rdev->family == CHIP_BONAIRE)
6236 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
6237 
6238 	cik_rlc_start(rdev);
6239 
6240 	return 0;
6241 }
6242 
6243 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6244 {
6245 	u32 data, orig, tmp, tmp2;
6246 
6247 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6248 
6249 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6250 		cik_enable_gui_idle_interrupt(rdev, true);
6251 
6252 		tmp = cik_halt_rlc(rdev);
6253 
6254 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6255 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6256 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6257 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6258 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6259 
6260 		cik_update_rlc(rdev, tmp);
6261 
6262 		data |= CGCG_EN | CGLS_EN;
6263 	} else {
6264 		cik_enable_gui_idle_interrupt(rdev, false);
6265 
6266 		RREG32(CB_CGTT_SCLK_CTRL);
6267 		RREG32(CB_CGTT_SCLK_CTRL);
6268 		RREG32(CB_CGTT_SCLK_CTRL);
6269 		RREG32(CB_CGTT_SCLK_CTRL);
6270 
6271 		data &= ~(CGCG_EN | CGLS_EN);
6272 	}
6273 
6274 	if (orig != data)
6275 		WREG32(RLC_CGCG_CGLS_CTRL, data);
6276 
6277 }
6278 
6279 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6280 {
6281 	u32 data, orig, tmp = 0;
6282 
6283 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6284 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6285 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6286 				orig = data = RREG32(CP_MEM_SLP_CNTL);
6287 				data |= CP_MEM_LS_EN;
6288 				if (orig != data)
6289 					WREG32(CP_MEM_SLP_CNTL, data);
6290 			}
6291 		}
6292 
6293 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6294 		data &= 0xfffffffd;
6295 		if (orig != data)
6296 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6297 
6298 		tmp = cik_halt_rlc(rdev);
6299 
6300 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6301 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6302 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6303 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6304 		WREG32(RLC_SERDES_WR_CTRL, data);
6305 
6306 		cik_update_rlc(rdev, tmp);
6307 
6308 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6309 			orig = data = RREG32(CGTS_SM_CTRL_REG);
6310 			data &= ~SM_MODE_MASK;
6311 			data |= SM_MODE(0x2);
6312 			data |= SM_MODE_ENABLE;
6313 			data &= ~CGTS_OVERRIDE;
6314 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6315 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6316 				data &= ~CGTS_LS_OVERRIDE;
6317 			data &= ~ON_MONITOR_ADD_MASK;
6318 			data |= ON_MONITOR_ADD_EN;
6319 			data |= ON_MONITOR_ADD(0x96);
6320 			if (orig != data)
6321 				WREG32(CGTS_SM_CTRL_REG, data);
6322 		}
6323 	} else {
6324 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6325 		data |= 0x00000002;
6326 		if (orig != data)
6327 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6328 
6329 		data = RREG32(RLC_MEM_SLP_CNTL);
6330 		if (data & RLC_MEM_LS_EN) {
6331 			data &= ~RLC_MEM_LS_EN;
6332 			WREG32(RLC_MEM_SLP_CNTL, data);
6333 		}
6334 
6335 		data = RREG32(CP_MEM_SLP_CNTL);
6336 		if (data & CP_MEM_LS_EN) {
6337 			data &= ~CP_MEM_LS_EN;
6338 			WREG32(CP_MEM_SLP_CNTL, data);
6339 		}
6340 
6341 		orig = data = RREG32(CGTS_SM_CTRL_REG);
6342 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6343 		if (orig != data)
6344 			WREG32(CGTS_SM_CTRL_REG, data);
6345 
6346 		tmp = cik_halt_rlc(rdev);
6347 
6348 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6349 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6350 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6351 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6352 		WREG32(RLC_SERDES_WR_CTRL, data);
6353 
6354 		cik_update_rlc(rdev, tmp);
6355 	}
6356 }
6357 
6358 static const u32 mc_cg_registers[] =
6359 {
6360 	MC_HUB_MISC_HUB_CG,
6361 	MC_HUB_MISC_SIP_CG,
6362 	MC_HUB_MISC_VM_CG,
6363 	MC_XPB_CLK_GAT,
6364 	ATC_MISC_CG,
6365 	MC_CITF_MISC_WR_CG,
6366 	MC_CITF_MISC_RD_CG,
6367 	MC_CITF_MISC_VM_CG,
6368 	VM_L2_CG,
6369 };
6370 
6371 static void cik_enable_mc_ls(struct radeon_device *rdev,
6372 			     bool enable)
6373 {
6374 	int i;
6375 	u32 orig, data;
6376 
6377 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6378 		orig = data = RREG32(mc_cg_registers[i]);
6379 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6380 			data |= MC_LS_ENABLE;
6381 		else
6382 			data &= ~MC_LS_ENABLE;
6383 		if (data != orig)
6384 			WREG32(mc_cg_registers[i], data);
6385 	}
6386 }
6387 
6388 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6389 			       bool enable)
6390 {
6391 	int i;
6392 	u32 orig, data;
6393 
6394 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6395 		orig = data = RREG32(mc_cg_registers[i]);
6396 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6397 			data |= MC_CG_ENABLE;
6398 		else
6399 			data &= ~MC_CG_ENABLE;
6400 		if (data != orig)
6401 			WREG32(mc_cg_registers[i], data);
6402 	}
6403 }
6404 
6405 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6406 				 bool enable)
6407 {
6408 	u32 orig, data;
6409 
6410 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6411 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6412 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6413 	} else {
6414 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6415 		data |= 0xff000000;
6416 		if (data != orig)
6417 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6418 
6419 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6420 		data |= 0xff000000;
6421 		if (data != orig)
6422 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6423 	}
6424 }
6425 
6426 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6427 				 bool enable)
6428 {
6429 	u32 orig, data;
6430 
6431 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6432 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6433 		data |= 0x100;
6434 		if (orig != data)
6435 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6436 
6437 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6438 		data |= 0x100;
6439 		if (orig != data)
6440 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6441 	} else {
6442 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6443 		data &= ~0x100;
6444 		if (orig != data)
6445 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6446 
6447 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6448 		data &= ~0x100;
6449 		if (orig != data)
6450 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6451 	}
6452 }
6453 
6454 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6455 				bool enable)
6456 {
6457 	u32 orig, data;
6458 
6459 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6460 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6461 		data = 0xfff;
6462 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6463 
6464 		orig = data = RREG32(UVD_CGC_CTRL);
6465 		data |= DCM;
6466 		if (orig != data)
6467 			WREG32(UVD_CGC_CTRL, data);
6468 	} else {
6469 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6470 		data &= ~0xfff;
6471 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6472 
6473 		orig = data = RREG32(UVD_CGC_CTRL);
6474 		data &= ~DCM;
6475 		if (orig != data)
6476 			WREG32(UVD_CGC_CTRL, data);
6477 	}
6478 }
6479 
6480 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6481 			       bool enable)
6482 {
6483 	u32 orig, data;
6484 
6485 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6486 
6487 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6488 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6489 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6490 	else
6491 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6492 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6493 
6494 	if (orig != data)
6495 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6496 }
6497 
6498 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6499 				bool enable)
6500 {
6501 	u32 orig, data;
6502 
6503 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6504 
6505 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6506 		data &= ~CLOCK_GATING_DIS;
6507 	else
6508 		data |= CLOCK_GATING_DIS;
6509 
6510 	if (orig != data)
6511 		WREG32(HDP_HOST_PATH_CNTL, data);
6512 }
6513 
6514 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6515 			      bool enable)
6516 {
6517 	u32 orig, data;
6518 
6519 	orig = data = RREG32(HDP_MEM_POWER_LS);
6520 
6521 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6522 		data |= HDP_LS_ENABLE;
6523 	else
6524 		data &= ~HDP_LS_ENABLE;
6525 
6526 	if (orig != data)
6527 		WREG32(HDP_MEM_POWER_LS, data);
6528 }
6529 
6530 void cik_update_cg(struct radeon_device *rdev,
6531 		   u32 block, bool enable)
6532 {
6533 
6534 	if (block & RADEON_CG_BLOCK_GFX) {
6535 		cik_enable_gui_idle_interrupt(rdev, false);
6536 		/* order matters! */
6537 		if (enable) {
6538 			cik_enable_mgcg(rdev, true);
6539 			cik_enable_cgcg(rdev, true);
6540 		} else {
6541 			cik_enable_cgcg(rdev, false);
6542 			cik_enable_mgcg(rdev, false);
6543 		}
6544 		cik_enable_gui_idle_interrupt(rdev, true);
6545 	}
6546 
6547 	if (block & RADEON_CG_BLOCK_MC) {
6548 		if (!(rdev->flags & RADEON_IS_IGP)) {
6549 			cik_enable_mc_mgcg(rdev, enable);
6550 			cik_enable_mc_ls(rdev, enable);
6551 		}
6552 	}
6553 
6554 	if (block & RADEON_CG_BLOCK_SDMA) {
6555 		cik_enable_sdma_mgcg(rdev, enable);
6556 		cik_enable_sdma_mgls(rdev, enable);
6557 	}
6558 
6559 	if (block & RADEON_CG_BLOCK_BIF) {
6560 		cik_enable_bif_mgls(rdev, enable);
6561 	}
6562 
6563 	if (block & RADEON_CG_BLOCK_UVD) {
6564 		if (rdev->has_uvd)
6565 			cik_enable_uvd_mgcg(rdev, enable);
6566 	}
6567 
6568 	if (block & RADEON_CG_BLOCK_HDP) {
6569 		cik_enable_hdp_mgcg(rdev, enable);
6570 		cik_enable_hdp_ls(rdev, enable);
6571 	}
6572 
6573 	if (block & RADEON_CG_BLOCK_VCE) {
6574 		vce_v2_0_enable_mgcg(rdev, enable);
6575 	}
6576 }
6577 
6578 static void cik_init_cg(struct radeon_device *rdev)
6579 {
6580 
6581 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6582 
6583 	if (rdev->has_uvd)
6584 		si_init_uvd_internal_cg(rdev);
6585 
6586 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6587 			     RADEON_CG_BLOCK_SDMA |
6588 			     RADEON_CG_BLOCK_BIF |
6589 			     RADEON_CG_BLOCK_UVD |
6590 			     RADEON_CG_BLOCK_HDP), true);
6591 }
6592 
6593 static void cik_fini_cg(struct radeon_device *rdev)
6594 {
6595 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6596 			     RADEON_CG_BLOCK_SDMA |
6597 			     RADEON_CG_BLOCK_BIF |
6598 			     RADEON_CG_BLOCK_UVD |
6599 			     RADEON_CG_BLOCK_HDP), false);
6600 
6601 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6602 }
6603 
6604 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6605 					  bool enable)
6606 {
6607 	u32 data, orig;
6608 
6609 	orig = data = RREG32(RLC_PG_CNTL);
6610 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6611 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6612 	else
6613 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6614 	if (orig != data)
6615 		WREG32(RLC_PG_CNTL, data);
6616 }
6617 
6618 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6619 					  bool enable)
6620 {
6621 	u32 data, orig;
6622 
6623 	orig = data = RREG32(RLC_PG_CNTL);
6624 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6625 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6626 	else
6627 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6628 	if (orig != data)
6629 		WREG32(RLC_PG_CNTL, data);
6630 }
6631 
6632 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6633 {
6634 	u32 data, orig;
6635 
6636 	orig = data = RREG32(RLC_PG_CNTL);
6637 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6638 		data &= ~DISABLE_CP_PG;
6639 	else
6640 		data |= DISABLE_CP_PG;
6641 	if (orig != data)
6642 		WREG32(RLC_PG_CNTL, data);
6643 }
6644 
6645 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6646 {
6647 	u32 data, orig;
6648 
6649 	orig = data = RREG32(RLC_PG_CNTL);
6650 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6651 		data &= ~DISABLE_GDS_PG;
6652 	else
6653 		data |= DISABLE_GDS_PG;
6654 	if (orig != data)
6655 		WREG32(RLC_PG_CNTL, data);
6656 }
6657 
6658 #define CP_ME_TABLE_SIZE    96
6659 #define CP_ME_TABLE_OFFSET  2048
6660 #define CP_MEC_TABLE_OFFSET 4096
6661 
6662 void cik_init_cp_pg_table(struct radeon_device *rdev)
6663 {
6664 	volatile u32 *dst_ptr;
6665 	int me, i, max_me = 4;
6666 	u32 bo_offset = 0;
6667 	u32 table_offset, table_size;
6668 
6669 	if (rdev->family == CHIP_KAVERI)
6670 		max_me = 5;
6671 
6672 	if (rdev->rlc.cp_table_ptr == NULL)
6673 		return;
6674 
6675 	/* write the cp table buffer */
6676 	dst_ptr = rdev->rlc.cp_table_ptr;
6677 	for (me = 0; me < max_me; me++) {
6678 		if (rdev->new_fw) {
6679 			const __le32 *fw_data;
6680 			const struct gfx_firmware_header_v1_0 *hdr;
6681 
6682 			if (me == 0) {
6683 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6684 				fw_data = (const __le32 *)
6685 					(rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6686 				table_offset = le32_to_cpu(hdr->jt_offset);
6687 				table_size = le32_to_cpu(hdr->jt_size);
6688 			} else if (me == 1) {
6689 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6690 				fw_data = (const __le32 *)
6691 					(rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6692 				table_offset = le32_to_cpu(hdr->jt_offset);
6693 				table_size = le32_to_cpu(hdr->jt_size);
6694 			} else if (me == 2) {
6695 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6696 				fw_data = (const __le32 *)
6697 					(rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6698 				table_offset = le32_to_cpu(hdr->jt_offset);
6699 				table_size = le32_to_cpu(hdr->jt_size);
6700 			} else if (me == 3) {
6701 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6702 				fw_data = (const __le32 *)
6703 					(rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6704 				table_offset = le32_to_cpu(hdr->jt_offset);
6705 				table_size = le32_to_cpu(hdr->jt_size);
6706 			} else {
6707 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6708 				fw_data = (const __le32 *)
6709 					(rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6710 				table_offset = le32_to_cpu(hdr->jt_offset);
6711 				table_size = le32_to_cpu(hdr->jt_size);
6712 			}
6713 
6714 			for (i = 0; i < table_size; i ++) {
6715 				dst_ptr[bo_offset + i] =
6716 					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6717 			}
6718 			bo_offset += table_size;
6719 		} else {
6720 			const __be32 *fw_data;
6721 			table_size = CP_ME_TABLE_SIZE;
6722 
6723 			if (me == 0) {
6724 				fw_data = (const __be32 *)rdev->ce_fw->data;
6725 				table_offset = CP_ME_TABLE_OFFSET;
6726 			} else if (me == 1) {
6727 				fw_data = (const __be32 *)rdev->pfp_fw->data;
6728 				table_offset = CP_ME_TABLE_OFFSET;
6729 			} else if (me == 2) {
6730 				fw_data = (const __be32 *)rdev->me_fw->data;
6731 				table_offset = CP_ME_TABLE_OFFSET;
6732 			} else {
6733 				fw_data = (const __be32 *)rdev->mec_fw->data;
6734 				table_offset = CP_MEC_TABLE_OFFSET;
6735 			}
6736 
6737 			for (i = 0; i < table_size; i ++) {
6738 				dst_ptr[bo_offset + i] =
6739 					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6740 			}
6741 			bo_offset += table_size;
6742 		}
6743 	}
6744 }
6745 
6746 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6747 				bool enable)
6748 {
6749 	u32 data, orig;
6750 
6751 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6752 		orig = data = RREG32(RLC_PG_CNTL);
6753 		data |= GFX_PG_ENABLE;
6754 		if (orig != data)
6755 			WREG32(RLC_PG_CNTL, data);
6756 
6757 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6758 		data |= AUTO_PG_EN;
6759 		if (orig != data)
6760 			WREG32(RLC_AUTO_PG_CTRL, data);
6761 	} else {
6762 		orig = data = RREG32(RLC_PG_CNTL);
6763 		data &= ~GFX_PG_ENABLE;
6764 		if (orig != data)
6765 			WREG32(RLC_PG_CNTL, data);
6766 
6767 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6768 		data &= ~AUTO_PG_EN;
6769 		if (orig != data)
6770 			WREG32(RLC_AUTO_PG_CTRL, data);
6771 
6772 		data = RREG32(DB_RENDER_CONTROL);
6773 	}
6774 }
6775 
6776 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6777 {
6778 	u32 mask = 0, tmp, tmp1;
6779 	int i;
6780 
6781 	cik_select_se_sh(rdev, se, sh);
6782 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6783 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6784 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6785 
6786 	tmp &= 0xffff0000;
6787 
6788 	tmp |= tmp1;
6789 	tmp >>= 16;
6790 
6791 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6792 		mask <<= 1;
6793 		mask |= 1;
6794 	}
6795 
6796 	return (~tmp) & mask;
6797 }
6798 
6799 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6800 {
6801 	u32 i, j, k, active_cu_number = 0;
6802 	u32 mask, counter, cu_bitmap;
6803 	u32 tmp = 0;
6804 
6805 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6806 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6807 			mask = 1;
6808 			cu_bitmap = 0;
6809 			counter = 0;
6810 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6811 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6812 					if (counter < 2)
6813 						cu_bitmap |= mask;
6814 					counter ++;
6815 				}
6816 				mask <<= 1;
6817 			}
6818 
6819 			active_cu_number += counter;
6820 			tmp |= (cu_bitmap << (i * 16 + j * 8));
6821 		}
6822 	}
6823 
6824 	WREG32(RLC_PG_AO_CU_MASK, tmp);
6825 
6826 	tmp = RREG32(RLC_MAX_PG_CU);
6827 	tmp &= ~MAX_PU_CU_MASK;
6828 	tmp |= MAX_PU_CU(active_cu_number);
6829 	WREG32(RLC_MAX_PG_CU, tmp);
6830 }
6831 
6832 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6833 				       bool enable)
6834 {
6835 	u32 data, orig;
6836 
6837 	orig = data = RREG32(RLC_PG_CNTL);
6838 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6839 		data |= STATIC_PER_CU_PG_ENABLE;
6840 	else
6841 		data &= ~STATIC_PER_CU_PG_ENABLE;
6842 	if (orig != data)
6843 		WREG32(RLC_PG_CNTL, data);
6844 }
6845 
6846 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6847 					bool enable)
6848 {
6849 	u32 data, orig;
6850 
6851 	orig = data = RREG32(RLC_PG_CNTL);
6852 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6853 		data |= DYN_PER_CU_PG_ENABLE;
6854 	else
6855 		data &= ~DYN_PER_CU_PG_ENABLE;
6856 	if (orig != data)
6857 		WREG32(RLC_PG_CNTL, data);
6858 }
6859 
6860 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6861 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6862 
6863 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6864 {
6865 	u32 data, orig;
6866 	u32 i;
6867 
6868 	if (rdev->rlc.cs_data) {
6869 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6870 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6871 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6872 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6873 	} else {
6874 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6875 		for (i = 0; i < 3; i++)
6876 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6877 	}
6878 	if (rdev->rlc.reg_list) {
6879 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6880 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6881 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6882 	}
6883 
6884 	orig = data = RREG32(RLC_PG_CNTL);
6885 	data |= GFX_PG_SRC;
6886 	if (orig != data)
6887 		WREG32(RLC_PG_CNTL, data);
6888 
6889 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6890 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6891 
6892 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6893 	data &= ~IDLE_POLL_COUNT_MASK;
6894 	data |= IDLE_POLL_COUNT(0x60);
6895 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6896 
6897 	data = 0x10101010;
6898 	WREG32(RLC_PG_DELAY, data);
6899 
6900 	data = RREG32(RLC_PG_DELAY_2);
6901 	data &= ~0xff;
6902 	data |= 0x3;
6903 	WREG32(RLC_PG_DELAY_2, data);
6904 
6905 	data = RREG32(RLC_AUTO_PG_CTRL);
6906 	data &= ~GRBM_REG_SGIT_MASK;
6907 	data |= GRBM_REG_SGIT(0x700);
6908 	WREG32(RLC_AUTO_PG_CTRL, data);
6909 
6910 }
6911 
6912 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6913 {
6914 	cik_enable_gfx_cgpg(rdev, enable);
6915 	cik_enable_gfx_static_mgpg(rdev, enable);
6916 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
6917 }
6918 
6919 u32 cik_get_csb_size(struct radeon_device *rdev)
6920 {
6921 	u32 count = 0;
6922 	const struct cs_section_def *sect = NULL;
6923 	const struct cs_extent_def *ext = NULL;
6924 
6925 	if (rdev->rlc.cs_data == NULL)
6926 		return 0;
6927 
6928 	/* begin clear state */
6929 	count += 2;
6930 	/* context control state */
6931 	count += 3;
6932 
6933 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6934 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6935 			if (sect->id == SECT_CONTEXT)
6936 				count += 2 + ext->reg_count;
6937 			else
6938 				return 0;
6939 		}
6940 	}
6941 	/* pa_sc_raster_config/pa_sc_raster_config1 */
6942 	count += 4;
6943 	/* end clear state */
6944 	count += 2;
6945 	/* clear state */
6946 	count += 2;
6947 
6948 	return count;
6949 }
6950 
6951 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6952 {
6953 	u32 count = 0, i;
6954 	const struct cs_section_def *sect = NULL;
6955 	const struct cs_extent_def *ext = NULL;
6956 
6957 	if (rdev->rlc.cs_data == NULL)
6958 		return;
6959 	if (buffer == NULL)
6960 		return;
6961 
6962 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6963 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6964 
6965 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6966 	buffer[count++] = cpu_to_le32(0x80000000);
6967 	buffer[count++] = cpu_to_le32(0x80000000);
6968 
6969 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6970 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6971 			if (sect->id == SECT_CONTEXT) {
6972 				buffer[count++] =
6973 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6974 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6975 				for (i = 0; i < ext->reg_count; i++)
6976 					buffer[count++] = cpu_to_le32(ext->extent[i]);
6977 			} else {
6978 				return;
6979 			}
6980 		}
6981 	}
6982 
6983 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6984 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6985 	switch (rdev->family) {
6986 	case CHIP_BONAIRE:
6987 		buffer[count++] = cpu_to_le32(0x16000012);
6988 		buffer[count++] = cpu_to_le32(0x00000000);
6989 		break;
6990 	case CHIP_KAVERI:
6991 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6992 		buffer[count++] = cpu_to_le32(0x00000000);
6993 		break;
6994 	case CHIP_KABINI:
6995 	case CHIP_MULLINS:
6996 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6997 		buffer[count++] = cpu_to_le32(0x00000000);
6998 		break;
6999 	case CHIP_HAWAII:
7000 		buffer[count++] = cpu_to_le32(0x3a00161a);
7001 		buffer[count++] = cpu_to_le32(0x0000002e);
7002 		break;
7003 	default:
7004 		buffer[count++] = cpu_to_le32(0x00000000);
7005 		buffer[count++] = cpu_to_le32(0x00000000);
7006 		break;
7007 	}
7008 
7009 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7010 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
7011 
7012 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
7013 	buffer[count++] = cpu_to_le32(0);
7014 }
7015 
7016 static void cik_init_pg(struct radeon_device *rdev)
7017 {
7018 	if (rdev->pg_flags) {
7019 		cik_enable_sck_slowdown_on_pu(rdev, true);
7020 		cik_enable_sck_slowdown_on_pd(rdev, true);
7021 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7022 			cik_init_gfx_cgpg(rdev);
7023 			cik_enable_cp_pg(rdev, true);
7024 			cik_enable_gds_pg(rdev, true);
7025 		}
7026 		cik_init_ao_cu_mask(rdev);
7027 		cik_update_gfx_pg(rdev, true);
7028 	}
7029 }
7030 
7031 static void cik_fini_pg(struct radeon_device *rdev)
7032 {
7033 	if (rdev->pg_flags) {
7034 		cik_update_gfx_pg(rdev, false);
7035 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7036 			cik_enable_cp_pg(rdev, false);
7037 			cik_enable_gds_pg(rdev, false);
7038 		}
7039 	}
7040 }
7041 
7042 /*
7043  * Interrupts
7044  * Starting with r6xx, interrupts are handled via a ring buffer.
7045  * Ring buffers are areas of GPU accessible memory that the GPU
7046  * writes interrupt vectors into and the host reads vectors out of.
7047  * There is a rptr (read pointer) that determines where the
7048  * host is currently reading, and a wptr (write pointer)
7049  * which determines where the GPU has written.  When the
7050  * pointers are equal, the ring is idle.  When the GPU
7051  * writes vectors to the ring buffer, it increments the
7052  * wptr.  When there is an interrupt, the host then starts
7053  * fetching commands and processing them until the pointers are
7054  * equal again at which point it updates the rptr.
7055  */
7056 
7057 /**
7058  * cik_enable_interrupts - Enable the interrupt ring buffer
7059  *
7060  * @rdev: radeon_device pointer
7061  *
7062  * Enable the interrupt ring buffer (CIK).
7063  */
7064 static void cik_enable_interrupts(struct radeon_device *rdev)
7065 {
7066 	u32 ih_cntl = RREG32(IH_CNTL);
7067 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7068 
7069 	ih_cntl |= ENABLE_INTR;
7070 	ih_rb_cntl |= IH_RB_ENABLE;
7071 	WREG32(IH_CNTL, ih_cntl);
7072 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7073 	rdev->ih.enabled = true;
7074 }
7075 
7076 /**
7077  * cik_disable_interrupts - Disable the interrupt ring buffer
7078  *
7079  * @rdev: radeon_device pointer
7080  *
7081  * Disable the interrupt ring buffer (CIK).
7082  */
7083 static void cik_disable_interrupts(struct radeon_device *rdev)
7084 {
7085 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7086 	u32 ih_cntl = RREG32(IH_CNTL);
7087 
7088 	ih_rb_cntl &= ~IH_RB_ENABLE;
7089 	ih_cntl &= ~ENABLE_INTR;
7090 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7091 	WREG32(IH_CNTL, ih_cntl);
7092 	/* set rptr, wptr to 0 */
7093 	WREG32(IH_RB_RPTR, 0);
7094 	WREG32(IH_RB_WPTR, 0);
7095 	rdev->ih.enabled = false;
7096 	rdev->ih.rptr = 0;
7097 }
7098 
7099 /**
7100  * cik_disable_interrupt_state - Disable all interrupt sources
7101  *
7102  * @rdev: radeon_device pointer
7103  *
7104  * Clear all interrupt enable bits used by the driver (CIK).
7105  */
7106 static void cik_disable_interrupt_state(struct radeon_device *rdev)
7107 {
7108 	u32 tmp;
7109 
7110 	/* gfx ring */
7111 	tmp = RREG32(CP_INT_CNTL_RING0) &
7112 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7113 	WREG32(CP_INT_CNTL_RING0, tmp);
7114 	/* sdma */
7115 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7116 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
7117 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7118 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
7119 	/* compute queues */
7120 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
7121 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
7122 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
7123 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
7124 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
7125 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
7126 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
7127 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
7128 	/* grbm */
7129 	WREG32(GRBM_INT_CNTL, 0);
7130 	/* vline/vblank, etc. */
7131 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7132 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7133 	if (rdev->num_crtc >= 4) {
7134 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7135 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7136 	}
7137 	if (rdev->num_crtc >= 6) {
7138 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7139 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7140 	}
7141 	/* pflip */
7142 	if (rdev->num_crtc >= 2) {
7143 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7144 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7145 	}
7146 	if (rdev->num_crtc >= 4) {
7147 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7148 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7149 	}
7150 	if (rdev->num_crtc >= 6) {
7151 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7152 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7153 	}
7154 
7155 	/* dac hotplug */
7156 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7157 
7158 	/* digital hotplug */
7159 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7160 	WREG32(DC_HPD1_INT_CONTROL, tmp);
7161 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7162 	WREG32(DC_HPD2_INT_CONTROL, tmp);
7163 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7164 	WREG32(DC_HPD3_INT_CONTROL, tmp);
7165 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7166 	WREG32(DC_HPD4_INT_CONTROL, tmp);
7167 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7168 	WREG32(DC_HPD5_INT_CONTROL, tmp);
7169 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7170 	WREG32(DC_HPD6_INT_CONTROL, tmp);
7171 
7172 }
7173 
7174 /**
7175  * cik_irq_init - init and enable the interrupt ring
7176  *
7177  * @rdev: radeon_device pointer
7178  *
7179  * Allocate a ring buffer for the interrupt controller,
7180  * enable the RLC, disable interrupts, enable the IH
7181  * ring buffer and enable it (CIK).
7182  * Called at device load and reume.
7183  * Returns 0 for success, errors for failure.
7184  */
7185 static int cik_irq_init(struct radeon_device *rdev)
7186 {
7187 	int ret = 0;
7188 	int rb_bufsz;
7189 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7190 
7191 	/* allocate ring */
7192 	ret = r600_ih_ring_alloc(rdev);
7193 	if (ret)
7194 		return ret;
7195 
7196 	/* disable irqs */
7197 	cik_disable_interrupts(rdev);
7198 
7199 	/* init rlc */
7200 	ret = cik_rlc_resume(rdev);
7201 	if (ret) {
7202 		r600_ih_ring_fini(rdev);
7203 		return ret;
7204 	}
7205 
7206 	/* setup interrupt control */
7207 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
7208 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7209 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
7210 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7211 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7212 	 */
7213 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7214 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7215 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7216 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
7217 
7218 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7219 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7220 
7221 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7222 		      IH_WPTR_OVERFLOW_CLEAR |
7223 		      (rb_bufsz << 1));
7224 
7225 	if (rdev->wb.enabled)
7226 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7227 
7228 	/* set the writeback address whether it's enabled or not */
7229 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7230 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7231 
7232 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7233 
7234 	/* set rptr, wptr to 0 */
7235 	WREG32(IH_RB_RPTR, 0);
7236 	WREG32(IH_RB_WPTR, 0);
7237 
7238 	/* Default settings for IH_CNTL (disabled at first) */
7239 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7240 	/* RPTR_REARM only works if msi's are enabled */
7241 	if (rdev->msi_enabled)
7242 		ih_cntl |= RPTR_REARM;
7243 	WREG32(IH_CNTL, ih_cntl);
7244 
7245 	/* force the active interrupt state to all disabled */
7246 	cik_disable_interrupt_state(rdev);
7247 
7248 	pci_set_master(rdev->pdev);
7249 
7250 	/* enable irqs */
7251 	cik_enable_interrupts(rdev);
7252 
7253 	return ret;
7254 }
7255 
7256 /**
7257  * cik_irq_set - enable/disable interrupt sources
7258  *
7259  * @rdev: radeon_device pointer
7260  *
7261  * Enable interrupt sources on the GPU (vblanks, hpd,
7262  * etc.) (CIK).
7263  * Returns 0 for success, errors for failure.
7264  */
7265 int cik_irq_set(struct radeon_device *rdev)
7266 {
7267 	u32 cp_int_cntl;
7268 	u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
7269 	u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
7270 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7271 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7272 	u32 grbm_int_cntl = 0;
7273 	u32 dma_cntl, dma_cntl1;
7274 	u32 thermal_int;
7275 
7276 	if (!rdev->irq.installed) {
7277 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7278 		return -EINVAL;
7279 	}
7280 	/* don't enable anything if the ih is disabled */
7281 	if (!rdev->ih.enabled) {
7282 		cik_disable_interrupts(rdev);
7283 		/* force the active interrupt state to all disabled */
7284 		cik_disable_interrupt_state(rdev);
7285 		return 0;
7286 	}
7287 
7288 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7289 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7290 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7291 
7292 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
7293 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
7294 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
7295 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
7296 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
7297 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
7298 
7299 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7300 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7301 
7302 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7303 	cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7304 	cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7305 	cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7306 	cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7307 	cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7308 	cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7309 	cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7310 
7311 	if (rdev->flags & RADEON_IS_IGP)
7312 		thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
7313 			~(THERM_INTH_MASK | THERM_INTL_MASK);
7314 	else
7315 		thermal_int = RREG32_SMC(CG_THERMAL_INT) &
7316 			~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
7317 
7318 	/* enable CP interrupts on all rings */
7319 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7320 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7321 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7322 	}
7323 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7324 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7325 		DRM_DEBUG("si_irq_set: sw int cp1\n");
7326 		if (ring->me == 1) {
7327 			switch (ring->pipe) {
7328 			case 0:
7329 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7330 				break;
7331 			case 1:
7332 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7333 				break;
7334 			case 2:
7335 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7336 				break;
7337 			case 3:
7338 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7339 				break;
7340 			default:
7341 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7342 				break;
7343 			}
7344 		} else if (ring->me == 2) {
7345 			switch (ring->pipe) {
7346 			case 0:
7347 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7348 				break;
7349 			case 1:
7350 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7351 				break;
7352 			case 2:
7353 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7354 				break;
7355 			case 3:
7356 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7357 				break;
7358 			default:
7359 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7360 				break;
7361 			}
7362 		} else {
7363 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7364 		}
7365 	}
7366 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7367 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7368 		DRM_DEBUG("si_irq_set: sw int cp2\n");
7369 		if (ring->me == 1) {
7370 			switch (ring->pipe) {
7371 			case 0:
7372 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7373 				break;
7374 			case 1:
7375 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7376 				break;
7377 			case 2:
7378 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7379 				break;
7380 			case 3:
7381 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7382 				break;
7383 			default:
7384 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7385 				break;
7386 			}
7387 		} else if (ring->me == 2) {
7388 			switch (ring->pipe) {
7389 			case 0:
7390 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7391 				break;
7392 			case 1:
7393 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7394 				break;
7395 			case 2:
7396 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7397 				break;
7398 			case 3:
7399 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7400 				break;
7401 			default:
7402 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7403 				break;
7404 			}
7405 		} else {
7406 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7407 		}
7408 	}
7409 
7410 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7411 		DRM_DEBUG("cik_irq_set: sw int dma\n");
7412 		dma_cntl |= TRAP_ENABLE;
7413 	}
7414 
7415 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7416 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7417 		dma_cntl1 |= TRAP_ENABLE;
7418 	}
7419 
7420 	if (rdev->irq.crtc_vblank_int[0] ||
7421 	    atomic_read(&rdev->irq.pflip[0])) {
7422 		DRM_DEBUG("cik_irq_set: vblank 0\n");
7423 		crtc1 |= VBLANK_INTERRUPT_MASK;
7424 	}
7425 	if (rdev->irq.crtc_vblank_int[1] ||
7426 	    atomic_read(&rdev->irq.pflip[1])) {
7427 		DRM_DEBUG("cik_irq_set: vblank 1\n");
7428 		crtc2 |= VBLANK_INTERRUPT_MASK;
7429 	}
7430 	if (rdev->irq.crtc_vblank_int[2] ||
7431 	    atomic_read(&rdev->irq.pflip[2])) {
7432 		DRM_DEBUG("cik_irq_set: vblank 2\n");
7433 		crtc3 |= VBLANK_INTERRUPT_MASK;
7434 	}
7435 	if (rdev->irq.crtc_vblank_int[3] ||
7436 	    atomic_read(&rdev->irq.pflip[3])) {
7437 		DRM_DEBUG("cik_irq_set: vblank 3\n");
7438 		crtc4 |= VBLANK_INTERRUPT_MASK;
7439 	}
7440 	if (rdev->irq.crtc_vblank_int[4] ||
7441 	    atomic_read(&rdev->irq.pflip[4])) {
7442 		DRM_DEBUG("cik_irq_set: vblank 4\n");
7443 		crtc5 |= VBLANK_INTERRUPT_MASK;
7444 	}
7445 	if (rdev->irq.crtc_vblank_int[5] ||
7446 	    atomic_read(&rdev->irq.pflip[5])) {
7447 		DRM_DEBUG("cik_irq_set: vblank 5\n");
7448 		crtc6 |= VBLANK_INTERRUPT_MASK;
7449 	}
7450 	if (rdev->irq.hpd[0]) {
7451 		DRM_DEBUG("cik_irq_set: hpd 1\n");
7452 		hpd1 |= DC_HPDx_INT_EN;
7453 	}
7454 	if (rdev->irq.hpd[1]) {
7455 		DRM_DEBUG("cik_irq_set: hpd 2\n");
7456 		hpd2 |= DC_HPDx_INT_EN;
7457 	}
7458 	if (rdev->irq.hpd[2]) {
7459 		DRM_DEBUG("cik_irq_set: hpd 3\n");
7460 		hpd3 |= DC_HPDx_INT_EN;
7461 	}
7462 	if (rdev->irq.hpd[3]) {
7463 		DRM_DEBUG("cik_irq_set: hpd 4\n");
7464 		hpd4 |= DC_HPDx_INT_EN;
7465 	}
7466 	if (rdev->irq.hpd[4]) {
7467 		DRM_DEBUG("cik_irq_set: hpd 5\n");
7468 		hpd5 |= DC_HPDx_INT_EN;
7469 	}
7470 	if (rdev->irq.hpd[5]) {
7471 		DRM_DEBUG("cik_irq_set: hpd 6\n");
7472 		hpd6 |= DC_HPDx_INT_EN;
7473 	}
7474 
7475 	if (rdev->irq.dpm_thermal) {
7476 		DRM_DEBUG("dpm thermal\n");
7477 		if (rdev->flags & RADEON_IS_IGP)
7478 			thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
7479 		else
7480 			thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
7481 	}
7482 
7483 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7484 
7485 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7486 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7487 
7488 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7489 	WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7490 	WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7491 	WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7492 	WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7493 	WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7494 	WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7495 	WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7496 
7497 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7498 
7499 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7500 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7501 	if (rdev->num_crtc >= 4) {
7502 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7503 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7504 	}
7505 	if (rdev->num_crtc >= 6) {
7506 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7507 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7508 	}
7509 
7510 	if (rdev->num_crtc >= 2) {
7511 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7512 		       GRPH_PFLIP_INT_MASK);
7513 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7514 		       GRPH_PFLIP_INT_MASK);
7515 	}
7516 	if (rdev->num_crtc >= 4) {
7517 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7518 		       GRPH_PFLIP_INT_MASK);
7519 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7520 		       GRPH_PFLIP_INT_MASK);
7521 	}
7522 	if (rdev->num_crtc >= 6) {
7523 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7524 		       GRPH_PFLIP_INT_MASK);
7525 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7526 		       GRPH_PFLIP_INT_MASK);
7527 	}
7528 
7529 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7530 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7531 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7532 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7533 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7534 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7535 
7536 	if (rdev->flags & RADEON_IS_IGP)
7537 		WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
7538 	else
7539 		WREG32_SMC(CG_THERMAL_INT, thermal_int);
7540 
7541 	return 0;
7542 }
7543 
7544 /**
7545  * cik_irq_ack - ack interrupt sources
7546  *
7547  * @rdev: radeon_device pointer
7548  *
7549  * Ack interrupt sources on the GPU (vblanks, hpd,
7550  * etc.) (CIK).  Certain interrupts sources are sw
7551  * generated and do not require an explicit ack.
7552  */
7553 static inline void cik_irq_ack(struct radeon_device *rdev)
7554 {
7555 	u32 tmp;
7556 
7557 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7558 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7559 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7560 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7561 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7562 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7563 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7564 
7565 	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7566 		EVERGREEN_CRTC0_REGISTER_OFFSET);
7567 	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7568 		EVERGREEN_CRTC1_REGISTER_OFFSET);
7569 	if (rdev->num_crtc >= 4) {
7570 		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7571 			EVERGREEN_CRTC2_REGISTER_OFFSET);
7572 		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7573 			EVERGREEN_CRTC3_REGISTER_OFFSET);
7574 	}
7575 	if (rdev->num_crtc >= 6) {
7576 		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7577 			EVERGREEN_CRTC4_REGISTER_OFFSET);
7578 		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7579 			EVERGREEN_CRTC5_REGISTER_OFFSET);
7580 	}
7581 
7582 	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7583 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7584 		       GRPH_PFLIP_INT_CLEAR);
7585 	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7586 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7587 		       GRPH_PFLIP_INT_CLEAR);
7588 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7589 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7590 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7591 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7592 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7593 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7594 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7595 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7596 
7597 	if (rdev->num_crtc >= 4) {
7598 		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7599 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7600 			       GRPH_PFLIP_INT_CLEAR);
7601 		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7602 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7603 			       GRPH_PFLIP_INT_CLEAR);
7604 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7605 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7606 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7607 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7608 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7609 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7610 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7611 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7612 	}
7613 
7614 	if (rdev->num_crtc >= 6) {
7615 		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7616 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7617 			       GRPH_PFLIP_INT_CLEAR);
7618 		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7619 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7620 			       GRPH_PFLIP_INT_CLEAR);
7621 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7622 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7623 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7624 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7625 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7626 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7627 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7628 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7629 	}
7630 
7631 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7632 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7633 		tmp |= DC_HPDx_INT_ACK;
7634 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7635 	}
7636 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7637 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7638 		tmp |= DC_HPDx_INT_ACK;
7639 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7640 	}
7641 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7642 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7643 		tmp |= DC_HPDx_INT_ACK;
7644 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7645 	}
7646 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7647 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7648 		tmp |= DC_HPDx_INT_ACK;
7649 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7650 	}
7651 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7652 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7653 		tmp |= DC_HPDx_INT_ACK;
7654 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7655 	}
7656 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7657 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7658 		tmp |= DC_HPDx_INT_ACK;
7659 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7660 	}
7661 }
7662 
7663 /**
7664  * cik_irq_disable - disable interrupts
7665  *
7666  * @rdev: radeon_device pointer
7667  *
7668  * Disable interrupts on the hw (CIK).
7669  */
7670 static void cik_irq_disable(struct radeon_device *rdev)
7671 {
7672 	cik_disable_interrupts(rdev);
7673 	/* Wait and acknowledge irq */
7674 	mdelay(1);
7675 	cik_irq_ack(rdev);
7676 	cik_disable_interrupt_state(rdev);
7677 }
7678 
7679 /**
7680  * cik_irq_disable - disable interrupts for suspend
7681  *
7682  * @rdev: radeon_device pointer
7683  *
7684  * Disable interrupts and stop the RLC (CIK).
7685  * Used for suspend.
7686  */
7687 static void cik_irq_suspend(struct radeon_device *rdev)
7688 {
7689 	cik_irq_disable(rdev);
7690 	cik_rlc_stop(rdev);
7691 }
7692 
7693 /**
7694  * cik_irq_fini - tear down interrupt support
7695  *
7696  * @rdev: radeon_device pointer
7697  *
7698  * Disable interrupts on the hw and free the IH ring
7699  * buffer (CIK).
7700  * Used for driver unload.
7701  */
7702 static void cik_irq_fini(struct radeon_device *rdev)
7703 {
7704 	cik_irq_suspend(rdev);
7705 	r600_ih_ring_fini(rdev);
7706 }
7707 
7708 /**
7709  * cik_get_ih_wptr - get the IH ring buffer wptr
7710  *
7711  * @rdev: radeon_device pointer
7712  *
7713  * Get the IH ring buffer wptr from either the register
7714  * or the writeback memory buffer (CIK).  Also check for
7715  * ring buffer overflow and deal with it.
7716  * Used by cik_irq_process().
7717  * Returns the value of the wptr.
7718  */
7719 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7720 {
7721 	u32 wptr, tmp;
7722 
7723 	if (rdev->wb.enabled)
7724 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7725 	else
7726 		wptr = RREG32(IH_RB_WPTR);
7727 
7728 	if (wptr & RB_OVERFLOW) {
7729 		/* When a ring buffer overflow happen start parsing interrupt
7730 		 * from the last not overwritten vector (wptr + 16). Hopefully
7731 		 * this should allow us to catchup.
7732 		 */
7733 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
7734 			wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
7735 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7736 		tmp = RREG32(IH_RB_CNTL);
7737 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7738 		WREG32(IH_RB_CNTL, tmp);
7739 		wptr &= ~RB_OVERFLOW;
7740 	}
7741 	return (wptr & rdev->ih.ptr_mask);
7742 }
7743 
7744 /*        CIK IV Ring
7745  * Each IV ring entry is 128 bits:
7746  * [7:0]    - interrupt source id
7747  * [31:8]   - reserved
7748  * [59:32]  - interrupt source data
7749  * [63:60]  - reserved
7750  * [71:64]  - RINGID
7751  *            CP:
7752  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7753  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7754  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7755  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7756  *            PIPE_ID - ME0 0=3D
7757  *                    - ME1&2 compute dispatcher (4 pipes each)
7758  *            SDMA:
7759  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7760  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7761  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7762  * [79:72]  - VMID
7763  * [95:80]  - PASID
7764  * [127:96] - reserved
7765  */
7766 /**
7767  * cik_irq_process - interrupt handler
7768  *
7769  * @rdev: radeon_device pointer
7770  *
7771  * Interrupt hander (CIK).  Walk the IH ring,
7772  * ack interrupts and schedule work to handle
7773  * interrupt events.
7774  * Returns irq process return code.
7775  */
7776 int cik_irq_process(struct radeon_device *rdev)
7777 {
7778 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7779 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7780 	u32 wptr;
7781 	u32 rptr;
7782 	u32 src_id, src_data, ring_id;
7783 	u8 me_id, pipe_id, queue_id;
7784 	u32 ring_index;
7785 	bool queue_hotplug = false;
7786 	bool queue_reset = false;
7787 	u32 addr, status, mc_client;
7788 	bool queue_thermal = false;
7789 
7790 	if (!rdev->ih.enabled || rdev->shutdown)
7791 		return IRQ_NONE;
7792 
7793 	wptr = cik_get_ih_wptr(rdev);
7794 
7795 restart_ih:
7796 	/* is somebody else already processing irqs? */
7797 	if (atomic_xchg(&rdev->ih.lock, 1))
7798 		return IRQ_NONE;
7799 
7800 	rptr = rdev->ih.rptr;
7801 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7802 
7803 	/* Order reading of wptr vs. reading of IH ring data */
7804 	rmb();
7805 
7806 	/* display interrupts */
7807 	cik_irq_ack(rdev);
7808 
7809 	while (rptr != wptr) {
7810 		/* wptr/rptr are in bytes! */
7811 		ring_index = rptr / 4;
7812 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7813 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7814 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7815 
7816 		switch (src_id) {
7817 		case 1: /* D1 vblank/vline */
7818 			switch (src_data) {
7819 			case 0: /* D1 vblank */
7820 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7821 					if (rdev->irq.crtc_vblank_int[0]) {
7822 						drm_handle_vblank(rdev->ddev, 0);
7823 						rdev->pm.vblank_sync = true;
7824 						wake_up(&rdev->irq.vblank_queue);
7825 					}
7826 					if (atomic_read(&rdev->irq.pflip[0]))
7827 						radeon_crtc_handle_vblank(rdev, 0);
7828 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7829 					DRM_DEBUG("IH: D1 vblank\n");
7830 				}
7831 				break;
7832 			case 1: /* D1 vline */
7833 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7834 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7835 					DRM_DEBUG("IH: D1 vline\n");
7836 				}
7837 				break;
7838 			default:
7839 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7840 				break;
7841 			}
7842 			break;
7843 		case 2: /* D2 vblank/vline */
7844 			switch (src_data) {
7845 			case 0: /* D2 vblank */
7846 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7847 					if (rdev->irq.crtc_vblank_int[1]) {
7848 						drm_handle_vblank(rdev->ddev, 1);
7849 						rdev->pm.vblank_sync = true;
7850 						wake_up(&rdev->irq.vblank_queue);
7851 					}
7852 					if (atomic_read(&rdev->irq.pflip[1]))
7853 						radeon_crtc_handle_vblank(rdev, 1);
7854 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7855 					DRM_DEBUG("IH: D2 vblank\n");
7856 				}
7857 				break;
7858 			case 1: /* D2 vline */
7859 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7860 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7861 					DRM_DEBUG("IH: D2 vline\n");
7862 				}
7863 				break;
7864 			default:
7865 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7866 				break;
7867 			}
7868 			break;
7869 		case 3: /* D3 vblank/vline */
7870 			switch (src_data) {
7871 			case 0: /* D3 vblank */
7872 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7873 					if (rdev->irq.crtc_vblank_int[2]) {
7874 						drm_handle_vblank(rdev->ddev, 2);
7875 						rdev->pm.vblank_sync = true;
7876 						wake_up(&rdev->irq.vblank_queue);
7877 					}
7878 					if (atomic_read(&rdev->irq.pflip[2]))
7879 						radeon_crtc_handle_vblank(rdev, 2);
7880 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7881 					DRM_DEBUG("IH: D3 vblank\n");
7882 				}
7883 				break;
7884 			case 1: /* D3 vline */
7885 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7886 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7887 					DRM_DEBUG("IH: D3 vline\n");
7888 				}
7889 				break;
7890 			default:
7891 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7892 				break;
7893 			}
7894 			break;
7895 		case 4: /* D4 vblank/vline */
7896 			switch (src_data) {
7897 			case 0: /* D4 vblank */
7898 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7899 					if (rdev->irq.crtc_vblank_int[3]) {
7900 						drm_handle_vblank(rdev->ddev, 3);
7901 						rdev->pm.vblank_sync = true;
7902 						wake_up(&rdev->irq.vblank_queue);
7903 					}
7904 					if (atomic_read(&rdev->irq.pflip[3]))
7905 						radeon_crtc_handle_vblank(rdev, 3);
7906 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7907 					DRM_DEBUG("IH: D4 vblank\n");
7908 				}
7909 				break;
7910 			case 1: /* D4 vline */
7911 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7912 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7913 					DRM_DEBUG("IH: D4 vline\n");
7914 				}
7915 				break;
7916 			default:
7917 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7918 				break;
7919 			}
7920 			break;
7921 		case 5: /* D5 vblank/vline */
7922 			switch (src_data) {
7923 			case 0: /* D5 vblank */
7924 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7925 					if (rdev->irq.crtc_vblank_int[4]) {
7926 						drm_handle_vblank(rdev->ddev, 4);
7927 						rdev->pm.vblank_sync = true;
7928 						wake_up(&rdev->irq.vblank_queue);
7929 					}
7930 					if (atomic_read(&rdev->irq.pflip[4]))
7931 						radeon_crtc_handle_vblank(rdev, 4);
7932 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7933 					DRM_DEBUG("IH: D5 vblank\n");
7934 				}
7935 				break;
7936 			case 1: /* D5 vline */
7937 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7938 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7939 					DRM_DEBUG("IH: D5 vline\n");
7940 				}
7941 				break;
7942 			default:
7943 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7944 				break;
7945 			}
7946 			break;
7947 		case 6: /* D6 vblank/vline */
7948 			switch (src_data) {
7949 			case 0: /* D6 vblank */
7950 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7951 					if (rdev->irq.crtc_vblank_int[5]) {
7952 						drm_handle_vblank(rdev->ddev, 5);
7953 						rdev->pm.vblank_sync = true;
7954 						wake_up(&rdev->irq.vblank_queue);
7955 					}
7956 					if (atomic_read(&rdev->irq.pflip[5]))
7957 						radeon_crtc_handle_vblank(rdev, 5);
7958 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7959 					DRM_DEBUG("IH: D6 vblank\n");
7960 				}
7961 				break;
7962 			case 1: /* D6 vline */
7963 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7964 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7965 					DRM_DEBUG("IH: D6 vline\n");
7966 				}
7967 				break;
7968 			default:
7969 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7970 				break;
7971 			}
7972 			break;
7973 		case 8: /* D1 page flip */
7974 		case 10: /* D2 page flip */
7975 		case 12: /* D3 page flip */
7976 		case 14: /* D4 page flip */
7977 		case 16: /* D5 page flip */
7978 		case 18: /* D6 page flip */
7979 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7980 			if (radeon_use_pflipirq > 0)
7981 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7982 			break;
7983 		case 42: /* HPD hotplug */
7984 			switch (src_data) {
7985 			case 0:
7986 				if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7987 					rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7988 					queue_hotplug = true;
7989 					DRM_DEBUG("IH: HPD1\n");
7990 				}
7991 				break;
7992 			case 1:
7993 				if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7994 					rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7995 					queue_hotplug = true;
7996 					DRM_DEBUG("IH: HPD2\n");
7997 				}
7998 				break;
7999 			case 2:
8000 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
8001 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
8002 					queue_hotplug = true;
8003 					DRM_DEBUG("IH: HPD3\n");
8004 				}
8005 				break;
8006 			case 3:
8007 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
8008 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
8009 					queue_hotplug = true;
8010 					DRM_DEBUG("IH: HPD4\n");
8011 				}
8012 				break;
8013 			case 4:
8014 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
8015 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
8016 					queue_hotplug = true;
8017 					DRM_DEBUG("IH: HPD5\n");
8018 				}
8019 				break;
8020 			case 5:
8021 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
8022 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
8023 					queue_hotplug = true;
8024 					DRM_DEBUG("IH: HPD6\n");
8025 				}
8026 				break;
8027 			default:
8028 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8029 				break;
8030 			}
8031 			break;
8032 		case 124: /* UVD */
8033 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
8034 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
8035 			break;
8036 		case 146:
8037 		case 147:
8038 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
8039 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
8040 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
8041 			/* reset addr and status */
8042 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
8043 			if (addr == 0x0 && status == 0x0)
8044 				break;
8045 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
8046 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
8047 				addr);
8048 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
8049 				status);
8050 			cik_vm_decode_fault(rdev, status, addr, mc_client);
8051 			break;
8052 		case 167: /* VCE */
8053 			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
8054 			switch (src_data) {
8055 			case 0:
8056 				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
8057 				break;
8058 			case 1:
8059 				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
8060 				break;
8061 			default:
8062 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
8063 				break;
8064 			}
8065 			break;
8066 		case 176: /* GFX RB CP_INT */
8067 		case 177: /* GFX IB CP_INT */
8068 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8069 			break;
8070 		case 181: /* CP EOP event */
8071 			DRM_DEBUG("IH: CP EOP\n");
8072 			/* XXX check the bitfield order! */
8073 			me_id = (ring_id & 0x60) >> 5;
8074 			pipe_id = (ring_id & 0x18) >> 3;
8075 			queue_id = (ring_id & 0x7) >> 0;
8076 			switch (me_id) {
8077 			case 0:
8078 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8079 				break;
8080 			case 1:
8081 			case 2:
8082 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
8083 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8084 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
8085 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8086 				break;
8087 			}
8088 			break;
8089 		case 184: /* CP Privileged reg access */
8090 			DRM_ERROR("Illegal register access in command stream\n");
8091 			/* XXX check the bitfield order! */
8092 			me_id = (ring_id & 0x60) >> 5;
8093 			pipe_id = (ring_id & 0x18) >> 3;
8094 			queue_id = (ring_id & 0x7) >> 0;
8095 			switch (me_id) {
8096 			case 0:
8097 				/* This results in a full GPU reset, but all we need to do is soft
8098 				 * reset the CP for gfx
8099 				 */
8100 				queue_reset = true;
8101 				break;
8102 			case 1:
8103 				/* XXX compute */
8104 				queue_reset = true;
8105 				break;
8106 			case 2:
8107 				/* XXX compute */
8108 				queue_reset = true;
8109 				break;
8110 			}
8111 			break;
8112 		case 185: /* CP Privileged inst */
8113 			DRM_ERROR("Illegal instruction in command stream\n");
8114 			/* XXX check the bitfield order! */
8115 			me_id = (ring_id & 0x60) >> 5;
8116 			pipe_id = (ring_id & 0x18) >> 3;
8117 			queue_id = (ring_id & 0x7) >> 0;
8118 			switch (me_id) {
8119 			case 0:
8120 				/* This results in a full GPU reset, but all we need to do is soft
8121 				 * reset the CP for gfx
8122 				 */
8123 				queue_reset = true;
8124 				break;
8125 			case 1:
8126 				/* XXX compute */
8127 				queue_reset = true;
8128 				break;
8129 			case 2:
8130 				/* XXX compute */
8131 				queue_reset = true;
8132 				break;
8133 			}
8134 			break;
8135 		case 224: /* SDMA trap event */
8136 			/* XXX check the bitfield order! */
8137 			me_id = (ring_id & 0x3) >> 0;
8138 			queue_id = (ring_id & 0xc) >> 2;
8139 			DRM_DEBUG("IH: SDMA trap\n");
8140 			switch (me_id) {
8141 			case 0:
8142 				switch (queue_id) {
8143 				case 0:
8144 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8145 					break;
8146 				case 1:
8147 					/* XXX compute */
8148 					break;
8149 				case 2:
8150 					/* XXX compute */
8151 					break;
8152 				}
8153 				break;
8154 			case 1:
8155 				switch (queue_id) {
8156 				case 0:
8157 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8158 					break;
8159 				case 1:
8160 					/* XXX compute */
8161 					break;
8162 				case 2:
8163 					/* XXX compute */
8164 					break;
8165 				}
8166 				break;
8167 			}
8168 			break;
8169 		case 230: /* thermal low to high */
8170 			DRM_DEBUG("IH: thermal low to high\n");
8171 			rdev->pm.dpm.thermal.high_to_low = false;
8172 			queue_thermal = true;
8173 			break;
8174 		case 231: /* thermal high to low */
8175 			DRM_DEBUG("IH: thermal high to low\n");
8176 			rdev->pm.dpm.thermal.high_to_low = true;
8177 			queue_thermal = true;
8178 			break;
8179 		case 233: /* GUI IDLE */
8180 			DRM_DEBUG("IH: GUI idle\n");
8181 			break;
8182 		case 241: /* SDMA Privileged inst */
8183 		case 247: /* SDMA Privileged inst */
8184 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8185 			/* XXX check the bitfield order! */
8186 			me_id = (ring_id & 0x3) >> 0;
8187 			queue_id = (ring_id & 0xc) >> 2;
8188 			switch (me_id) {
8189 			case 0:
8190 				switch (queue_id) {
8191 				case 0:
8192 					queue_reset = true;
8193 					break;
8194 				case 1:
8195 					/* XXX compute */
8196 					queue_reset = true;
8197 					break;
8198 				case 2:
8199 					/* XXX compute */
8200 					queue_reset = true;
8201 					break;
8202 				}
8203 				break;
8204 			case 1:
8205 				switch (queue_id) {
8206 				case 0:
8207 					queue_reset = true;
8208 					break;
8209 				case 1:
8210 					/* XXX compute */
8211 					queue_reset = true;
8212 					break;
8213 				case 2:
8214 					/* XXX compute */
8215 					queue_reset = true;
8216 					break;
8217 				}
8218 				break;
8219 			}
8220 			break;
8221 		default:
8222 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8223 			break;
8224 		}
8225 
8226 		/* wptr/rptr are in bytes! */
8227 		rptr += 16;
8228 		rptr &= rdev->ih.ptr_mask;
8229 	}
8230 	if (queue_hotplug)
8231 		schedule_work(&rdev->hotplug_work);
8232 	if (queue_reset)
8233 		schedule_work(&rdev->reset_work);
8234 	if (queue_thermal)
8235 		schedule_work(&rdev->pm.dpm.thermal.work);
8236 	rdev->ih.rptr = rptr;
8237 	WREG32(IH_RB_RPTR, rdev->ih.rptr);
8238 	atomic_set(&rdev->ih.lock, 0);
8239 
8240 	/* make sure wptr hasn't changed while processing */
8241 	wptr = cik_get_ih_wptr(rdev);
8242 	if (wptr != rptr)
8243 		goto restart_ih;
8244 
8245 	return IRQ_HANDLED;
8246 }
8247 
8248 /*
8249  * startup/shutdown callbacks
8250  */
8251 /**
8252  * cik_startup - program the asic to a functional state
8253  *
8254  * @rdev: radeon_device pointer
8255  *
8256  * Programs the asic to a functional state (CIK).
8257  * Called by cik_init() and cik_resume().
8258  * Returns 0 for success, error for failure.
8259  */
8260 static int cik_startup(struct radeon_device *rdev)
8261 {
8262 	struct radeon_ring *ring;
8263 	u32 nop;
8264 	int r;
8265 
8266 	/* enable pcie gen2/3 link */
8267 	cik_pcie_gen3_enable(rdev);
8268 	/* enable aspm */
8269 	cik_program_aspm(rdev);
8270 
8271 	/* scratch needs to be initialized before MC */
8272 	r = r600_vram_scratch_init(rdev);
8273 	if (r)
8274 		return r;
8275 
8276 	cik_mc_program(rdev);
8277 
8278 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8279 		r = ci_mc_load_microcode(rdev);
8280 		if (r) {
8281 			DRM_ERROR("Failed to load MC firmware!\n");
8282 			return r;
8283 		}
8284 	}
8285 
8286 	r = cik_pcie_gart_enable(rdev);
8287 	if (r)
8288 		return r;
8289 	cik_gpu_init(rdev);
8290 
8291 	/* allocate rlc buffers */
8292 	if (rdev->flags & RADEON_IS_IGP) {
8293 		if (rdev->family == CHIP_KAVERI) {
8294 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8295 			rdev->rlc.reg_list_size =
8296 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8297 		} else {
8298 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8299 			rdev->rlc.reg_list_size =
8300 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8301 		}
8302 	}
8303 	rdev->rlc.cs_data = ci_cs_data;
8304 	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
8305 	r = sumo_rlc_init(rdev);
8306 	if (r) {
8307 		DRM_ERROR("Failed to init rlc BOs!\n");
8308 		return r;
8309 	}
8310 
8311 	/* allocate wb buffer */
8312 	r = radeon_wb_init(rdev);
8313 	if (r)
8314 		return r;
8315 
8316 	/* allocate mec buffers */
8317 	r = cik_mec_init(rdev);
8318 	if (r) {
8319 		DRM_ERROR("Failed to init MEC BOs!\n");
8320 		return r;
8321 	}
8322 
8323 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8324 	if (r) {
8325 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8326 		return r;
8327 	}
8328 
8329 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8330 	if (r) {
8331 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8332 		return r;
8333 	}
8334 
8335 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8336 	if (r) {
8337 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8338 		return r;
8339 	}
8340 
8341 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8342 	if (r) {
8343 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8344 		return r;
8345 	}
8346 
8347 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8348 	if (r) {
8349 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8350 		return r;
8351 	}
8352 
8353 	r = radeon_uvd_resume(rdev);
8354 	if (!r) {
8355 		r = uvd_v4_2_resume(rdev);
8356 		if (!r) {
8357 			r = radeon_fence_driver_start_ring(rdev,
8358 							   R600_RING_TYPE_UVD_INDEX);
8359 			if (r)
8360 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
8361 		}
8362 	}
8363 	if (r)
8364 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8365 
8366 	r = radeon_vce_resume(rdev);
8367 	if (!r) {
8368 		r = vce_v2_0_resume(rdev);
8369 		if (!r)
8370 			r = radeon_fence_driver_start_ring(rdev,
8371 							   TN_RING_TYPE_VCE1_INDEX);
8372 		if (!r)
8373 			r = radeon_fence_driver_start_ring(rdev,
8374 							   TN_RING_TYPE_VCE2_INDEX);
8375 	}
8376 	if (r) {
8377 		dev_err(rdev->dev, "VCE init error (%d).\n", r);
8378 		rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8379 		rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8380 	}
8381 
8382 	/* Enable IRQ */
8383 	if (!rdev->irq.installed) {
8384 		r = radeon_irq_kms_init(rdev);
8385 		if (r)
8386 			return r;
8387 	}
8388 
8389 	r = cik_irq_init(rdev);
8390 	if (r) {
8391 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8392 		radeon_irq_kms_fini(rdev);
8393 		return r;
8394 	}
8395 	cik_irq_set(rdev);
8396 
8397 	if (rdev->family == CHIP_HAWAII) {
8398 		if (rdev->new_fw)
8399 			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8400 		else
8401 			nop = RADEON_CP_PACKET2;
8402 	} else {
8403 		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8404 	}
8405 
8406 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8407 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8408 			     nop);
8409 	if (r)
8410 		return r;
8411 
8412 	/* set up the compute queues */
8413 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8414 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8415 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8416 			     nop);
8417 	if (r)
8418 		return r;
8419 	ring->me = 1; /* first MEC */
8420 	ring->pipe = 0; /* first pipe */
8421 	ring->queue = 0; /* first queue */
8422 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8423 
8424 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8425 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8426 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8427 			     nop);
8428 	if (r)
8429 		return r;
8430 	/* dGPU only have 1 MEC */
8431 	ring->me = 1; /* first MEC */
8432 	ring->pipe = 0; /* first pipe */
8433 	ring->queue = 1; /* second queue */
8434 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8435 
8436 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8437 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8438 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8439 	if (r)
8440 		return r;
8441 
8442 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8443 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8444 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8445 	if (r)
8446 		return r;
8447 
8448 	r = cik_cp_resume(rdev);
8449 	if (r)
8450 		return r;
8451 
8452 	r = cik_sdma_resume(rdev);
8453 	if (r)
8454 		return r;
8455 
8456 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8457 	if (ring->ring_size) {
8458 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8459 				     RADEON_CP_PACKET2);
8460 		if (!r)
8461 			r = uvd_v1_0_init(rdev);
8462 		if (r)
8463 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8464 	}
8465 
8466 	r = -ENOENT;
8467 
8468 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8469 	if (ring->ring_size)
8470 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8471 				     VCE_CMD_NO_OP);
8472 
8473 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8474 	if (ring->ring_size)
8475 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8476 				     VCE_CMD_NO_OP);
8477 
8478 	if (!r)
8479 		r = vce_v1_0_init(rdev);
8480 	else if (r != -ENOENT)
8481 		DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8482 
8483 	r = radeon_ib_pool_init(rdev);
8484 	if (r) {
8485 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8486 		return r;
8487 	}
8488 
8489 	r = radeon_vm_manager_init(rdev);
8490 	if (r) {
8491 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8492 		return r;
8493 	}
8494 
8495 	r = dce6_audio_init(rdev);
8496 	if (r)
8497 		return r;
8498 
8499 	return 0;
8500 }
8501 
8502 /**
8503  * cik_resume - resume the asic to a functional state
8504  *
8505  * @rdev: radeon_device pointer
8506  *
8507  * Programs the asic to a functional state (CIK).
8508  * Called at resume.
8509  * Returns 0 for success, error for failure.
8510  */
8511 int cik_resume(struct radeon_device *rdev)
8512 {
8513 	int r;
8514 
8515 	/* post card */
8516 	atom_asic_init(rdev->mode_info.atom_context);
8517 
8518 	/* init golden registers */
8519 	cik_init_golden_registers(rdev);
8520 
8521 	if (rdev->pm.pm_method == PM_METHOD_DPM)
8522 		radeon_pm_resume(rdev);
8523 
8524 	rdev->accel_working = true;
8525 	r = cik_startup(rdev);
8526 	if (r) {
8527 		DRM_ERROR("cik startup failed on resume\n");
8528 		rdev->accel_working = false;
8529 		return r;
8530 	}
8531 
8532 	return r;
8533 
8534 }
8535 
8536 /**
8537  * cik_suspend - suspend the asic
8538  *
8539  * @rdev: radeon_device pointer
8540  *
8541  * Bring the chip into a state suitable for suspend (CIK).
8542  * Called at suspend.
8543  * Returns 0 for success.
8544  */
8545 int cik_suspend(struct radeon_device *rdev)
8546 {
8547 	radeon_pm_suspend(rdev);
8548 	dce6_audio_fini(rdev);
8549 	radeon_vm_manager_fini(rdev);
8550 	cik_cp_enable(rdev, false);
8551 	cik_sdma_enable(rdev, false);
8552 	uvd_v1_0_fini(rdev);
8553 	radeon_uvd_suspend(rdev);
8554 	radeon_vce_suspend(rdev);
8555 	cik_fini_pg(rdev);
8556 	cik_fini_cg(rdev);
8557 	cik_irq_suspend(rdev);
8558 	radeon_wb_disable(rdev);
8559 	cik_pcie_gart_disable(rdev);
8560 	return 0;
8561 }
8562 
8563 /* Plan is to move initialization in that function and use
8564  * helper function so that radeon_device_init pretty much
8565  * do nothing more than calling asic specific function. This
8566  * should also allow to remove a bunch of callback function
8567  * like vram_info.
8568  */
8569 /**
8570  * cik_init - asic specific driver and hw init
8571  *
8572  * @rdev: radeon_device pointer
8573  *
8574  * Setup asic specific driver variables and program the hw
8575  * to a functional state (CIK).
8576  * Called at driver startup.
8577  * Returns 0 for success, errors for failure.
8578  */
8579 int cik_init(struct radeon_device *rdev)
8580 {
8581 	struct radeon_ring *ring;
8582 	int r;
8583 
8584 	/* Read BIOS */
8585 	if (!radeon_get_bios(rdev)) {
8586 		if (ASIC_IS_AVIVO(rdev))
8587 			return -EINVAL;
8588 	}
8589 	/* Must be an ATOMBIOS */
8590 	if (!rdev->is_atom_bios) {
8591 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8592 		return -EINVAL;
8593 	}
8594 	r = radeon_atombios_init(rdev);
8595 	if (r)
8596 		return r;
8597 
8598 	/* Post card if necessary */
8599 	if (!radeon_card_posted(rdev)) {
8600 		if (!rdev->bios) {
8601 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8602 			return -EINVAL;
8603 		}
8604 		DRM_INFO("GPU not posted. posting now...\n");
8605 		atom_asic_init(rdev->mode_info.atom_context);
8606 	}
8607 	/* init golden registers */
8608 	cik_init_golden_registers(rdev);
8609 	/* Initialize scratch registers */
8610 	cik_scratch_init(rdev);
8611 	/* Initialize surface registers */
8612 	radeon_surface_init(rdev);
8613 	/* Initialize clocks */
8614 	radeon_get_clock_info(rdev->ddev);
8615 
8616 	/* Fence driver */
8617 	r = radeon_fence_driver_init(rdev);
8618 	if (r)
8619 		return r;
8620 
8621 	/* initialize memory controller */
8622 	r = cik_mc_init(rdev);
8623 	if (r)
8624 		return r;
8625 	/* Memory manager */
8626 	r = radeon_bo_init(rdev);
8627 	if (r)
8628 		return r;
8629 
8630 	if (rdev->flags & RADEON_IS_IGP) {
8631 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8632 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8633 			r = cik_init_microcode(rdev);
8634 			if (r) {
8635 				DRM_ERROR("Failed to load firmware!\n");
8636 				return r;
8637 			}
8638 		}
8639 	} else {
8640 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8641 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8642 		    !rdev->mc_fw) {
8643 			r = cik_init_microcode(rdev);
8644 			if (r) {
8645 				DRM_ERROR("Failed to load firmware!\n");
8646 				return r;
8647 			}
8648 		}
8649 	}
8650 
8651 	/* Initialize power management */
8652 	radeon_pm_init(rdev);
8653 
8654 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8655 	ring->ring_obj = NULL;
8656 	r600_ring_init(rdev, ring, 1024 * 1024);
8657 
8658 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8659 	ring->ring_obj = NULL;
8660 	r600_ring_init(rdev, ring, 1024 * 1024);
8661 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8662 	if (r)
8663 		return r;
8664 
8665 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8666 	ring->ring_obj = NULL;
8667 	r600_ring_init(rdev, ring, 1024 * 1024);
8668 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8669 	if (r)
8670 		return r;
8671 
8672 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8673 	ring->ring_obj = NULL;
8674 	r600_ring_init(rdev, ring, 256 * 1024);
8675 
8676 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8677 	ring->ring_obj = NULL;
8678 	r600_ring_init(rdev, ring, 256 * 1024);
8679 
8680 	r = radeon_uvd_init(rdev);
8681 	if (!r) {
8682 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8683 		ring->ring_obj = NULL;
8684 		r600_ring_init(rdev, ring, 4096);
8685 	}
8686 
8687 	r = radeon_vce_init(rdev);
8688 	if (!r) {
8689 		ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8690 		ring->ring_obj = NULL;
8691 		r600_ring_init(rdev, ring, 4096);
8692 
8693 		ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8694 		ring->ring_obj = NULL;
8695 		r600_ring_init(rdev, ring, 4096);
8696 	}
8697 
8698 	rdev->ih.ring_obj = NULL;
8699 	r600_ih_ring_init(rdev, 64 * 1024);
8700 
8701 	r = r600_pcie_gart_init(rdev);
8702 	if (r)
8703 		return r;
8704 
8705 	rdev->accel_working = true;
8706 	r = cik_startup(rdev);
8707 	if (r) {
8708 		dev_err(rdev->dev, "disabling GPU acceleration\n");
8709 		cik_cp_fini(rdev);
8710 		cik_sdma_fini(rdev);
8711 		cik_irq_fini(rdev);
8712 		sumo_rlc_fini(rdev);
8713 		cik_mec_fini(rdev);
8714 		radeon_wb_fini(rdev);
8715 		radeon_ib_pool_fini(rdev);
8716 		radeon_vm_manager_fini(rdev);
8717 		radeon_irq_kms_fini(rdev);
8718 		cik_pcie_gart_fini(rdev);
8719 		rdev->accel_working = false;
8720 	}
8721 
8722 	/* Don't start up if the MC ucode is missing.
8723 	 * The default clocks and voltages before the MC ucode
8724 	 * is loaded are not suffient for advanced operations.
8725 	 */
8726 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8727 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8728 		return -EINVAL;
8729 	}
8730 
8731 	return 0;
8732 }
8733 
8734 /**
8735  * cik_fini - asic specific driver and hw fini
8736  *
8737  * @rdev: radeon_device pointer
8738  *
8739  * Tear down the asic specific driver variables and program the hw
8740  * to an idle state (CIK).
8741  * Called at driver unload.
8742  */
8743 void cik_fini(struct radeon_device *rdev)
8744 {
8745 	radeon_pm_fini(rdev);
8746 	cik_cp_fini(rdev);
8747 	cik_sdma_fini(rdev);
8748 	cik_fini_pg(rdev);
8749 	cik_fini_cg(rdev);
8750 	cik_irq_fini(rdev);
8751 	sumo_rlc_fini(rdev);
8752 	cik_mec_fini(rdev);
8753 	radeon_wb_fini(rdev);
8754 	radeon_vm_manager_fini(rdev);
8755 	radeon_ib_pool_fini(rdev);
8756 	radeon_irq_kms_fini(rdev);
8757 	uvd_v1_0_fini(rdev);
8758 	radeon_uvd_fini(rdev);
8759 	radeon_vce_fini(rdev);
8760 	cik_pcie_gart_fini(rdev);
8761 	r600_vram_scratch_fini(rdev);
8762 	radeon_gem_fini(rdev);
8763 	radeon_fence_driver_fini(rdev);
8764 	radeon_bo_fini(rdev);
8765 	radeon_atombios_fini(rdev);
8766 	kfree(rdev->bios);
8767 	rdev->bios = NULL;
8768 }
8769 
8770 void dce8_program_fmt(struct drm_encoder *encoder)
8771 {
8772 	struct drm_device *dev = encoder->dev;
8773 	struct radeon_device *rdev = dev->dev_private;
8774 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8775 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8776 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8777 	int bpc = 0;
8778 	u32 tmp = 0;
8779 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8780 
8781 	if (connector) {
8782 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8783 		bpc = radeon_get_monitor_bpc(connector);
8784 		dither = radeon_connector->dither;
8785 	}
8786 
8787 	/* LVDS/eDP FMT is set up by atom */
8788 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8789 		return;
8790 
8791 	/* not needed for analog */
8792 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8793 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8794 		return;
8795 
8796 	if (bpc == 0)
8797 		return;
8798 
8799 	switch (bpc) {
8800 	case 6:
8801 		if (dither == RADEON_FMT_DITHER_ENABLE)
8802 			/* XXX sort out optimal dither settings */
8803 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8804 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8805 		else
8806 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8807 		break;
8808 	case 8:
8809 		if (dither == RADEON_FMT_DITHER_ENABLE)
8810 			/* XXX sort out optimal dither settings */
8811 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8812 				FMT_RGB_RANDOM_ENABLE |
8813 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8814 		else
8815 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8816 		break;
8817 	case 10:
8818 		if (dither == RADEON_FMT_DITHER_ENABLE)
8819 			/* XXX sort out optimal dither settings */
8820 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8821 				FMT_RGB_RANDOM_ENABLE |
8822 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8823 		else
8824 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8825 		break;
8826 	default:
8827 		/* not needed */
8828 		break;
8829 	}
8830 
8831 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8832 }
8833 
8834 /* display watermark setup */
8835 /**
8836  * dce8_line_buffer_adjust - Set up the line buffer
8837  *
8838  * @rdev: radeon_device pointer
8839  * @radeon_crtc: the selected display controller
8840  * @mode: the current display mode on the selected display
8841  * controller
8842  *
8843  * Setup up the line buffer allocation for
8844  * the selected display controller (CIK).
8845  * Returns the line buffer size in pixels.
8846  */
8847 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8848 				   struct radeon_crtc *radeon_crtc,
8849 				   struct drm_display_mode *mode)
8850 {
8851 	u32 tmp, buffer_alloc, i;
8852 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8853 	/*
8854 	 * Line Buffer Setup
8855 	 * There are 6 line buffers, one for each display controllers.
8856 	 * There are 3 partitions per LB. Select the number of partitions
8857 	 * to enable based on the display width.  For display widths larger
8858 	 * than 4096, you need use to use 2 display controllers and combine
8859 	 * them using the stereo blender.
8860 	 */
8861 	if (radeon_crtc->base.enabled && mode) {
8862 		if (mode->crtc_hdisplay < 1920) {
8863 			tmp = 1;
8864 			buffer_alloc = 2;
8865 		} else if (mode->crtc_hdisplay < 2560) {
8866 			tmp = 2;
8867 			buffer_alloc = 2;
8868 		} else if (mode->crtc_hdisplay < 4096) {
8869 			tmp = 0;
8870 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8871 		} else {
8872 			DRM_DEBUG_KMS("Mode too big for LB!\n");
8873 			tmp = 0;
8874 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8875 		}
8876 	} else {
8877 		tmp = 1;
8878 		buffer_alloc = 0;
8879 	}
8880 
8881 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8882 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8883 
8884 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8885 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8886 	for (i = 0; i < rdev->usec_timeout; i++) {
8887 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8888 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8889 			break;
8890 		udelay(1);
8891 	}
8892 
8893 	if (radeon_crtc->base.enabled && mode) {
8894 		switch (tmp) {
8895 		case 0:
8896 		default:
8897 			return 4096 * 2;
8898 		case 1:
8899 			return 1920 * 2;
8900 		case 2:
8901 			return 2560 * 2;
8902 		}
8903 	}
8904 
8905 	/* controller not enabled, so no lb used */
8906 	return 0;
8907 }
8908 
8909 /**
8910  * cik_get_number_of_dram_channels - get the number of dram channels
8911  *
8912  * @rdev: radeon_device pointer
8913  *
8914  * Look up the number of video ram channels (CIK).
8915  * Used for display watermark bandwidth calculations
8916  * Returns the number of dram channels
8917  */
8918 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8919 {
8920 	u32 tmp = RREG32(MC_SHARED_CHMAP);
8921 
8922 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8923 	case 0:
8924 	default:
8925 		return 1;
8926 	case 1:
8927 		return 2;
8928 	case 2:
8929 		return 4;
8930 	case 3:
8931 		return 8;
8932 	case 4:
8933 		return 3;
8934 	case 5:
8935 		return 6;
8936 	case 6:
8937 		return 10;
8938 	case 7:
8939 		return 12;
8940 	case 8:
8941 		return 16;
8942 	}
8943 }
8944 
8945 struct dce8_wm_params {
8946 	u32 dram_channels; /* number of dram channels */
8947 	u32 yclk;          /* bandwidth per dram data pin in kHz */
8948 	u32 sclk;          /* engine clock in kHz */
8949 	u32 disp_clk;      /* display clock in kHz */
8950 	u32 src_width;     /* viewport width */
8951 	u32 active_time;   /* active display time in ns */
8952 	u32 blank_time;    /* blank time in ns */
8953 	bool interlaced;    /* mode is interlaced */
8954 	fixed20_12 vsc;    /* vertical scale ratio */
8955 	u32 num_heads;     /* number of active crtcs */
8956 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8957 	u32 lb_size;       /* line buffer allocated to pipe */
8958 	u32 vtaps;         /* vertical scaler taps */
8959 };
8960 
8961 /**
8962  * dce8_dram_bandwidth - get the dram bandwidth
8963  *
8964  * @wm: watermark calculation data
8965  *
8966  * Calculate the raw dram bandwidth (CIK).
8967  * Used for display watermark bandwidth calculations
8968  * Returns the dram bandwidth in MBytes/s
8969  */
8970 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8971 {
8972 	/* Calculate raw DRAM Bandwidth */
8973 	fixed20_12 dram_efficiency; /* 0.7 */
8974 	fixed20_12 yclk, dram_channels, bandwidth;
8975 	fixed20_12 a;
8976 
8977 	a.full = dfixed_const(1000);
8978 	yclk.full = dfixed_const(wm->yclk);
8979 	yclk.full = dfixed_div(yclk, a);
8980 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8981 	a.full = dfixed_const(10);
8982 	dram_efficiency.full = dfixed_const(7);
8983 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
8984 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8985 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8986 
8987 	return dfixed_trunc(bandwidth);
8988 }
8989 
8990 /**
8991  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8992  *
8993  * @wm: watermark calculation data
8994  *
8995  * Calculate the dram bandwidth used for display (CIK).
8996  * Used for display watermark bandwidth calculations
8997  * Returns the dram bandwidth for display in MBytes/s
8998  */
8999 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9000 {
9001 	/* Calculate DRAM Bandwidth and the part allocated to display. */
9002 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9003 	fixed20_12 yclk, dram_channels, bandwidth;
9004 	fixed20_12 a;
9005 
9006 	a.full = dfixed_const(1000);
9007 	yclk.full = dfixed_const(wm->yclk);
9008 	yclk.full = dfixed_div(yclk, a);
9009 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9010 	a.full = dfixed_const(10);
9011 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9012 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9013 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9014 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9015 
9016 	return dfixed_trunc(bandwidth);
9017 }
9018 
9019 /**
9020  * dce8_data_return_bandwidth - get the data return bandwidth
9021  *
9022  * @wm: watermark calculation data
9023  *
9024  * Calculate the data return bandwidth used for display (CIK).
9025  * Used for display watermark bandwidth calculations
9026  * Returns the data return bandwidth in MBytes/s
9027  */
9028 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9029 {
9030 	/* Calculate the display Data return Bandwidth */
9031 	fixed20_12 return_efficiency; /* 0.8 */
9032 	fixed20_12 sclk, bandwidth;
9033 	fixed20_12 a;
9034 
9035 	a.full = dfixed_const(1000);
9036 	sclk.full = dfixed_const(wm->sclk);
9037 	sclk.full = dfixed_div(sclk, a);
9038 	a.full = dfixed_const(10);
9039 	return_efficiency.full = dfixed_const(8);
9040 	return_efficiency.full = dfixed_div(return_efficiency, a);
9041 	a.full = dfixed_const(32);
9042 	bandwidth.full = dfixed_mul(a, sclk);
9043 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9044 
9045 	return dfixed_trunc(bandwidth);
9046 }
9047 
9048 /**
9049  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9050  *
9051  * @wm: watermark calculation data
9052  *
9053  * Calculate the dmif bandwidth used for display (CIK).
9054  * Used for display watermark bandwidth calculations
9055  * Returns the dmif bandwidth in MBytes/s
9056  */
9057 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9058 {
9059 	/* Calculate the DMIF Request Bandwidth */
9060 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9061 	fixed20_12 disp_clk, bandwidth;
9062 	fixed20_12 a, b;
9063 
9064 	a.full = dfixed_const(1000);
9065 	disp_clk.full = dfixed_const(wm->disp_clk);
9066 	disp_clk.full = dfixed_div(disp_clk, a);
9067 	a.full = dfixed_const(32);
9068 	b.full = dfixed_mul(a, disp_clk);
9069 
9070 	a.full = dfixed_const(10);
9071 	disp_clk_request_efficiency.full = dfixed_const(8);
9072 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9073 
9074 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9075 
9076 	return dfixed_trunc(bandwidth);
9077 }
9078 
9079 /**
9080  * dce8_available_bandwidth - get the min available bandwidth
9081  *
9082  * @wm: watermark calculation data
9083  *
9084  * Calculate the min available bandwidth used for display (CIK).
9085  * Used for display watermark bandwidth calculations
9086  * Returns the min available bandwidth in MBytes/s
9087  */
9088 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9089 {
9090 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9091 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9092 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9093 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9094 
9095 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9096 }
9097 
9098 /**
9099  * dce8_average_bandwidth - get the average available bandwidth
9100  *
9101  * @wm: watermark calculation data
9102  *
9103  * Calculate the average available bandwidth used for display (CIK).
9104  * Used for display watermark bandwidth calculations
9105  * Returns the average available bandwidth in MBytes/s
9106  */
9107 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9108 {
9109 	/* Calculate the display mode Average Bandwidth
9110 	 * DisplayMode should contain the source and destination dimensions,
9111 	 * timing, etc.
9112 	 */
9113 	fixed20_12 bpp;
9114 	fixed20_12 line_time;
9115 	fixed20_12 src_width;
9116 	fixed20_12 bandwidth;
9117 	fixed20_12 a;
9118 
9119 	a.full = dfixed_const(1000);
9120 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9121 	line_time.full = dfixed_div(line_time, a);
9122 	bpp.full = dfixed_const(wm->bytes_per_pixel);
9123 	src_width.full = dfixed_const(wm->src_width);
9124 	bandwidth.full = dfixed_mul(src_width, bpp);
9125 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9126 	bandwidth.full = dfixed_div(bandwidth, line_time);
9127 
9128 	return dfixed_trunc(bandwidth);
9129 }
9130 
9131 /**
9132  * dce8_latency_watermark - get the latency watermark
9133  *
9134  * @wm: watermark calculation data
9135  *
9136  * Calculate the latency watermark (CIK).
9137  * Used for display watermark bandwidth calculations
9138  * Returns the latency watermark in ns
9139  */
9140 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9141 {
9142 	/* First calculate the latency in ns */
9143 	u32 mc_latency = 2000; /* 2000 ns. */
9144 	u32 available_bandwidth = dce8_available_bandwidth(wm);
9145 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9146 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9147 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9148 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9149 		(wm->num_heads * cursor_line_pair_return_time);
9150 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9151 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9152 	u32 tmp, dmif_size = 12288;
9153 	fixed20_12 a, b, c;
9154 
9155 	if (wm->num_heads == 0)
9156 		return 0;
9157 
9158 	a.full = dfixed_const(2);
9159 	b.full = dfixed_const(1);
9160 	if ((wm->vsc.full > a.full) ||
9161 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9162 	    (wm->vtaps >= 5) ||
9163 	    ((wm->vsc.full >= a.full) && wm->interlaced))
9164 		max_src_lines_per_dst_line = 4;
9165 	else
9166 		max_src_lines_per_dst_line = 2;
9167 
9168 	a.full = dfixed_const(available_bandwidth);
9169 	b.full = dfixed_const(wm->num_heads);
9170 	a.full = dfixed_div(a, b);
9171 
9172 	b.full = dfixed_const(mc_latency + 512);
9173 	c.full = dfixed_const(wm->disp_clk);
9174 	b.full = dfixed_div(b, c);
9175 
9176 	c.full = dfixed_const(dmif_size);
9177 	b.full = dfixed_div(c, b);
9178 
9179 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9180 
9181 	b.full = dfixed_const(1000);
9182 	c.full = dfixed_const(wm->disp_clk);
9183 	b.full = dfixed_div(c, b);
9184 	c.full = dfixed_const(wm->bytes_per_pixel);
9185 	b.full = dfixed_mul(b, c);
9186 
9187 	lb_fill_bw = min(tmp, dfixed_trunc(b));
9188 
9189 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9190 	b.full = dfixed_const(1000);
9191 	c.full = dfixed_const(lb_fill_bw);
9192 	b.full = dfixed_div(c, b);
9193 	a.full = dfixed_div(a, b);
9194 	line_fill_time = dfixed_trunc(a);
9195 
9196 	if (line_fill_time < wm->active_time)
9197 		return latency;
9198 	else
9199 		return latency + (line_fill_time - wm->active_time);
9200 
9201 }
9202 
9203 /**
9204  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9205  * average and available dram bandwidth
9206  *
9207  * @wm: watermark calculation data
9208  *
9209  * Check if the display average bandwidth fits in the display
9210  * dram bandwidth (CIK).
9211  * Used for display watermark bandwidth calculations
9212  * Returns true if the display fits, false if not.
9213  */
9214 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9215 {
9216 	if (dce8_average_bandwidth(wm) <=
9217 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9218 		return true;
9219 	else
9220 		return false;
9221 }
9222 
9223 /**
9224  * dce8_average_bandwidth_vs_available_bandwidth - check
9225  * average and available bandwidth
9226  *
9227  * @wm: watermark calculation data
9228  *
9229  * Check if the display average bandwidth fits in the display
9230  * available bandwidth (CIK).
9231  * Used for display watermark bandwidth calculations
9232  * Returns true if the display fits, false if not.
9233  */
9234 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9235 {
9236 	if (dce8_average_bandwidth(wm) <=
9237 	    (dce8_available_bandwidth(wm) / wm->num_heads))
9238 		return true;
9239 	else
9240 		return false;
9241 }
9242 
9243 /**
9244  * dce8_check_latency_hiding - check latency hiding
9245  *
9246  * @wm: watermark calculation data
9247  *
9248  * Check latency hiding (CIK).
9249  * Used for display watermark bandwidth calculations
9250  * Returns true if the display fits, false if not.
9251  */
9252 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9253 {
9254 	u32 lb_partitions = wm->lb_size / wm->src_width;
9255 	u32 line_time = wm->active_time + wm->blank_time;
9256 	u32 latency_tolerant_lines;
9257 	u32 latency_hiding;
9258 	fixed20_12 a;
9259 
9260 	a.full = dfixed_const(1);
9261 	if (wm->vsc.full > a.full)
9262 		latency_tolerant_lines = 1;
9263 	else {
9264 		if (lb_partitions <= (wm->vtaps + 1))
9265 			latency_tolerant_lines = 1;
9266 		else
9267 			latency_tolerant_lines = 2;
9268 	}
9269 
9270 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9271 
9272 	if (dce8_latency_watermark(wm) <= latency_hiding)
9273 		return true;
9274 	else
9275 		return false;
9276 }
9277 
9278 /**
9279  * dce8_program_watermarks - program display watermarks
9280  *
9281  * @rdev: radeon_device pointer
9282  * @radeon_crtc: the selected display controller
9283  * @lb_size: line buffer size
9284  * @num_heads: number of display controllers in use
9285  *
9286  * Calculate and program the display watermarks for the
9287  * selected display controller (CIK).
9288  */
9289 static void dce8_program_watermarks(struct radeon_device *rdev,
9290 				    struct radeon_crtc *radeon_crtc,
9291 				    u32 lb_size, u32 num_heads)
9292 {
9293 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9294 	struct dce8_wm_params wm_low, wm_high;
9295 	u32 pixel_period;
9296 	u32 line_time = 0;
9297 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9298 	u32 tmp, wm_mask;
9299 
9300 	if (radeon_crtc->base.enabled && num_heads && mode) {
9301 		pixel_period = 1000000 / (u32)mode->clock;
9302 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9303 
9304 		/* watermark for high clocks */
9305 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9306 		    rdev->pm.dpm_enabled) {
9307 			wm_high.yclk =
9308 				radeon_dpm_get_mclk(rdev, false) * 10;
9309 			wm_high.sclk =
9310 				radeon_dpm_get_sclk(rdev, false) * 10;
9311 		} else {
9312 			wm_high.yclk = rdev->pm.current_mclk * 10;
9313 			wm_high.sclk = rdev->pm.current_sclk * 10;
9314 		}
9315 
9316 		wm_high.disp_clk = mode->clock;
9317 		wm_high.src_width = mode->crtc_hdisplay;
9318 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9319 		wm_high.blank_time = line_time - wm_high.active_time;
9320 		wm_high.interlaced = false;
9321 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9322 			wm_high.interlaced = true;
9323 		wm_high.vsc = radeon_crtc->vsc;
9324 		wm_high.vtaps = 1;
9325 		if (radeon_crtc->rmx_type != RMX_OFF)
9326 			wm_high.vtaps = 2;
9327 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9328 		wm_high.lb_size = lb_size;
9329 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9330 		wm_high.num_heads = num_heads;
9331 
9332 		/* set for high clocks */
9333 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9334 
9335 		/* possibly force display priority to high */
9336 		/* should really do this at mode validation time... */
9337 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9338 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9339 		    !dce8_check_latency_hiding(&wm_high) ||
9340 		    (rdev->disp_priority == 2)) {
9341 			DRM_DEBUG_KMS("force priority to high\n");
9342 		}
9343 
9344 		/* watermark for low clocks */
9345 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9346 		    rdev->pm.dpm_enabled) {
9347 			wm_low.yclk =
9348 				radeon_dpm_get_mclk(rdev, true) * 10;
9349 			wm_low.sclk =
9350 				radeon_dpm_get_sclk(rdev, true) * 10;
9351 		} else {
9352 			wm_low.yclk = rdev->pm.current_mclk * 10;
9353 			wm_low.sclk = rdev->pm.current_sclk * 10;
9354 		}
9355 
9356 		wm_low.disp_clk = mode->clock;
9357 		wm_low.src_width = mode->crtc_hdisplay;
9358 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9359 		wm_low.blank_time = line_time - wm_low.active_time;
9360 		wm_low.interlaced = false;
9361 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9362 			wm_low.interlaced = true;
9363 		wm_low.vsc = radeon_crtc->vsc;
9364 		wm_low.vtaps = 1;
9365 		if (radeon_crtc->rmx_type != RMX_OFF)
9366 			wm_low.vtaps = 2;
9367 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9368 		wm_low.lb_size = lb_size;
9369 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9370 		wm_low.num_heads = num_heads;
9371 
9372 		/* set for low clocks */
9373 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9374 
9375 		/* possibly force display priority to high */
9376 		/* should really do this at mode validation time... */
9377 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9378 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9379 		    !dce8_check_latency_hiding(&wm_low) ||
9380 		    (rdev->disp_priority == 2)) {
9381 			DRM_DEBUG_KMS("force priority to high\n");
9382 		}
9383 	}
9384 
9385 	/* select wm A */
9386 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9387 	tmp = wm_mask;
9388 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9389 	tmp |= LATENCY_WATERMARK_MASK(1);
9390 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9391 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9392 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9393 		LATENCY_HIGH_WATERMARK(line_time)));
9394 	/* select wm B */
9395 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9396 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9397 	tmp |= LATENCY_WATERMARK_MASK(2);
9398 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9399 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9400 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9401 		LATENCY_HIGH_WATERMARK(line_time)));
9402 	/* restore original selection */
9403 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9404 
9405 	/* save values for DPM */
9406 	radeon_crtc->line_time = line_time;
9407 	radeon_crtc->wm_high = latency_watermark_a;
9408 	radeon_crtc->wm_low = latency_watermark_b;
9409 }
9410 
9411 /**
9412  * dce8_bandwidth_update - program display watermarks
9413  *
9414  * @rdev: radeon_device pointer
9415  *
9416  * Calculate and program the display watermarks and line
9417  * buffer allocation (CIK).
9418  */
9419 void dce8_bandwidth_update(struct radeon_device *rdev)
9420 {
9421 	struct drm_display_mode *mode = NULL;
9422 	u32 num_heads = 0, lb_size;
9423 	int i;
9424 
9425 	radeon_update_display_priority(rdev);
9426 
9427 	for (i = 0; i < rdev->num_crtc; i++) {
9428 		if (rdev->mode_info.crtcs[i]->base.enabled)
9429 			num_heads++;
9430 	}
9431 	for (i = 0; i < rdev->num_crtc; i++) {
9432 		mode = &rdev->mode_info.crtcs[i]->base.mode;
9433 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9434 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9435 	}
9436 }
9437 
9438 /**
9439  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9440  *
9441  * @rdev: radeon_device pointer
9442  *
9443  * Fetches a GPU clock counter snapshot (SI).
9444  * Returns the 64 bit clock counter snapshot.
9445  */
9446 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9447 {
9448 	uint64_t clock;
9449 
9450 	mutex_lock(&rdev->gpu_clock_mutex);
9451 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9452 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9453 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9454 	mutex_unlock(&rdev->gpu_clock_mutex);
9455 	return clock;
9456 }
9457 
9458 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9459                               u32 cntl_reg, u32 status_reg)
9460 {
9461 	int r, i;
9462 	struct atom_clock_dividers dividers;
9463 	uint32_t tmp;
9464 
9465 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9466 					   clock, false, &dividers);
9467 	if (r)
9468 		return r;
9469 
9470 	tmp = RREG32_SMC(cntl_reg);
9471 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9472 	tmp |= dividers.post_divider;
9473 	WREG32_SMC(cntl_reg, tmp);
9474 
9475 	for (i = 0; i < 100; i++) {
9476 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9477 			break;
9478 		mdelay(10);
9479 	}
9480 	if (i == 100)
9481 		return -ETIMEDOUT;
9482 
9483 	return 0;
9484 }
9485 
9486 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9487 {
9488 	int r = 0;
9489 
9490 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9491 	if (r)
9492 		return r;
9493 
9494 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9495 	return r;
9496 }
9497 
9498 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9499 {
9500 	int r, i;
9501 	struct atom_clock_dividers dividers;
9502 	u32 tmp;
9503 
9504 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9505 					   ecclk, false, &dividers);
9506 	if (r)
9507 		return r;
9508 
9509 	for (i = 0; i < 100; i++) {
9510 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9511 			break;
9512 		mdelay(10);
9513 	}
9514 	if (i == 100)
9515 		return -ETIMEDOUT;
9516 
9517 	tmp = RREG32_SMC(CG_ECLK_CNTL);
9518 	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9519 	tmp |= dividers.post_divider;
9520 	WREG32_SMC(CG_ECLK_CNTL, tmp);
9521 
9522 	for (i = 0; i < 100; i++) {
9523 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9524 			break;
9525 		mdelay(10);
9526 	}
9527 	if (i == 100)
9528 		return -ETIMEDOUT;
9529 
9530 	return 0;
9531 }
9532 
9533 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9534 {
9535 	struct pci_dev *root = rdev->pdev->bus->self;
9536 	int bridge_pos, gpu_pos;
9537 	u32 speed_cntl, mask, current_data_rate;
9538 	int ret, i;
9539 	u16 tmp16;
9540 
9541 	if (radeon_pcie_gen2 == 0)
9542 		return;
9543 
9544 	if (rdev->flags & RADEON_IS_IGP)
9545 		return;
9546 
9547 	if (!(rdev->flags & RADEON_IS_PCIE))
9548 		return;
9549 
9550 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9551 	if (ret != 0)
9552 		return;
9553 
9554 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9555 		return;
9556 
9557 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9558 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9559 		LC_CURRENT_DATA_RATE_SHIFT;
9560 	if (mask & DRM_PCIE_SPEED_80) {
9561 		if (current_data_rate == 2) {
9562 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9563 			return;
9564 		}
9565 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9566 	} else if (mask & DRM_PCIE_SPEED_50) {
9567 		if (current_data_rate == 1) {
9568 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9569 			return;
9570 		}
9571 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9572 	}
9573 
9574 	bridge_pos = pci_pcie_cap(root);
9575 	if (!bridge_pos)
9576 		return;
9577 
9578 	gpu_pos = pci_pcie_cap(rdev->pdev);
9579 	if (!gpu_pos)
9580 		return;
9581 
9582 	if (mask & DRM_PCIE_SPEED_80) {
9583 		/* re-try equalization if gen3 is not already enabled */
9584 		if (current_data_rate != 2) {
9585 			u16 bridge_cfg, gpu_cfg;
9586 			u16 bridge_cfg2, gpu_cfg2;
9587 			u32 max_lw, current_lw, tmp;
9588 
9589 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9590 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9591 
9592 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9593 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9594 
9595 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9596 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9597 
9598 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9599 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9600 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9601 
9602 			if (current_lw < max_lw) {
9603 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9604 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9605 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9606 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9607 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9608 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9609 				}
9610 			}
9611 
9612 			for (i = 0; i < 10; i++) {
9613 				/* check status */
9614 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9615 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9616 					break;
9617 
9618 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9619 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9620 
9621 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9622 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9623 
9624 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9625 				tmp |= LC_SET_QUIESCE;
9626 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9627 
9628 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9629 				tmp |= LC_REDO_EQ;
9630 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9631 
9632 				mdelay(100);
9633 
9634 				/* linkctl */
9635 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9636 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9637 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9638 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9639 
9640 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9641 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9642 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9643 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9644 
9645 				/* linkctl2 */
9646 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9647 				tmp16 &= ~((1 << 4) | (7 << 9));
9648 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9649 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9650 
9651 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9652 				tmp16 &= ~((1 << 4) | (7 << 9));
9653 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9654 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9655 
9656 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9657 				tmp &= ~LC_SET_QUIESCE;
9658 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9659 			}
9660 		}
9661 	}
9662 
9663 	/* set the link speed */
9664 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9665 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9666 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9667 
9668 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9669 	tmp16 &= ~0xf;
9670 	if (mask & DRM_PCIE_SPEED_80)
9671 		tmp16 |= 3; /* gen3 */
9672 	else if (mask & DRM_PCIE_SPEED_50)
9673 		tmp16 |= 2; /* gen2 */
9674 	else
9675 		tmp16 |= 1; /* gen1 */
9676 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9677 
9678 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9679 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9680 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9681 
9682 	for (i = 0; i < rdev->usec_timeout; i++) {
9683 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9684 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9685 			break;
9686 		udelay(1);
9687 	}
9688 }
9689 
9690 static void cik_program_aspm(struct radeon_device *rdev)
9691 {
9692 	u32 data, orig;
9693 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9694 	bool disable_clkreq = false;
9695 
9696 	if (radeon_aspm == 0)
9697 		return;
9698 
9699 	/* XXX double check IGPs */
9700 	if (rdev->flags & RADEON_IS_IGP)
9701 		return;
9702 
9703 	if (!(rdev->flags & RADEON_IS_PCIE))
9704 		return;
9705 
9706 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9707 	data &= ~LC_XMIT_N_FTS_MASK;
9708 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9709 	if (orig != data)
9710 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9711 
9712 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9713 	data |= LC_GO_TO_RECOVERY;
9714 	if (orig != data)
9715 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9716 
9717 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9718 	data |= P_IGNORE_EDB_ERR;
9719 	if (orig != data)
9720 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9721 
9722 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9723 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9724 	data |= LC_PMI_TO_L1_DIS;
9725 	if (!disable_l0s)
9726 		data |= LC_L0S_INACTIVITY(7);
9727 
9728 	if (!disable_l1) {
9729 		data |= LC_L1_INACTIVITY(7);
9730 		data &= ~LC_PMI_TO_L1_DIS;
9731 		if (orig != data)
9732 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9733 
9734 		if (!disable_plloff_in_l1) {
9735 			bool clk_req_support;
9736 
9737 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9738 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9739 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9740 			if (orig != data)
9741 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9742 
9743 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9744 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9745 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9746 			if (orig != data)
9747 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9748 
9749 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9750 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9751 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9752 			if (orig != data)
9753 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9754 
9755 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9756 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9757 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9758 			if (orig != data)
9759 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9760 
9761 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9762 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9763 			data |= LC_DYN_LANES_PWR_STATE(3);
9764 			if (orig != data)
9765 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9766 
9767 			if (!disable_clkreq) {
9768 				struct pci_dev *root = rdev->pdev->bus->self;
9769 				u32 lnkcap;
9770 
9771 				clk_req_support = false;
9772 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9773 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9774 					clk_req_support = true;
9775 			} else {
9776 				clk_req_support = false;
9777 			}
9778 
9779 			if (clk_req_support) {
9780 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9781 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9782 				if (orig != data)
9783 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9784 
9785 				orig = data = RREG32_SMC(THM_CLK_CNTL);
9786 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9787 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9788 				if (orig != data)
9789 					WREG32_SMC(THM_CLK_CNTL, data);
9790 
9791 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9792 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9793 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9794 				if (orig != data)
9795 					WREG32_SMC(MISC_CLK_CTRL, data);
9796 
9797 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9798 				data &= ~BCLK_AS_XCLK;
9799 				if (orig != data)
9800 					WREG32_SMC(CG_CLKPIN_CNTL, data);
9801 
9802 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9803 				data &= ~FORCE_BIF_REFCLK_EN;
9804 				if (orig != data)
9805 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9806 
9807 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9808 				data &= ~MPLL_CLKOUT_SEL_MASK;
9809 				data |= MPLL_CLKOUT_SEL(4);
9810 				if (orig != data)
9811 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9812 			}
9813 		}
9814 	} else {
9815 		if (orig != data)
9816 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9817 	}
9818 
9819 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9820 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9821 	if (orig != data)
9822 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9823 
9824 	if (!disable_l0s) {
9825 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9826 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9827 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9828 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9829 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9830 				data &= ~LC_L0S_INACTIVITY_MASK;
9831 				if (orig != data)
9832 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9833 			}
9834 		}
9835 	}
9836 }
9837