xref: /openbmc/linux/drivers/gpu/drm/radeon/cik.c (revision aa0dc6a7)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 
25 #include <linux/firmware.h>
26 #include <linux/module.h>
27 #include <linux/pci.h>
28 #include <linux/slab.h>
29 
30 #include <drm/drm_vblank.h>
31 
32 #include "atom.h"
33 #include "evergreen.h"
34 #include "cik_blit_shaders.h"
35 #include "cik.h"
36 #include "cikd.h"
37 #include "clearstate_ci.h"
38 #include "r600.h"
39 #include "radeon.h"
40 #include "radeon_asic.h"
41 #include "radeon_audio.h"
42 #include "radeon_ucode.h"
43 #include "si.h"
44 #include "vce.h"
45 
46 #define SH_MEM_CONFIG_GFX_DEFAULT \
47 	ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
48 
49 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
50 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
51 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
52 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
53 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
54 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
55 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
56 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
57 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
58 
59 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
60 MODULE_FIRMWARE("radeon/bonaire_me.bin");
61 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
62 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
63 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
64 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
65 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
66 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
67 MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
68 
69 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
70 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
71 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
72 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
73 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
74 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
75 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
76 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
77 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
78 
79 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
80 MODULE_FIRMWARE("radeon/hawaii_me.bin");
81 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
82 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
83 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
84 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
85 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
86 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
87 MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
88 
89 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
90 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
91 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
92 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
93 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
94 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
95 
96 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
97 MODULE_FIRMWARE("radeon/kaveri_me.bin");
98 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
99 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
100 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
101 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
102 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
103 
104 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
105 MODULE_FIRMWARE("radeon/KABINI_me.bin");
106 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
107 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
108 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
109 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
110 
111 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
112 MODULE_FIRMWARE("radeon/kabini_me.bin");
113 MODULE_FIRMWARE("radeon/kabini_ce.bin");
114 MODULE_FIRMWARE("radeon/kabini_mec.bin");
115 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
116 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
117 
118 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
119 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
120 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
121 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
122 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
123 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
124 
125 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
126 MODULE_FIRMWARE("radeon/mullins_me.bin");
127 MODULE_FIRMWARE("radeon/mullins_ce.bin");
128 MODULE_FIRMWARE("radeon/mullins_mec.bin");
129 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
130 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
131 
132 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
133 static void cik_rlc_stop(struct radeon_device *rdev);
134 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
135 static void cik_program_aspm(struct radeon_device *rdev);
136 static void cik_init_pg(struct radeon_device *rdev);
137 static void cik_init_cg(struct radeon_device *rdev);
138 static void cik_fini_pg(struct radeon_device *rdev);
139 static void cik_fini_cg(struct radeon_device *rdev);
140 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
141 					  bool enable);
142 
143 /**
144  * cik_get_allowed_info_register - fetch the register for the info ioctl
145  *
146  * @rdev: radeon_device pointer
147  * @reg: register offset in bytes
148  * @val: register value
149  *
150  * Returns 0 for success or -EINVAL for an invalid register
151  *
152  */
153 int cik_get_allowed_info_register(struct radeon_device *rdev,
154 				  u32 reg, u32 *val)
155 {
156 	switch (reg) {
157 	case GRBM_STATUS:
158 	case GRBM_STATUS2:
159 	case GRBM_STATUS_SE0:
160 	case GRBM_STATUS_SE1:
161 	case GRBM_STATUS_SE2:
162 	case GRBM_STATUS_SE3:
163 	case SRBM_STATUS:
164 	case SRBM_STATUS2:
165 	case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
166 	case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
167 	case UVD_STATUS:
168 	/* TODO VCE */
169 		*val = RREG32(reg);
170 		return 0;
171 	default:
172 		return -EINVAL;
173 	}
174 }
175 
176 /*
177  * Indirect registers accessor
178  */
179 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
180 {
181 	unsigned long flags;
182 	u32 r;
183 
184 	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
185 	WREG32(CIK_DIDT_IND_INDEX, (reg));
186 	r = RREG32(CIK_DIDT_IND_DATA);
187 	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
188 	return r;
189 }
190 
191 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
192 {
193 	unsigned long flags;
194 
195 	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
196 	WREG32(CIK_DIDT_IND_INDEX, (reg));
197 	WREG32(CIK_DIDT_IND_DATA, (v));
198 	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
199 }
200 
201 /* get temperature in millidegrees */
202 int ci_get_temp(struct radeon_device *rdev)
203 {
204 	u32 temp;
205 	int actual_temp = 0;
206 
207 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
208 		CTF_TEMP_SHIFT;
209 
210 	if (temp & 0x200)
211 		actual_temp = 255;
212 	else
213 		actual_temp = temp & 0x1ff;
214 
215 	return actual_temp * 1000;
216 }
217 
218 /* get temperature in millidegrees */
219 int kv_get_temp(struct radeon_device *rdev)
220 {
221 	u32 temp;
222 	int actual_temp = 0;
223 
224 	temp = RREG32_SMC(0xC0300E0C);
225 
226 	if (temp)
227 		actual_temp = (temp / 8) - 49;
228 	else
229 		actual_temp = 0;
230 
231 	return actual_temp * 1000;
232 }
233 
234 /*
235  * Indirect registers accessor
236  */
237 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
238 {
239 	unsigned long flags;
240 	u32 r;
241 
242 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
243 	WREG32(PCIE_INDEX, reg);
244 	(void)RREG32(PCIE_INDEX);
245 	r = RREG32(PCIE_DATA);
246 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
247 	return r;
248 }
249 
250 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
251 {
252 	unsigned long flags;
253 
254 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
255 	WREG32(PCIE_INDEX, reg);
256 	(void)RREG32(PCIE_INDEX);
257 	WREG32(PCIE_DATA, v);
258 	(void)RREG32(PCIE_DATA);
259 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
260 }
261 
262 static const u32 spectre_rlc_save_restore_register_list[] =
263 {
264 	(0x0e00 << 16) | (0xc12c >> 2),
265 	0x00000000,
266 	(0x0e00 << 16) | (0xc140 >> 2),
267 	0x00000000,
268 	(0x0e00 << 16) | (0xc150 >> 2),
269 	0x00000000,
270 	(0x0e00 << 16) | (0xc15c >> 2),
271 	0x00000000,
272 	(0x0e00 << 16) | (0xc168 >> 2),
273 	0x00000000,
274 	(0x0e00 << 16) | (0xc170 >> 2),
275 	0x00000000,
276 	(0x0e00 << 16) | (0xc178 >> 2),
277 	0x00000000,
278 	(0x0e00 << 16) | (0xc204 >> 2),
279 	0x00000000,
280 	(0x0e00 << 16) | (0xc2b4 >> 2),
281 	0x00000000,
282 	(0x0e00 << 16) | (0xc2b8 >> 2),
283 	0x00000000,
284 	(0x0e00 << 16) | (0xc2bc >> 2),
285 	0x00000000,
286 	(0x0e00 << 16) | (0xc2c0 >> 2),
287 	0x00000000,
288 	(0x0e00 << 16) | (0x8228 >> 2),
289 	0x00000000,
290 	(0x0e00 << 16) | (0x829c >> 2),
291 	0x00000000,
292 	(0x0e00 << 16) | (0x869c >> 2),
293 	0x00000000,
294 	(0x0600 << 16) | (0x98f4 >> 2),
295 	0x00000000,
296 	(0x0e00 << 16) | (0x98f8 >> 2),
297 	0x00000000,
298 	(0x0e00 << 16) | (0x9900 >> 2),
299 	0x00000000,
300 	(0x0e00 << 16) | (0xc260 >> 2),
301 	0x00000000,
302 	(0x0e00 << 16) | (0x90e8 >> 2),
303 	0x00000000,
304 	(0x0e00 << 16) | (0x3c000 >> 2),
305 	0x00000000,
306 	(0x0e00 << 16) | (0x3c00c >> 2),
307 	0x00000000,
308 	(0x0e00 << 16) | (0x8c1c >> 2),
309 	0x00000000,
310 	(0x0e00 << 16) | (0x9700 >> 2),
311 	0x00000000,
312 	(0x0e00 << 16) | (0xcd20 >> 2),
313 	0x00000000,
314 	(0x4e00 << 16) | (0xcd20 >> 2),
315 	0x00000000,
316 	(0x5e00 << 16) | (0xcd20 >> 2),
317 	0x00000000,
318 	(0x6e00 << 16) | (0xcd20 >> 2),
319 	0x00000000,
320 	(0x7e00 << 16) | (0xcd20 >> 2),
321 	0x00000000,
322 	(0x8e00 << 16) | (0xcd20 >> 2),
323 	0x00000000,
324 	(0x9e00 << 16) | (0xcd20 >> 2),
325 	0x00000000,
326 	(0xae00 << 16) | (0xcd20 >> 2),
327 	0x00000000,
328 	(0xbe00 << 16) | (0xcd20 >> 2),
329 	0x00000000,
330 	(0x0e00 << 16) | (0x89bc >> 2),
331 	0x00000000,
332 	(0x0e00 << 16) | (0x8900 >> 2),
333 	0x00000000,
334 	0x3,
335 	(0x0e00 << 16) | (0xc130 >> 2),
336 	0x00000000,
337 	(0x0e00 << 16) | (0xc134 >> 2),
338 	0x00000000,
339 	(0x0e00 << 16) | (0xc1fc >> 2),
340 	0x00000000,
341 	(0x0e00 << 16) | (0xc208 >> 2),
342 	0x00000000,
343 	(0x0e00 << 16) | (0xc264 >> 2),
344 	0x00000000,
345 	(0x0e00 << 16) | (0xc268 >> 2),
346 	0x00000000,
347 	(0x0e00 << 16) | (0xc26c >> 2),
348 	0x00000000,
349 	(0x0e00 << 16) | (0xc270 >> 2),
350 	0x00000000,
351 	(0x0e00 << 16) | (0xc274 >> 2),
352 	0x00000000,
353 	(0x0e00 << 16) | (0xc278 >> 2),
354 	0x00000000,
355 	(0x0e00 << 16) | (0xc27c >> 2),
356 	0x00000000,
357 	(0x0e00 << 16) | (0xc280 >> 2),
358 	0x00000000,
359 	(0x0e00 << 16) | (0xc284 >> 2),
360 	0x00000000,
361 	(0x0e00 << 16) | (0xc288 >> 2),
362 	0x00000000,
363 	(0x0e00 << 16) | (0xc28c >> 2),
364 	0x00000000,
365 	(0x0e00 << 16) | (0xc290 >> 2),
366 	0x00000000,
367 	(0x0e00 << 16) | (0xc294 >> 2),
368 	0x00000000,
369 	(0x0e00 << 16) | (0xc298 >> 2),
370 	0x00000000,
371 	(0x0e00 << 16) | (0xc29c >> 2),
372 	0x00000000,
373 	(0x0e00 << 16) | (0xc2a0 >> 2),
374 	0x00000000,
375 	(0x0e00 << 16) | (0xc2a4 >> 2),
376 	0x00000000,
377 	(0x0e00 << 16) | (0xc2a8 >> 2),
378 	0x00000000,
379 	(0x0e00 << 16) | (0xc2ac  >> 2),
380 	0x00000000,
381 	(0x0e00 << 16) | (0xc2b0 >> 2),
382 	0x00000000,
383 	(0x0e00 << 16) | (0x301d0 >> 2),
384 	0x00000000,
385 	(0x0e00 << 16) | (0x30238 >> 2),
386 	0x00000000,
387 	(0x0e00 << 16) | (0x30250 >> 2),
388 	0x00000000,
389 	(0x0e00 << 16) | (0x30254 >> 2),
390 	0x00000000,
391 	(0x0e00 << 16) | (0x30258 >> 2),
392 	0x00000000,
393 	(0x0e00 << 16) | (0x3025c >> 2),
394 	0x00000000,
395 	(0x4e00 << 16) | (0xc900 >> 2),
396 	0x00000000,
397 	(0x5e00 << 16) | (0xc900 >> 2),
398 	0x00000000,
399 	(0x6e00 << 16) | (0xc900 >> 2),
400 	0x00000000,
401 	(0x7e00 << 16) | (0xc900 >> 2),
402 	0x00000000,
403 	(0x8e00 << 16) | (0xc900 >> 2),
404 	0x00000000,
405 	(0x9e00 << 16) | (0xc900 >> 2),
406 	0x00000000,
407 	(0xae00 << 16) | (0xc900 >> 2),
408 	0x00000000,
409 	(0xbe00 << 16) | (0xc900 >> 2),
410 	0x00000000,
411 	(0x4e00 << 16) | (0xc904 >> 2),
412 	0x00000000,
413 	(0x5e00 << 16) | (0xc904 >> 2),
414 	0x00000000,
415 	(0x6e00 << 16) | (0xc904 >> 2),
416 	0x00000000,
417 	(0x7e00 << 16) | (0xc904 >> 2),
418 	0x00000000,
419 	(0x8e00 << 16) | (0xc904 >> 2),
420 	0x00000000,
421 	(0x9e00 << 16) | (0xc904 >> 2),
422 	0x00000000,
423 	(0xae00 << 16) | (0xc904 >> 2),
424 	0x00000000,
425 	(0xbe00 << 16) | (0xc904 >> 2),
426 	0x00000000,
427 	(0x4e00 << 16) | (0xc908 >> 2),
428 	0x00000000,
429 	(0x5e00 << 16) | (0xc908 >> 2),
430 	0x00000000,
431 	(0x6e00 << 16) | (0xc908 >> 2),
432 	0x00000000,
433 	(0x7e00 << 16) | (0xc908 >> 2),
434 	0x00000000,
435 	(0x8e00 << 16) | (0xc908 >> 2),
436 	0x00000000,
437 	(0x9e00 << 16) | (0xc908 >> 2),
438 	0x00000000,
439 	(0xae00 << 16) | (0xc908 >> 2),
440 	0x00000000,
441 	(0xbe00 << 16) | (0xc908 >> 2),
442 	0x00000000,
443 	(0x4e00 << 16) | (0xc90c >> 2),
444 	0x00000000,
445 	(0x5e00 << 16) | (0xc90c >> 2),
446 	0x00000000,
447 	(0x6e00 << 16) | (0xc90c >> 2),
448 	0x00000000,
449 	(0x7e00 << 16) | (0xc90c >> 2),
450 	0x00000000,
451 	(0x8e00 << 16) | (0xc90c >> 2),
452 	0x00000000,
453 	(0x9e00 << 16) | (0xc90c >> 2),
454 	0x00000000,
455 	(0xae00 << 16) | (0xc90c >> 2),
456 	0x00000000,
457 	(0xbe00 << 16) | (0xc90c >> 2),
458 	0x00000000,
459 	(0x4e00 << 16) | (0xc910 >> 2),
460 	0x00000000,
461 	(0x5e00 << 16) | (0xc910 >> 2),
462 	0x00000000,
463 	(0x6e00 << 16) | (0xc910 >> 2),
464 	0x00000000,
465 	(0x7e00 << 16) | (0xc910 >> 2),
466 	0x00000000,
467 	(0x8e00 << 16) | (0xc910 >> 2),
468 	0x00000000,
469 	(0x9e00 << 16) | (0xc910 >> 2),
470 	0x00000000,
471 	(0xae00 << 16) | (0xc910 >> 2),
472 	0x00000000,
473 	(0xbe00 << 16) | (0xc910 >> 2),
474 	0x00000000,
475 	(0x0e00 << 16) | (0xc99c >> 2),
476 	0x00000000,
477 	(0x0e00 << 16) | (0x9834 >> 2),
478 	0x00000000,
479 	(0x0000 << 16) | (0x30f00 >> 2),
480 	0x00000000,
481 	(0x0001 << 16) | (0x30f00 >> 2),
482 	0x00000000,
483 	(0x0000 << 16) | (0x30f04 >> 2),
484 	0x00000000,
485 	(0x0001 << 16) | (0x30f04 >> 2),
486 	0x00000000,
487 	(0x0000 << 16) | (0x30f08 >> 2),
488 	0x00000000,
489 	(0x0001 << 16) | (0x30f08 >> 2),
490 	0x00000000,
491 	(0x0000 << 16) | (0x30f0c >> 2),
492 	0x00000000,
493 	(0x0001 << 16) | (0x30f0c >> 2),
494 	0x00000000,
495 	(0x0600 << 16) | (0x9b7c >> 2),
496 	0x00000000,
497 	(0x0e00 << 16) | (0x8a14 >> 2),
498 	0x00000000,
499 	(0x0e00 << 16) | (0x8a18 >> 2),
500 	0x00000000,
501 	(0x0600 << 16) | (0x30a00 >> 2),
502 	0x00000000,
503 	(0x0e00 << 16) | (0x8bf0 >> 2),
504 	0x00000000,
505 	(0x0e00 << 16) | (0x8bcc >> 2),
506 	0x00000000,
507 	(0x0e00 << 16) | (0x8b24 >> 2),
508 	0x00000000,
509 	(0x0e00 << 16) | (0x30a04 >> 2),
510 	0x00000000,
511 	(0x0600 << 16) | (0x30a10 >> 2),
512 	0x00000000,
513 	(0x0600 << 16) | (0x30a14 >> 2),
514 	0x00000000,
515 	(0x0600 << 16) | (0x30a18 >> 2),
516 	0x00000000,
517 	(0x0600 << 16) | (0x30a2c >> 2),
518 	0x00000000,
519 	(0x0e00 << 16) | (0xc700 >> 2),
520 	0x00000000,
521 	(0x0e00 << 16) | (0xc704 >> 2),
522 	0x00000000,
523 	(0x0e00 << 16) | (0xc708 >> 2),
524 	0x00000000,
525 	(0x0e00 << 16) | (0xc768 >> 2),
526 	0x00000000,
527 	(0x0400 << 16) | (0xc770 >> 2),
528 	0x00000000,
529 	(0x0400 << 16) | (0xc774 >> 2),
530 	0x00000000,
531 	(0x0400 << 16) | (0xc778 >> 2),
532 	0x00000000,
533 	(0x0400 << 16) | (0xc77c >> 2),
534 	0x00000000,
535 	(0x0400 << 16) | (0xc780 >> 2),
536 	0x00000000,
537 	(0x0400 << 16) | (0xc784 >> 2),
538 	0x00000000,
539 	(0x0400 << 16) | (0xc788 >> 2),
540 	0x00000000,
541 	(0x0400 << 16) | (0xc78c >> 2),
542 	0x00000000,
543 	(0x0400 << 16) | (0xc798 >> 2),
544 	0x00000000,
545 	(0x0400 << 16) | (0xc79c >> 2),
546 	0x00000000,
547 	(0x0400 << 16) | (0xc7a0 >> 2),
548 	0x00000000,
549 	(0x0400 << 16) | (0xc7a4 >> 2),
550 	0x00000000,
551 	(0x0400 << 16) | (0xc7a8 >> 2),
552 	0x00000000,
553 	(0x0400 << 16) | (0xc7ac >> 2),
554 	0x00000000,
555 	(0x0400 << 16) | (0xc7b0 >> 2),
556 	0x00000000,
557 	(0x0400 << 16) | (0xc7b4 >> 2),
558 	0x00000000,
559 	(0x0e00 << 16) | (0x9100 >> 2),
560 	0x00000000,
561 	(0x0e00 << 16) | (0x3c010 >> 2),
562 	0x00000000,
563 	(0x0e00 << 16) | (0x92a8 >> 2),
564 	0x00000000,
565 	(0x0e00 << 16) | (0x92ac >> 2),
566 	0x00000000,
567 	(0x0e00 << 16) | (0x92b4 >> 2),
568 	0x00000000,
569 	(0x0e00 << 16) | (0x92b8 >> 2),
570 	0x00000000,
571 	(0x0e00 << 16) | (0x92bc >> 2),
572 	0x00000000,
573 	(0x0e00 << 16) | (0x92c0 >> 2),
574 	0x00000000,
575 	(0x0e00 << 16) | (0x92c4 >> 2),
576 	0x00000000,
577 	(0x0e00 << 16) | (0x92c8 >> 2),
578 	0x00000000,
579 	(0x0e00 << 16) | (0x92cc >> 2),
580 	0x00000000,
581 	(0x0e00 << 16) | (0x92d0 >> 2),
582 	0x00000000,
583 	(0x0e00 << 16) | (0x8c00 >> 2),
584 	0x00000000,
585 	(0x0e00 << 16) | (0x8c04 >> 2),
586 	0x00000000,
587 	(0x0e00 << 16) | (0x8c20 >> 2),
588 	0x00000000,
589 	(0x0e00 << 16) | (0x8c38 >> 2),
590 	0x00000000,
591 	(0x0e00 << 16) | (0x8c3c >> 2),
592 	0x00000000,
593 	(0x0e00 << 16) | (0xae00 >> 2),
594 	0x00000000,
595 	(0x0e00 << 16) | (0x9604 >> 2),
596 	0x00000000,
597 	(0x0e00 << 16) | (0xac08 >> 2),
598 	0x00000000,
599 	(0x0e00 << 16) | (0xac0c >> 2),
600 	0x00000000,
601 	(0x0e00 << 16) | (0xac10 >> 2),
602 	0x00000000,
603 	(0x0e00 << 16) | (0xac14 >> 2),
604 	0x00000000,
605 	(0x0e00 << 16) | (0xac58 >> 2),
606 	0x00000000,
607 	(0x0e00 << 16) | (0xac68 >> 2),
608 	0x00000000,
609 	(0x0e00 << 16) | (0xac6c >> 2),
610 	0x00000000,
611 	(0x0e00 << 16) | (0xac70 >> 2),
612 	0x00000000,
613 	(0x0e00 << 16) | (0xac74 >> 2),
614 	0x00000000,
615 	(0x0e00 << 16) | (0xac78 >> 2),
616 	0x00000000,
617 	(0x0e00 << 16) | (0xac7c >> 2),
618 	0x00000000,
619 	(0x0e00 << 16) | (0xac80 >> 2),
620 	0x00000000,
621 	(0x0e00 << 16) | (0xac84 >> 2),
622 	0x00000000,
623 	(0x0e00 << 16) | (0xac88 >> 2),
624 	0x00000000,
625 	(0x0e00 << 16) | (0xac8c >> 2),
626 	0x00000000,
627 	(0x0e00 << 16) | (0x970c >> 2),
628 	0x00000000,
629 	(0x0e00 << 16) | (0x9714 >> 2),
630 	0x00000000,
631 	(0x0e00 << 16) | (0x9718 >> 2),
632 	0x00000000,
633 	(0x0e00 << 16) | (0x971c >> 2),
634 	0x00000000,
635 	(0x0e00 << 16) | (0x31068 >> 2),
636 	0x00000000,
637 	(0x4e00 << 16) | (0x31068 >> 2),
638 	0x00000000,
639 	(0x5e00 << 16) | (0x31068 >> 2),
640 	0x00000000,
641 	(0x6e00 << 16) | (0x31068 >> 2),
642 	0x00000000,
643 	(0x7e00 << 16) | (0x31068 >> 2),
644 	0x00000000,
645 	(0x8e00 << 16) | (0x31068 >> 2),
646 	0x00000000,
647 	(0x9e00 << 16) | (0x31068 >> 2),
648 	0x00000000,
649 	(0xae00 << 16) | (0x31068 >> 2),
650 	0x00000000,
651 	(0xbe00 << 16) | (0x31068 >> 2),
652 	0x00000000,
653 	(0x0e00 << 16) | (0xcd10 >> 2),
654 	0x00000000,
655 	(0x0e00 << 16) | (0xcd14 >> 2),
656 	0x00000000,
657 	(0x0e00 << 16) | (0x88b0 >> 2),
658 	0x00000000,
659 	(0x0e00 << 16) | (0x88b4 >> 2),
660 	0x00000000,
661 	(0x0e00 << 16) | (0x88b8 >> 2),
662 	0x00000000,
663 	(0x0e00 << 16) | (0x88bc >> 2),
664 	0x00000000,
665 	(0x0400 << 16) | (0x89c0 >> 2),
666 	0x00000000,
667 	(0x0e00 << 16) | (0x88c4 >> 2),
668 	0x00000000,
669 	(0x0e00 << 16) | (0x88c8 >> 2),
670 	0x00000000,
671 	(0x0e00 << 16) | (0x88d0 >> 2),
672 	0x00000000,
673 	(0x0e00 << 16) | (0x88d4 >> 2),
674 	0x00000000,
675 	(0x0e00 << 16) | (0x88d8 >> 2),
676 	0x00000000,
677 	(0x0e00 << 16) | (0x8980 >> 2),
678 	0x00000000,
679 	(0x0e00 << 16) | (0x30938 >> 2),
680 	0x00000000,
681 	(0x0e00 << 16) | (0x3093c >> 2),
682 	0x00000000,
683 	(0x0e00 << 16) | (0x30940 >> 2),
684 	0x00000000,
685 	(0x0e00 << 16) | (0x89a0 >> 2),
686 	0x00000000,
687 	(0x0e00 << 16) | (0x30900 >> 2),
688 	0x00000000,
689 	(0x0e00 << 16) | (0x30904 >> 2),
690 	0x00000000,
691 	(0x0e00 << 16) | (0x89b4 >> 2),
692 	0x00000000,
693 	(0x0e00 << 16) | (0x3c210 >> 2),
694 	0x00000000,
695 	(0x0e00 << 16) | (0x3c214 >> 2),
696 	0x00000000,
697 	(0x0e00 << 16) | (0x3c218 >> 2),
698 	0x00000000,
699 	(0x0e00 << 16) | (0x8904 >> 2),
700 	0x00000000,
701 	0x5,
702 	(0x0e00 << 16) | (0x8c28 >> 2),
703 	(0x0e00 << 16) | (0x8c2c >> 2),
704 	(0x0e00 << 16) | (0x8c30 >> 2),
705 	(0x0e00 << 16) | (0x8c34 >> 2),
706 	(0x0e00 << 16) | (0x9600 >> 2),
707 };
708 
709 static const u32 kalindi_rlc_save_restore_register_list[] =
710 {
711 	(0x0e00 << 16) | (0xc12c >> 2),
712 	0x00000000,
713 	(0x0e00 << 16) | (0xc140 >> 2),
714 	0x00000000,
715 	(0x0e00 << 16) | (0xc150 >> 2),
716 	0x00000000,
717 	(0x0e00 << 16) | (0xc15c >> 2),
718 	0x00000000,
719 	(0x0e00 << 16) | (0xc168 >> 2),
720 	0x00000000,
721 	(0x0e00 << 16) | (0xc170 >> 2),
722 	0x00000000,
723 	(0x0e00 << 16) | (0xc204 >> 2),
724 	0x00000000,
725 	(0x0e00 << 16) | (0xc2b4 >> 2),
726 	0x00000000,
727 	(0x0e00 << 16) | (0xc2b8 >> 2),
728 	0x00000000,
729 	(0x0e00 << 16) | (0xc2bc >> 2),
730 	0x00000000,
731 	(0x0e00 << 16) | (0xc2c0 >> 2),
732 	0x00000000,
733 	(0x0e00 << 16) | (0x8228 >> 2),
734 	0x00000000,
735 	(0x0e00 << 16) | (0x829c >> 2),
736 	0x00000000,
737 	(0x0e00 << 16) | (0x869c >> 2),
738 	0x00000000,
739 	(0x0600 << 16) | (0x98f4 >> 2),
740 	0x00000000,
741 	(0x0e00 << 16) | (0x98f8 >> 2),
742 	0x00000000,
743 	(0x0e00 << 16) | (0x9900 >> 2),
744 	0x00000000,
745 	(0x0e00 << 16) | (0xc260 >> 2),
746 	0x00000000,
747 	(0x0e00 << 16) | (0x90e8 >> 2),
748 	0x00000000,
749 	(0x0e00 << 16) | (0x3c000 >> 2),
750 	0x00000000,
751 	(0x0e00 << 16) | (0x3c00c >> 2),
752 	0x00000000,
753 	(0x0e00 << 16) | (0x8c1c >> 2),
754 	0x00000000,
755 	(0x0e00 << 16) | (0x9700 >> 2),
756 	0x00000000,
757 	(0x0e00 << 16) | (0xcd20 >> 2),
758 	0x00000000,
759 	(0x4e00 << 16) | (0xcd20 >> 2),
760 	0x00000000,
761 	(0x5e00 << 16) | (0xcd20 >> 2),
762 	0x00000000,
763 	(0x6e00 << 16) | (0xcd20 >> 2),
764 	0x00000000,
765 	(0x7e00 << 16) | (0xcd20 >> 2),
766 	0x00000000,
767 	(0x0e00 << 16) | (0x89bc >> 2),
768 	0x00000000,
769 	(0x0e00 << 16) | (0x8900 >> 2),
770 	0x00000000,
771 	0x3,
772 	(0x0e00 << 16) | (0xc130 >> 2),
773 	0x00000000,
774 	(0x0e00 << 16) | (0xc134 >> 2),
775 	0x00000000,
776 	(0x0e00 << 16) | (0xc1fc >> 2),
777 	0x00000000,
778 	(0x0e00 << 16) | (0xc208 >> 2),
779 	0x00000000,
780 	(0x0e00 << 16) | (0xc264 >> 2),
781 	0x00000000,
782 	(0x0e00 << 16) | (0xc268 >> 2),
783 	0x00000000,
784 	(0x0e00 << 16) | (0xc26c >> 2),
785 	0x00000000,
786 	(0x0e00 << 16) | (0xc270 >> 2),
787 	0x00000000,
788 	(0x0e00 << 16) | (0xc274 >> 2),
789 	0x00000000,
790 	(0x0e00 << 16) | (0xc28c >> 2),
791 	0x00000000,
792 	(0x0e00 << 16) | (0xc290 >> 2),
793 	0x00000000,
794 	(0x0e00 << 16) | (0xc294 >> 2),
795 	0x00000000,
796 	(0x0e00 << 16) | (0xc298 >> 2),
797 	0x00000000,
798 	(0x0e00 << 16) | (0xc2a0 >> 2),
799 	0x00000000,
800 	(0x0e00 << 16) | (0xc2a4 >> 2),
801 	0x00000000,
802 	(0x0e00 << 16) | (0xc2a8 >> 2),
803 	0x00000000,
804 	(0x0e00 << 16) | (0xc2ac >> 2),
805 	0x00000000,
806 	(0x0e00 << 16) | (0x301d0 >> 2),
807 	0x00000000,
808 	(0x0e00 << 16) | (0x30238 >> 2),
809 	0x00000000,
810 	(0x0e00 << 16) | (0x30250 >> 2),
811 	0x00000000,
812 	(0x0e00 << 16) | (0x30254 >> 2),
813 	0x00000000,
814 	(0x0e00 << 16) | (0x30258 >> 2),
815 	0x00000000,
816 	(0x0e00 << 16) | (0x3025c >> 2),
817 	0x00000000,
818 	(0x4e00 << 16) | (0xc900 >> 2),
819 	0x00000000,
820 	(0x5e00 << 16) | (0xc900 >> 2),
821 	0x00000000,
822 	(0x6e00 << 16) | (0xc900 >> 2),
823 	0x00000000,
824 	(0x7e00 << 16) | (0xc900 >> 2),
825 	0x00000000,
826 	(0x4e00 << 16) | (0xc904 >> 2),
827 	0x00000000,
828 	(0x5e00 << 16) | (0xc904 >> 2),
829 	0x00000000,
830 	(0x6e00 << 16) | (0xc904 >> 2),
831 	0x00000000,
832 	(0x7e00 << 16) | (0xc904 >> 2),
833 	0x00000000,
834 	(0x4e00 << 16) | (0xc908 >> 2),
835 	0x00000000,
836 	(0x5e00 << 16) | (0xc908 >> 2),
837 	0x00000000,
838 	(0x6e00 << 16) | (0xc908 >> 2),
839 	0x00000000,
840 	(0x7e00 << 16) | (0xc908 >> 2),
841 	0x00000000,
842 	(0x4e00 << 16) | (0xc90c >> 2),
843 	0x00000000,
844 	(0x5e00 << 16) | (0xc90c >> 2),
845 	0x00000000,
846 	(0x6e00 << 16) | (0xc90c >> 2),
847 	0x00000000,
848 	(0x7e00 << 16) | (0xc90c >> 2),
849 	0x00000000,
850 	(0x4e00 << 16) | (0xc910 >> 2),
851 	0x00000000,
852 	(0x5e00 << 16) | (0xc910 >> 2),
853 	0x00000000,
854 	(0x6e00 << 16) | (0xc910 >> 2),
855 	0x00000000,
856 	(0x7e00 << 16) | (0xc910 >> 2),
857 	0x00000000,
858 	(0x0e00 << 16) | (0xc99c >> 2),
859 	0x00000000,
860 	(0x0e00 << 16) | (0x9834 >> 2),
861 	0x00000000,
862 	(0x0000 << 16) | (0x30f00 >> 2),
863 	0x00000000,
864 	(0x0000 << 16) | (0x30f04 >> 2),
865 	0x00000000,
866 	(0x0000 << 16) | (0x30f08 >> 2),
867 	0x00000000,
868 	(0x0000 << 16) | (0x30f0c >> 2),
869 	0x00000000,
870 	(0x0600 << 16) | (0x9b7c >> 2),
871 	0x00000000,
872 	(0x0e00 << 16) | (0x8a14 >> 2),
873 	0x00000000,
874 	(0x0e00 << 16) | (0x8a18 >> 2),
875 	0x00000000,
876 	(0x0600 << 16) | (0x30a00 >> 2),
877 	0x00000000,
878 	(0x0e00 << 16) | (0x8bf0 >> 2),
879 	0x00000000,
880 	(0x0e00 << 16) | (0x8bcc >> 2),
881 	0x00000000,
882 	(0x0e00 << 16) | (0x8b24 >> 2),
883 	0x00000000,
884 	(0x0e00 << 16) | (0x30a04 >> 2),
885 	0x00000000,
886 	(0x0600 << 16) | (0x30a10 >> 2),
887 	0x00000000,
888 	(0x0600 << 16) | (0x30a14 >> 2),
889 	0x00000000,
890 	(0x0600 << 16) | (0x30a18 >> 2),
891 	0x00000000,
892 	(0x0600 << 16) | (0x30a2c >> 2),
893 	0x00000000,
894 	(0x0e00 << 16) | (0xc700 >> 2),
895 	0x00000000,
896 	(0x0e00 << 16) | (0xc704 >> 2),
897 	0x00000000,
898 	(0x0e00 << 16) | (0xc708 >> 2),
899 	0x00000000,
900 	(0x0e00 << 16) | (0xc768 >> 2),
901 	0x00000000,
902 	(0x0400 << 16) | (0xc770 >> 2),
903 	0x00000000,
904 	(0x0400 << 16) | (0xc774 >> 2),
905 	0x00000000,
906 	(0x0400 << 16) | (0xc798 >> 2),
907 	0x00000000,
908 	(0x0400 << 16) | (0xc79c >> 2),
909 	0x00000000,
910 	(0x0e00 << 16) | (0x9100 >> 2),
911 	0x00000000,
912 	(0x0e00 << 16) | (0x3c010 >> 2),
913 	0x00000000,
914 	(0x0e00 << 16) | (0x8c00 >> 2),
915 	0x00000000,
916 	(0x0e00 << 16) | (0x8c04 >> 2),
917 	0x00000000,
918 	(0x0e00 << 16) | (0x8c20 >> 2),
919 	0x00000000,
920 	(0x0e00 << 16) | (0x8c38 >> 2),
921 	0x00000000,
922 	(0x0e00 << 16) | (0x8c3c >> 2),
923 	0x00000000,
924 	(0x0e00 << 16) | (0xae00 >> 2),
925 	0x00000000,
926 	(0x0e00 << 16) | (0x9604 >> 2),
927 	0x00000000,
928 	(0x0e00 << 16) | (0xac08 >> 2),
929 	0x00000000,
930 	(0x0e00 << 16) | (0xac0c >> 2),
931 	0x00000000,
932 	(0x0e00 << 16) | (0xac10 >> 2),
933 	0x00000000,
934 	(0x0e00 << 16) | (0xac14 >> 2),
935 	0x00000000,
936 	(0x0e00 << 16) | (0xac58 >> 2),
937 	0x00000000,
938 	(0x0e00 << 16) | (0xac68 >> 2),
939 	0x00000000,
940 	(0x0e00 << 16) | (0xac6c >> 2),
941 	0x00000000,
942 	(0x0e00 << 16) | (0xac70 >> 2),
943 	0x00000000,
944 	(0x0e00 << 16) | (0xac74 >> 2),
945 	0x00000000,
946 	(0x0e00 << 16) | (0xac78 >> 2),
947 	0x00000000,
948 	(0x0e00 << 16) | (0xac7c >> 2),
949 	0x00000000,
950 	(0x0e00 << 16) | (0xac80 >> 2),
951 	0x00000000,
952 	(0x0e00 << 16) | (0xac84 >> 2),
953 	0x00000000,
954 	(0x0e00 << 16) | (0xac88 >> 2),
955 	0x00000000,
956 	(0x0e00 << 16) | (0xac8c >> 2),
957 	0x00000000,
958 	(0x0e00 << 16) | (0x970c >> 2),
959 	0x00000000,
960 	(0x0e00 << 16) | (0x9714 >> 2),
961 	0x00000000,
962 	(0x0e00 << 16) | (0x9718 >> 2),
963 	0x00000000,
964 	(0x0e00 << 16) | (0x971c >> 2),
965 	0x00000000,
966 	(0x0e00 << 16) | (0x31068 >> 2),
967 	0x00000000,
968 	(0x4e00 << 16) | (0x31068 >> 2),
969 	0x00000000,
970 	(0x5e00 << 16) | (0x31068 >> 2),
971 	0x00000000,
972 	(0x6e00 << 16) | (0x31068 >> 2),
973 	0x00000000,
974 	(0x7e00 << 16) | (0x31068 >> 2),
975 	0x00000000,
976 	(0x0e00 << 16) | (0xcd10 >> 2),
977 	0x00000000,
978 	(0x0e00 << 16) | (0xcd14 >> 2),
979 	0x00000000,
980 	(0x0e00 << 16) | (0x88b0 >> 2),
981 	0x00000000,
982 	(0x0e00 << 16) | (0x88b4 >> 2),
983 	0x00000000,
984 	(0x0e00 << 16) | (0x88b8 >> 2),
985 	0x00000000,
986 	(0x0e00 << 16) | (0x88bc >> 2),
987 	0x00000000,
988 	(0x0400 << 16) | (0x89c0 >> 2),
989 	0x00000000,
990 	(0x0e00 << 16) | (0x88c4 >> 2),
991 	0x00000000,
992 	(0x0e00 << 16) | (0x88c8 >> 2),
993 	0x00000000,
994 	(0x0e00 << 16) | (0x88d0 >> 2),
995 	0x00000000,
996 	(0x0e00 << 16) | (0x88d4 >> 2),
997 	0x00000000,
998 	(0x0e00 << 16) | (0x88d8 >> 2),
999 	0x00000000,
1000 	(0x0e00 << 16) | (0x8980 >> 2),
1001 	0x00000000,
1002 	(0x0e00 << 16) | (0x30938 >> 2),
1003 	0x00000000,
1004 	(0x0e00 << 16) | (0x3093c >> 2),
1005 	0x00000000,
1006 	(0x0e00 << 16) | (0x30940 >> 2),
1007 	0x00000000,
1008 	(0x0e00 << 16) | (0x89a0 >> 2),
1009 	0x00000000,
1010 	(0x0e00 << 16) | (0x30900 >> 2),
1011 	0x00000000,
1012 	(0x0e00 << 16) | (0x30904 >> 2),
1013 	0x00000000,
1014 	(0x0e00 << 16) | (0x89b4 >> 2),
1015 	0x00000000,
1016 	(0x0e00 << 16) | (0x3e1fc >> 2),
1017 	0x00000000,
1018 	(0x0e00 << 16) | (0x3c210 >> 2),
1019 	0x00000000,
1020 	(0x0e00 << 16) | (0x3c214 >> 2),
1021 	0x00000000,
1022 	(0x0e00 << 16) | (0x3c218 >> 2),
1023 	0x00000000,
1024 	(0x0e00 << 16) | (0x8904 >> 2),
1025 	0x00000000,
1026 	0x5,
1027 	(0x0e00 << 16) | (0x8c28 >> 2),
1028 	(0x0e00 << 16) | (0x8c2c >> 2),
1029 	(0x0e00 << 16) | (0x8c30 >> 2),
1030 	(0x0e00 << 16) | (0x8c34 >> 2),
1031 	(0x0e00 << 16) | (0x9600 >> 2),
1032 };
1033 
1034 static const u32 bonaire_golden_spm_registers[] =
1035 {
1036 	0x30800, 0xe0ffffff, 0xe0000000
1037 };
1038 
1039 static const u32 bonaire_golden_common_registers[] =
1040 {
1041 	0xc770, 0xffffffff, 0x00000800,
1042 	0xc774, 0xffffffff, 0x00000800,
1043 	0xc798, 0xffffffff, 0x00007fbf,
1044 	0xc79c, 0xffffffff, 0x00007faf
1045 };
1046 
1047 static const u32 bonaire_golden_registers[] =
1048 {
1049 	0x3354, 0x00000333, 0x00000333,
1050 	0x3350, 0x000c0fc0, 0x00040200,
1051 	0x9a10, 0x00010000, 0x00058208,
1052 	0x3c000, 0xffff1fff, 0x00140000,
1053 	0x3c200, 0xfdfc0fff, 0x00000100,
1054 	0x3c234, 0x40000000, 0x40000200,
1055 	0x9830, 0xffffffff, 0x00000000,
1056 	0x9834, 0xf00fffff, 0x00000400,
1057 	0x9838, 0x0002021c, 0x00020200,
1058 	0xc78, 0x00000080, 0x00000000,
1059 	0x5bb0, 0x000000f0, 0x00000070,
1060 	0x5bc0, 0xf0311fff, 0x80300000,
1061 	0x98f8, 0x73773777, 0x12010001,
1062 	0x350c, 0x00810000, 0x408af000,
1063 	0x7030, 0x31000111, 0x00000011,
1064 	0x2f48, 0x73773777, 0x12010001,
1065 	0x220c, 0x00007fb6, 0x0021a1b1,
1066 	0x2210, 0x00007fb6, 0x002021b1,
1067 	0x2180, 0x00007fb6, 0x00002191,
1068 	0x2218, 0x00007fb6, 0x002121b1,
1069 	0x221c, 0x00007fb6, 0x002021b1,
1070 	0x21dc, 0x00007fb6, 0x00002191,
1071 	0x21e0, 0x00007fb6, 0x00002191,
1072 	0x3628, 0x0000003f, 0x0000000a,
1073 	0x362c, 0x0000003f, 0x0000000a,
1074 	0x2ae4, 0x00073ffe, 0x000022a2,
1075 	0x240c, 0x000007ff, 0x00000000,
1076 	0x8a14, 0xf000003f, 0x00000007,
1077 	0x8bf0, 0x00002001, 0x00000001,
1078 	0x8b24, 0xffffffff, 0x00ffffff,
1079 	0x30a04, 0x0000ff0f, 0x00000000,
1080 	0x28a4c, 0x07ffffff, 0x06000000,
1081 	0x4d8, 0x00000fff, 0x00000100,
1082 	0x3e78, 0x00000001, 0x00000002,
1083 	0x9100, 0x03000000, 0x0362c688,
1084 	0x8c00, 0x000000ff, 0x00000001,
1085 	0xe40, 0x00001fff, 0x00001fff,
1086 	0x9060, 0x0000007f, 0x00000020,
1087 	0x9508, 0x00010000, 0x00010000,
1088 	0xac14, 0x000003ff, 0x000000f3,
1089 	0xac0c, 0xffffffff, 0x00001032
1090 };
1091 
1092 static const u32 bonaire_mgcg_cgcg_init[] =
1093 {
1094 	0xc420, 0xffffffff, 0xfffffffc,
1095 	0x30800, 0xffffffff, 0xe0000000,
1096 	0x3c2a0, 0xffffffff, 0x00000100,
1097 	0x3c208, 0xffffffff, 0x00000100,
1098 	0x3c2c0, 0xffffffff, 0xc0000100,
1099 	0x3c2c8, 0xffffffff, 0xc0000100,
1100 	0x3c2c4, 0xffffffff, 0xc0000100,
1101 	0x55e4, 0xffffffff, 0x00600100,
1102 	0x3c280, 0xffffffff, 0x00000100,
1103 	0x3c214, 0xffffffff, 0x06000100,
1104 	0x3c220, 0xffffffff, 0x00000100,
1105 	0x3c218, 0xffffffff, 0x06000100,
1106 	0x3c204, 0xffffffff, 0x00000100,
1107 	0x3c2e0, 0xffffffff, 0x00000100,
1108 	0x3c224, 0xffffffff, 0x00000100,
1109 	0x3c200, 0xffffffff, 0x00000100,
1110 	0x3c230, 0xffffffff, 0x00000100,
1111 	0x3c234, 0xffffffff, 0x00000100,
1112 	0x3c250, 0xffffffff, 0x00000100,
1113 	0x3c254, 0xffffffff, 0x00000100,
1114 	0x3c258, 0xffffffff, 0x00000100,
1115 	0x3c25c, 0xffffffff, 0x00000100,
1116 	0x3c260, 0xffffffff, 0x00000100,
1117 	0x3c27c, 0xffffffff, 0x00000100,
1118 	0x3c278, 0xffffffff, 0x00000100,
1119 	0x3c210, 0xffffffff, 0x06000100,
1120 	0x3c290, 0xffffffff, 0x00000100,
1121 	0x3c274, 0xffffffff, 0x00000100,
1122 	0x3c2b4, 0xffffffff, 0x00000100,
1123 	0x3c2b0, 0xffffffff, 0x00000100,
1124 	0x3c270, 0xffffffff, 0x00000100,
1125 	0x30800, 0xffffffff, 0xe0000000,
1126 	0x3c020, 0xffffffff, 0x00010000,
1127 	0x3c024, 0xffffffff, 0x00030002,
1128 	0x3c028, 0xffffffff, 0x00040007,
1129 	0x3c02c, 0xffffffff, 0x00060005,
1130 	0x3c030, 0xffffffff, 0x00090008,
1131 	0x3c034, 0xffffffff, 0x00010000,
1132 	0x3c038, 0xffffffff, 0x00030002,
1133 	0x3c03c, 0xffffffff, 0x00040007,
1134 	0x3c040, 0xffffffff, 0x00060005,
1135 	0x3c044, 0xffffffff, 0x00090008,
1136 	0x3c048, 0xffffffff, 0x00010000,
1137 	0x3c04c, 0xffffffff, 0x00030002,
1138 	0x3c050, 0xffffffff, 0x00040007,
1139 	0x3c054, 0xffffffff, 0x00060005,
1140 	0x3c058, 0xffffffff, 0x00090008,
1141 	0x3c05c, 0xffffffff, 0x00010000,
1142 	0x3c060, 0xffffffff, 0x00030002,
1143 	0x3c064, 0xffffffff, 0x00040007,
1144 	0x3c068, 0xffffffff, 0x00060005,
1145 	0x3c06c, 0xffffffff, 0x00090008,
1146 	0x3c070, 0xffffffff, 0x00010000,
1147 	0x3c074, 0xffffffff, 0x00030002,
1148 	0x3c078, 0xffffffff, 0x00040007,
1149 	0x3c07c, 0xffffffff, 0x00060005,
1150 	0x3c080, 0xffffffff, 0x00090008,
1151 	0x3c084, 0xffffffff, 0x00010000,
1152 	0x3c088, 0xffffffff, 0x00030002,
1153 	0x3c08c, 0xffffffff, 0x00040007,
1154 	0x3c090, 0xffffffff, 0x00060005,
1155 	0x3c094, 0xffffffff, 0x00090008,
1156 	0x3c098, 0xffffffff, 0x00010000,
1157 	0x3c09c, 0xffffffff, 0x00030002,
1158 	0x3c0a0, 0xffffffff, 0x00040007,
1159 	0x3c0a4, 0xffffffff, 0x00060005,
1160 	0x3c0a8, 0xffffffff, 0x00090008,
1161 	0x3c000, 0xffffffff, 0x96e00200,
1162 	0x8708, 0xffffffff, 0x00900100,
1163 	0xc424, 0xffffffff, 0x0020003f,
1164 	0x38, 0xffffffff, 0x0140001c,
1165 	0x3c, 0x000f0000, 0x000f0000,
1166 	0x220, 0xffffffff, 0xC060000C,
1167 	0x224, 0xc0000fff, 0x00000100,
1168 	0xf90, 0xffffffff, 0x00000100,
1169 	0xf98, 0x00000101, 0x00000000,
1170 	0x20a8, 0xffffffff, 0x00000104,
1171 	0x55e4, 0xff000fff, 0x00000100,
1172 	0x30cc, 0xc0000fff, 0x00000104,
1173 	0xc1e4, 0x00000001, 0x00000001,
1174 	0xd00c, 0xff000ff0, 0x00000100,
1175 	0xd80c, 0xff000ff0, 0x00000100
1176 };
1177 
1178 static const u32 spectre_golden_spm_registers[] =
1179 {
1180 	0x30800, 0xe0ffffff, 0xe0000000
1181 };
1182 
1183 static const u32 spectre_golden_common_registers[] =
1184 {
1185 	0xc770, 0xffffffff, 0x00000800,
1186 	0xc774, 0xffffffff, 0x00000800,
1187 	0xc798, 0xffffffff, 0x00007fbf,
1188 	0xc79c, 0xffffffff, 0x00007faf
1189 };
1190 
1191 static const u32 spectre_golden_registers[] =
1192 {
1193 	0x3c000, 0xffff1fff, 0x96940200,
1194 	0x3c00c, 0xffff0001, 0xff000000,
1195 	0x3c200, 0xfffc0fff, 0x00000100,
1196 	0x6ed8, 0x00010101, 0x00010000,
1197 	0x9834, 0xf00fffff, 0x00000400,
1198 	0x9838, 0xfffffffc, 0x00020200,
1199 	0x5bb0, 0x000000f0, 0x00000070,
1200 	0x5bc0, 0xf0311fff, 0x80300000,
1201 	0x98f8, 0x73773777, 0x12010001,
1202 	0x9b7c, 0x00ff0000, 0x00fc0000,
1203 	0x2f48, 0x73773777, 0x12010001,
1204 	0x8a14, 0xf000003f, 0x00000007,
1205 	0x8b24, 0xffffffff, 0x00ffffff,
1206 	0x28350, 0x3f3f3fff, 0x00000082,
1207 	0x28354, 0x0000003f, 0x00000000,
1208 	0x3e78, 0x00000001, 0x00000002,
1209 	0x913c, 0xffff03df, 0x00000004,
1210 	0xc768, 0x00000008, 0x00000008,
1211 	0x8c00, 0x000008ff, 0x00000800,
1212 	0x9508, 0x00010000, 0x00010000,
1213 	0xac0c, 0xffffffff, 0x54763210,
1214 	0x214f8, 0x01ff01ff, 0x00000002,
1215 	0x21498, 0x007ff800, 0x00200000,
1216 	0x2015c, 0xffffffff, 0x00000f40,
1217 	0x30934, 0xffffffff, 0x00000001
1218 };
1219 
1220 static const u32 spectre_mgcg_cgcg_init[] =
1221 {
1222 	0xc420, 0xffffffff, 0xfffffffc,
1223 	0x30800, 0xffffffff, 0xe0000000,
1224 	0x3c2a0, 0xffffffff, 0x00000100,
1225 	0x3c208, 0xffffffff, 0x00000100,
1226 	0x3c2c0, 0xffffffff, 0x00000100,
1227 	0x3c2c8, 0xffffffff, 0x00000100,
1228 	0x3c2c4, 0xffffffff, 0x00000100,
1229 	0x55e4, 0xffffffff, 0x00600100,
1230 	0x3c280, 0xffffffff, 0x00000100,
1231 	0x3c214, 0xffffffff, 0x06000100,
1232 	0x3c220, 0xffffffff, 0x00000100,
1233 	0x3c218, 0xffffffff, 0x06000100,
1234 	0x3c204, 0xffffffff, 0x00000100,
1235 	0x3c2e0, 0xffffffff, 0x00000100,
1236 	0x3c224, 0xffffffff, 0x00000100,
1237 	0x3c200, 0xffffffff, 0x00000100,
1238 	0x3c230, 0xffffffff, 0x00000100,
1239 	0x3c234, 0xffffffff, 0x00000100,
1240 	0x3c250, 0xffffffff, 0x00000100,
1241 	0x3c254, 0xffffffff, 0x00000100,
1242 	0x3c258, 0xffffffff, 0x00000100,
1243 	0x3c25c, 0xffffffff, 0x00000100,
1244 	0x3c260, 0xffffffff, 0x00000100,
1245 	0x3c27c, 0xffffffff, 0x00000100,
1246 	0x3c278, 0xffffffff, 0x00000100,
1247 	0x3c210, 0xffffffff, 0x06000100,
1248 	0x3c290, 0xffffffff, 0x00000100,
1249 	0x3c274, 0xffffffff, 0x00000100,
1250 	0x3c2b4, 0xffffffff, 0x00000100,
1251 	0x3c2b0, 0xffffffff, 0x00000100,
1252 	0x3c270, 0xffffffff, 0x00000100,
1253 	0x30800, 0xffffffff, 0xe0000000,
1254 	0x3c020, 0xffffffff, 0x00010000,
1255 	0x3c024, 0xffffffff, 0x00030002,
1256 	0x3c028, 0xffffffff, 0x00040007,
1257 	0x3c02c, 0xffffffff, 0x00060005,
1258 	0x3c030, 0xffffffff, 0x00090008,
1259 	0x3c034, 0xffffffff, 0x00010000,
1260 	0x3c038, 0xffffffff, 0x00030002,
1261 	0x3c03c, 0xffffffff, 0x00040007,
1262 	0x3c040, 0xffffffff, 0x00060005,
1263 	0x3c044, 0xffffffff, 0x00090008,
1264 	0x3c048, 0xffffffff, 0x00010000,
1265 	0x3c04c, 0xffffffff, 0x00030002,
1266 	0x3c050, 0xffffffff, 0x00040007,
1267 	0x3c054, 0xffffffff, 0x00060005,
1268 	0x3c058, 0xffffffff, 0x00090008,
1269 	0x3c05c, 0xffffffff, 0x00010000,
1270 	0x3c060, 0xffffffff, 0x00030002,
1271 	0x3c064, 0xffffffff, 0x00040007,
1272 	0x3c068, 0xffffffff, 0x00060005,
1273 	0x3c06c, 0xffffffff, 0x00090008,
1274 	0x3c070, 0xffffffff, 0x00010000,
1275 	0x3c074, 0xffffffff, 0x00030002,
1276 	0x3c078, 0xffffffff, 0x00040007,
1277 	0x3c07c, 0xffffffff, 0x00060005,
1278 	0x3c080, 0xffffffff, 0x00090008,
1279 	0x3c084, 0xffffffff, 0x00010000,
1280 	0x3c088, 0xffffffff, 0x00030002,
1281 	0x3c08c, 0xffffffff, 0x00040007,
1282 	0x3c090, 0xffffffff, 0x00060005,
1283 	0x3c094, 0xffffffff, 0x00090008,
1284 	0x3c098, 0xffffffff, 0x00010000,
1285 	0x3c09c, 0xffffffff, 0x00030002,
1286 	0x3c0a0, 0xffffffff, 0x00040007,
1287 	0x3c0a4, 0xffffffff, 0x00060005,
1288 	0x3c0a8, 0xffffffff, 0x00090008,
1289 	0x3c0ac, 0xffffffff, 0x00010000,
1290 	0x3c0b0, 0xffffffff, 0x00030002,
1291 	0x3c0b4, 0xffffffff, 0x00040007,
1292 	0x3c0b8, 0xffffffff, 0x00060005,
1293 	0x3c0bc, 0xffffffff, 0x00090008,
1294 	0x3c000, 0xffffffff, 0x96e00200,
1295 	0x8708, 0xffffffff, 0x00900100,
1296 	0xc424, 0xffffffff, 0x0020003f,
1297 	0x38, 0xffffffff, 0x0140001c,
1298 	0x3c, 0x000f0000, 0x000f0000,
1299 	0x220, 0xffffffff, 0xC060000C,
1300 	0x224, 0xc0000fff, 0x00000100,
1301 	0xf90, 0xffffffff, 0x00000100,
1302 	0xf98, 0x00000101, 0x00000000,
1303 	0x20a8, 0xffffffff, 0x00000104,
1304 	0x55e4, 0xff000fff, 0x00000100,
1305 	0x30cc, 0xc0000fff, 0x00000104,
1306 	0xc1e4, 0x00000001, 0x00000001,
1307 	0xd00c, 0xff000ff0, 0x00000100,
1308 	0xd80c, 0xff000ff0, 0x00000100
1309 };
1310 
1311 static const u32 kalindi_golden_spm_registers[] =
1312 {
1313 	0x30800, 0xe0ffffff, 0xe0000000
1314 };
1315 
1316 static const u32 kalindi_golden_common_registers[] =
1317 {
1318 	0xc770, 0xffffffff, 0x00000800,
1319 	0xc774, 0xffffffff, 0x00000800,
1320 	0xc798, 0xffffffff, 0x00007fbf,
1321 	0xc79c, 0xffffffff, 0x00007faf
1322 };
1323 
1324 static const u32 kalindi_golden_registers[] =
1325 {
1326 	0x3c000, 0xffffdfff, 0x6e944040,
1327 	0x55e4, 0xff607fff, 0xfc000100,
1328 	0x3c220, 0xff000fff, 0x00000100,
1329 	0x3c224, 0xff000fff, 0x00000100,
1330 	0x3c200, 0xfffc0fff, 0x00000100,
1331 	0x6ed8, 0x00010101, 0x00010000,
1332 	0x9830, 0xffffffff, 0x00000000,
1333 	0x9834, 0xf00fffff, 0x00000400,
1334 	0x5bb0, 0x000000f0, 0x00000070,
1335 	0x5bc0, 0xf0311fff, 0x80300000,
1336 	0x98f8, 0x73773777, 0x12010001,
1337 	0x98fc, 0xffffffff, 0x00000010,
1338 	0x9b7c, 0x00ff0000, 0x00fc0000,
1339 	0x8030, 0x00001f0f, 0x0000100a,
1340 	0x2f48, 0x73773777, 0x12010001,
1341 	0x2408, 0x000fffff, 0x000c007f,
1342 	0x8a14, 0xf000003f, 0x00000007,
1343 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1344 	0x30a04, 0x0000ff0f, 0x00000000,
1345 	0x28a4c, 0x07ffffff, 0x06000000,
1346 	0x4d8, 0x00000fff, 0x00000100,
1347 	0x3e78, 0x00000001, 0x00000002,
1348 	0xc768, 0x00000008, 0x00000008,
1349 	0x8c00, 0x000000ff, 0x00000003,
1350 	0x214f8, 0x01ff01ff, 0x00000002,
1351 	0x21498, 0x007ff800, 0x00200000,
1352 	0x2015c, 0xffffffff, 0x00000f40,
1353 	0x88c4, 0x001f3ae3, 0x00000082,
1354 	0x88d4, 0x0000001f, 0x00000010,
1355 	0x30934, 0xffffffff, 0x00000000
1356 };
1357 
1358 static const u32 kalindi_mgcg_cgcg_init[] =
1359 {
1360 	0xc420, 0xffffffff, 0xfffffffc,
1361 	0x30800, 0xffffffff, 0xe0000000,
1362 	0x3c2a0, 0xffffffff, 0x00000100,
1363 	0x3c208, 0xffffffff, 0x00000100,
1364 	0x3c2c0, 0xffffffff, 0x00000100,
1365 	0x3c2c8, 0xffffffff, 0x00000100,
1366 	0x3c2c4, 0xffffffff, 0x00000100,
1367 	0x55e4, 0xffffffff, 0x00600100,
1368 	0x3c280, 0xffffffff, 0x00000100,
1369 	0x3c214, 0xffffffff, 0x06000100,
1370 	0x3c220, 0xffffffff, 0x00000100,
1371 	0x3c218, 0xffffffff, 0x06000100,
1372 	0x3c204, 0xffffffff, 0x00000100,
1373 	0x3c2e0, 0xffffffff, 0x00000100,
1374 	0x3c224, 0xffffffff, 0x00000100,
1375 	0x3c200, 0xffffffff, 0x00000100,
1376 	0x3c230, 0xffffffff, 0x00000100,
1377 	0x3c234, 0xffffffff, 0x00000100,
1378 	0x3c250, 0xffffffff, 0x00000100,
1379 	0x3c254, 0xffffffff, 0x00000100,
1380 	0x3c258, 0xffffffff, 0x00000100,
1381 	0x3c25c, 0xffffffff, 0x00000100,
1382 	0x3c260, 0xffffffff, 0x00000100,
1383 	0x3c27c, 0xffffffff, 0x00000100,
1384 	0x3c278, 0xffffffff, 0x00000100,
1385 	0x3c210, 0xffffffff, 0x06000100,
1386 	0x3c290, 0xffffffff, 0x00000100,
1387 	0x3c274, 0xffffffff, 0x00000100,
1388 	0x3c2b4, 0xffffffff, 0x00000100,
1389 	0x3c2b0, 0xffffffff, 0x00000100,
1390 	0x3c270, 0xffffffff, 0x00000100,
1391 	0x30800, 0xffffffff, 0xe0000000,
1392 	0x3c020, 0xffffffff, 0x00010000,
1393 	0x3c024, 0xffffffff, 0x00030002,
1394 	0x3c028, 0xffffffff, 0x00040007,
1395 	0x3c02c, 0xffffffff, 0x00060005,
1396 	0x3c030, 0xffffffff, 0x00090008,
1397 	0x3c034, 0xffffffff, 0x00010000,
1398 	0x3c038, 0xffffffff, 0x00030002,
1399 	0x3c03c, 0xffffffff, 0x00040007,
1400 	0x3c040, 0xffffffff, 0x00060005,
1401 	0x3c044, 0xffffffff, 0x00090008,
1402 	0x3c000, 0xffffffff, 0x96e00200,
1403 	0x8708, 0xffffffff, 0x00900100,
1404 	0xc424, 0xffffffff, 0x0020003f,
1405 	0x38, 0xffffffff, 0x0140001c,
1406 	0x3c, 0x000f0000, 0x000f0000,
1407 	0x220, 0xffffffff, 0xC060000C,
1408 	0x224, 0xc0000fff, 0x00000100,
1409 	0x20a8, 0xffffffff, 0x00000104,
1410 	0x55e4, 0xff000fff, 0x00000100,
1411 	0x30cc, 0xc0000fff, 0x00000104,
1412 	0xc1e4, 0x00000001, 0x00000001,
1413 	0xd00c, 0xff000ff0, 0x00000100,
1414 	0xd80c, 0xff000ff0, 0x00000100
1415 };
1416 
1417 static const u32 hawaii_golden_spm_registers[] =
1418 {
1419 	0x30800, 0xe0ffffff, 0xe0000000
1420 };
1421 
1422 static const u32 hawaii_golden_common_registers[] =
1423 {
1424 	0x30800, 0xffffffff, 0xe0000000,
1425 	0x28350, 0xffffffff, 0x3a00161a,
1426 	0x28354, 0xffffffff, 0x0000002e,
1427 	0x9a10, 0xffffffff, 0x00018208,
1428 	0x98f8, 0xffffffff, 0x12011003
1429 };
1430 
1431 static const u32 hawaii_golden_registers[] =
1432 {
1433 	0x3354, 0x00000333, 0x00000333,
1434 	0x9a10, 0x00010000, 0x00058208,
1435 	0x9830, 0xffffffff, 0x00000000,
1436 	0x9834, 0xf00fffff, 0x00000400,
1437 	0x9838, 0x0002021c, 0x00020200,
1438 	0xc78, 0x00000080, 0x00000000,
1439 	0x5bb0, 0x000000f0, 0x00000070,
1440 	0x5bc0, 0xf0311fff, 0x80300000,
1441 	0x350c, 0x00810000, 0x408af000,
1442 	0x7030, 0x31000111, 0x00000011,
1443 	0x2f48, 0x73773777, 0x12010001,
1444 	0x2120, 0x0000007f, 0x0000001b,
1445 	0x21dc, 0x00007fb6, 0x00002191,
1446 	0x3628, 0x0000003f, 0x0000000a,
1447 	0x362c, 0x0000003f, 0x0000000a,
1448 	0x2ae4, 0x00073ffe, 0x000022a2,
1449 	0x240c, 0x000007ff, 0x00000000,
1450 	0x8bf0, 0x00002001, 0x00000001,
1451 	0x8b24, 0xffffffff, 0x00ffffff,
1452 	0x30a04, 0x0000ff0f, 0x00000000,
1453 	0x28a4c, 0x07ffffff, 0x06000000,
1454 	0x3e78, 0x00000001, 0x00000002,
1455 	0xc768, 0x00000008, 0x00000008,
1456 	0xc770, 0x00000f00, 0x00000800,
1457 	0xc774, 0x00000f00, 0x00000800,
1458 	0xc798, 0x00ffffff, 0x00ff7fbf,
1459 	0xc79c, 0x00ffffff, 0x00ff7faf,
1460 	0x8c00, 0x000000ff, 0x00000800,
1461 	0xe40, 0x00001fff, 0x00001fff,
1462 	0x9060, 0x0000007f, 0x00000020,
1463 	0x9508, 0x00010000, 0x00010000,
1464 	0xae00, 0x00100000, 0x000ff07c,
1465 	0xac14, 0x000003ff, 0x0000000f,
1466 	0xac10, 0xffffffff, 0x7564fdec,
1467 	0xac0c, 0xffffffff, 0x3120b9a8,
1468 	0xac08, 0x20000000, 0x0f9c0000
1469 };
1470 
1471 static const u32 hawaii_mgcg_cgcg_init[] =
1472 {
1473 	0xc420, 0xffffffff, 0xfffffffd,
1474 	0x30800, 0xffffffff, 0xe0000000,
1475 	0x3c2a0, 0xffffffff, 0x00000100,
1476 	0x3c208, 0xffffffff, 0x00000100,
1477 	0x3c2c0, 0xffffffff, 0x00000100,
1478 	0x3c2c8, 0xffffffff, 0x00000100,
1479 	0x3c2c4, 0xffffffff, 0x00000100,
1480 	0x55e4, 0xffffffff, 0x00200100,
1481 	0x3c280, 0xffffffff, 0x00000100,
1482 	0x3c214, 0xffffffff, 0x06000100,
1483 	0x3c220, 0xffffffff, 0x00000100,
1484 	0x3c218, 0xffffffff, 0x06000100,
1485 	0x3c204, 0xffffffff, 0x00000100,
1486 	0x3c2e0, 0xffffffff, 0x00000100,
1487 	0x3c224, 0xffffffff, 0x00000100,
1488 	0x3c200, 0xffffffff, 0x00000100,
1489 	0x3c230, 0xffffffff, 0x00000100,
1490 	0x3c234, 0xffffffff, 0x00000100,
1491 	0x3c250, 0xffffffff, 0x00000100,
1492 	0x3c254, 0xffffffff, 0x00000100,
1493 	0x3c258, 0xffffffff, 0x00000100,
1494 	0x3c25c, 0xffffffff, 0x00000100,
1495 	0x3c260, 0xffffffff, 0x00000100,
1496 	0x3c27c, 0xffffffff, 0x00000100,
1497 	0x3c278, 0xffffffff, 0x00000100,
1498 	0x3c210, 0xffffffff, 0x06000100,
1499 	0x3c290, 0xffffffff, 0x00000100,
1500 	0x3c274, 0xffffffff, 0x00000100,
1501 	0x3c2b4, 0xffffffff, 0x00000100,
1502 	0x3c2b0, 0xffffffff, 0x00000100,
1503 	0x3c270, 0xffffffff, 0x00000100,
1504 	0x30800, 0xffffffff, 0xe0000000,
1505 	0x3c020, 0xffffffff, 0x00010000,
1506 	0x3c024, 0xffffffff, 0x00030002,
1507 	0x3c028, 0xffffffff, 0x00040007,
1508 	0x3c02c, 0xffffffff, 0x00060005,
1509 	0x3c030, 0xffffffff, 0x00090008,
1510 	0x3c034, 0xffffffff, 0x00010000,
1511 	0x3c038, 0xffffffff, 0x00030002,
1512 	0x3c03c, 0xffffffff, 0x00040007,
1513 	0x3c040, 0xffffffff, 0x00060005,
1514 	0x3c044, 0xffffffff, 0x00090008,
1515 	0x3c048, 0xffffffff, 0x00010000,
1516 	0x3c04c, 0xffffffff, 0x00030002,
1517 	0x3c050, 0xffffffff, 0x00040007,
1518 	0x3c054, 0xffffffff, 0x00060005,
1519 	0x3c058, 0xffffffff, 0x00090008,
1520 	0x3c05c, 0xffffffff, 0x00010000,
1521 	0x3c060, 0xffffffff, 0x00030002,
1522 	0x3c064, 0xffffffff, 0x00040007,
1523 	0x3c068, 0xffffffff, 0x00060005,
1524 	0x3c06c, 0xffffffff, 0x00090008,
1525 	0x3c070, 0xffffffff, 0x00010000,
1526 	0x3c074, 0xffffffff, 0x00030002,
1527 	0x3c078, 0xffffffff, 0x00040007,
1528 	0x3c07c, 0xffffffff, 0x00060005,
1529 	0x3c080, 0xffffffff, 0x00090008,
1530 	0x3c084, 0xffffffff, 0x00010000,
1531 	0x3c088, 0xffffffff, 0x00030002,
1532 	0x3c08c, 0xffffffff, 0x00040007,
1533 	0x3c090, 0xffffffff, 0x00060005,
1534 	0x3c094, 0xffffffff, 0x00090008,
1535 	0x3c098, 0xffffffff, 0x00010000,
1536 	0x3c09c, 0xffffffff, 0x00030002,
1537 	0x3c0a0, 0xffffffff, 0x00040007,
1538 	0x3c0a4, 0xffffffff, 0x00060005,
1539 	0x3c0a8, 0xffffffff, 0x00090008,
1540 	0x3c0ac, 0xffffffff, 0x00010000,
1541 	0x3c0b0, 0xffffffff, 0x00030002,
1542 	0x3c0b4, 0xffffffff, 0x00040007,
1543 	0x3c0b8, 0xffffffff, 0x00060005,
1544 	0x3c0bc, 0xffffffff, 0x00090008,
1545 	0x3c0c0, 0xffffffff, 0x00010000,
1546 	0x3c0c4, 0xffffffff, 0x00030002,
1547 	0x3c0c8, 0xffffffff, 0x00040007,
1548 	0x3c0cc, 0xffffffff, 0x00060005,
1549 	0x3c0d0, 0xffffffff, 0x00090008,
1550 	0x3c0d4, 0xffffffff, 0x00010000,
1551 	0x3c0d8, 0xffffffff, 0x00030002,
1552 	0x3c0dc, 0xffffffff, 0x00040007,
1553 	0x3c0e0, 0xffffffff, 0x00060005,
1554 	0x3c0e4, 0xffffffff, 0x00090008,
1555 	0x3c0e8, 0xffffffff, 0x00010000,
1556 	0x3c0ec, 0xffffffff, 0x00030002,
1557 	0x3c0f0, 0xffffffff, 0x00040007,
1558 	0x3c0f4, 0xffffffff, 0x00060005,
1559 	0x3c0f8, 0xffffffff, 0x00090008,
1560 	0xc318, 0xffffffff, 0x00020200,
1561 	0x3350, 0xffffffff, 0x00000200,
1562 	0x15c0, 0xffffffff, 0x00000400,
1563 	0x55e8, 0xffffffff, 0x00000000,
1564 	0x2f50, 0xffffffff, 0x00000902,
1565 	0x3c000, 0xffffffff, 0x96940200,
1566 	0x8708, 0xffffffff, 0x00900100,
1567 	0xc424, 0xffffffff, 0x0020003f,
1568 	0x38, 0xffffffff, 0x0140001c,
1569 	0x3c, 0x000f0000, 0x000f0000,
1570 	0x220, 0xffffffff, 0xc060000c,
1571 	0x224, 0xc0000fff, 0x00000100,
1572 	0xf90, 0xffffffff, 0x00000100,
1573 	0xf98, 0x00000101, 0x00000000,
1574 	0x20a8, 0xffffffff, 0x00000104,
1575 	0x55e4, 0xff000fff, 0x00000100,
1576 	0x30cc, 0xc0000fff, 0x00000104,
1577 	0xc1e4, 0x00000001, 0x00000001,
1578 	0xd00c, 0xff000ff0, 0x00000100,
1579 	0xd80c, 0xff000ff0, 0x00000100
1580 };
1581 
1582 static const u32 godavari_golden_registers[] =
1583 {
1584 	0x55e4, 0xff607fff, 0xfc000100,
1585 	0x6ed8, 0x00010101, 0x00010000,
1586 	0x9830, 0xffffffff, 0x00000000,
1587 	0x98302, 0xf00fffff, 0x00000400,
1588 	0x6130, 0xffffffff, 0x00010000,
1589 	0x5bb0, 0x000000f0, 0x00000070,
1590 	0x5bc0, 0xf0311fff, 0x80300000,
1591 	0x98f8, 0x73773777, 0x12010001,
1592 	0x98fc, 0xffffffff, 0x00000010,
1593 	0x8030, 0x00001f0f, 0x0000100a,
1594 	0x2f48, 0x73773777, 0x12010001,
1595 	0x2408, 0x000fffff, 0x000c007f,
1596 	0x8a14, 0xf000003f, 0x00000007,
1597 	0x8b24, 0xffffffff, 0x00ff0fff,
1598 	0x30a04, 0x0000ff0f, 0x00000000,
1599 	0x28a4c, 0x07ffffff, 0x06000000,
1600 	0x4d8, 0x00000fff, 0x00000100,
1601 	0xd014, 0x00010000, 0x00810001,
1602 	0xd814, 0x00010000, 0x00810001,
1603 	0x3e78, 0x00000001, 0x00000002,
1604 	0xc768, 0x00000008, 0x00000008,
1605 	0xc770, 0x00000f00, 0x00000800,
1606 	0xc774, 0x00000f00, 0x00000800,
1607 	0xc798, 0x00ffffff, 0x00ff7fbf,
1608 	0xc79c, 0x00ffffff, 0x00ff7faf,
1609 	0x8c00, 0x000000ff, 0x00000001,
1610 	0x214f8, 0x01ff01ff, 0x00000002,
1611 	0x21498, 0x007ff800, 0x00200000,
1612 	0x2015c, 0xffffffff, 0x00000f40,
1613 	0x88c4, 0x001f3ae3, 0x00000082,
1614 	0x88d4, 0x0000001f, 0x00000010,
1615 	0x30934, 0xffffffff, 0x00000000
1616 };
1617 
1618 
1619 static void cik_init_golden_registers(struct radeon_device *rdev)
1620 {
1621 	switch (rdev->family) {
1622 	case CHIP_BONAIRE:
1623 		radeon_program_register_sequence(rdev,
1624 						 bonaire_mgcg_cgcg_init,
1625 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1626 		radeon_program_register_sequence(rdev,
1627 						 bonaire_golden_registers,
1628 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1629 		radeon_program_register_sequence(rdev,
1630 						 bonaire_golden_common_registers,
1631 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1632 		radeon_program_register_sequence(rdev,
1633 						 bonaire_golden_spm_registers,
1634 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1635 		break;
1636 	case CHIP_KABINI:
1637 		radeon_program_register_sequence(rdev,
1638 						 kalindi_mgcg_cgcg_init,
1639 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1640 		radeon_program_register_sequence(rdev,
1641 						 kalindi_golden_registers,
1642 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1643 		radeon_program_register_sequence(rdev,
1644 						 kalindi_golden_common_registers,
1645 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1646 		radeon_program_register_sequence(rdev,
1647 						 kalindi_golden_spm_registers,
1648 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1649 		break;
1650 	case CHIP_MULLINS:
1651 		radeon_program_register_sequence(rdev,
1652 						 kalindi_mgcg_cgcg_init,
1653 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1654 		radeon_program_register_sequence(rdev,
1655 						 godavari_golden_registers,
1656 						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1657 		radeon_program_register_sequence(rdev,
1658 						 kalindi_golden_common_registers,
1659 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1660 		radeon_program_register_sequence(rdev,
1661 						 kalindi_golden_spm_registers,
1662 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1663 		break;
1664 	case CHIP_KAVERI:
1665 		radeon_program_register_sequence(rdev,
1666 						 spectre_mgcg_cgcg_init,
1667 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1668 		radeon_program_register_sequence(rdev,
1669 						 spectre_golden_registers,
1670 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1671 		radeon_program_register_sequence(rdev,
1672 						 spectre_golden_common_registers,
1673 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1674 		radeon_program_register_sequence(rdev,
1675 						 spectre_golden_spm_registers,
1676 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1677 		break;
1678 	case CHIP_HAWAII:
1679 		radeon_program_register_sequence(rdev,
1680 						 hawaii_mgcg_cgcg_init,
1681 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1682 		radeon_program_register_sequence(rdev,
1683 						 hawaii_golden_registers,
1684 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1685 		radeon_program_register_sequence(rdev,
1686 						 hawaii_golden_common_registers,
1687 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1688 		radeon_program_register_sequence(rdev,
1689 						 hawaii_golden_spm_registers,
1690 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1691 		break;
1692 	default:
1693 		break;
1694 	}
1695 }
1696 
1697 /**
1698  * cik_get_xclk - get the xclk
1699  *
1700  * @rdev: radeon_device pointer
1701  *
1702  * Returns the reference clock used by the gfx engine
1703  * (CIK).
1704  */
1705 u32 cik_get_xclk(struct radeon_device *rdev)
1706 {
1707 	u32 reference_clock = rdev->clock.spll.reference_freq;
1708 
1709 	if (rdev->flags & RADEON_IS_IGP) {
1710 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1711 			return reference_clock / 2;
1712 	} else {
1713 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1714 			return reference_clock / 4;
1715 	}
1716 	return reference_clock;
1717 }
1718 
1719 /**
1720  * cik_mm_rdoorbell - read a doorbell dword
1721  *
1722  * @rdev: radeon_device pointer
1723  * @index: doorbell index
1724  *
1725  * Returns the value in the doorbell aperture at the
1726  * requested doorbell index (CIK).
1727  */
1728 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1729 {
1730 	if (index < rdev->doorbell.num_doorbells) {
1731 		return readl(rdev->doorbell.ptr + index);
1732 	} else {
1733 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1734 		return 0;
1735 	}
1736 }
1737 
1738 /**
1739  * cik_mm_wdoorbell - write a doorbell dword
1740  *
1741  * @rdev: radeon_device pointer
1742  * @index: doorbell index
1743  * @v: value to write
1744  *
1745  * Writes @v to the doorbell aperture at the
1746  * requested doorbell index (CIK).
1747  */
1748 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1749 {
1750 	if (index < rdev->doorbell.num_doorbells) {
1751 		writel(v, rdev->doorbell.ptr + index);
1752 	} else {
1753 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1754 	}
1755 }
1756 
1757 #define BONAIRE_IO_MC_REGS_SIZE 36
1758 
1759 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1760 {
1761 	{0x00000070, 0x04400000},
1762 	{0x00000071, 0x80c01803},
1763 	{0x00000072, 0x00004004},
1764 	{0x00000073, 0x00000100},
1765 	{0x00000074, 0x00ff0000},
1766 	{0x00000075, 0x34000000},
1767 	{0x00000076, 0x08000014},
1768 	{0x00000077, 0x00cc08ec},
1769 	{0x00000078, 0x00000400},
1770 	{0x00000079, 0x00000000},
1771 	{0x0000007a, 0x04090000},
1772 	{0x0000007c, 0x00000000},
1773 	{0x0000007e, 0x4408a8e8},
1774 	{0x0000007f, 0x00000304},
1775 	{0x00000080, 0x00000000},
1776 	{0x00000082, 0x00000001},
1777 	{0x00000083, 0x00000002},
1778 	{0x00000084, 0xf3e4f400},
1779 	{0x00000085, 0x052024e3},
1780 	{0x00000087, 0x00000000},
1781 	{0x00000088, 0x01000000},
1782 	{0x0000008a, 0x1c0a0000},
1783 	{0x0000008b, 0xff010000},
1784 	{0x0000008d, 0xffffefff},
1785 	{0x0000008e, 0xfff3efff},
1786 	{0x0000008f, 0xfff3efbf},
1787 	{0x00000092, 0xf7ffffff},
1788 	{0x00000093, 0xffffff7f},
1789 	{0x00000095, 0x00101101},
1790 	{0x00000096, 0x00000fff},
1791 	{0x00000097, 0x00116fff},
1792 	{0x00000098, 0x60010000},
1793 	{0x00000099, 0x10010000},
1794 	{0x0000009a, 0x00006000},
1795 	{0x0000009b, 0x00001000},
1796 	{0x0000009f, 0x00b48000}
1797 };
1798 
1799 #define HAWAII_IO_MC_REGS_SIZE 22
1800 
1801 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1802 {
1803 	{0x0000007d, 0x40000000},
1804 	{0x0000007e, 0x40180304},
1805 	{0x0000007f, 0x0000ff00},
1806 	{0x00000081, 0x00000000},
1807 	{0x00000083, 0x00000800},
1808 	{0x00000086, 0x00000000},
1809 	{0x00000087, 0x00000100},
1810 	{0x00000088, 0x00020100},
1811 	{0x00000089, 0x00000000},
1812 	{0x0000008b, 0x00040000},
1813 	{0x0000008c, 0x00000100},
1814 	{0x0000008e, 0xff010000},
1815 	{0x00000090, 0xffffefff},
1816 	{0x00000091, 0xfff3efff},
1817 	{0x00000092, 0xfff3efbf},
1818 	{0x00000093, 0xf7ffffff},
1819 	{0x00000094, 0xffffff7f},
1820 	{0x00000095, 0x00000fff},
1821 	{0x00000096, 0x00116fff},
1822 	{0x00000097, 0x60010000},
1823 	{0x00000098, 0x10010000},
1824 	{0x0000009f, 0x00c79000}
1825 };
1826 
1827 
1828 /**
1829  * cik_srbm_select - select specific register instances
1830  *
1831  * @rdev: radeon_device pointer
1832  * @me: selected ME (micro engine)
1833  * @pipe: pipe
1834  * @queue: queue
1835  * @vmid: VMID
1836  *
1837  * Switches the currently active registers instances.  Some
1838  * registers are instanced per VMID, others are instanced per
1839  * me/pipe/queue combination.
1840  */
1841 static void cik_srbm_select(struct radeon_device *rdev,
1842 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1843 {
1844 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1845 			     MEID(me & 0x3) |
1846 			     VMID(vmid & 0xf) |
1847 			     QUEUEID(queue & 0x7));
1848 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1849 }
1850 
1851 /* ucode loading */
1852 /**
1853  * ci_mc_load_microcode - load MC ucode into the hw
1854  *
1855  * @rdev: radeon_device pointer
1856  *
1857  * Load the GDDR MC ucode into the hw (CIK).
1858  * Returns 0 on success, error on failure.
1859  */
1860 int ci_mc_load_microcode(struct radeon_device *rdev)
1861 {
1862 	const __be32 *fw_data = NULL;
1863 	const __le32 *new_fw_data = NULL;
1864 	u32 running, tmp;
1865 	u32 *io_mc_regs = NULL;
1866 	const __le32 *new_io_mc_regs = NULL;
1867 	int i, regs_size, ucode_size;
1868 
1869 	if (!rdev->mc_fw)
1870 		return -EINVAL;
1871 
1872 	if (rdev->new_fw) {
1873 		const struct mc_firmware_header_v1_0 *hdr =
1874 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1875 
1876 		radeon_ucode_print_mc_hdr(&hdr->header);
1877 
1878 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1879 		new_io_mc_regs = (const __le32 *)
1880 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1881 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1882 		new_fw_data = (const __le32 *)
1883 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1884 	} else {
1885 		ucode_size = rdev->mc_fw->size / 4;
1886 
1887 		switch (rdev->family) {
1888 		case CHIP_BONAIRE:
1889 			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1890 			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1891 			break;
1892 		case CHIP_HAWAII:
1893 			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1894 			regs_size = HAWAII_IO_MC_REGS_SIZE;
1895 			break;
1896 		default:
1897 			return -EINVAL;
1898 		}
1899 		fw_data = (const __be32 *)rdev->mc_fw->data;
1900 	}
1901 
1902 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1903 
1904 	if (running == 0) {
1905 		/* reset the engine and set to writable */
1906 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1907 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1908 
1909 		/* load mc io regs */
1910 		for (i = 0; i < regs_size; i++) {
1911 			if (rdev->new_fw) {
1912 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1913 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1914 			} else {
1915 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1916 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1917 			}
1918 		}
1919 
1920 		tmp = RREG32(MC_SEQ_MISC0);
1921 		if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1922 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1923 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1924 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1925 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1926 		}
1927 
1928 		/* load the MC ucode */
1929 		for (i = 0; i < ucode_size; i++) {
1930 			if (rdev->new_fw)
1931 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1932 			else
1933 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1934 		}
1935 
1936 		/* put the engine back into the active state */
1937 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1938 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1939 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1940 
1941 		/* wait for training to complete */
1942 		for (i = 0; i < rdev->usec_timeout; i++) {
1943 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1944 				break;
1945 			udelay(1);
1946 		}
1947 		for (i = 0; i < rdev->usec_timeout; i++) {
1948 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1949 				break;
1950 			udelay(1);
1951 		}
1952 	}
1953 
1954 	return 0;
1955 }
1956 
1957 /**
1958  * cik_init_microcode - load ucode images from disk
1959  *
1960  * @rdev: radeon_device pointer
1961  *
1962  * Use the firmware interface to load the ucode images into
1963  * the driver (not loaded into hw).
1964  * Returns 0 on success, error on failure.
1965  */
1966 static int cik_init_microcode(struct radeon_device *rdev)
1967 {
1968 	const char *chip_name;
1969 	const char *new_chip_name;
1970 	size_t pfp_req_size, me_req_size, ce_req_size,
1971 		mec_req_size, rlc_req_size, mc_req_size = 0,
1972 		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1973 	char fw_name[30];
1974 	int new_fw = 0;
1975 	int err;
1976 	int num_fw;
1977 	bool new_smc = false;
1978 
1979 	DRM_DEBUG("\n");
1980 
1981 	switch (rdev->family) {
1982 	case CHIP_BONAIRE:
1983 		chip_name = "BONAIRE";
1984 		if ((rdev->pdev->revision == 0x80) ||
1985 		    (rdev->pdev->revision == 0x81) ||
1986 		    (rdev->pdev->device == 0x665f))
1987 			new_smc = true;
1988 		new_chip_name = "bonaire";
1989 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1990 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1991 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1992 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1993 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1994 		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1995 		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1996 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1997 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1998 		num_fw = 8;
1999 		break;
2000 	case CHIP_HAWAII:
2001 		chip_name = "HAWAII";
2002 		if (rdev->pdev->revision == 0x80)
2003 			new_smc = true;
2004 		new_chip_name = "hawaii";
2005 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2006 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2007 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2008 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2009 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2010 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2011 		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2012 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2013 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2014 		num_fw = 8;
2015 		break;
2016 	case CHIP_KAVERI:
2017 		chip_name = "KAVERI";
2018 		new_chip_name = "kaveri";
2019 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2020 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2021 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2022 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2023 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2024 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2025 		num_fw = 7;
2026 		break;
2027 	case CHIP_KABINI:
2028 		chip_name = "KABINI";
2029 		new_chip_name = "kabini";
2030 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2031 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2032 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2033 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2034 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2035 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2036 		num_fw = 6;
2037 		break;
2038 	case CHIP_MULLINS:
2039 		chip_name = "MULLINS";
2040 		new_chip_name = "mullins";
2041 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2042 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2043 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2044 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2045 		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2046 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2047 		num_fw = 6;
2048 		break;
2049 	default: BUG();
2050 	}
2051 
2052 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
2053 
2054 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2055 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2056 	if (err) {
2057 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2058 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2059 		if (err)
2060 			goto out;
2061 		if (rdev->pfp_fw->size != pfp_req_size) {
2062 			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2063 			       rdev->pfp_fw->size, fw_name);
2064 			err = -EINVAL;
2065 			goto out;
2066 		}
2067 	} else {
2068 		err = radeon_ucode_validate(rdev->pfp_fw);
2069 		if (err) {
2070 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2071 			       fw_name);
2072 			goto out;
2073 		} else {
2074 			new_fw++;
2075 		}
2076 	}
2077 
2078 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2079 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2080 	if (err) {
2081 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2082 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2083 		if (err)
2084 			goto out;
2085 		if (rdev->me_fw->size != me_req_size) {
2086 			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2087 			       rdev->me_fw->size, fw_name);
2088 			err = -EINVAL;
2089 		}
2090 	} else {
2091 		err = radeon_ucode_validate(rdev->me_fw);
2092 		if (err) {
2093 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2094 			       fw_name);
2095 			goto out;
2096 		} else {
2097 			new_fw++;
2098 		}
2099 	}
2100 
2101 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2102 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2103 	if (err) {
2104 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2105 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2106 		if (err)
2107 			goto out;
2108 		if (rdev->ce_fw->size != ce_req_size) {
2109 			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2110 			       rdev->ce_fw->size, fw_name);
2111 			err = -EINVAL;
2112 		}
2113 	} else {
2114 		err = radeon_ucode_validate(rdev->ce_fw);
2115 		if (err) {
2116 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2117 			       fw_name);
2118 			goto out;
2119 		} else {
2120 			new_fw++;
2121 		}
2122 	}
2123 
2124 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2125 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2126 	if (err) {
2127 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2128 		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2129 		if (err)
2130 			goto out;
2131 		if (rdev->mec_fw->size != mec_req_size) {
2132 			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2133 			       rdev->mec_fw->size, fw_name);
2134 			err = -EINVAL;
2135 		}
2136 	} else {
2137 		err = radeon_ucode_validate(rdev->mec_fw);
2138 		if (err) {
2139 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2140 			       fw_name);
2141 			goto out;
2142 		} else {
2143 			new_fw++;
2144 		}
2145 	}
2146 
2147 	if (rdev->family == CHIP_KAVERI) {
2148 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2149 		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2150 		if (err) {
2151 			goto out;
2152 		} else {
2153 			err = radeon_ucode_validate(rdev->mec2_fw);
2154 			if (err) {
2155 				goto out;
2156 			} else {
2157 				new_fw++;
2158 			}
2159 		}
2160 	}
2161 
2162 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2163 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2164 	if (err) {
2165 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2166 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2167 		if (err)
2168 			goto out;
2169 		if (rdev->rlc_fw->size != rlc_req_size) {
2170 			pr_err("cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2171 			       rdev->rlc_fw->size, fw_name);
2172 			err = -EINVAL;
2173 		}
2174 	} else {
2175 		err = radeon_ucode_validate(rdev->rlc_fw);
2176 		if (err) {
2177 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2178 			       fw_name);
2179 			goto out;
2180 		} else {
2181 			new_fw++;
2182 		}
2183 	}
2184 
2185 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2186 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2187 	if (err) {
2188 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2189 		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2190 		if (err)
2191 			goto out;
2192 		if (rdev->sdma_fw->size != sdma_req_size) {
2193 			pr_err("cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2194 			       rdev->sdma_fw->size, fw_name);
2195 			err = -EINVAL;
2196 		}
2197 	} else {
2198 		err = radeon_ucode_validate(rdev->sdma_fw);
2199 		if (err) {
2200 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2201 			       fw_name);
2202 			goto out;
2203 		} else {
2204 			new_fw++;
2205 		}
2206 	}
2207 
2208 	/* No SMC, MC ucode on APUs */
2209 	if (!(rdev->flags & RADEON_IS_IGP)) {
2210 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2211 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2212 		if (err) {
2213 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2214 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2215 			if (err) {
2216 				snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2217 				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2218 				if (err)
2219 					goto out;
2220 			}
2221 			if ((rdev->mc_fw->size != mc_req_size) &&
2222 			    (rdev->mc_fw->size != mc2_req_size)){
2223 				pr_err("cik_mc: Bogus length %zu in firmware \"%s\"\n",
2224 				       rdev->mc_fw->size, fw_name);
2225 				err = -EINVAL;
2226 			}
2227 			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2228 		} else {
2229 			err = radeon_ucode_validate(rdev->mc_fw);
2230 			if (err) {
2231 				pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2232 				       fw_name);
2233 				goto out;
2234 			} else {
2235 				new_fw++;
2236 			}
2237 		}
2238 
2239 		if (new_smc)
2240 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
2241 		else
2242 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2243 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2244 		if (err) {
2245 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2246 			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2247 			if (err) {
2248 				pr_err("smc: error loading firmware \"%s\"\n",
2249 				       fw_name);
2250 				release_firmware(rdev->smc_fw);
2251 				rdev->smc_fw = NULL;
2252 				err = 0;
2253 			} else if (rdev->smc_fw->size != smc_req_size) {
2254 				pr_err("cik_smc: Bogus length %zu in firmware \"%s\"\n",
2255 				       rdev->smc_fw->size, fw_name);
2256 				err = -EINVAL;
2257 			}
2258 		} else {
2259 			err = radeon_ucode_validate(rdev->smc_fw);
2260 			if (err) {
2261 				pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2262 				       fw_name);
2263 				goto out;
2264 			} else {
2265 				new_fw++;
2266 			}
2267 		}
2268 	}
2269 
2270 	if (new_fw == 0) {
2271 		rdev->new_fw = false;
2272 	} else if (new_fw < num_fw) {
2273 		pr_err("ci_fw: mixing new and old firmware!\n");
2274 		err = -EINVAL;
2275 	} else {
2276 		rdev->new_fw = true;
2277 	}
2278 
2279 out:
2280 	if (err) {
2281 		if (err != -EINVAL)
2282 			pr_err("cik_cp: Failed to load firmware \"%s\"\n",
2283 			       fw_name);
2284 		release_firmware(rdev->pfp_fw);
2285 		rdev->pfp_fw = NULL;
2286 		release_firmware(rdev->me_fw);
2287 		rdev->me_fw = NULL;
2288 		release_firmware(rdev->ce_fw);
2289 		rdev->ce_fw = NULL;
2290 		release_firmware(rdev->mec_fw);
2291 		rdev->mec_fw = NULL;
2292 		release_firmware(rdev->mec2_fw);
2293 		rdev->mec2_fw = NULL;
2294 		release_firmware(rdev->rlc_fw);
2295 		rdev->rlc_fw = NULL;
2296 		release_firmware(rdev->sdma_fw);
2297 		rdev->sdma_fw = NULL;
2298 		release_firmware(rdev->mc_fw);
2299 		rdev->mc_fw = NULL;
2300 		release_firmware(rdev->smc_fw);
2301 		rdev->smc_fw = NULL;
2302 	}
2303 	return err;
2304 }
2305 
2306 /*
2307  * Core functions
2308  */
2309 /**
2310  * cik_tiling_mode_table_init - init the hw tiling table
2311  *
2312  * @rdev: radeon_device pointer
2313  *
2314  * Starting with SI, the tiling setup is done globally in a
2315  * set of 32 tiling modes.  Rather than selecting each set of
2316  * parameters per surface as on older asics, we just select
2317  * which index in the tiling table we want to use, and the
2318  * surface uses those parameters (CIK).
2319  */
2320 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2321 {
2322 	u32 *tile = rdev->config.cik.tile_mode_array;
2323 	u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2324 	const u32 num_tile_mode_states =
2325 			ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2326 	const u32 num_secondary_tile_mode_states =
2327 			ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2328 	u32 reg_offset, split_equal_to_row_size;
2329 	u32 num_pipe_configs;
2330 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2331 		rdev->config.cik.max_shader_engines;
2332 
2333 	switch (rdev->config.cik.mem_row_size_in_kb) {
2334 	case 1:
2335 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2336 		break;
2337 	case 2:
2338 	default:
2339 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2340 		break;
2341 	case 4:
2342 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2343 		break;
2344 	}
2345 
2346 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2347 	if (num_pipe_configs > 8)
2348 		num_pipe_configs = 16;
2349 
2350 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2351 		tile[reg_offset] = 0;
2352 	for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2353 		macrotile[reg_offset] = 0;
2354 
2355 	switch(num_pipe_configs) {
2356 	case 16:
2357 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2358 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2359 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2360 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2361 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2362 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2363 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2364 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2365 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2366 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2367 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2368 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2369 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2370 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2371 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2372 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2373 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2374 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2375 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2376 			   TILE_SPLIT(split_equal_to_row_size));
2377 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2378 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2379 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2380 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2381 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2382 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2383 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2384 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2385 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2386 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2387 			   TILE_SPLIT(split_equal_to_row_size));
2388 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2389 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2390 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2391 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2393 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2394 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2395 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2397 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2398 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2399 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2400 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2401 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2402 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2403 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2404 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2405 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2406 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2407 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2408 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2409 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2410 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2411 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2412 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2413 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2414 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2415 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2416 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2417 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2418 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2419 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2420 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2421 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2422 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2423 		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2424 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2425 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2426 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2427 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2428 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2429 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2430 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2431 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2432 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2433 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2434 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2435 
2436 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2437 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2438 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2439 			   NUM_BANKS(ADDR_SURF_16_BANK));
2440 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2441 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2442 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2443 			   NUM_BANKS(ADDR_SURF_16_BANK));
2444 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2445 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2446 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2447 			   NUM_BANKS(ADDR_SURF_16_BANK));
2448 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2449 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2450 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2451 			   NUM_BANKS(ADDR_SURF_16_BANK));
2452 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2453 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2454 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2455 			   NUM_BANKS(ADDR_SURF_8_BANK));
2456 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2457 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2458 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2459 			   NUM_BANKS(ADDR_SURF_4_BANK));
2460 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2461 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2462 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2463 			   NUM_BANKS(ADDR_SURF_2_BANK));
2464 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2465 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2466 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2467 			   NUM_BANKS(ADDR_SURF_16_BANK));
2468 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2470 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2471 			   NUM_BANKS(ADDR_SURF_16_BANK));
2472 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2474 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2475 			    NUM_BANKS(ADDR_SURF_16_BANK));
2476 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2477 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2478 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2479 			    NUM_BANKS(ADDR_SURF_8_BANK));
2480 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2481 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2482 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2483 			    NUM_BANKS(ADDR_SURF_4_BANK));
2484 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2486 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2487 			    NUM_BANKS(ADDR_SURF_2_BANK));
2488 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2489 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2490 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2491 			    NUM_BANKS(ADDR_SURF_2_BANK));
2492 
2493 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2494 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2495 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2496 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2497 		break;
2498 
2499 	case 8:
2500 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2501 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2502 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2503 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2504 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2505 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2506 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2507 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2508 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2509 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2510 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2511 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2512 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2513 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2514 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2515 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2516 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2517 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2518 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2519 			   TILE_SPLIT(split_equal_to_row_size));
2520 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2521 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2522 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2523 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2524 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2525 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2526 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2527 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2528 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2529 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2530 			   TILE_SPLIT(split_equal_to_row_size));
2531 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2532 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2533 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2534 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2536 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2537 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2538 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2540 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2541 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2542 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2543 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2544 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2545 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2546 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2548 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2549 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2550 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2551 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2552 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2553 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2554 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2555 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2556 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2557 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2558 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2559 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2560 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2561 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2562 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2563 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2564 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2565 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2566 		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2567 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2568 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2569 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2570 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2571 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2572 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2573 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2574 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2575 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2576 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2577 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2578 
2579 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2580 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2581 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2582 				NUM_BANKS(ADDR_SURF_16_BANK));
2583 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2584 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2585 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2586 				NUM_BANKS(ADDR_SURF_16_BANK));
2587 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2588 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2589 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2590 				NUM_BANKS(ADDR_SURF_16_BANK));
2591 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2592 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2593 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2594 				NUM_BANKS(ADDR_SURF_16_BANK));
2595 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2596 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2597 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2598 				NUM_BANKS(ADDR_SURF_8_BANK));
2599 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2600 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2601 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2602 				NUM_BANKS(ADDR_SURF_4_BANK));
2603 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2604 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2605 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2606 				NUM_BANKS(ADDR_SURF_2_BANK));
2607 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2608 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2609 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2610 				NUM_BANKS(ADDR_SURF_16_BANK));
2611 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2612 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2613 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2614 				NUM_BANKS(ADDR_SURF_16_BANK));
2615 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2616 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2617 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2618 				NUM_BANKS(ADDR_SURF_16_BANK));
2619 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2620 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2621 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2622 				NUM_BANKS(ADDR_SURF_16_BANK));
2623 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2624 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2625 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2626 				NUM_BANKS(ADDR_SURF_8_BANK));
2627 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2628 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2629 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2630 				NUM_BANKS(ADDR_SURF_4_BANK));
2631 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2632 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2633 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2634 				NUM_BANKS(ADDR_SURF_2_BANK));
2635 
2636 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2637 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2638 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2639 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2640 		break;
2641 
2642 	case 4:
2643 		if (num_rbs == 4) {
2644 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2645 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2646 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2647 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2648 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2649 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2650 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2651 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2652 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2653 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2654 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2655 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2656 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2657 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2658 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2659 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2660 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2661 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2662 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2663 			   TILE_SPLIT(split_equal_to_row_size));
2664 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2665 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2666 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2667 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2668 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2669 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2670 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2671 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2672 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2673 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2674 			   TILE_SPLIT(split_equal_to_row_size));
2675 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2676 			   PIPE_CONFIG(ADDR_SURF_P4_16x16));
2677 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2678 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2680 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2681 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2682 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2684 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2685 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2686 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2687 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2688 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2689 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2690 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2691 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2692 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2693 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2694 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2695 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2696 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2697 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2698 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2699 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2700 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2701 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2702 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2703 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2704 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2705 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2706 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2707 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2708 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2709 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2710 		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2711 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2712 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2713 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2714 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2715 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2716 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2717 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2718 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2719 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2720 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2721 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2722 
2723 		} else if (num_rbs < 4) {
2724 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2725 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2726 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2727 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2728 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2729 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2730 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2731 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2732 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2733 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2734 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2735 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2736 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2737 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2738 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2739 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2740 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2741 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2742 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2743 			   TILE_SPLIT(split_equal_to_row_size));
2744 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2745 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2746 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2747 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2748 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2749 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2750 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2751 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2752 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2753 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2754 			   TILE_SPLIT(split_equal_to_row_size));
2755 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2756 			   PIPE_CONFIG(ADDR_SURF_P4_8x16));
2757 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2758 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2759 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2760 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2761 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2762 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2763 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2764 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2765 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2766 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2767 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2768 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2769 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2770 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2771 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2772 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2773 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2774 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2775 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2776 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2777 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2778 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2779 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2780 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2781 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2782 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2783 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2784 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2785 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2786 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2787 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2788 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2789 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2790 		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2791 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2792 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2793 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2794 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2795 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2796 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2797 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2798 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2799 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2800 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2801 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2802 		}
2803 
2804 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2805 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2806 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2807 				NUM_BANKS(ADDR_SURF_16_BANK));
2808 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2809 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2810 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2811 				NUM_BANKS(ADDR_SURF_16_BANK));
2812 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2813 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2814 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2815 				NUM_BANKS(ADDR_SURF_16_BANK));
2816 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2817 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2818 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2819 				NUM_BANKS(ADDR_SURF_16_BANK));
2820 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2821 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2822 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2823 				NUM_BANKS(ADDR_SURF_16_BANK));
2824 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2825 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2826 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2827 				NUM_BANKS(ADDR_SURF_8_BANK));
2828 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2829 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2830 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2831 				NUM_BANKS(ADDR_SURF_4_BANK));
2832 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2833 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2834 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2835 				NUM_BANKS(ADDR_SURF_16_BANK));
2836 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2837 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2838 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2839 				NUM_BANKS(ADDR_SURF_16_BANK));
2840 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2841 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2842 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2843 				NUM_BANKS(ADDR_SURF_16_BANK));
2844 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2845 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2846 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2847 				NUM_BANKS(ADDR_SURF_16_BANK));
2848 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2849 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2850 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2851 				NUM_BANKS(ADDR_SURF_16_BANK));
2852 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2854 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2855 				NUM_BANKS(ADDR_SURF_8_BANK));
2856 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2857 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2858 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2859 				NUM_BANKS(ADDR_SURF_4_BANK));
2860 
2861 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2862 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2863 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2864 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2865 		break;
2866 
2867 	case 2:
2868 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2869 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2870 			   PIPE_CONFIG(ADDR_SURF_P2) |
2871 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2872 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2873 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2874 			   PIPE_CONFIG(ADDR_SURF_P2) |
2875 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2876 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2877 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2878 			   PIPE_CONFIG(ADDR_SURF_P2) |
2879 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2880 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2881 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2882 			   PIPE_CONFIG(ADDR_SURF_P2) |
2883 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2884 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2885 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2886 			   PIPE_CONFIG(ADDR_SURF_P2) |
2887 			   TILE_SPLIT(split_equal_to_row_size));
2888 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2889 			   PIPE_CONFIG(ADDR_SURF_P2) |
2890 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2891 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2892 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2893 			   PIPE_CONFIG(ADDR_SURF_P2) |
2894 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2895 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2896 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2897 			   PIPE_CONFIG(ADDR_SURF_P2) |
2898 			   TILE_SPLIT(split_equal_to_row_size));
2899 		tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2900 			   PIPE_CONFIG(ADDR_SURF_P2);
2901 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2902 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2903 			   PIPE_CONFIG(ADDR_SURF_P2));
2904 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2905 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2906 			    PIPE_CONFIG(ADDR_SURF_P2) |
2907 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2908 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2909 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2910 			    PIPE_CONFIG(ADDR_SURF_P2) |
2911 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2912 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2913 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2914 			    PIPE_CONFIG(ADDR_SURF_P2) |
2915 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2916 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2917 			    PIPE_CONFIG(ADDR_SURF_P2) |
2918 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2919 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2920 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2921 			    PIPE_CONFIG(ADDR_SURF_P2) |
2922 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2923 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2924 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2925 			    PIPE_CONFIG(ADDR_SURF_P2) |
2926 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2927 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2928 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2929 			    PIPE_CONFIG(ADDR_SURF_P2) |
2930 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2931 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2932 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2933 			    PIPE_CONFIG(ADDR_SURF_P2));
2934 		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2935 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2936 			    PIPE_CONFIG(ADDR_SURF_P2) |
2937 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2938 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2939 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2940 			    PIPE_CONFIG(ADDR_SURF_P2) |
2941 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2942 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2943 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2944 			    PIPE_CONFIG(ADDR_SURF_P2) |
2945 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2946 
2947 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2948 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2949 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2950 				NUM_BANKS(ADDR_SURF_16_BANK));
2951 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2952 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2953 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2954 				NUM_BANKS(ADDR_SURF_16_BANK));
2955 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2956 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2957 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2958 				NUM_BANKS(ADDR_SURF_16_BANK));
2959 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2960 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2961 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2962 				NUM_BANKS(ADDR_SURF_16_BANK));
2963 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2964 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2965 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2966 				NUM_BANKS(ADDR_SURF_16_BANK));
2967 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2968 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2969 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2970 				NUM_BANKS(ADDR_SURF_16_BANK));
2971 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2972 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2973 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2974 				NUM_BANKS(ADDR_SURF_8_BANK));
2975 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2976 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2977 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2978 				NUM_BANKS(ADDR_SURF_16_BANK));
2979 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2980 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2981 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2982 				NUM_BANKS(ADDR_SURF_16_BANK));
2983 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2984 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2985 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2986 				NUM_BANKS(ADDR_SURF_16_BANK));
2987 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2988 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2989 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2990 				NUM_BANKS(ADDR_SURF_16_BANK));
2991 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2992 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2993 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2994 				NUM_BANKS(ADDR_SURF_16_BANK));
2995 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2996 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2997 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2998 				NUM_BANKS(ADDR_SURF_16_BANK));
2999 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3000 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3001 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3002 				NUM_BANKS(ADDR_SURF_8_BANK));
3003 
3004 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3005 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
3006 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3007 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
3008 		break;
3009 
3010 	default:
3011 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3012 	}
3013 }
3014 
3015 /**
3016  * cik_select_se_sh - select which SE, SH to address
3017  *
3018  * @rdev: radeon_device pointer
3019  * @se_num: shader engine to address
3020  * @sh_num: sh block to address
3021  *
3022  * Select which SE, SH combinations to address. Certain
3023  * registers are instanced per SE or SH.  0xffffffff means
3024  * broadcast to all SEs or SHs (CIK).
3025  */
3026 static void cik_select_se_sh(struct radeon_device *rdev,
3027 			     u32 se_num, u32 sh_num)
3028 {
3029 	u32 data = INSTANCE_BROADCAST_WRITES;
3030 
3031 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3032 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3033 	else if (se_num == 0xffffffff)
3034 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3035 	else if (sh_num == 0xffffffff)
3036 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3037 	else
3038 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3039 	WREG32(GRBM_GFX_INDEX, data);
3040 }
3041 
3042 /**
3043  * cik_create_bitmask - create a bitmask
3044  *
3045  * @bit_width: length of the mask
3046  *
3047  * create a variable length bit mask (CIK).
3048  * Returns the bitmask.
3049  */
3050 static u32 cik_create_bitmask(u32 bit_width)
3051 {
3052 	u32 i, mask = 0;
3053 
3054 	for (i = 0; i < bit_width; i++) {
3055 		mask <<= 1;
3056 		mask |= 1;
3057 	}
3058 	return mask;
3059 }
3060 
3061 /**
3062  * cik_get_rb_disabled - computes the mask of disabled RBs
3063  *
3064  * @rdev: radeon_device pointer
3065  * @max_rb_num_per_se: max RBs (render backends) per SE (shader engine) for the asic
3066  * @sh_per_se: number of SH blocks per SE for the asic
3067  *
3068  * Calculates the bitmask of disabled RBs (CIK).
3069  * Returns the disabled RB bitmask.
3070  */
3071 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3072 			      u32 max_rb_num_per_se,
3073 			      u32 sh_per_se)
3074 {
3075 	u32 data, mask;
3076 
3077 	data = RREG32(CC_RB_BACKEND_DISABLE);
3078 	if (data & 1)
3079 		data &= BACKEND_DISABLE_MASK;
3080 	else
3081 		data = 0;
3082 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3083 
3084 	data >>= BACKEND_DISABLE_SHIFT;
3085 
3086 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3087 
3088 	return data & mask;
3089 }
3090 
3091 /**
3092  * cik_setup_rb - setup the RBs on the asic
3093  *
3094  * @rdev: radeon_device pointer
3095  * @se_num: number of SEs (shader engines) for the asic
3096  * @sh_per_se: number of SH blocks per SE for the asic
3097  * @max_rb_num_per_se: max RBs (render backends) per SE for the asic
3098  *
3099  * Configures per-SE/SH RB registers (CIK).
3100  */
3101 static void cik_setup_rb(struct radeon_device *rdev,
3102 			 u32 se_num, u32 sh_per_se,
3103 			 u32 max_rb_num_per_se)
3104 {
3105 	int i, j;
3106 	u32 data, mask;
3107 	u32 disabled_rbs = 0;
3108 	u32 enabled_rbs = 0;
3109 
3110 	for (i = 0; i < se_num; i++) {
3111 		for (j = 0; j < sh_per_se; j++) {
3112 			cik_select_se_sh(rdev, i, j);
3113 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3114 			if (rdev->family == CHIP_HAWAII)
3115 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3116 			else
3117 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3118 		}
3119 	}
3120 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3121 
3122 	mask = 1;
3123 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3124 		if (!(disabled_rbs & mask))
3125 			enabled_rbs |= mask;
3126 		mask <<= 1;
3127 	}
3128 
3129 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3130 
3131 	for (i = 0; i < se_num; i++) {
3132 		cik_select_se_sh(rdev, i, 0xffffffff);
3133 		data = 0;
3134 		for (j = 0; j < sh_per_se; j++) {
3135 			switch (enabled_rbs & 3) {
3136 			case 0:
3137 				if (j == 0)
3138 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3139 				else
3140 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3141 				break;
3142 			case 1:
3143 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3144 				break;
3145 			case 2:
3146 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3147 				break;
3148 			case 3:
3149 			default:
3150 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3151 				break;
3152 			}
3153 			enabled_rbs >>= 2;
3154 		}
3155 		WREG32(PA_SC_RASTER_CONFIG, data);
3156 	}
3157 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3158 }
3159 
3160 /**
3161  * cik_gpu_init - setup the 3D engine
3162  *
3163  * @rdev: radeon_device pointer
3164  *
3165  * Configures the 3D engine and tiling configuration
3166  * registers so that the 3D engine is usable.
3167  */
3168 static void cik_gpu_init(struct radeon_device *rdev)
3169 {
3170 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3171 	u32 mc_arb_ramcfg;
3172 	u32 hdp_host_path_cntl;
3173 	u32 tmp;
3174 	int i, j;
3175 
3176 	switch (rdev->family) {
3177 	case CHIP_BONAIRE:
3178 		rdev->config.cik.max_shader_engines = 2;
3179 		rdev->config.cik.max_tile_pipes = 4;
3180 		rdev->config.cik.max_cu_per_sh = 7;
3181 		rdev->config.cik.max_sh_per_se = 1;
3182 		rdev->config.cik.max_backends_per_se = 2;
3183 		rdev->config.cik.max_texture_channel_caches = 4;
3184 		rdev->config.cik.max_gprs = 256;
3185 		rdev->config.cik.max_gs_threads = 32;
3186 		rdev->config.cik.max_hw_contexts = 8;
3187 
3188 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3189 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3190 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3191 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3192 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3193 		break;
3194 	case CHIP_HAWAII:
3195 		rdev->config.cik.max_shader_engines = 4;
3196 		rdev->config.cik.max_tile_pipes = 16;
3197 		rdev->config.cik.max_cu_per_sh = 11;
3198 		rdev->config.cik.max_sh_per_se = 1;
3199 		rdev->config.cik.max_backends_per_se = 4;
3200 		rdev->config.cik.max_texture_channel_caches = 16;
3201 		rdev->config.cik.max_gprs = 256;
3202 		rdev->config.cik.max_gs_threads = 32;
3203 		rdev->config.cik.max_hw_contexts = 8;
3204 
3205 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3206 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3207 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3208 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3209 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3210 		break;
3211 	case CHIP_KAVERI:
3212 		rdev->config.cik.max_shader_engines = 1;
3213 		rdev->config.cik.max_tile_pipes = 4;
3214 		rdev->config.cik.max_cu_per_sh = 8;
3215 		rdev->config.cik.max_backends_per_se = 2;
3216 		rdev->config.cik.max_sh_per_se = 1;
3217 		rdev->config.cik.max_texture_channel_caches = 4;
3218 		rdev->config.cik.max_gprs = 256;
3219 		rdev->config.cik.max_gs_threads = 16;
3220 		rdev->config.cik.max_hw_contexts = 8;
3221 
3222 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3223 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3224 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3225 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3226 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3227 		break;
3228 	case CHIP_KABINI:
3229 	case CHIP_MULLINS:
3230 	default:
3231 		rdev->config.cik.max_shader_engines = 1;
3232 		rdev->config.cik.max_tile_pipes = 2;
3233 		rdev->config.cik.max_cu_per_sh = 2;
3234 		rdev->config.cik.max_sh_per_se = 1;
3235 		rdev->config.cik.max_backends_per_se = 1;
3236 		rdev->config.cik.max_texture_channel_caches = 2;
3237 		rdev->config.cik.max_gprs = 256;
3238 		rdev->config.cik.max_gs_threads = 16;
3239 		rdev->config.cik.max_hw_contexts = 8;
3240 
3241 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3242 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3243 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3244 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3245 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3246 		break;
3247 	}
3248 
3249 	/* Initialize HDP */
3250 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3251 		WREG32((0x2c14 + j), 0x00000000);
3252 		WREG32((0x2c18 + j), 0x00000000);
3253 		WREG32((0x2c1c + j), 0x00000000);
3254 		WREG32((0x2c20 + j), 0x00000000);
3255 		WREG32((0x2c24 + j), 0x00000000);
3256 	}
3257 
3258 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3259 	WREG32(SRBM_INT_CNTL, 0x1);
3260 	WREG32(SRBM_INT_ACK, 0x1);
3261 
3262 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3263 
3264 	RREG32(MC_SHARED_CHMAP);
3265 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3266 
3267 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3268 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3269 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3270 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3271 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3272 		rdev->config.cik.mem_row_size_in_kb = 4;
3273 	/* XXX use MC settings? */
3274 	rdev->config.cik.shader_engine_tile_size = 32;
3275 	rdev->config.cik.num_gpus = 1;
3276 	rdev->config.cik.multi_gpu_tile_size = 64;
3277 
3278 	/* fix up row size */
3279 	gb_addr_config &= ~ROW_SIZE_MASK;
3280 	switch (rdev->config.cik.mem_row_size_in_kb) {
3281 	case 1:
3282 	default:
3283 		gb_addr_config |= ROW_SIZE(0);
3284 		break;
3285 	case 2:
3286 		gb_addr_config |= ROW_SIZE(1);
3287 		break;
3288 	case 4:
3289 		gb_addr_config |= ROW_SIZE(2);
3290 		break;
3291 	}
3292 
3293 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3294 	 * not have bank info, so create a custom tiling dword.
3295 	 * bits 3:0   num_pipes
3296 	 * bits 7:4   num_banks
3297 	 * bits 11:8  group_size
3298 	 * bits 15:12 row_size
3299 	 */
3300 	rdev->config.cik.tile_config = 0;
3301 	switch (rdev->config.cik.num_tile_pipes) {
3302 	case 1:
3303 		rdev->config.cik.tile_config |= (0 << 0);
3304 		break;
3305 	case 2:
3306 		rdev->config.cik.tile_config |= (1 << 0);
3307 		break;
3308 	case 4:
3309 		rdev->config.cik.tile_config |= (2 << 0);
3310 		break;
3311 	case 8:
3312 	default:
3313 		/* XXX what about 12? */
3314 		rdev->config.cik.tile_config |= (3 << 0);
3315 		break;
3316 	}
3317 	rdev->config.cik.tile_config |=
3318 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3319 	rdev->config.cik.tile_config |=
3320 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3321 	rdev->config.cik.tile_config |=
3322 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3323 
3324 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3325 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3326 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3327 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3328 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3329 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3330 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3331 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3332 
3333 	cik_tiling_mode_table_init(rdev);
3334 
3335 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3336 		     rdev->config.cik.max_sh_per_se,
3337 		     rdev->config.cik.max_backends_per_se);
3338 
3339 	rdev->config.cik.active_cus = 0;
3340 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3341 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3342 			rdev->config.cik.active_cus +=
3343 				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3344 		}
3345 	}
3346 
3347 	/* set HW defaults for 3D engine */
3348 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3349 
3350 	WREG32(SX_DEBUG_1, 0x20);
3351 
3352 	WREG32(TA_CNTL_AUX, 0x00010000);
3353 
3354 	tmp = RREG32(SPI_CONFIG_CNTL);
3355 	tmp |= 0x03000000;
3356 	WREG32(SPI_CONFIG_CNTL, tmp);
3357 
3358 	WREG32(SQ_CONFIG, 1);
3359 
3360 	WREG32(DB_DEBUG, 0);
3361 
3362 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3363 	tmp |= 0x00000400;
3364 	WREG32(DB_DEBUG2, tmp);
3365 
3366 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3367 	tmp |= 0x00020200;
3368 	WREG32(DB_DEBUG3, tmp);
3369 
3370 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3371 	tmp |= 0x00018208;
3372 	WREG32(CB_HW_CONTROL, tmp);
3373 
3374 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3375 
3376 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3377 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3378 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3379 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3380 
3381 	WREG32(VGT_NUM_INSTANCES, 1);
3382 
3383 	WREG32(CP_PERFMON_CNTL, 0);
3384 
3385 	WREG32(SQ_CONFIG, 0);
3386 
3387 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3388 					  FORCE_EOV_MAX_REZ_CNT(255)));
3389 
3390 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3391 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3392 
3393 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3394 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3395 
3396 	tmp = RREG32(HDP_MISC_CNTL);
3397 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3398 	WREG32(HDP_MISC_CNTL, tmp);
3399 
3400 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3401 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3402 
3403 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3404 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3405 
3406 	udelay(50);
3407 }
3408 
3409 /*
3410  * GPU scratch registers helpers function.
3411  */
3412 /**
3413  * cik_scratch_init - setup driver info for CP scratch regs
3414  *
3415  * @rdev: radeon_device pointer
3416  *
3417  * Set up the number and offset of the CP scratch registers.
3418  * NOTE: use of CP scratch registers is a legacy inferface and
3419  * is not used by default on newer asics (r6xx+).  On newer asics,
3420  * memory buffers are used for fences rather than scratch regs.
3421  */
3422 static void cik_scratch_init(struct radeon_device *rdev)
3423 {
3424 	int i;
3425 
3426 	rdev->scratch.num_reg = 7;
3427 	rdev->scratch.reg_base = SCRATCH_REG0;
3428 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3429 		rdev->scratch.free[i] = true;
3430 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3431 	}
3432 }
3433 
3434 /**
3435  * cik_ring_test - basic gfx ring test
3436  *
3437  * @rdev: radeon_device pointer
3438  * @ring: radeon_ring structure holding ring information
3439  *
3440  * Allocate a scratch register and write to it using the gfx ring (CIK).
3441  * Provides a basic gfx ring test to verify that the ring is working.
3442  * Used by cik_cp_gfx_resume();
3443  * Returns 0 on success, error on failure.
3444  */
3445 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3446 {
3447 	uint32_t scratch;
3448 	uint32_t tmp = 0;
3449 	unsigned i;
3450 	int r;
3451 
3452 	r = radeon_scratch_get(rdev, &scratch);
3453 	if (r) {
3454 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3455 		return r;
3456 	}
3457 	WREG32(scratch, 0xCAFEDEAD);
3458 	r = radeon_ring_lock(rdev, ring, 3);
3459 	if (r) {
3460 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3461 		radeon_scratch_free(rdev, scratch);
3462 		return r;
3463 	}
3464 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3465 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3466 	radeon_ring_write(ring, 0xDEADBEEF);
3467 	radeon_ring_unlock_commit(rdev, ring, false);
3468 
3469 	for (i = 0; i < rdev->usec_timeout; i++) {
3470 		tmp = RREG32(scratch);
3471 		if (tmp == 0xDEADBEEF)
3472 			break;
3473 		udelay(1);
3474 	}
3475 	if (i < rdev->usec_timeout) {
3476 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3477 	} else {
3478 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3479 			  ring->idx, scratch, tmp);
3480 		r = -EINVAL;
3481 	}
3482 	radeon_scratch_free(rdev, scratch);
3483 	return r;
3484 }
3485 
3486 /**
3487  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3488  *
3489  * @rdev: radeon_device pointer
3490  * @ridx: radeon ring index
3491  *
3492  * Emits an hdp flush on the cp.
3493  */
3494 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3495 				       int ridx)
3496 {
3497 	struct radeon_ring *ring = &rdev->ring[ridx];
3498 	u32 ref_and_mask;
3499 
3500 	switch (ring->idx) {
3501 	case CAYMAN_RING_TYPE_CP1_INDEX:
3502 	case CAYMAN_RING_TYPE_CP2_INDEX:
3503 	default:
3504 		switch (ring->me) {
3505 		case 0:
3506 			ref_and_mask = CP2 << ring->pipe;
3507 			break;
3508 		case 1:
3509 			ref_and_mask = CP6 << ring->pipe;
3510 			break;
3511 		default:
3512 			return;
3513 		}
3514 		break;
3515 	case RADEON_RING_TYPE_GFX_INDEX:
3516 		ref_and_mask = CP0;
3517 		break;
3518 	}
3519 
3520 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3521 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3522 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3523 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3524 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3525 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3526 	radeon_ring_write(ring, ref_and_mask);
3527 	radeon_ring_write(ring, ref_and_mask);
3528 	radeon_ring_write(ring, 0x20); /* poll interval */
3529 }
3530 
3531 /**
3532  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3533  *
3534  * @rdev: radeon_device pointer
3535  * @fence: radeon fence object
3536  *
3537  * Emits a fence sequnce number on the gfx ring and flushes
3538  * GPU caches.
3539  */
3540 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3541 			     struct radeon_fence *fence)
3542 {
3543 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3544 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3545 
3546 	/* Workaround for cache flush problems. First send a dummy EOP
3547 	 * event down the pipe with seq one below.
3548 	 */
3549 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3550 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3551 				 EOP_TC_ACTION_EN |
3552 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3553 				 EVENT_INDEX(5)));
3554 	radeon_ring_write(ring, addr & 0xfffffffc);
3555 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3556 				DATA_SEL(1) | INT_SEL(0));
3557 	radeon_ring_write(ring, fence->seq - 1);
3558 	radeon_ring_write(ring, 0);
3559 
3560 	/* Then send the real EOP event down the pipe. */
3561 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3562 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3563 				 EOP_TC_ACTION_EN |
3564 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3565 				 EVENT_INDEX(5)));
3566 	radeon_ring_write(ring, addr & 0xfffffffc);
3567 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3568 	radeon_ring_write(ring, fence->seq);
3569 	radeon_ring_write(ring, 0);
3570 }
3571 
3572 /**
3573  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3574  *
3575  * @rdev: radeon_device pointer
3576  * @fence: radeon fence object
3577  *
3578  * Emits a fence sequnce number on the compute ring and flushes
3579  * GPU caches.
3580  */
3581 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3582 				 struct radeon_fence *fence)
3583 {
3584 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3585 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3586 
3587 	/* RELEASE_MEM - flush caches, send int */
3588 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3589 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3590 				 EOP_TC_ACTION_EN |
3591 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3592 				 EVENT_INDEX(5)));
3593 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3594 	radeon_ring_write(ring, addr & 0xfffffffc);
3595 	radeon_ring_write(ring, upper_32_bits(addr));
3596 	radeon_ring_write(ring, fence->seq);
3597 	radeon_ring_write(ring, 0);
3598 }
3599 
3600 /**
3601  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3602  *
3603  * @rdev: radeon_device pointer
3604  * @ring: radeon ring buffer object
3605  * @semaphore: radeon semaphore object
3606  * @emit_wait: Is this a sempahore wait?
3607  *
3608  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3609  * from running ahead of semaphore waits.
3610  */
3611 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3612 			     struct radeon_ring *ring,
3613 			     struct radeon_semaphore *semaphore,
3614 			     bool emit_wait)
3615 {
3616 	uint64_t addr = semaphore->gpu_addr;
3617 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3618 
3619 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3620 	radeon_ring_write(ring, lower_32_bits(addr));
3621 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3622 
3623 	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3624 		/* Prevent the PFP from running ahead of the semaphore wait */
3625 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3626 		radeon_ring_write(ring, 0x0);
3627 	}
3628 
3629 	return true;
3630 }
3631 
3632 /**
3633  * cik_copy_cpdma - copy pages using the CP DMA engine
3634  *
3635  * @rdev: radeon_device pointer
3636  * @src_offset: src GPU address
3637  * @dst_offset: dst GPU address
3638  * @num_gpu_pages: number of GPU pages to xfer
3639  * @resv: reservation object to sync to
3640  *
3641  * Copy GPU paging using the CP DMA engine (CIK+).
3642  * Used by the radeon ttm implementation to move pages if
3643  * registered as the asic copy callback.
3644  */
3645 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3646 				    uint64_t src_offset, uint64_t dst_offset,
3647 				    unsigned num_gpu_pages,
3648 				    struct dma_resv *resv)
3649 {
3650 	struct radeon_fence *fence;
3651 	struct radeon_sync sync;
3652 	int ring_index = rdev->asic->copy.blit_ring_index;
3653 	struct radeon_ring *ring = &rdev->ring[ring_index];
3654 	u32 size_in_bytes, cur_size_in_bytes, control;
3655 	int i, num_loops;
3656 	int r = 0;
3657 
3658 	radeon_sync_create(&sync);
3659 
3660 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3661 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3662 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3663 	if (r) {
3664 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3665 		radeon_sync_free(rdev, &sync, NULL);
3666 		return ERR_PTR(r);
3667 	}
3668 
3669 	radeon_sync_resv(rdev, &sync, resv, false);
3670 	radeon_sync_rings(rdev, &sync, ring->idx);
3671 
3672 	for (i = 0; i < num_loops; i++) {
3673 		cur_size_in_bytes = size_in_bytes;
3674 		if (cur_size_in_bytes > 0x1fffff)
3675 			cur_size_in_bytes = 0x1fffff;
3676 		size_in_bytes -= cur_size_in_bytes;
3677 		control = 0;
3678 		if (size_in_bytes == 0)
3679 			control |= PACKET3_DMA_DATA_CP_SYNC;
3680 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3681 		radeon_ring_write(ring, control);
3682 		radeon_ring_write(ring, lower_32_bits(src_offset));
3683 		radeon_ring_write(ring, upper_32_bits(src_offset));
3684 		radeon_ring_write(ring, lower_32_bits(dst_offset));
3685 		radeon_ring_write(ring, upper_32_bits(dst_offset));
3686 		radeon_ring_write(ring, cur_size_in_bytes);
3687 		src_offset += cur_size_in_bytes;
3688 		dst_offset += cur_size_in_bytes;
3689 	}
3690 
3691 	r = radeon_fence_emit(rdev, &fence, ring->idx);
3692 	if (r) {
3693 		radeon_ring_unlock_undo(rdev, ring);
3694 		radeon_sync_free(rdev, &sync, NULL);
3695 		return ERR_PTR(r);
3696 	}
3697 
3698 	radeon_ring_unlock_commit(rdev, ring, false);
3699 	radeon_sync_free(rdev, &sync, fence);
3700 
3701 	return fence;
3702 }
3703 
3704 /*
3705  * IB stuff
3706  */
3707 /**
3708  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3709  *
3710  * @rdev: radeon_device pointer
3711  * @ib: radeon indirect buffer object
3712  *
3713  * Emits a DE (drawing engine) or CE (constant engine) IB
3714  * on the gfx ring.  IBs are usually generated by userspace
3715  * acceleration drivers and submitted to the kernel for
3716  * scheduling on the ring.  This function schedules the IB
3717  * on the gfx ring for execution by the GPU.
3718  */
3719 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3720 {
3721 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3722 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3723 	u32 header, control = INDIRECT_BUFFER_VALID;
3724 
3725 	if (ib->is_const_ib) {
3726 		/* set switch buffer packet before const IB */
3727 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3728 		radeon_ring_write(ring, 0);
3729 
3730 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3731 	} else {
3732 		u32 next_rptr;
3733 		if (ring->rptr_save_reg) {
3734 			next_rptr = ring->wptr + 3 + 4;
3735 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3736 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3737 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
3738 			radeon_ring_write(ring, next_rptr);
3739 		} else if (rdev->wb.enabled) {
3740 			next_rptr = ring->wptr + 5 + 4;
3741 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3742 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3743 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3744 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3745 			radeon_ring_write(ring, next_rptr);
3746 		}
3747 
3748 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3749 	}
3750 
3751 	control |= ib->length_dw | (vm_id << 24);
3752 
3753 	radeon_ring_write(ring, header);
3754 	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3755 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3756 	radeon_ring_write(ring, control);
3757 }
3758 
3759 /**
3760  * cik_ib_test - basic gfx ring IB test
3761  *
3762  * @rdev: radeon_device pointer
3763  * @ring: radeon_ring structure holding ring information
3764  *
3765  * Allocate an IB and execute it on the gfx ring (CIK).
3766  * Provides a basic gfx ring test to verify that IBs are working.
3767  * Returns 0 on success, error on failure.
3768  */
3769 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3770 {
3771 	struct radeon_ib ib;
3772 	uint32_t scratch;
3773 	uint32_t tmp = 0;
3774 	unsigned i;
3775 	int r;
3776 
3777 	r = radeon_scratch_get(rdev, &scratch);
3778 	if (r) {
3779 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3780 		return r;
3781 	}
3782 	WREG32(scratch, 0xCAFEDEAD);
3783 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3784 	if (r) {
3785 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3786 		radeon_scratch_free(rdev, scratch);
3787 		return r;
3788 	}
3789 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3790 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3791 	ib.ptr[2] = 0xDEADBEEF;
3792 	ib.length_dw = 3;
3793 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
3794 	if (r) {
3795 		radeon_scratch_free(rdev, scratch);
3796 		radeon_ib_free(rdev, &ib);
3797 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3798 		return r;
3799 	}
3800 	r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3801 		RADEON_USEC_IB_TEST_TIMEOUT));
3802 	if (r < 0) {
3803 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3804 		radeon_scratch_free(rdev, scratch);
3805 		radeon_ib_free(rdev, &ib);
3806 		return r;
3807 	} else if (r == 0) {
3808 		DRM_ERROR("radeon: fence wait timed out.\n");
3809 		radeon_scratch_free(rdev, scratch);
3810 		radeon_ib_free(rdev, &ib);
3811 		return -ETIMEDOUT;
3812 	}
3813 	r = 0;
3814 	for (i = 0; i < rdev->usec_timeout; i++) {
3815 		tmp = RREG32(scratch);
3816 		if (tmp == 0xDEADBEEF)
3817 			break;
3818 		udelay(1);
3819 	}
3820 	if (i < rdev->usec_timeout) {
3821 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3822 	} else {
3823 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3824 			  scratch, tmp);
3825 		r = -EINVAL;
3826 	}
3827 	radeon_scratch_free(rdev, scratch);
3828 	radeon_ib_free(rdev, &ib);
3829 	return r;
3830 }
3831 
3832 /*
3833  * CP.
3834  * On CIK, gfx and compute now have independant command processors.
3835  *
3836  * GFX
3837  * Gfx consists of a single ring and can process both gfx jobs and
3838  * compute jobs.  The gfx CP consists of three microengines (ME):
3839  * PFP - Pre-Fetch Parser
3840  * ME - Micro Engine
3841  * CE - Constant Engine
3842  * The PFP and ME make up what is considered the Drawing Engine (DE).
3843  * The CE is an asynchronous engine used for updating buffer desciptors
3844  * used by the DE so that they can be loaded into cache in parallel
3845  * while the DE is processing state update packets.
3846  *
3847  * Compute
3848  * The compute CP consists of two microengines (ME):
3849  * MEC1 - Compute MicroEngine 1
3850  * MEC2 - Compute MicroEngine 2
3851  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3852  * The queues are exposed to userspace and are programmed directly
3853  * by the compute runtime.
3854  */
3855 /**
3856  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3857  *
3858  * @rdev: radeon_device pointer
3859  * @enable: enable or disable the MEs
3860  *
3861  * Halts or unhalts the gfx MEs.
3862  */
3863 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3864 {
3865 	if (enable)
3866 		WREG32(CP_ME_CNTL, 0);
3867 	else {
3868 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3869 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3870 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3871 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3872 	}
3873 	udelay(50);
3874 }
3875 
3876 /**
3877  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3878  *
3879  * @rdev: radeon_device pointer
3880  *
3881  * Loads the gfx PFP, ME, and CE ucode.
3882  * Returns 0 for success, -EINVAL if the ucode is not available.
3883  */
3884 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3885 {
3886 	int i;
3887 
3888 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3889 		return -EINVAL;
3890 
3891 	cik_cp_gfx_enable(rdev, false);
3892 
3893 	if (rdev->new_fw) {
3894 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3895 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3896 		const struct gfx_firmware_header_v1_0 *ce_hdr =
3897 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3898 		const struct gfx_firmware_header_v1_0 *me_hdr =
3899 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3900 		const __le32 *fw_data;
3901 		u32 fw_size;
3902 
3903 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3904 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3905 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3906 
3907 		/* PFP */
3908 		fw_data = (const __le32 *)
3909 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3910 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3911 		WREG32(CP_PFP_UCODE_ADDR, 0);
3912 		for (i = 0; i < fw_size; i++)
3913 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3914 		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3915 
3916 		/* CE */
3917 		fw_data = (const __le32 *)
3918 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3919 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3920 		WREG32(CP_CE_UCODE_ADDR, 0);
3921 		for (i = 0; i < fw_size; i++)
3922 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3923 		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3924 
3925 		/* ME */
3926 		fw_data = (const __be32 *)
3927 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3928 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3929 		WREG32(CP_ME_RAM_WADDR, 0);
3930 		for (i = 0; i < fw_size; i++)
3931 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3932 		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
3933 		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
3934 	} else {
3935 		const __be32 *fw_data;
3936 
3937 		/* PFP */
3938 		fw_data = (const __be32 *)rdev->pfp_fw->data;
3939 		WREG32(CP_PFP_UCODE_ADDR, 0);
3940 		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3941 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3942 		WREG32(CP_PFP_UCODE_ADDR, 0);
3943 
3944 		/* CE */
3945 		fw_data = (const __be32 *)rdev->ce_fw->data;
3946 		WREG32(CP_CE_UCODE_ADDR, 0);
3947 		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3948 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3949 		WREG32(CP_CE_UCODE_ADDR, 0);
3950 
3951 		/* ME */
3952 		fw_data = (const __be32 *)rdev->me_fw->data;
3953 		WREG32(CP_ME_RAM_WADDR, 0);
3954 		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3955 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3956 		WREG32(CP_ME_RAM_WADDR, 0);
3957 	}
3958 
3959 	return 0;
3960 }
3961 
3962 /**
3963  * cik_cp_gfx_start - start the gfx ring
3964  *
3965  * @rdev: radeon_device pointer
3966  *
3967  * Enables the ring and loads the clear state context and other
3968  * packets required to init the ring.
3969  * Returns 0 for success, error for failure.
3970  */
3971 static int cik_cp_gfx_start(struct radeon_device *rdev)
3972 {
3973 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3974 	int r, i;
3975 
3976 	/* init the CP */
3977 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3978 	WREG32(CP_ENDIAN_SWAP, 0);
3979 	WREG32(CP_DEVICE_ID, 1);
3980 
3981 	cik_cp_gfx_enable(rdev, true);
3982 
3983 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3984 	if (r) {
3985 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3986 		return r;
3987 	}
3988 
3989 	/* init the CE partitions.  CE only used for gfx on CIK */
3990 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3991 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3992 	radeon_ring_write(ring, 0x8000);
3993 	radeon_ring_write(ring, 0x8000);
3994 
3995 	/* setup clear context state */
3996 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3997 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3998 
3999 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4000 	radeon_ring_write(ring, 0x80000000);
4001 	radeon_ring_write(ring, 0x80000000);
4002 
4003 	for (i = 0; i < cik_default_size; i++)
4004 		radeon_ring_write(ring, cik_default_state[i]);
4005 
4006 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4007 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4008 
4009 	/* set clear context state */
4010 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4011 	radeon_ring_write(ring, 0);
4012 
4013 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4014 	radeon_ring_write(ring, 0x00000316);
4015 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4016 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4017 
4018 	radeon_ring_unlock_commit(rdev, ring, false);
4019 
4020 	return 0;
4021 }
4022 
4023 /**
4024  * cik_cp_gfx_fini - stop the gfx ring
4025  *
4026  * @rdev: radeon_device pointer
4027  *
4028  * Stop the gfx ring and tear down the driver ring
4029  * info.
4030  */
4031 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4032 {
4033 	cik_cp_gfx_enable(rdev, false);
4034 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4035 }
4036 
4037 /**
4038  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4039  *
4040  * @rdev: radeon_device pointer
4041  *
4042  * Program the location and size of the gfx ring buffer
4043  * and test it to make sure it's working.
4044  * Returns 0 for success, error for failure.
4045  */
4046 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4047 {
4048 	struct radeon_ring *ring;
4049 	u32 tmp;
4050 	u32 rb_bufsz;
4051 	u64 rb_addr;
4052 	int r;
4053 
4054 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4055 	if (rdev->family != CHIP_HAWAII)
4056 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4057 
4058 	/* Set the write pointer delay */
4059 	WREG32(CP_RB_WPTR_DELAY, 0);
4060 
4061 	/* set the RB to use vmid 0 */
4062 	WREG32(CP_RB_VMID, 0);
4063 
4064 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4065 
4066 	/* ring 0 - compute and gfx */
4067 	/* Set ring buffer size */
4068 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4069 	rb_bufsz = order_base_2(ring->ring_size / 8);
4070 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4071 #ifdef __BIG_ENDIAN
4072 	tmp |= BUF_SWAP_32BIT;
4073 #endif
4074 	WREG32(CP_RB0_CNTL, tmp);
4075 
4076 	/* Initialize the ring buffer's read and write pointers */
4077 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4078 	ring->wptr = 0;
4079 	WREG32(CP_RB0_WPTR, ring->wptr);
4080 
4081 	/* set the wb address wether it's enabled or not */
4082 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4083 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4084 
4085 	/* scratch register shadowing is no longer supported */
4086 	WREG32(SCRATCH_UMSK, 0);
4087 
4088 	if (!rdev->wb.enabled)
4089 		tmp |= RB_NO_UPDATE;
4090 
4091 	mdelay(1);
4092 	WREG32(CP_RB0_CNTL, tmp);
4093 
4094 	rb_addr = ring->gpu_addr >> 8;
4095 	WREG32(CP_RB0_BASE, rb_addr);
4096 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4097 
4098 	/* start the ring */
4099 	cik_cp_gfx_start(rdev);
4100 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4101 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4102 	if (r) {
4103 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4104 		return r;
4105 	}
4106 
4107 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4108 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4109 
4110 	return 0;
4111 }
4112 
4113 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4114 		     struct radeon_ring *ring)
4115 {
4116 	u32 rptr;
4117 
4118 	if (rdev->wb.enabled)
4119 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4120 	else
4121 		rptr = RREG32(CP_RB0_RPTR);
4122 
4123 	return rptr;
4124 }
4125 
4126 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4127 		     struct radeon_ring *ring)
4128 {
4129 	return RREG32(CP_RB0_WPTR);
4130 }
4131 
4132 void cik_gfx_set_wptr(struct radeon_device *rdev,
4133 		      struct radeon_ring *ring)
4134 {
4135 	WREG32(CP_RB0_WPTR, ring->wptr);
4136 	(void)RREG32(CP_RB0_WPTR);
4137 }
4138 
4139 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4140 			 struct radeon_ring *ring)
4141 {
4142 	u32 rptr;
4143 
4144 	if (rdev->wb.enabled) {
4145 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4146 	} else {
4147 		mutex_lock(&rdev->srbm_mutex);
4148 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4149 		rptr = RREG32(CP_HQD_PQ_RPTR);
4150 		cik_srbm_select(rdev, 0, 0, 0, 0);
4151 		mutex_unlock(&rdev->srbm_mutex);
4152 	}
4153 
4154 	return rptr;
4155 }
4156 
4157 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4158 			 struct radeon_ring *ring)
4159 {
4160 	u32 wptr;
4161 
4162 	if (rdev->wb.enabled) {
4163 		/* XXX check if swapping is necessary on BE */
4164 		wptr = rdev->wb.wb[ring->wptr_offs/4];
4165 	} else {
4166 		mutex_lock(&rdev->srbm_mutex);
4167 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4168 		wptr = RREG32(CP_HQD_PQ_WPTR);
4169 		cik_srbm_select(rdev, 0, 0, 0, 0);
4170 		mutex_unlock(&rdev->srbm_mutex);
4171 	}
4172 
4173 	return wptr;
4174 }
4175 
4176 void cik_compute_set_wptr(struct radeon_device *rdev,
4177 			  struct radeon_ring *ring)
4178 {
4179 	/* XXX check if swapping is necessary on BE */
4180 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4181 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4182 }
4183 
4184 static void cik_compute_stop(struct radeon_device *rdev,
4185 			     struct radeon_ring *ring)
4186 {
4187 	u32 j, tmp;
4188 
4189 	cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4190 	/* Disable wptr polling. */
4191 	tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4192 	tmp &= ~WPTR_POLL_EN;
4193 	WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4194 	/* Disable HQD. */
4195 	if (RREG32(CP_HQD_ACTIVE) & 1) {
4196 		WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4197 		for (j = 0; j < rdev->usec_timeout; j++) {
4198 			if (!(RREG32(CP_HQD_ACTIVE) & 1))
4199 				break;
4200 			udelay(1);
4201 		}
4202 		WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4203 		WREG32(CP_HQD_PQ_RPTR, 0);
4204 		WREG32(CP_HQD_PQ_WPTR, 0);
4205 	}
4206 	cik_srbm_select(rdev, 0, 0, 0, 0);
4207 }
4208 
4209 /**
4210  * cik_cp_compute_enable - enable/disable the compute CP MEs
4211  *
4212  * @rdev: radeon_device pointer
4213  * @enable: enable or disable the MEs
4214  *
4215  * Halts or unhalts the compute MEs.
4216  */
4217 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4218 {
4219 	if (enable)
4220 		WREG32(CP_MEC_CNTL, 0);
4221 	else {
4222 		/*
4223 		 * To make hibernation reliable we need to clear compute ring
4224 		 * configuration before halting the compute ring.
4225 		 */
4226 		mutex_lock(&rdev->srbm_mutex);
4227 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4228 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4229 		mutex_unlock(&rdev->srbm_mutex);
4230 
4231 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4232 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4233 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4234 	}
4235 	udelay(50);
4236 }
4237 
4238 /**
4239  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4240  *
4241  * @rdev: radeon_device pointer
4242  *
4243  * Loads the compute MEC1&2 ucode.
4244  * Returns 0 for success, -EINVAL if the ucode is not available.
4245  */
4246 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4247 {
4248 	int i;
4249 
4250 	if (!rdev->mec_fw)
4251 		return -EINVAL;
4252 
4253 	cik_cp_compute_enable(rdev, false);
4254 
4255 	if (rdev->new_fw) {
4256 		const struct gfx_firmware_header_v1_0 *mec_hdr =
4257 			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4258 		const __le32 *fw_data;
4259 		u32 fw_size;
4260 
4261 		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4262 
4263 		/* MEC1 */
4264 		fw_data = (const __le32 *)
4265 			(rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4266 		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4267 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4268 		for (i = 0; i < fw_size; i++)
4269 			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4270 		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4271 
4272 		/* MEC2 */
4273 		if (rdev->family == CHIP_KAVERI) {
4274 			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4275 				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4276 
4277 			fw_data = (const __le32 *)
4278 				(rdev->mec2_fw->data +
4279 				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4280 			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4281 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4282 			for (i = 0; i < fw_size; i++)
4283 				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4284 			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4285 		}
4286 	} else {
4287 		const __be32 *fw_data;
4288 
4289 		/* MEC1 */
4290 		fw_data = (const __be32 *)rdev->mec_fw->data;
4291 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4292 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4293 			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4294 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4295 
4296 		if (rdev->family == CHIP_KAVERI) {
4297 			/* MEC2 */
4298 			fw_data = (const __be32 *)rdev->mec_fw->data;
4299 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4300 			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4301 				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4302 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4303 		}
4304 	}
4305 
4306 	return 0;
4307 }
4308 
4309 /**
4310  * cik_cp_compute_start - start the compute queues
4311  *
4312  * @rdev: radeon_device pointer
4313  *
4314  * Enable the compute queues.
4315  * Returns 0 for success, error for failure.
4316  */
4317 static int cik_cp_compute_start(struct radeon_device *rdev)
4318 {
4319 	cik_cp_compute_enable(rdev, true);
4320 
4321 	return 0;
4322 }
4323 
4324 /**
4325  * cik_cp_compute_fini - stop the compute queues
4326  *
4327  * @rdev: radeon_device pointer
4328  *
4329  * Stop the compute queues and tear down the driver queue
4330  * info.
4331  */
4332 static void cik_cp_compute_fini(struct radeon_device *rdev)
4333 {
4334 	int i, idx, r;
4335 
4336 	cik_cp_compute_enable(rdev, false);
4337 
4338 	for (i = 0; i < 2; i++) {
4339 		if (i == 0)
4340 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4341 		else
4342 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4343 
4344 		if (rdev->ring[idx].mqd_obj) {
4345 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4346 			if (unlikely(r != 0))
4347 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4348 
4349 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4350 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4351 
4352 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4353 			rdev->ring[idx].mqd_obj = NULL;
4354 		}
4355 	}
4356 }
4357 
4358 static void cik_mec_fini(struct radeon_device *rdev)
4359 {
4360 	int r;
4361 
4362 	if (rdev->mec.hpd_eop_obj) {
4363 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4364 		if (unlikely(r != 0))
4365 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4366 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4367 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4368 
4369 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4370 		rdev->mec.hpd_eop_obj = NULL;
4371 	}
4372 }
4373 
4374 #define MEC_HPD_SIZE 2048
4375 
4376 static int cik_mec_init(struct radeon_device *rdev)
4377 {
4378 	int r;
4379 	u32 *hpd;
4380 
4381 	/*
4382 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4383 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4384 	 */
4385 	if (rdev->family == CHIP_KAVERI)
4386 		rdev->mec.num_mec = 2;
4387 	else
4388 		rdev->mec.num_mec = 1;
4389 	rdev->mec.num_pipe = 4;
4390 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4391 
4392 	if (rdev->mec.hpd_eop_obj == NULL) {
4393 		r = radeon_bo_create(rdev,
4394 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4395 				     PAGE_SIZE, true,
4396 				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4397 				     &rdev->mec.hpd_eop_obj);
4398 		if (r) {
4399 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4400 			return r;
4401 		}
4402 	}
4403 
4404 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4405 	if (unlikely(r != 0)) {
4406 		cik_mec_fini(rdev);
4407 		return r;
4408 	}
4409 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4410 			  &rdev->mec.hpd_eop_gpu_addr);
4411 	if (r) {
4412 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4413 		cik_mec_fini(rdev);
4414 		return r;
4415 	}
4416 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4417 	if (r) {
4418 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4419 		cik_mec_fini(rdev);
4420 		return r;
4421 	}
4422 
4423 	/* clear memory.  Not sure if this is required or not */
4424 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4425 
4426 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4427 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4428 
4429 	return 0;
4430 }
4431 
4432 struct hqd_registers
4433 {
4434 	u32 cp_mqd_base_addr;
4435 	u32 cp_mqd_base_addr_hi;
4436 	u32 cp_hqd_active;
4437 	u32 cp_hqd_vmid;
4438 	u32 cp_hqd_persistent_state;
4439 	u32 cp_hqd_pipe_priority;
4440 	u32 cp_hqd_queue_priority;
4441 	u32 cp_hqd_quantum;
4442 	u32 cp_hqd_pq_base;
4443 	u32 cp_hqd_pq_base_hi;
4444 	u32 cp_hqd_pq_rptr;
4445 	u32 cp_hqd_pq_rptr_report_addr;
4446 	u32 cp_hqd_pq_rptr_report_addr_hi;
4447 	u32 cp_hqd_pq_wptr_poll_addr;
4448 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4449 	u32 cp_hqd_pq_doorbell_control;
4450 	u32 cp_hqd_pq_wptr;
4451 	u32 cp_hqd_pq_control;
4452 	u32 cp_hqd_ib_base_addr;
4453 	u32 cp_hqd_ib_base_addr_hi;
4454 	u32 cp_hqd_ib_rptr;
4455 	u32 cp_hqd_ib_control;
4456 	u32 cp_hqd_iq_timer;
4457 	u32 cp_hqd_iq_rptr;
4458 	u32 cp_hqd_dequeue_request;
4459 	u32 cp_hqd_dma_offload;
4460 	u32 cp_hqd_sema_cmd;
4461 	u32 cp_hqd_msg_type;
4462 	u32 cp_hqd_atomic0_preop_lo;
4463 	u32 cp_hqd_atomic0_preop_hi;
4464 	u32 cp_hqd_atomic1_preop_lo;
4465 	u32 cp_hqd_atomic1_preop_hi;
4466 	u32 cp_hqd_hq_scheduler0;
4467 	u32 cp_hqd_hq_scheduler1;
4468 	u32 cp_mqd_control;
4469 };
4470 
4471 struct bonaire_mqd
4472 {
4473 	u32 header;
4474 	u32 dispatch_initiator;
4475 	u32 dimensions[3];
4476 	u32 start_idx[3];
4477 	u32 num_threads[3];
4478 	u32 pipeline_stat_enable;
4479 	u32 perf_counter_enable;
4480 	u32 pgm[2];
4481 	u32 tba[2];
4482 	u32 tma[2];
4483 	u32 pgm_rsrc[2];
4484 	u32 vmid;
4485 	u32 resource_limits;
4486 	u32 static_thread_mgmt01[2];
4487 	u32 tmp_ring_size;
4488 	u32 static_thread_mgmt23[2];
4489 	u32 restart[3];
4490 	u32 thread_trace_enable;
4491 	u32 reserved1;
4492 	u32 user_data[16];
4493 	u32 vgtcs_invoke_count[2];
4494 	struct hqd_registers queue_state;
4495 	u32 dequeue_cntr;
4496 	u32 interrupt_queue[64];
4497 };
4498 
4499 /**
4500  * cik_cp_compute_resume - setup the compute queue registers
4501  *
4502  * @rdev: radeon_device pointer
4503  *
4504  * Program the compute queues and test them to make sure they
4505  * are working.
4506  * Returns 0 for success, error for failure.
4507  */
4508 static int cik_cp_compute_resume(struct radeon_device *rdev)
4509 {
4510 	int r, i, j, idx;
4511 	u32 tmp;
4512 	bool use_doorbell = true;
4513 	u64 hqd_gpu_addr;
4514 	u64 mqd_gpu_addr;
4515 	u64 eop_gpu_addr;
4516 	u64 wb_gpu_addr;
4517 	u32 *buf;
4518 	struct bonaire_mqd *mqd;
4519 
4520 	r = cik_cp_compute_start(rdev);
4521 	if (r)
4522 		return r;
4523 
4524 	/* fix up chicken bits */
4525 	tmp = RREG32(CP_CPF_DEBUG);
4526 	tmp |= (1 << 23);
4527 	WREG32(CP_CPF_DEBUG, tmp);
4528 
4529 	/* init the pipes */
4530 	mutex_lock(&rdev->srbm_mutex);
4531 
4532 	for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); ++i) {
4533 		int me = (i < 4) ? 1 : 2;
4534 		int pipe = (i < 4) ? i : (i - 4);
4535 
4536 		cik_srbm_select(rdev, me, pipe, 0, 0);
4537 
4538 		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2) ;
4539 		/* write the EOP addr */
4540 		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4541 		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4542 
4543 		/* set the VMID assigned */
4544 		WREG32(CP_HPD_EOP_VMID, 0);
4545 
4546 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4547 		tmp = RREG32(CP_HPD_EOP_CONTROL);
4548 		tmp &= ~EOP_SIZE_MASK;
4549 		tmp |= order_base_2(MEC_HPD_SIZE / 8);
4550 		WREG32(CP_HPD_EOP_CONTROL, tmp);
4551 
4552 	}
4553 	cik_srbm_select(rdev, 0, 0, 0, 0);
4554 	mutex_unlock(&rdev->srbm_mutex);
4555 
4556 	/* init the queues.  Just two for now. */
4557 	for (i = 0; i < 2; i++) {
4558 		if (i == 0)
4559 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4560 		else
4561 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4562 
4563 		if (rdev->ring[idx].mqd_obj == NULL) {
4564 			r = radeon_bo_create(rdev,
4565 					     sizeof(struct bonaire_mqd),
4566 					     PAGE_SIZE, true,
4567 					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4568 					     NULL, &rdev->ring[idx].mqd_obj);
4569 			if (r) {
4570 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4571 				return r;
4572 			}
4573 		}
4574 
4575 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4576 		if (unlikely(r != 0)) {
4577 			cik_cp_compute_fini(rdev);
4578 			return r;
4579 		}
4580 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4581 				  &mqd_gpu_addr);
4582 		if (r) {
4583 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4584 			cik_cp_compute_fini(rdev);
4585 			return r;
4586 		}
4587 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4588 		if (r) {
4589 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4590 			cik_cp_compute_fini(rdev);
4591 			return r;
4592 		}
4593 
4594 		/* init the mqd struct */
4595 		memset(buf, 0, sizeof(struct bonaire_mqd));
4596 
4597 		mqd = (struct bonaire_mqd *)buf;
4598 		mqd->header = 0xC0310800;
4599 		mqd->static_thread_mgmt01[0] = 0xffffffff;
4600 		mqd->static_thread_mgmt01[1] = 0xffffffff;
4601 		mqd->static_thread_mgmt23[0] = 0xffffffff;
4602 		mqd->static_thread_mgmt23[1] = 0xffffffff;
4603 
4604 		mutex_lock(&rdev->srbm_mutex);
4605 		cik_srbm_select(rdev, rdev->ring[idx].me,
4606 				rdev->ring[idx].pipe,
4607 				rdev->ring[idx].queue, 0);
4608 
4609 		/* disable wptr polling */
4610 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4611 		tmp &= ~WPTR_POLL_EN;
4612 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4613 
4614 		/* enable doorbell? */
4615 		mqd->queue_state.cp_hqd_pq_doorbell_control =
4616 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4617 		if (use_doorbell)
4618 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4619 		else
4620 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4621 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4622 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4623 
4624 		/* disable the queue if it's active */
4625 		mqd->queue_state.cp_hqd_dequeue_request = 0;
4626 		mqd->queue_state.cp_hqd_pq_rptr = 0;
4627 		mqd->queue_state.cp_hqd_pq_wptr= 0;
4628 		if (RREG32(CP_HQD_ACTIVE) & 1) {
4629 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4630 			for (j = 0; j < rdev->usec_timeout; j++) {
4631 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4632 					break;
4633 				udelay(1);
4634 			}
4635 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4636 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4637 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4638 		}
4639 
4640 		/* set the pointer to the MQD */
4641 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4642 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4643 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4644 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4645 		/* set MQD vmid to 0 */
4646 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4647 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4648 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4649 
4650 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4651 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4652 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4653 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4654 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4655 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4656 
4657 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4658 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4659 		mqd->queue_state.cp_hqd_pq_control &=
4660 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4661 
4662 		mqd->queue_state.cp_hqd_pq_control |=
4663 			order_base_2(rdev->ring[idx].ring_size / 8);
4664 		mqd->queue_state.cp_hqd_pq_control |=
4665 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4666 #ifdef __BIG_ENDIAN
4667 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4668 #endif
4669 		mqd->queue_state.cp_hqd_pq_control &=
4670 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4671 		mqd->queue_state.cp_hqd_pq_control |=
4672 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4673 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4674 
4675 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4676 		if (i == 0)
4677 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4678 		else
4679 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4680 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4681 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4682 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4683 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4684 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4685 
4686 		/* set the wb address wether it's enabled or not */
4687 		if (i == 0)
4688 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4689 		else
4690 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4691 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4692 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4693 			upper_32_bits(wb_gpu_addr) & 0xffff;
4694 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4695 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4696 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4697 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4698 
4699 		/* enable the doorbell if requested */
4700 		if (use_doorbell) {
4701 			mqd->queue_state.cp_hqd_pq_doorbell_control =
4702 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4703 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4704 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
4705 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4706 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4707 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
4708 				~(DOORBELL_SOURCE | DOORBELL_HIT);
4709 
4710 		} else {
4711 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4712 		}
4713 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4714 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4715 
4716 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4717 		rdev->ring[idx].wptr = 0;
4718 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4719 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4720 		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4721 
4722 		/* set the vmid for the queue */
4723 		mqd->queue_state.cp_hqd_vmid = 0;
4724 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4725 
4726 		/* activate the queue */
4727 		mqd->queue_state.cp_hqd_active = 1;
4728 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4729 
4730 		cik_srbm_select(rdev, 0, 0, 0, 0);
4731 		mutex_unlock(&rdev->srbm_mutex);
4732 
4733 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4734 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4735 
4736 		rdev->ring[idx].ready = true;
4737 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4738 		if (r)
4739 			rdev->ring[idx].ready = false;
4740 	}
4741 
4742 	return 0;
4743 }
4744 
4745 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4746 {
4747 	cik_cp_gfx_enable(rdev, enable);
4748 	cik_cp_compute_enable(rdev, enable);
4749 }
4750 
4751 static int cik_cp_load_microcode(struct radeon_device *rdev)
4752 {
4753 	int r;
4754 
4755 	r = cik_cp_gfx_load_microcode(rdev);
4756 	if (r)
4757 		return r;
4758 	r = cik_cp_compute_load_microcode(rdev);
4759 	if (r)
4760 		return r;
4761 
4762 	return 0;
4763 }
4764 
4765 static void cik_cp_fini(struct radeon_device *rdev)
4766 {
4767 	cik_cp_gfx_fini(rdev);
4768 	cik_cp_compute_fini(rdev);
4769 }
4770 
4771 static int cik_cp_resume(struct radeon_device *rdev)
4772 {
4773 	int r;
4774 
4775 	cik_enable_gui_idle_interrupt(rdev, false);
4776 
4777 	r = cik_cp_load_microcode(rdev);
4778 	if (r)
4779 		return r;
4780 
4781 	r = cik_cp_gfx_resume(rdev);
4782 	if (r)
4783 		return r;
4784 	r = cik_cp_compute_resume(rdev);
4785 	if (r)
4786 		return r;
4787 
4788 	cik_enable_gui_idle_interrupt(rdev, true);
4789 
4790 	return 0;
4791 }
4792 
4793 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4794 {
4795 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4796 		RREG32(GRBM_STATUS));
4797 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4798 		RREG32(GRBM_STATUS2));
4799 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4800 		RREG32(GRBM_STATUS_SE0));
4801 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4802 		RREG32(GRBM_STATUS_SE1));
4803 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4804 		RREG32(GRBM_STATUS_SE2));
4805 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4806 		RREG32(GRBM_STATUS_SE3));
4807 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4808 		RREG32(SRBM_STATUS));
4809 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4810 		RREG32(SRBM_STATUS2));
4811 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4812 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4813 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4814 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4815 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4816 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4817 		 RREG32(CP_STALLED_STAT1));
4818 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4819 		 RREG32(CP_STALLED_STAT2));
4820 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4821 		 RREG32(CP_STALLED_STAT3));
4822 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4823 		 RREG32(CP_CPF_BUSY_STAT));
4824 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4825 		 RREG32(CP_CPF_STALLED_STAT1));
4826 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4827 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4828 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4829 		 RREG32(CP_CPC_STALLED_STAT1));
4830 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4831 }
4832 
4833 /**
4834  * cik_gpu_check_soft_reset - check which blocks are busy
4835  *
4836  * @rdev: radeon_device pointer
4837  *
4838  * Check which blocks are busy and return the relevant reset
4839  * mask to be used by cik_gpu_soft_reset().
4840  * Returns a mask of the blocks to be reset.
4841  */
4842 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4843 {
4844 	u32 reset_mask = 0;
4845 	u32 tmp;
4846 
4847 	/* GRBM_STATUS */
4848 	tmp = RREG32(GRBM_STATUS);
4849 	if (tmp & (PA_BUSY | SC_BUSY |
4850 		   BCI_BUSY | SX_BUSY |
4851 		   TA_BUSY | VGT_BUSY |
4852 		   DB_BUSY | CB_BUSY |
4853 		   GDS_BUSY | SPI_BUSY |
4854 		   IA_BUSY | IA_BUSY_NO_DMA))
4855 		reset_mask |= RADEON_RESET_GFX;
4856 
4857 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4858 		reset_mask |= RADEON_RESET_CP;
4859 
4860 	/* GRBM_STATUS2 */
4861 	tmp = RREG32(GRBM_STATUS2);
4862 	if (tmp & RLC_BUSY)
4863 		reset_mask |= RADEON_RESET_RLC;
4864 
4865 	/* SDMA0_STATUS_REG */
4866 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4867 	if (!(tmp & SDMA_IDLE))
4868 		reset_mask |= RADEON_RESET_DMA;
4869 
4870 	/* SDMA1_STATUS_REG */
4871 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4872 	if (!(tmp & SDMA_IDLE))
4873 		reset_mask |= RADEON_RESET_DMA1;
4874 
4875 	/* SRBM_STATUS2 */
4876 	tmp = RREG32(SRBM_STATUS2);
4877 	if (tmp & SDMA_BUSY)
4878 		reset_mask |= RADEON_RESET_DMA;
4879 
4880 	if (tmp & SDMA1_BUSY)
4881 		reset_mask |= RADEON_RESET_DMA1;
4882 
4883 	/* SRBM_STATUS */
4884 	tmp = RREG32(SRBM_STATUS);
4885 
4886 	if (tmp & IH_BUSY)
4887 		reset_mask |= RADEON_RESET_IH;
4888 
4889 	if (tmp & SEM_BUSY)
4890 		reset_mask |= RADEON_RESET_SEM;
4891 
4892 	if (tmp & GRBM_RQ_PENDING)
4893 		reset_mask |= RADEON_RESET_GRBM;
4894 
4895 	if (tmp & VMC_BUSY)
4896 		reset_mask |= RADEON_RESET_VMC;
4897 
4898 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4899 		   MCC_BUSY | MCD_BUSY))
4900 		reset_mask |= RADEON_RESET_MC;
4901 
4902 	if (evergreen_is_display_hung(rdev))
4903 		reset_mask |= RADEON_RESET_DISPLAY;
4904 
4905 	/* Skip MC reset as it's mostly likely not hung, just busy */
4906 	if (reset_mask & RADEON_RESET_MC) {
4907 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4908 		reset_mask &= ~RADEON_RESET_MC;
4909 	}
4910 
4911 	return reset_mask;
4912 }
4913 
4914 /**
4915  * cik_gpu_soft_reset - soft reset GPU
4916  *
4917  * @rdev: radeon_device pointer
4918  * @reset_mask: mask of which blocks to reset
4919  *
4920  * Soft reset the blocks specified in @reset_mask.
4921  */
4922 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4923 {
4924 	struct evergreen_mc_save save;
4925 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4926 	u32 tmp;
4927 
4928 	if (reset_mask == 0)
4929 		return;
4930 
4931 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4932 
4933 	cik_print_gpu_status_regs(rdev);
4934 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4935 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4936 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4937 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4938 
4939 	/* disable CG/PG */
4940 	cik_fini_pg(rdev);
4941 	cik_fini_cg(rdev);
4942 
4943 	/* stop the rlc */
4944 	cik_rlc_stop(rdev);
4945 
4946 	/* Disable GFX parsing/prefetching */
4947 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4948 
4949 	/* Disable MEC parsing/prefetching */
4950 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4951 
4952 	if (reset_mask & RADEON_RESET_DMA) {
4953 		/* sdma0 */
4954 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4955 		tmp |= SDMA_HALT;
4956 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4957 	}
4958 	if (reset_mask & RADEON_RESET_DMA1) {
4959 		/* sdma1 */
4960 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4961 		tmp |= SDMA_HALT;
4962 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4963 	}
4964 
4965 	evergreen_mc_stop(rdev, &save);
4966 	if (evergreen_mc_wait_for_idle(rdev)) {
4967 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4968 	}
4969 
4970 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4971 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4972 
4973 	if (reset_mask & RADEON_RESET_CP) {
4974 		grbm_soft_reset |= SOFT_RESET_CP;
4975 
4976 		srbm_soft_reset |= SOFT_RESET_GRBM;
4977 	}
4978 
4979 	if (reset_mask & RADEON_RESET_DMA)
4980 		srbm_soft_reset |= SOFT_RESET_SDMA;
4981 
4982 	if (reset_mask & RADEON_RESET_DMA1)
4983 		srbm_soft_reset |= SOFT_RESET_SDMA1;
4984 
4985 	if (reset_mask & RADEON_RESET_DISPLAY)
4986 		srbm_soft_reset |= SOFT_RESET_DC;
4987 
4988 	if (reset_mask & RADEON_RESET_RLC)
4989 		grbm_soft_reset |= SOFT_RESET_RLC;
4990 
4991 	if (reset_mask & RADEON_RESET_SEM)
4992 		srbm_soft_reset |= SOFT_RESET_SEM;
4993 
4994 	if (reset_mask & RADEON_RESET_IH)
4995 		srbm_soft_reset |= SOFT_RESET_IH;
4996 
4997 	if (reset_mask & RADEON_RESET_GRBM)
4998 		srbm_soft_reset |= SOFT_RESET_GRBM;
4999 
5000 	if (reset_mask & RADEON_RESET_VMC)
5001 		srbm_soft_reset |= SOFT_RESET_VMC;
5002 
5003 	if (!(rdev->flags & RADEON_IS_IGP)) {
5004 		if (reset_mask & RADEON_RESET_MC)
5005 			srbm_soft_reset |= SOFT_RESET_MC;
5006 	}
5007 
5008 	if (grbm_soft_reset) {
5009 		tmp = RREG32(GRBM_SOFT_RESET);
5010 		tmp |= grbm_soft_reset;
5011 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5012 		WREG32(GRBM_SOFT_RESET, tmp);
5013 		tmp = RREG32(GRBM_SOFT_RESET);
5014 
5015 		udelay(50);
5016 
5017 		tmp &= ~grbm_soft_reset;
5018 		WREG32(GRBM_SOFT_RESET, tmp);
5019 		tmp = RREG32(GRBM_SOFT_RESET);
5020 	}
5021 
5022 	if (srbm_soft_reset) {
5023 		tmp = RREG32(SRBM_SOFT_RESET);
5024 		tmp |= srbm_soft_reset;
5025 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5026 		WREG32(SRBM_SOFT_RESET, tmp);
5027 		tmp = RREG32(SRBM_SOFT_RESET);
5028 
5029 		udelay(50);
5030 
5031 		tmp &= ~srbm_soft_reset;
5032 		WREG32(SRBM_SOFT_RESET, tmp);
5033 		tmp = RREG32(SRBM_SOFT_RESET);
5034 	}
5035 
5036 	/* Wait a little for things to settle down */
5037 	udelay(50);
5038 
5039 	evergreen_mc_resume(rdev, &save);
5040 	udelay(50);
5041 
5042 	cik_print_gpu_status_regs(rdev);
5043 }
5044 
5045 struct kv_reset_save_regs {
5046 	u32 gmcon_reng_execute;
5047 	u32 gmcon_misc;
5048 	u32 gmcon_misc3;
5049 };
5050 
5051 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5052 				   struct kv_reset_save_regs *save)
5053 {
5054 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5055 	save->gmcon_misc = RREG32(GMCON_MISC);
5056 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5057 
5058 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5059 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5060 						STCTRL_STUTTER_EN));
5061 }
5062 
5063 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5064 				      struct kv_reset_save_regs *save)
5065 {
5066 	int i;
5067 
5068 	WREG32(GMCON_PGFSM_WRITE, 0);
5069 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5070 
5071 	for (i = 0; i < 5; i++)
5072 		WREG32(GMCON_PGFSM_WRITE, 0);
5073 
5074 	WREG32(GMCON_PGFSM_WRITE, 0);
5075 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5076 
5077 	for (i = 0; i < 5; i++)
5078 		WREG32(GMCON_PGFSM_WRITE, 0);
5079 
5080 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5081 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5082 
5083 	for (i = 0; i < 5; i++)
5084 		WREG32(GMCON_PGFSM_WRITE, 0);
5085 
5086 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5087 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5088 
5089 	for (i = 0; i < 5; i++)
5090 		WREG32(GMCON_PGFSM_WRITE, 0);
5091 
5092 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5093 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5094 
5095 	for (i = 0; i < 5; i++)
5096 		WREG32(GMCON_PGFSM_WRITE, 0);
5097 
5098 	WREG32(GMCON_PGFSM_WRITE, 0);
5099 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5100 
5101 	for (i = 0; i < 5; i++)
5102 		WREG32(GMCON_PGFSM_WRITE, 0);
5103 
5104 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5105 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5106 
5107 	for (i = 0; i < 5; i++)
5108 		WREG32(GMCON_PGFSM_WRITE, 0);
5109 
5110 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5111 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5112 
5113 	for (i = 0; i < 5; i++)
5114 		WREG32(GMCON_PGFSM_WRITE, 0);
5115 
5116 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5117 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5118 
5119 	for (i = 0; i < 5; i++)
5120 		WREG32(GMCON_PGFSM_WRITE, 0);
5121 
5122 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5123 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5124 
5125 	for (i = 0; i < 5; i++)
5126 		WREG32(GMCON_PGFSM_WRITE, 0);
5127 
5128 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5129 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5130 
5131 	WREG32(GMCON_MISC3, save->gmcon_misc3);
5132 	WREG32(GMCON_MISC, save->gmcon_misc);
5133 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5134 }
5135 
5136 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5137 {
5138 	struct evergreen_mc_save save;
5139 	struct kv_reset_save_regs kv_save = { 0 };
5140 	u32 tmp, i;
5141 
5142 	dev_info(rdev->dev, "GPU pci config reset\n");
5143 
5144 	/* disable dpm? */
5145 
5146 	/* disable cg/pg */
5147 	cik_fini_pg(rdev);
5148 	cik_fini_cg(rdev);
5149 
5150 	/* Disable GFX parsing/prefetching */
5151 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5152 
5153 	/* Disable MEC parsing/prefetching */
5154 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5155 
5156 	/* sdma0 */
5157 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5158 	tmp |= SDMA_HALT;
5159 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5160 	/* sdma1 */
5161 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5162 	tmp |= SDMA_HALT;
5163 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5164 	/* XXX other engines? */
5165 
5166 	/* halt the rlc, disable cp internal ints */
5167 	cik_rlc_stop(rdev);
5168 
5169 	udelay(50);
5170 
5171 	/* disable mem access */
5172 	evergreen_mc_stop(rdev, &save);
5173 	if (evergreen_mc_wait_for_idle(rdev)) {
5174 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5175 	}
5176 
5177 	if (rdev->flags & RADEON_IS_IGP)
5178 		kv_save_regs_for_reset(rdev, &kv_save);
5179 
5180 	/* disable BM */
5181 	pci_clear_master(rdev->pdev);
5182 	/* reset */
5183 	radeon_pci_config_reset(rdev);
5184 
5185 	udelay(100);
5186 
5187 	/* wait for asic to come out of reset */
5188 	for (i = 0; i < rdev->usec_timeout; i++) {
5189 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5190 			break;
5191 		udelay(1);
5192 	}
5193 
5194 	/* does asic init need to be run first??? */
5195 	if (rdev->flags & RADEON_IS_IGP)
5196 		kv_restore_regs_for_reset(rdev, &kv_save);
5197 }
5198 
5199 /**
5200  * cik_asic_reset - soft reset GPU
5201  *
5202  * @rdev: radeon_device pointer
5203  * @hard: force hard reset
5204  *
5205  * Look up which blocks are hung and attempt
5206  * to reset them.
5207  * Returns 0 for success.
5208  */
5209 int cik_asic_reset(struct radeon_device *rdev, bool hard)
5210 {
5211 	u32 reset_mask;
5212 
5213 	if (hard) {
5214 		cik_gpu_pci_config_reset(rdev);
5215 		return 0;
5216 	}
5217 
5218 	reset_mask = cik_gpu_check_soft_reset(rdev);
5219 
5220 	if (reset_mask)
5221 		r600_set_bios_scratch_engine_hung(rdev, true);
5222 
5223 	/* try soft reset */
5224 	cik_gpu_soft_reset(rdev, reset_mask);
5225 
5226 	reset_mask = cik_gpu_check_soft_reset(rdev);
5227 
5228 	/* try pci config reset */
5229 	if (reset_mask && radeon_hard_reset)
5230 		cik_gpu_pci_config_reset(rdev);
5231 
5232 	reset_mask = cik_gpu_check_soft_reset(rdev);
5233 
5234 	if (!reset_mask)
5235 		r600_set_bios_scratch_engine_hung(rdev, false);
5236 
5237 	return 0;
5238 }
5239 
5240 /**
5241  * cik_gfx_is_lockup - check if the 3D engine is locked up
5242  *
5243  * @rdev: radeon_device pointer
5244  * @ring: radeon_ring structure holding ring information
5245  *
5246  * Check if the 3D engine is locked up (CIK).
5247  * Returns true if the engine is locked, false if not.
5248  */
5249 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5250 {
5251 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5252 
5253 	if (!(reset_mask & (RADEON_RESET_GFX |
5254 			    RADEON_RESET_COMPUTE |
5255 			    RADEON_RESET_CP))) {
5256 		radeon_ring_lockup_update(rdev, ring);
5257 		return false;
5258 	}
5259 	return radeon_ring_test_lockup(rdev, ring);
5260 }
5261 
5262 /* MC */
5263 /**
5264  * cik_mc_program - program the GPU memory controller
5265  *
5266  * @rdev: radeon_device pointer
5267  *
5268  * Set the location of vram, gart, and AGP in the GPU's
5269  * physical address space (CIK).
5270  */
5271 static void cik_mc_program(struct radeon_device *rdev)
5272 {
5273 	struct evergreen_mc_save save;
5274 	u32 tmp;
5275 	int i, j;
5276 
5277 	/* Initialize HDP */
5278 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5279 		WREG32((0x2c14 + j), 0x00000000);
5280 		WREG32((0x2c18 + j), 0x00000000);
5281 		WREG32((0x2c1c + j), 0x00000000);
5282 		WREG32((0x2c20 + j), 0x00000000);
5283 		WREG32((0x2c24 + j), 0x00000000);
5284 	}
5285 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5286 
5287 	evergreen_mc_stop(rdev, &save);
5288 	if (radeon_mc_wait_for_idle(rdev)) {
5289 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5290 	}
5291 	/* Lockout access through VGA aperture*/
5292 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5293 	/* Update configuration */
5294 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5295 	       rdev->mc.vram_start >> 12);
5296 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5297 	       rdev->mc.vram_end >> 12);
5298 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5299 	       rdev->vram_scratch.gpu_addr >> 12);
5300 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5301 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5302 	WREG32(MC_VM_FB_LOCATION, tmp);
5303 	/* XXX double check these! */
5304 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5305 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5306 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5307 	WREG32(MC_VM_AGP_BASE, 0);
5308 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5309 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5310 	if (radeon_mc_wait_for_idle(rdev)) {
5311 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5312 	}
5313 	evergreen_mc_resume(rdev, &save);
5314 	/* we need to own VRAM, so turn off the VGA renderer here
5315 	 * to stop it overwriting our objects */
5316 	rv515_vga_render_disable(rdev);
5317 }
5318 
5319 /**
5320  * cik_mc_init - initialize the memory controller driver params
5321  *
5322  * @rdev: radeon_device pointer
5323  *
5324  * Look up the amount of vram, vram width, and decide how to place
5325  * vram and gart within the GPU's physical address space (CIK).
5326  * Returns 0 for success.
5327  */
5328 static int cik_mc_init(struct radeon_device *rdev)
5329 {
5330 	u32 tmp;
5331 	int chansize, numchan;
5332 
5333 	/* Get VRAM informations */
5334 	rdev->mc.vram_is_ddr = true;
5335 	tmp = RREG32(MC_ARB_RAMCFG);
5336 	if (tmp & CHANSIZE_MASK) {
5337 		chansize = 64;
5338 	} else {
5339 		chansize = 32;
5340 	}
5341 	tmp = RREG32(MC_SHARED_CHMAP);
5342 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5343 	case 0:
5344 	default:
5345 		numchan = 1;
5346 		break;
5347 	case 1:
5348 		numchan = 2;
5349 		break;
5350 	case 2:
5351 		numchan = 4;
5352 		break;
5353 	case 3:
5354 		numchan = 8;
5355 		break;
5356 	case 4:
5357 		numchan = 3;
5358 		break;
5359 	case 5:
5360 		numchan = 6;
5361 		break;
5362 	case 6:
5363 		numchan = 10;
5364 		break;
5365 	case 7:
5366 		numchan = 12;
5367 		break;
5368 	case 8:
5369 		numchan = 16;
5370 		break;
5371 	}
5372 	rdev->mc.vram_width = numchan * chansize;
5373 	/* Could aper size report 0 ? */
5374 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5375 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5376 	/* size in MB on si */
5377 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5378 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5379 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5380 	si_vram_gtt_location(rdev, &rdev->mc);
5381 	radeon_update_bandwidth_info(rdev);
5382 
5383 	return 0;
5384 }
5385 
5386 /*
5387  * GART
5388  * VMID 0 is the physical GPU addresses as used by the kernel.
5389  * VMIDs 1-15 are used for userspace clients and are handled
5390  * by the radeon vm/hsa code.
5391  */
5392 /**
5393  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5394  *
5395  * @rdev: radeon_device pointer
5396  *
5397  * Flush the TLB for the VMID 0 page table (CIK).
5398  */
5399 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5400 {
5401 	/* flush hdp cache */
5402 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5403 
5404 	/* bits 0-15 are the VM contexts0-15 */
5405 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5406 }
5407 
5408 /**
5409  * cik_pcie_gart_enable - gart enable
5410  *
5411  * @rdev: radeon_device pointer
5412  *
5413  * This sets up the TLBs, programs the page tables for VMID0,
5414  * sets up the hw for VMIDs 1-15 which are allocated on
5415  * demand, and sets up the global locations for the LDS, GDS,
5416  * and GPUVM for FSA64 clients (CIK).
5417  * Returns 0 for success, errors for failure.
5418  */
5419 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5420 {
5421 	int r, i;
5422 
5423 	if (rdev->gart.robj == NULL) {
5424 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5425 		return -EINVAL;
5426 	}
5427 	r = radeon_gart_table_vram_pin(rdev);
5428 	if (r)
5429 		return r;
5430 	/* Setup TLB control */
5431 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5432 	       (0xA << 7) |
5433 	       ENABLE_L1_TLB |
5434 	       ENABLE_L1_FRAGMENT_PROCESSING |
5435 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5436 	       ENABLE_ADVANCED_DRIVER_MODEL |
5437 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5438 	/* Setup L2 cache */
5439 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5440 	       ENABLE_L2_FRAGMENT_PROCESSING |
5441 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5442 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5443 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5444 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5445 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5446 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5447 	       BANK_SELECT(4) |
5448 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5449 	/* setup context0 */
5450 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5451 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5452 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5453 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5454 			(u32)(rdev->dummy_page.addr >> 12));
5455 	WREG32(VM_CONTEXT0_CNTL2, 0);
5456 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5457 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5458 
5459 	WREG32(0x15D4, 0);
5460 	WREG32(0x15D8, 0);
5461 	WREG32(0x15DC, 0);
5462 
5463 	/* restore context1-15 */
5464 	/* set vm size, must be a multiple of 4 */
5465 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5466 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5467 	for (i = 1; i < 16; i++) {
5468 		if (i < 8)
5469 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5470 			       rdev->vm_manager.saved_table_addr[i]);
5471 		else
5472 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5473 			       rdev->vm_manager.saved_table_addr[i]);
5474 	}
5475 
5476 	/* enable context1-15 */
5477 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5478 	       (u32)(rdev->dummy_page.addr >> 12));
5479 	WREG32(VM_CONTEXT1_CNTL2, 4);
5480 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5481 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5482 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5483 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5484 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5485 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5486 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5487 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5488 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5489 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5490 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5491 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5492 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5493 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5494 
5495 	if (rdev->family == CHIP_KAVERI) {
5496 		u32 tmp = RREG32(CHUB_CONTROL);
5497 		tmp &= ~BYPASS_VM;
5498 		WREG32(CHUB_CONTROL, tmp);
5499 	}
5500 
5501 	/* XXX SH_MEM regs */
5502 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5503 	mutex_lock(&rdev->srbm_mutex);
5504 	for (i = 0; i < 16; i++) {
5505 		cik_srbm_select(rdev, 0, 0, 0, i);
5506 		/* CP and shaders */
5507 		WREG32(SH_MEM_CONFIG, SH_MEM_CONFIG_GFX_DEFAULT);
5508 		WREG32(SH_MEM_APE1_BASE, 1);
5509 		WREG32(SH_MEM_APE1_LIMIT, 0);
5510 		WREG32(SH_MEM_BASES, 0);
5511 		/* SDMA GFX */
5512 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5513 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5514 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5515 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5516 		/* XXX SDMA RLC - todo */
5517 	}
5518 	cik_srbm_select(rdev, 0, 0, 0, 0);
5519 	mutex_unlock(&rdev->srbm_mutex);
5520 
5521 	cik_pcie_gart_tlb_flush(rdev);
5522 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5523 		 (unsigned)(rdev->mc.gtt_size >> 20),
5524 		 (unsigned long long)rdev->gart.table_addr);
5525 	rdev->gart.ready = true;
5526 	return 0;
5527 }
5528 
5529 /**
5530  * cik_pcie_gart_disable - gart disable
5531  *
5532  * @rdev: radeon_device pointer
5533  *
5534  * This disables all VM page table (CIK).
5535  */
5536 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5537 {
5538 	unsigned i;
5539 
5540 	for (i = 1; i < 16; ++i) {
5541 		uint32_t reg;
5542 		if (i < 8)
5543 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5544 		else
5545 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5546 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5547 	}
5548 
5549 	/* Disable all tables */
5550 	WREG32(VM_CONTEXT0_CNTL, 0);
5551 	WREG32(VM_CONTEXT1_CNTL, 0);
5552 	/* Setup TLB control */
5553 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5554 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5555 	/* Setup L2 cache */
5556 	WREG32(VM_L2_CNTL,
5557 	       ENABLE_L2_FRAGMENT_PROCESSING |
5558 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5559 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5560 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5561 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5562 	WREG32(VM_L2_CNTL2, 0);
5563 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5564 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5565 	radeon_gart_table_vram_unpin(rdev);
5566 }
5567 
5568 /**
5569  * cik_pcie_gart_fini - vm fini callback
5570  *
5571  * @rdev: radeon_device pointer
5572  *
5573  * Tears down the driver GART/VM setup (CIK).
5574  */
5575 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5576 {
5577 	cik_pcie_gart_disable(rdev);
5578 	radeon_gart_table_vram_free(rdev);
5579 	radeon_gart_fini(rdev);
5580 }
5581 
5582 /* vm parser */
5583 /**
5584  * cik_ib_parse - vm ib_parse callback
5585  *
5586  * @rdev: radeon_device pointer
5587  * @ib: indirect buffer pointer
5588  *
5589  * CIK uses hw IB checking so this is a nop (CIK).
5590  */
5591 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5592 {
5593 	return 0;
5594 }
5595 
5596 /*
5597  * vm
5598  * VMID 0 is the physical GPU addresses as used by the kernel.
5599  * VMIDs 1-15 are used for userspace clients and are handled
5600  * by the radeon vm/hsa code.
5601  */
5602 /**
5603  * cik_vm_init - cik vm init callback
5604  *
5605  * @rdev: radeon_device pointer
5606  *
5607  * Inits cik specific vm parameters (number of VMs, base of vram for
5608  * VMIDs 1-15) (CIK).
5609  * Returns 0 for success.
5610  */
5611 int cik_vm_init(struct radeon_device *rdev)
5612 {
5613 	/*
5614 	 * number of VMs
5615 	 * VMID 0 is reserved for System
5616 	 * radeon graphics/compute will use VMIDs 1-15
5617 	 */
5618 	rdev->vm_manager.nvm = 16;
5619 	/* base offset of vram pages */
5620 	if (rdev->flags & RADEON_IS_IGP) {
5621 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5622 		tmp <<= 22;
5623 		rdev->vm_manager.vram_base_offset = tmp;
5624 	} else
5625 		rdev->vm_manager.vram_base_offset = 0;
5626 
5627 	return 0;
5628 }
5629 
5630 /**
5631  * cik_vm_fini - cik vm fini callback
5632  *
5633  * @rdev: radeon_device pointer
5634  *
5635  * Tear down any asic specific VM setup (CIK).
5636  */
5637 void cik_vm_fini(struct radeon_device *rdev)
5638 {
5639 }
5640 
5641 /**
5642  * cik_vm_decode_fault - print human readable fault info
5643  *
5644  * @rdev: radeon_device pointer
5645  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5646  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5647  * @mc_client: VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT register value
5648  *
5649  * Print human readable fault information (CIK).
5650  */
5651 static void cik_vm_decode_fault(struct radeon_device *rdev,
5652 				u32 status, u32 addr, u32 mc_client)
5653 {
5654 	u32 mc_id;
5655 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5656 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5657 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5658 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5659 
5660 	if (rdev->family == CHIP_HAWAII)
5661 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5662 	else
5663 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5664 
5665 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5666 	       protections, vmid, addr,
5667 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5668 	       block, mc_client, mc_id);
5669 }
5670 
5671 /*
5672  * cik_vm_flush - cik vm flush using the CP
5673  *
5674  * Update the page table base and flush the VM TLB
5675  * using the CP (CIK).
5676  */
5677 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5678 		  unsigned vm_id, uint64_t pd_addr)
5679 {
5680 	int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5681 
5682 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5683 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5684 				 WRITE_DATA_DST_SEL(0)));
5685 	if (vm_id < 8) {
5686 		radeon_ring_write(ring,
5687 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5688 	} else {
5689 		radeon_ring_write(ring,
5690 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5691 	}
5692 	radeon_ring_write(ring, 0);
5693 	radeon_ring_write(ring, pd_addr >> 12);
5694 
5695 	/* update SH_MEM_* regs */
5696 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5697 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5698 				 WRITE_DATA_DST_SEL(0)));
5699 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5700 	radeon_ring_write(ring, 0);
5701 	radeon_ring_write(ring, VMID(vm_id));
5702 
5703 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5704 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5705 				 WRITE_DATA_DST_SEL(0)));
5706 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
5707 	radeon_ring_write(ring, 0);
5708 
5709 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5710 	radeon_ring_write(ring, SH_MEM_CONFIG_GFX_DEFAULT); /* SH_MEM_CONFIG */
5711 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5712 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5713 
5714 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5715 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5716 				 WRITE_DATA_DST_SEL(0)));
5717 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5718 	radeon_ring_write(ring, 0);
5719 	radeon_ring_write(ring, VMID(0));
5720 
5721 	/* HDP flush */
5722 	cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5723 
5724 	/* bits 0-15 are the VM contexts0-15 */
5725 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5726 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5727 				 WRITE_DATA_DST_SEL(0)));
5728 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5729 	radeon_ring_write(ring, 0);
5730 	radeon_ring_write(ring, 1 << vm_id);
5731 
5732 	/* wait for the invalidate to complete */
5733 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5734 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5735 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
5736 				 WAIT_REG_MEM_ENGINE(0))); /* me */
5737 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5738 	radeon_ring_write(ring, 0);
5739 	radeon_ring_write(ring, 0); /* ref */
5740 	radeon_ring_write(ring, 0); /* mask */
5741 	radeon_ring_write(ring, 0x20); /* poll interval */
5742 
5743 	/* compute doesn't have PFP */
5744 	if (usepfp) {
5745 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5746 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5747 		radeon_ring_write(ring, 0x0);
5748 	}
5749 }
5750 
5751 /*
5752  * RLC
5753  * The RLC is a multi-purpose microengine that handles a
5754  * variety of functions, the most important of which is
5755  * the interrupt controller.
5756  */
5757 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5758 					  bool enable)
5759 {
5760 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5761 
5762 	if (enable)
5763 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5764 	else
5765 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5766 	WREG32(CP_INT_CNTL_RING0, tmp);
5767 }
5768 
5769 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5770 {
5771 	u32 tmp;
5772 
5773 	tmp = RREG32(RLC_LB_CNTL);
5774 	if (enable)
5775 		tmp |= LOAD_BALANCE_ENABLE;
5776 	else
5777 		tmp &= ~LOAD_BALANCE_ENABLE;
5778 	WREG32(RLC_LB_CNTL, tmp);
5779 }
5780 
5781 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5782 {
5783 	u32 i, j, k;
5784 	u32 mask;
5785 
5786 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5787 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5788 			cik_select_se_sh(rdev, i, j);
5789 			for (k = 0; k < rdev->usec_timeout; k++) {
5790 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5791 					break;
5792 				udelay(1);
5793 			}
5794 		}
5795 	}
5796 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5797 
5798 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5799 	for (k = 0; k < rdev->usec_timeout; k++) {
5800 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5801 			break;
5802 		udelay(1);
5803 	}
5804 }
5805 
5806 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5807 {
5808 	u32 tmp;
5809 
5810 	tmp = RREG32(RLC_CNTL);
5811 	if (tmp != rlc)
5812 		WREG32(RLC_CNTL, rlc);
5813 }
5814 
5815 static u32 cik_halt_rlc(struct radeon_device *rdev)
5816 {
5817 	u32 data, orig;
5818 
5819 	orig = data = RREG32(RLC_CNTL);
5820 
5821 	if (data & RLC_ENABLE) {
5822 		u32 i;
5823 
5824 		data &= ~RLC_ENABLE;
5825 		WREG32(RLC_CNTL, data);
5826 
5827 		for (i = 0; i < rdev->usec_timeout; i++) {
5828 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5829 				break;
5830 			udelay(1);
5831 		}
5832 
5833 		cik_wait_for_rlc_serdes(rdev);
5834 	}
5835 
5836 	return orig;
5837 }
5838 
5839 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5840 {
5841 	u32 tmp, i, mask;
5842 
5843 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5844 	WREG32(RLC_GPR_REG2, tmp);
5845 
5846 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5847 	for (i = 0; i < rdev->usec_timeout; i++) {
5848 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5849 			break;
5850 		udelay(1);
5851 	}
5852 
5853 	for (i = 0; i < rdev->usec_timeout; i++) {
5854 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5855 			break;
5856 		udelay(1);
5857 	}
5858 }
5859 
5860 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5861 {
5862 	u32 tmp;
5863 
5864 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5865 	WREG32(RLC_GPR_REG2, tmp);
5866 }
5867 
5868 /**
5869  * cik_rlc_stop - stop the RLC ME
5870  *
5871  * @rdev: radeon_device pointer
5872  *
5873  * Halt the RLC ME (MicroEngine) (CIK).
5874  */
5875 static void cik_rlc_stop(struct radeon_device *rdev)
5876 {
5877 	WREG32(RLC_CNTL, 0);
5878 
5879 	cik_enable_gui_idle_interrupt(rdev, false);
5880 
5881 	cik_wait_for_rlc_serdes(rdev);
5882 }
5883 
5884 /**
5885  * cik_rlc_start - start the RLC ME
5886  *
5887  * @rdev: radeon_device pointer
5888  *
5889  * Unhalt the RLC ME (MicroEngine) (CIK).
5890  */
5891 static void cik_rlc_start(struct radeon_device *rdev)
5892 {
5893 	WREG32(RLC_CNTL, RLC_ENABLE);
5894 
5895 	cik_enable_gui_idle_interrupt(rdev, true);
5896 
5897 	udelay(50);
5898 }
5899 
5900 /**
5901  * cik_rlc_resume - setup the RLC hw
5902  *
5903  * @rdev: radeon_device pointer
5904  *
5905  * Initialize the RLC registers, load the ucode,
5906  * and start the RLC (CIK).
5907  * Returns 0 for success, -EINVAL if the ucode is not available.
5908  */
5909 static int cik_rlc_resume(struct radeon_device *rdev)
5910 {
5911 	u32 i, size, tmp;
5912 
5913 	if (!rdev->rlc_fw)
5914 		return -EINVAL;
5915 
5916 	cik_rlc_stop(rdev);
5917 
5918 	/* disable CG */
5919 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5920 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5921 
5922 	si_rlc_reset(rdev);
5923 
5924 	cik_init_pg(rdev);
5925 
5926 	cik_init_cg(rdev);
5927 
5928 	WREG32(RLC_LB_CNTR_INIT, 0);
5929 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5930 
5931 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5932 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5933 	WREG32(RLC_LB_PARAMS, 0x00600408);
5934 	WREG32(RLC_LB_CNTL, 0x80000004);
5935 
5936 	WREG32(RLC_MC_CNTL, 0);
5937 	WREG32(RLC_UCODE_CNTL, 0);
5938 
5939 	if (rdev->new_fw) {
5940 		const struct rlc_firmware_header_v1_0 *hdr =
5941 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5942 		const __le32 *fw_data = (const __le32 *)
5943 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5944 
5945 		radeon_ucode_print_rlc_hdr(&hdr->header);
5946 
5947 		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5948 		WREG32(RLC_GPM_UCODE_ADDR, 0);
5949 		for (i = 0; i < size; i++)
5950 			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
5951 		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
5952 	} else {
5953 		const __be32 *fw_data;
5954 
5955 		switch (rdev->family) {
5956 		case CHIP_BONAIRE:
5957 		case CHIP_HAWAII:
5958 		default:
5959 			size = BONAIRE_RLC_UCODE_SIZE;
5960 			break;
5961 		case CHIP_KAVERI:
5962 			size = KV_RLC_UCODE_SIZE;
5963 			break;
5964 		case CHIP_KABINI:
5965 			size = KB_RLC_UCODE_SIZE;
5966 			break;
5967 		case CHIP_MULLINS:
5968 			size = ML_RLC_UCODE_SIZE;
5969 			break;
5970 		}
5971 
5972 		fw_data = (const __be32 *)rdev->rlc_fw->data;
5973 		WREG32(RLC_GPM_UCODE_ADDR, 0);
5974 		for (i = 0; i < size; i++)
5975 			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5976 		WREG32(RLC_GPM_UCODE_ADDR, 0);
5977 	}
5978 
5979 	/* XXX - find out what chips support lbpw */
5980 	cik_enable_lbpw(rdev, false);
5981 
5982 	if (rdev->family == CHIP_BONAIRE)
5983 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
5984 
5985 	cik_rlc_start(rdev);
5986 
5987 	return 0;
5988 }
5989 
5990 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5991 {
5992 	u32 data, orig, tmp, tmp2;
5993 
5994 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5995 
5996 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5997 		cik_enable_gui_idle_interrupt(rdev, true);
5998 
5999 		tmp = cik_halt_rlc(rdev);
6000 
6001 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6002 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6003 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6004 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6005 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6006 
6007 		cik_update_rlc(rdev, tmp);
6008 
6009 		data |= CGCG_EN | CGLS_EN;
6010 	} else {
6011 		cik_enable_gui_idle_interrupt(rdev, false);
6012 
6013 		RREG32(CB_CGTT_SCLK_CTRL);
6014 		RREG32(CB_CGTT_SCLK_CTRL);
6015 		RREG32(CB_CGTT_SCLK_CTRL);
6016 		RREG32(CB_CGTT_SCLK_CTRL);
6017 
6018 		data &= ~(CGCG_EN | CGLS_EN);
6019 	}
6020 
6021 	if (orig != data)
6022 		WREG32(RLC_CGCG_CGLS_CTRL, data);
6023 
6024 }
6025 
6026 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6027 {
6028 	u32 data, orig, tmp = 0;
6029 
6030 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6031 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6032 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6033 				orig = data = RREG32(CP_MEM_SLP_CNTL);
6034 				data |= CP_MEM_LS_EN;
6035 				if (orig != data)
6036 					WREG32(CP_MEM_SLP_CNTL, data);
6037 			}
6038 		}
6039 
6040 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6041 		data |= 0x00000001;
6042 		data &= 0xfffffffd;
6043 		if (orig != data)
6044 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6045 
6046 		tmp = cik_halt_rlc(rdev);
6047 
6048 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6049 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6050 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6051 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6052 		WREG32(RLC_SERDES_WR_CTRL, data);
6053 
6054 		cik_update_rlc(rdev, tmp);
6055 
6056 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6057 			orig = data = RREG32(CGTS_SM_CTRL_REG);
6058 			data &= ~SM_MODE_MASK;
6059 			data |= SM_MODE(0x2);
6060 			data |= SM_MODE_ENABLE;
6061 			data &= ~CGTS_OVERRIDE;
6062 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6063 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6064 				data &= ~CGTS_LS_OVERRIDE;
6065 			data &= ~ON_MONITOR_ADD_MASK;
6066 			data |= ON_MONITOR_ADD_EN;
6067 			data |= ON_MONITOR_ADD(0x96);
6068 			if (orig != data)
6069 				WREG32(CGTS_SM_CTRL_REG, data);
6070 		}
6071 	} else {
6072 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6073 		data |= 0x00000003;
6074 		if (orig != data)
6075 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6076 
6077 		data = RREG32(RLC_MEM_SLP_CNTL);
6078 		if (data & RLC_MEM_LS_EN) {
6079 			data &= ~RLC_MEM_LS_EN;
6080 			WREG32(RLC_MEM_SLP_CNTL, data);
6081 		}
6082 
6083 		data = RREG32(CP_MEM_SLP_CNTL);
6084 		if (data & CP_MEM_LS_EN) {
6085 			data &= ~CP_MEM_LS_EN;
6086 			WREG32(CP_MEM_SLP_CNTL, data);
6087 		}
6088 
6089 		orig = data = RREG32(CGTS_SM_CTRL_REG);
6090 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6091 		if (orig != data)
6092 			WREG32(CGTS_SM_CTRL_REG, data);
6093 
6094 		tmp = cik_halt_rlc(rdev);
6095 
6096 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6097 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6098 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6099 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6100 		WREG32(RLC_SERDES_WR_CTRL, data);
6101 
6102 		cik_update_rlc(rdev, tmp);
6103 	}
6104 }
6105 
6106 static const u32 mc_cg_registers[] =
6107 {
6108 	MC_HUB_MISC_HUB_CG,
6109 	MC_HUB_MISC_SIP_CG,
6110 	MC_HUB_MISC_VM_CG,
6111 	MC_XPB_CLK_GAT,
6112 	ATC_MISC_CG,
6113 	MC_CITF_MISC_WR_CG,
6114 	MC_CITF_MISC_RD_CG,
6115 	MC_CITF_MISC_VM_CG,
6116 	VM_L2_CG,
6117 };
6118 
6119 static void cik_enable_mc_ls(struct radeon_device *rdev,
6120 			     bool enable)
6121 {
6122 	int i;
6123 	u32 orig, data;
6124 
6125 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6126 		orig = data = RREG32(mc_cg_registers[i]);
6127 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6128 			data |= MC_LS_ENABLE;
6129 		else
6130 			data &= ~MC_LS_ENABLE;
6131 		if (data != orig)
6132 			WREG32(mc_cg_registers[i], data);
6133 	}
6134 }
6135 
6136 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6137 			       bool enable)
6138 {
6139 	int i;
6140 	u32 orig, data;
6141 
6142 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6143 		orig = data = RREG32(mc_cg_registers[i]);
6144 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6145 			data |= MC_CG_ENABLE;
6146 		else
6147 			data &= ~MC_CG_ENABLE;
6148 		if (data != orig)
6149 			WREG32(mc_cg_registers[i], data);
6150 	}
6151 }
6152 
6153 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6154 				 bool enable)
6155 {
6156 	u32 orig, data;
6157 
6158 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6159 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6160 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6161 	} else {
6162 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6163 		data |= 0xff000000;
6164 		if (data != orig)
6165 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6166 
6167 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6168 		data |= 0xff000000;
6169 		if (data != orig)
6170 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6171 	}
6172 }
6173 
6174 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6175 				 bool enable)
6176 {
6177 	u32 orig, data;
6178 
6179 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6180 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6181 		data |= 0x100;
6182 		if (orig != data)
6183 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6184 
6185 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6186 		data |= 0x100;
6187 		if (orig != data)
6188 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6189 	} else {
6190 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6191 		data &= ~0x100;
6192 		if (orig != data)
6193 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6194 
6195 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6196 		data &= ~0x100;
6197 		if (orig != data)
6198 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6199 	}
6200 }
6201 
6202 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6203 				bool enable)
6204 {
6205 	u32 orig, data;
6206 
6207 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6208 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6209 		data = 0xfff;
6210 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6211 
6212 		orig = data = RREG32(UVD_CGC_CTRL);
6213 		data |= DCM;
6214 		if (orig != data)
6215 			WREG32(UVD_CGC_CTRL, data);
6216 	} else {
6217 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6218 		data &= ~0xfff;
6219 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6220 
6221 		orig = data = RREG32(UVD_CGC_CTRL);
6222 		data &= ~DCM;
6223 		if (orig != data)
6224 			WREG32(UVD_CGC_CTRL, data);
6225 	}
6226 }
6227 
6228 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6229 			       bool enable)
6230 {
6231 	u32 orig, data;
6232 
6233 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6234 
6235 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6236 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6237 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6238 	else
6239 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6240 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6241 
6242 	if (orig != data)
6243 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6244 }
6245 
6246 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6247 				bool enable)
6248 {
6249 	u32 orig, data;
6250 
6251 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6252 
6253 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6254 		data &= ~CLOCK_GATING_DIS;
6255 	else
6256 		data |= CLOCK_GATING_DIS;
6257 
6258 	if (orig != data)
6259 		WREG32(HDP_HOST_PATH_CNTL, data);
6260 }
6261 
6262 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6263 			      bool enable)
6264 {
6265 	u32 orig, data;
6266 
6267 	orig = data = RREG32(HDP_MEM_POWER_LS);
6268 
6269 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6270 		data |= HDP_LS_ENABLE;
6271 	else
6272 		data &= ~HDP_LS_ENABLE;
6273 
6274 	if (orig != data)
6275 		WREG32(HDP_MEM_POWER_LS, data);
6276 }
6277 
6278 void cik_update_cg(struct radeon_device *rdev,
6279 		   u32 block, bool enable)
6280 {
6281 
6282 	if (block & RADEON_CG_BLOCK_GFX) {
6283 		cik_enable_gui_idle_interrupt(rdev, false);
6284 		/* order matters! */
6285 		if (enable) {
6286 			cik_enable_mgcg(rdev, true);
6287 			cik_enable_cgcg(rdev, true);
6288 		} else {
6289 			cik_enable_cgcg(rdev, false);
6290 			cik_enable_mgcg(rdev, false);
6291 		}
6292 		cik_enable_gui_idle_interrupt(rdev, true);
6293 	}
6294 
6295 	if (block & RADEON_CG_BLOCK_MC) {
6296 		if (!(rdev->flags & RADEON_IS_IGP)) {
6297 			cik_enable_mc_mgcg(rdev, enable);
6298 			cik_enable_mc_ls(rdev, enable);
6299 		}
6300 	}
6301 
6302 	if (block & RADEON_CG_BLOCK_SDMA) {
6303 		cik_enable_sdma_mgcg(rdev, enable);
6304 		cik_enable_sdma_mgls(rdev, enable);
6305 	}
6306 
6307 	if (block & RADEON_CG_BLOCK_BIF) {
6308 		cik_enable_bif_mgls(rdev, enable);
6309 	}
6310 
6311 	if (block & RADEON_CG_BLOCK_UVD) {
6312 		if (rdev->has_uvd)
6313 			cik_enable_uvd_mgcg(rdev, enable);
6314 	}
6315 
6316 	if (block & RADEON_CG_BLOCK_HDP) {
6317 		cik_enable_hdp_mgcg(rdev, enable);
6318 		cik_enable_hdp_ls(rdev, enable);
6319 	}
6320 
6321 	if (block & RADEON_CG_BLOCK_VCE) {
6322 		vce_v2_0_enable_mgcg(rdev, enable);
6323 	}
6324 }
6325 
6326 static void cik_init_cg(struct radeon_device *rdev)
6327 {
6328 
6329 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6330 
6331 	if (rdev->has_uvd)
6332 		si_init_uvd_internal_cg(rdev);
6333 
6334 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6335 			     RADEON_CG_BLOCK_SDMA |
6336 			     RADEON_CG_BLOCK_BIF |
6337 			     RADEON_CG_BLOCK_UVD |
6338 			     RADEON_CG_BLOCK_HDP), true);
6339 }
6340 
6341 static void cik_fini_cg(struct radeon_device *rdev)
6342 {
6343 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6344 			     RADEON_CG_BLOCK_SDMA |
6345 			     RADEON_CG_BLOCK_BIF |
6346 			     RADEON_CG_BLOCK_UVD |
6347 			     RADEON_CG_BLOCK_HDP), false);
6348 
6349 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6350 }
6351 
6352 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6353 					  bool enable)
6354 {
6355 	u32 data, orig;
6356 
6357 	orig = data = RREG32(RLC_PG_CNTL);
6358 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6359 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6360 	else
6361 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6362 	if (orig != data)
6363 		WREG32(RLC_PG_CNTL, data);
6364 }
6365 
6366 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6367 					  bool enable)
6368 {
6369 	u32 data, orig;
6370 
6371 	orig = data = RREG32(RLC_PG_CNTL);
6372 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6373 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6374 	else
6375 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6376 	if (orig != data)
6377 		WREG32(RLC_PG_CNTL, data);
6378 }
6379 
6380 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6381 {
6382 	u32 data, orig;
6383 
6384 	orig = data = RREG32(RLC_PG_CNTL);
6385 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6386 		data &= ~DISABLE_CP_PG;
6387 	else
6388 		data |= DISABLE_CP_PG;
6389 	if (orig != data)
6390 		WREG32(RLC_PG_CNTL, data);
6391 }
6392 
6393 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6394 {
6395 	u32 data, orig;
6396 
6397 	orig = data = RREG32(RLC_PG_CNTL);
6398 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6399 		data &= ~DISABLE_GDS_PG;
6400 	else
6401 		data |= DISABLE_GDS_PG;
6402 	if (orig != data)
6403 		WREG32(RLC_PG_CNTL, data);
6404 }
6405 
6406 #define CP_ME_TABLE_SIZE    96
6407 #define CP_ME_TABLE_OFFSET  2048
6408 #define CP_MEC_TABLE_OFFSET 4096
6409 
6410 void cik_init_cp_pg_table(struct radeon_device *rdev)
6411 {
6412 	volatile u32 *dst_ptr;
6413 	int me, i, max_me = 4;
6414 	u32 bo_offset = 0;
6415 	u32 table_offset, table_size;
6416 
6417 	if (rdev->family == CHIP_KAVERI)
6418 		max_me = 5;
6419 
6420 	if (rdev->rlc.cp_table_ptr == NULL)
6421 		return;
6422 
6423 	/* write the cp table buffer */
6424 	dst_ptr = rdev->rlc.cp_table_ptr;
6425 	for (me = 0; me < max_me; me++) {
6426 		if (rdev->new_fw) {
6427 			const __le32 *fw_data;
6428 			const struct gfx_firmware_header_v1_0 *hdr;
6429 
6430 			if (me == 0) {
6431 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6432 				fw_data = (const __le32 *)
6433 					(rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6434 				table_offset = le32_to_cpu(hdr->jt_offset);
6435 				table_size = le32_to_cpu(hdr->jt_size);
6436 			} else if (me == 1) {
6437 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6438 				fw_data = (const __le32 *)
6439 					(rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6440 				table_offset = le32_to_cpu(hdr->jt_offset);
6441 				table_size = le32_to_cpu(hdr->jt_size);
6442 			} else if (me == 2) {
6443 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6444 				fw_data = (const __le32 *)
6445 					(rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6446 				table_offset = le32_to_cpu(hdr->jt_offset);
6447 				table_size = le32_to_cpu(hdr->jt_size);
6448 			} else if (me == 3) {
6449 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6450 				fw_data = (const __le32 *)
6451 					(rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6452 				table_offset = le32_to_cpu(hdr->jt_offset);
6453 				table_size = le32_to_cpu(hdr->jt_size);
6454 			} else {
6455 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6456 				fw_data = (const __le32 *)
6457 					(rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6458 				table_offset = le32_to_cpu(hdr->jt_offset);
6459 				table_size = le32_to_cpu(hdr->jt_size);
6460 			}
6461 
6462 			for (i = 0; i < table_size; i ++) {
6463 				dst_ptr[bo_offset + i] =
6464 					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6465 			}
6466 			bo_offset += table_size;
6467 		} else {
6468 			const __be32 *fw_data;
6469 			table_size = CP_ME_TABLE_SIZE;
6470 
6471 			if (me == 0) {
6472 				fw_data = (const __be32 *)rdev->ce_fw->data;
6473 				table_offset = CP_ME_TABLE_OFFSET;
6474 			} else if (me == 1) {
6475 				fw_data = (const __be32 *)rdev->pfp_fw->data;
6476 				table_offset = CP_ME_TABLE_OFFSET;
6477 			} else if (me == 2) {
6478 				fw_data = (const __be32 *)rdev->me_fw->data;
6479 				table_offset = CP_ME_TABLE_OFFSET;
6480 			} else {
6481 				fw_data = (const __be32 *)rdev->mec_fw->data;
6482 				table_offset = CP_MEC_TABLE_OFFSET;
6483 			}
6484 
6485 			for (i = 0; i < table_size; i ++) {
6486 				dst_ptr[bo_offset + i] =
6487 					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6488 			}
6489 			bo_offset += table_size;
6490 		}
6491 	}
6492 }
6493 
6494 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6495 				bool enable)
6496 {
6497 	u32 data, orig;
6498 
6499 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6500 		orig = data = RREG32(RLC_PG_CNTL);
6501 		data |= GFX_PG_ENABLE;
6502 		if (orig != data)
6503 			WREG32(RLC_PG_CNTL, data);
6504 
6505 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6506 		data |= AUTO_PG_EN;
6507 		if (orig != data)
6508 			WREG32(RLC_AUTO_PG_CTRL, data);
6509 	} else {
6510 		orig = data = RREG32(RLC_PG_CNTL);
6511 		data &= ~GFX_PG_ENABLE;
6512 		if (orig != data)
6513 			WREG32(RLC_PG_CNTL, data);
6514 
6515 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6516 		data &= ~AUTO_PG_EN;
6517 		if (orig != data)
6518 			WREG32(RLC_AUTO_PG_CTRL, data);
6519 
6520 		data = RREG32(DB_RENDER_CONTROL);
6521 	}
6522 }
6523 
6524 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6525 {
6526 	u32 mask = 0, tmp, tmp1;
6527 	int i;
6528 
6529 	cik_select_se_sh(rdev, se, sh);
6530 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6531 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6532 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6533 
6534 	tmp &= 0xffff0000;
6535 
6536 	tmp |= tmp1;
6537 	tmp >>= 16;
6538 
6539 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6540 		mask <<= 1;
6541 		mask |= 1;
6542 	}
6543 
6544 	return (~tmp) & mask;
6545 }
6546 
6547 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6548 {
6549 	u32 i, j, k, active_cu_number = 0;
6550 	u32 mask, counter, cu_bitmap;
6551 	u32 tmp = 0;
6552 
6553 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6554 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6555 			mask = 1;
6556 			cu_bitmap = 0;
6557 			counter = 0;
6558 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6559 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6560 					if (counter < 2)
6561 						cu_bitmap |= mask;
6562 					counter ++;
6563 				}
6564 				mask <<= 1;
6565 			}
6566 
6567 			active_cu_number += counter;
6568 			tmp |= (cu_bitmap << (i * 16 + j * 8));
6569 		}
6570 	}
6571 
6572 	WREG32(RLC_PG_AO_CU_MASK, tmp);
6573 
6574 	tmp = RREG32(RLC_MAX_PG_CU);
6575 	tmp &= ~MAX_PU_CU_MASK;
6576 	tmp |= MAX_PU_CU(active_cu_number);
6577 	WREG32(RLC_MAX_PG_CU, tmp);
6578 }
6579 
6580 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6581 				       bool enable)
6582 {
6583 	u32 data, orig;
6584 
6585 	orig = data = RREG32(RLC_PG_CNTL);
6586 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6587 		data |= STATIC_PER_CU_PG_ENABLE;
6588 	else
6589 		data &= ~STATIC_PER_CU_PG_ENABLE;
6590 	if (orig != data)
6591 		WREG32(RLC_PG_CNTL, data);
6592 }
6593 
6594 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6595 					bool enable)
6596 {
6597 	u32 data, orig;
6598 
6599 	orig = data = RREG32(RLC_PG_CNTL);
6600 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6601 		data |= DYN_PER_CU_PG_ENABLE;
6602 	else
6603 		data &= ~DYN_PER_CU_PG_ENABLE;
6604 	if (orig != data)
6605 		WREG32(RLC_PG_CNTL, data);
6606 }
6607 
6608 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6609 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6610 
6611 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6612 {
6613 	u32 data, orig;
6614 	u32 i;
6615 
6616 	if (rdev->rlc.cs_data) {
6617 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6618 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6619 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6620 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6621 	} else {
6622 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6623 		for (i = 0; i < 3; i++)
6624 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6625 	}
6626 	if (rdev->rlc.reg_list) {
6627 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6628 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6629 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6630 	}
6631 
6632 	orig = data = RREG32(RLC_PG_CNTL);
6633 	data |= GFX_PG_SRC;
6634 	if (orig != data)
6635 		WREG32(RLC_PG_CNTL, data);
6636 
6637 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6638 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6639 
6640 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6641 	data &= ~IDLE_POLL_COUNT_MASK;
6642 	data |= IDLE_POLL_COUNT(0x60);
6643 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6644 
6645 	data = 0x10101010;
6646 	WREG32(RLC_PG_DELAY, data);
6647 
6648 	data = RREG32(RLC_PG_DELAY_2);
6649 	data &= ~0xff;
6650 	data |= 0x3;
6651 	WREG32(RLC_PG_DELAY_2, data);
6652 
6653 	data = RREG32(RLC_AUTO_PG_CTRL);
6654 	data &= ~GRBM_REG_SGIT_MASK;
6655 	data |= GRBM_REG_SGIT(0x700);
6656 	WREG32(RLC_AUTO_PG_CTRL, data);
6657 
6658 }
6659 
6660 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6661 {
6662 	cik_enable_gfx_cgpg(rdev, enable);
6663 	cik_enable_gfx_static_mgpg(rdev, enable);
6664 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
6665 }
6666 
6667 u32 cik_get_csb_size(struct radeon_device *rdev)
6668 {
6669 	u32 count = 0;
6670 	const struct cs_section_def *sect = NULL;
6671 	const struct cs_extent_def *ext = NULL;
6672 
6673 	if (rdev->rlc.cs_data == NULL)
6674 		return 0;
6675 
6676 	/* begin clear state */
6677 	count += 2;
6678 	/* context control state */
6679 	count += 3;
6680 
6681 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6682 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6683 			if (sect->id == SECT_CONTEXT)
6684 				count += 2 + ext->reg_count;
6685 			else
6686 				return 0;
6687 		}
6688 	}
6689 	/* pa_sc_raster_config/pa_sc_raster_config1 */
6690 	count += 4;
6691 	/* end clear state */
6692 	count += 2;
6693 	/* clear state */
6694 	count += 2;
6695 
6696 	return count;
6697 }
6698 
6699 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6700 {
6701 	u32 count = 0, i;
6702 	const struct cs_section_def *sect = NULL;
6703 	const struct cs_extent_def *ext = NULL;
6704 
6705 	if (rdev->rlc.cs_data == NULL)
6706 		return;
6707 	if (buffer == NULL)
6708 		return;
6709 
6710 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6711 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6712 
6713 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6714 	buffer[count++] = cpu_to_le32(0x80000000);
6715 	buffer[count++] = cpu_to_le32(0x80000000);
6716 
6717 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6718 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6719 			if (sect->id == SECT_CONTEXT) {
6720 				buffer[count++] =
6721 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6722 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6723 				for (i = 0; i < ext->reg_count; i++)
6724 					buffer[count++] = cpu_to_le32(ext->extent[i]);
6725 			} else {
6726 				return;
6727 			}
6728 		}
6729 	}
6730 
6731 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6732 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6733 	switch (rdev->family) {
6734 	case CHIP_BONAIRE:
6735 		buffer[count++] = cpu_to_le32(0x16000012);
6736 		buffer[count++] = cpu_to_le32(0x00000000);
6737 		break;
6738 	case CHIP_KAVERI:
6739 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6740 		buffer[count++] = cpu_to_le32(0x00000000);
6741 		break;
6742 	case CHIP_KABINI:
6743 	case CHIP_MULLINS:
6744 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6745 		buffer[count++] = cpu_to_le32(0x00000000);
6746 		break;
6747 	case CHIP_HAWAII:
6748 		buffer[count++] = cpu_to_le32(0x3a00161a);
6749 		buffer[count++] = cpu_to_le32(0x0000002e);
6750 		break;
6751 	default:
6752 		buffer[count++] = cpu_to_le32(0x00000000);
6753 		buffer[count++] = cpu_to_le32(0x00000000);
6754 		break;
6755 	}
6756 
6757 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6758 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6759 
6760 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6761 	buffer[count++] = cpu_to_le32(0);
6762 }
6763 
6764 static void cik_init_pg(struct radeon_device *rdev)
6765 {
6766 	if (rdev->pg_flags) {
6767 		cik_enable_sck_slowdown_on_pu(rdev, true);
6768 		cik_enable_sck_slowdown_on_pd(rdev, true);
6769 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6770 			cik_init_gfx_cgpg(rdev);
6771 			cik_enable_cp_pg(rdev, true);
6772 			cik_enable_gds_pg(rdev, true);
6773 		}
6774 		cik_init_ao_cu_mask(rdev);
6775 		cik_update_gfx_pg(rdev, true);
6776 	}
6777 }
6778 
6779 static void cik_fini_pg(struct radeon_device *rdev)
6780 {
6781 	if (rdev->pg_flags) {
6782 		cik_update_gfx_pg(rdev, false);
6783 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6784 			cik_enable_cp_pg(rdev, false);
6785 			cik_enable_gds_pg(rdev, false);
6786 		}
6787 	}
6788 }
6789 
6790 /*
6791  * Interrupts
6792  * Starting with r6xx, interrupts are handled via a ring buffer.
6793  * Ring buffers are areas of GPU accessible memory that the GPU
6794  * writes interrupt vectors into and the host reads vectors out of.
6795  * There is a rptr (read pointer) that determines where the
6796  * host is currently reading, and a wptr (write pointer)
6797  * which determines where the GPU has written.  When the
6798  * pointers are equal, the ring is idle.  When the GPU
6799  * writes vectors to the ring buffer, it increments the
6800  * wptr.  When there is an interrupt, the host then starts
6801  * fetching commands and processing them until the pointers are
6802  * equal again at which point it updates the rptr.
6803  */
6804 
6805 /**
6806  * cik_enable_interrupts - Enable the interrupt ring buffer
6807  *
6808  * @rdev: radeon_device pointer
6809  *
6810  * Enable the interrupt ring buffer (CIK).
6811  */
6812 static void cik_enable_interrupts(struct radeon_device *rdev)
6813 {
6814 	u32 ih_cntl = RREG32(IH_CNTL);
6815 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6816 
6817 	ih_cntl |= ENABLE_INTR;
6818 	ih_rb_cntl |= IH_RB_ENABLE;
6819 	WREG32(IH_CNTL, ih_cntl);
6820 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6821 	rdev->ih.enabled = true;
6822 }
6823 
6824 /**
6825  * cik_disable_interrupts - Disable the interrupt ring buffer
6826  *
6827  * @rdev: radeon_device pointer
6828  *
6829  * Disable the interrupt ring buffer (CIK).
6830  */
6831 static void cik_disable_interrupts(struct radeon_device *rdev)
6832 {
6833 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6834 	u32 ih_cntl = RREG32(IH_CNTL);
6835 
6836 	ih_rb_cntl &= ~IH_RB_ENABLE;
6837 	ih_cntl &= ~ENABLE_INTR;
6838 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6839 	WREG32(IH_CNTL, ih_cntl);
6840 	/* set rptr, wptr to 0 */
6841 	WREG32(IH_RB_RPTR, 0);
6842 	WREG32(IH_RB_WPTR, 0);
6843 	rdev->ih.enabled = false;
6844 	rdev->ih.rptr = 0;
6845 }
6846 
6847 /**
6848  * cik_disable_interrupt_state - Disable all interrupt sources
6849  *
6850  * @rdev: radeon_device pointer
6851  *
6852  * Clear all interrupt enable bits used by the driver (CIK).
6853  */
6854 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6855 {
6856 	u32 tmp;
6857 
6858 	/* gfx ring */
6859 	tmp = RREG32(CP_INT_CNTL_RING0) &
6860 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6861 	WREG32(CP_INT_CNTL_RING0, tmp);
6862 	/* sdma */
6863 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6864 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6865 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6866 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6867 	/* compute queues */
6868 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6869 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6870 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6871 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6872 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6873 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6874 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6875 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6876 	/* grbm */
6877 	WREG32(GRBM_INT_CNTL, 0);
6878 	/* SRBM */
6879 	WREG32(SRBM_INT_CNTL, 0);
6880 	/* vline/vblank, etc. */
6881 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6882 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6883 	if (rdev->num_crtc >= 4) {
6884 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6885 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6886 	}
6887 	if (rdev->num_crtc >= 6) {
6888 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6889 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6890 	}
6891 	/* pflip */
6892 	if (rdev->num_crtc >= 2) {
6893 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6894 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6895 	}
6896 	if (rdev->num_crtc >= 4) {
6897 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6898 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6899 	}
6900 	if (rdev->num_crtc >= 6) {
6901 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6902 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6903 	}
6904 
6905 	/* dac hotplug */
6906 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6907 
6908 	/* digital hotplug */
6909 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6910 	WREG32(DC_HPD1_INT_CONTROL, tmp);
6911 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6912 	WREG32(DC_HPD2_INT_CONTROL, tmp);
6913 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6914 	WREG32(DC_HPD3_INT_CONTROL, tmp);
6915 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6916 	WREG32(DC_HPD4_INT_CONTROL, tmp);
6917 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6918 	WREG32(DC_HPD5_INT_CONTROL, tmp);
6919 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6920 	WREG32(DC_HPD6_INT_CONTROL, tmp);
6921 
6922 }
6923 
6924 /**
6925  * cik_irq_init - init and enable the interrupt ring
6926  *
6927  * @rdev: radeon_device pointer
6928  *
6929  * Allocate a ring buffer for the interrupt controller,
6930  * enable the RLC, disable interrupts, enable the IH
6931  * ring buffer and enable it (CIK).
6932  * Called at device load and reume.
6933  * Returns 0 for success, errors for failure.
6934  */
6935 static int cik_irq_init(struct radeon_device *rdev)
6936 {
6937 	int ret = 0;
6938 	int rb_bufsz;
6939 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6940 
6941 	/* allocate ring */
6942 	ret = r600_ih_ring_alloc(rdev);
6943 	if (ret)
6944 		return ret;
6945 
6946 	/* disable irqs */
6947 	cik_disable_interrupts(rdev);
6948 
6949 	/* init rlc */
6950 	ret = cik_rlc_resume(rdev);
6951 	if (ret) {
6952 		r600_ih_ring_fini(rdev);
6953 		return ret;
6954 	}
6955 
6956 	/* setup interrupt control */
6957 	/* set dummy read address to dummy page address */
6958 	WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8);
6959 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
6960 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6961 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6962 	 */
6963 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6964 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6965 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6966 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6967 
6968 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6969 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6970 
6971 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6972 		      IH_WPTR_OVERFLOW_CLEAR |
6973 		      (rb_bufsz << 1));
6974 
6975 	if (rdev->wb.enabled)
6976 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6977 
6978 	/* set the writeback address whether it's enabled or not */
6979 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6980 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6981 
6982 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6983 
6984 	/* set rptr, wptr to 0 */
6985 	WREG32(IH_RB_RPTR, 0);
6986 	WREG32(IH_RB_WPTR, 0);
6987 
6988 	/* Default settings for IH_CNTL (disabled at first) */
6989 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6990 	/* RPTR_REARM only works if msi's are enabled */
6991 	if (rdev->msi_enabled)
6992 		ih_cntl |= RPTR_REARM;
6993 	WREG32(IH_CNTL, ih_cntl);
6994 
6995 	/* force the active interrupt state to all disabled */
6996 	cik_disable_interrupt_state(rdev);
6997 
6998 	pci_set_master(rdev->pdev);
6999 
7000 	/* enable irqs */
7001 	cik_enable_interrupts(rdev);
7002 
7003 	return ret;
7004 }
7005 
7006 /**
7007  * cik_irq_set - enable/disable interrupt sources
7008  *
7009  * @rdev: radeon_device pointer
7010  *
7011  * Enable interrupt sources on the GPU (vblanks, hpd,
7012  * etc.) (CIK).
7013  * Returns 0 for success, errors for failure.
7014  */
7015 int cik_irq_set(struct radeon_device *rdev)
7016 {
7017 	u32 cp_int_cntl;
7018 	u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
7019 	u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
7020 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7021 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7022 	u32 grbm_int_cntl = 0;
7023 	u32 dma_cntl, dma_cntl1;
7024 
7025 	if (!rdev->irq.installed) {
7026 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7027 		return -EINVAL;
7028 	}
7029 	/* don't enable anything if the ih is disabled */
7030 	if (!rdev->ih.enabled) {
7031 		cik_disable_interrupts(rdev);
7032 		/* force the active interrupt state to all disabled */
7033 		cik_disable_interrupt_state(rdev);
7034 		return 0;
7035 	}
7036 
7037 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7038 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7039 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7040 
7041 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7042 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7043 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7044 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7045 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7046 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7047 
7048 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7049 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7050 
7051 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7052 	cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7053 	cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7054 	cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7055 	cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7056 	cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7057 	cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7058 	cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7059 
7060 	/* enable CP interrupts on all rings */
7061 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7062 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7063 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7064 	}
7065 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7066 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7067 		DRM_DEBUG("si_irq_set: sw int cp1\n");
7068 		if (ring->me == 1) {
7069 			switch (ring->pipe) {
7070 			case 0:
7071 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7072 				break;
7073 			case 1:
7074 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7075 				break;
7076 			case 2:
7077 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7078 				break;
7079 			case 3:
7080 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7081 				break;
7082 			default:
7083 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7084 				break;
7085 			}
7086 		} else if (ring->me == 2) {
7087 			switch (ring->pipe) {
7088 			case 0:
7089 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7090 				break;
7091 			case 1:
7092 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7093 				break;
7094 			case 2:
7095 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7096 				break;
7097 			case 3:
7098 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7099 				break;
7100 			default:
7101 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7102 				break;
7103 			}
7104 		} else {
7105 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7106 		}
7107 	}
7108 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7109 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7110 		DRM_DEBUG("si_irq_set: sw int cp2\n");
7111 		if (ring->me == 1) {
7112 			switch (ring->pipe) {
7113 			case 0:
7114 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7115 				break;
7116 			case 1:
7117 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7118 				break;
7119 			case 2:
7120 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7121 				break;
7122 			case 3:
7123 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7124 				break;
7125 			default:
7126 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7127 				break;
7128 			}
7129 		} else if (ring->me == 2) {
7130 			switch (ring->pipe) {
7131 			case 0:
7132 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7133 				break;
7134 			case 1:
7135 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7136 				break;
7137 			case 2:
7138 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7139 				break;
7140 			case 3:
7141 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7142 				break;
7143 			default:
7144 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7145 				break;
7146 			}
7147 		} else {
7148 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7149 		}
7150 	}
7151 
7152 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7153 		DRM_DEBUG("cik_irq_set: sw int dma\n");
7154 		dma_cntl |= TRAP_ENABLE;
7155 	}
7156 
7157 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7158 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7159 		dma_cntl1 |= TRAP_ENABLE;
7160 	}
7161 
7162 	if (rdev->irq.crtc_vblank_int[0] ||
7163 	    atomic_read(&rdev->irq.pflip[0])) {
7164 		DRM_DEBUG("cik_irq_set: vblank 0\n");
7165 		crtc1 |= VBLANK_INTERRUPT_MASK;
7166 	}
7167 	if (rdev->irq.crtc_vblank_int[1] ||
7168 	    atomic_read(&rdev->irq.pflip[1])) {
7169 		DRM_DEBUG("cik_irq_set: vblank 1\n");
7170 		crtc2 |= VBLANK_INTERRUPT_MASK;
7171 	}
7172 	if (rdev->irq.crtc_vblank_int[2] ||
7173 	    atomic_read(&rdev->irq.pflip[2])) {
7174 		DRM_DEBUG("cik_irq_set: vblank 2\n");
7175 		crtc3 |= VBLANK_INTERRUPT_MASK;
7176 	}
7177 	if (rdev->irq.crtc_vblank_int[3] ||
7178 	    atomic_read(&rdev->irq.pflip[3])) {
7179 		DRM_DEBUG("cik_irq_set: vblank 3\n");
7180 		crtc4 |= VBLANK_INTERRUPT_MASK;
7181 	}
7182 	if (rdev->irq.crtc_vblank_int[4] ||
7183 	    atomic_read(&rdev->irq.pflip[4])) {
7184 		DRM_DEBUG("cik_irq_set: vblank 4\n");
7185 		crtc5 |= VBLANK_INTERRUPT_MASK;
7186 	}
7187 	if (rdev->irq.crtc_vblank_int[5] ||
7188 	    atomic_read(&rdev->irq.pflip[5])) {
7189 		DRM_DEBUG("cik_irq_set: vblank 5\n");
7190 		crtc6 |= VBLANK_INTERRUPT_MASK;
7191 	}
7192 	if (rdev->irq.hpd[0]) {
7193 		DRM_DEBUG("cik_irq_set: hpd 1\n");
7194 		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7195 	}
7196 	if (rdev->irq.hpd[1]) {
7197 		DRM_DEBUG("cik_irq_set: hpd 2\n");
7198 		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7199 	}
7200 	if (rdev->irq.hpd[2]) {
7201 		DRM_DEBUG("cik_irq_set: hpd 3\n");
7202 		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7203 	}
7204 	if (rdev->irq.hpd[3]) {
7205 		DRM_DEBUG("cik_irq_set: hpd 4\n");
7206 		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7207 	}
7208 	if (rdev->irq.hpd[4]) {
7209 		DRM_DEBUG("cik_irq_set: hpd 5\n");
7210 		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7211 	}
7212 	if (rdev->irq.hpd[5]) {
7213 		DRM_DEBUG("cik_irq_set: hpd 6\n");
7214 		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7215 	}
7216 
7217 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7218 
7219 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7220 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7221 
7222 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7223 	WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7224 	WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7225 	WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7226 	WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7227 	WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7228 	WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7229 	WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7230 
7231 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7232 
7233 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7234 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7235 	if (rdev->num_crtc >= 4) {
7236 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7237 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7238 	}
7239 	if (rdev->num_crtc >= 6) {
7240 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7241 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7242 	}
7243 
7244 	if (rdev->num_crtc >= 2) {
7245 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7246 		       GRPH_PFLIP_INT_MASK);
7247 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7248 		       GRPH_PFLIP_INT_MASK);
7249 	}
7250 	if (rdev->num_crtc >= 4) {
7251 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7252 		       GRPH_PFLIP_INT_MASK);
7253 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7254 		       GRPH_PFLIP_INT_MASK);
7255 	}
7256 	if (rdev->num_crtc >= 6) {
7257 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7258 		       GRPH_PFLIP_INT_MASK);
7259 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7260 		       GRPH_PFLIP_INT_MASK);
7261 	}
7262 
7263 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7264 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7265 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7266 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7267 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7268 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7269 
7270 	/* posting read */
7271 	RREG32(SRBM_STATUS);
7272 
7273 	return 0;
7274 }
7275 
7276 /**
7277  * cik_irq_ack - ack interrupt sources
7278  *
7279  * @rdev: radeon_device pointer
7280  *
7281  * Ack interrupt sources on the GPU (vblanks, hpd,
7282  * etc.) (CIK).  Certain interrupts sources are sw
7283  * generated and do not require an explicit ack.
7284  */
7285 static inline void cik_irq_ack(struct radeon_device *rdev)
7286 {
7287 	u32 tmp;
7288 
7289 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7290 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7291 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7292 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7293 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7294 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7295 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7296 
7297 	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7298 		EVERGREEN_CRTC0_REGISTER_OFFSET);
7299 	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7300 		EVERGREEN_CRTC1_REGISTER_OFFSET);
7301 	if (rdev->num_crtc >= 4) {
7302 		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7303 			EVERGREEN_CRTC2_REGISTER_OFFSET);
7304 		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7305 			EVERGREEN_CRTC3_REGISTER_OFFSET);
7306 	}
7307 	if (rdev->num_crtc >= 6) {
7308 		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7309 			EVERGREEN_CRTC4_REGISTER_OFFSET);
7310 		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7311 			EVERGREEN_CRTC5_REGISTER_OFFSET);
7312 	}
7313 
7314 	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7315 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7316 		       GRPH_PFLIP_INT_CLEAR);
7317 	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7318 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7319 		       GRPH_PFLIP_INT_CLEAR);
7320 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7321 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7322 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7323 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7324 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7325 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7326 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7327 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7328 
7329 	if (rdev->num_crtc >= 4) {
7330 		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7331 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7332 			       GRPH_PFLIP_INT_CLEAR);
7333 		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7334 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7335 			       GRPH_PFLIP_INT_CLEAR);
7336 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7337 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7338 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7339 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7340 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7341 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7342 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7343 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7344 	}
7345 
7346 	if (rdev->num_crtc >= 6) {
7347 		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7348 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7349 			       GRPH_PFLIP_INT_CLEAR);
7350 		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7351 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7352 			       GRPH_PFLIP_INT_CLEAR);
7353 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7354 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7355 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7356 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7357 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7358 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7359 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7360 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7361 	}
7362 
7363 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7364 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7365 		tmp |= DC_HPDx_INT_ACK;
7366 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7367 	}
7368 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7369 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7370 		tmp |= DC_HPDx_INT_ACK;
7371 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7372 	}
7373 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7374 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7375 		tmp |= DC_HPDx_INT_ACK;
7376 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7377 	}
7378 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7379 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7380 		tmp |= DC_HPDx_INT_ACK;
7381 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7382 	}
7383 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7384 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7385 		tmp |= DC_HPDx_INT_ACK;
7386 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7387 	}
7388 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7389 		tmp = RREG32(DC_HPD6_INT_CONTROL);
7390 		tmp |= DC_HPDx_INT_ACK;
7391 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7392 	}
7393 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7394 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7395 		tmp |= DC_HPDx_RX_INT_ACK;
7396 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7397 	}
7398 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7399 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7400 		tmp |= DC_HPDx_RX_INT_ACK;
7401 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7402 	}
7403 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7404 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7405 		tmp |= DC_HPDx_RX_INT_ACK;
7406 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7407 	}
7408 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7409 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7410 		tmp |= DC_HPDx_RX_INT_ACK;
7411 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7412 	}
7413 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7414 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7415 		tmp |= DC_HPDx_RX_INT_ACK;
7416 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7417 	}
7418 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7419 		tmp = RREG32(DC_HPD6_INT_CONTROL);
7420 		tmp |= DC_HPDx_RX_INT_ACK;
7421 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7422 	}
7423 }
7424 
7425 /**
7426  * cik_irq_disable - disable interrupts
7427  *
7428  * @rdev: radeon_device pointer
7429  *
7430  * Disable interrupts on the hw (CIK).
7431  */
7432 static void cik_irq_disable(struct radeon_device *rdev)
7433 {
7434 	cik_disable_interrupts(rdev);
7435 	/* Wait and acknowledge irq */
7436 	mdelay(1);
7437 	cik_irq_ack(rdev);
7438 	cik_disable_interrupt_state(rdev);
7439 }
7440 
7441 /**
7442  * cik_irq_suspend - disable interrupts for suspend
7443  *
7444  * @rdev: radeon_device pointer
7445  *
7446  * Disable interrupts and stop the RLC (CIK).
7447  * Used for suspend.
7448  */
7449 static void cik_irq_suspend(struct radeon_device *rdev)
7450 {
7451 	cik_irq_disable(rdev);
7452 	cik_rlc_stop(rdev);
7453 }
7454 
7455 /**
7456  * cik_irq_fini - tear down interrupt support
7457  *
7458  * @rdev: radeon_device pointer
7459  *
7460  * Disable interrupts on the hw and free the IH ring
7461  * buffer (CIK).
7462  * Used for driver unload.
7463  */
7464 static void cik_irq_fini(struct radeon_device *rdev)
7465 {
7466 	cik_irq_suspend(rdev);
7467 	r600_ih_ring_fini(rdev);
7468 }
7469 
7470 /**
7471  * cik_get_ih_wptr - get the IH ring buffer wptr
7472  *
7473  * @rdev: radeon_device pointer
7474  *
7475  * Get the IH ring buffer wptr from either the register
7476  * or the writeback memory buffer (CIK).  Also check for
7477  * ring buffer overflow and deal with it.
7478  * Used by cik_irq_process().
7479  * Returns the value of the wptr.
7480  */
7481 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7482 {
7483 	u32 wptr, tmp;
7484 
7485 	if (rdev->wb.enabled)
7486 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7487 	else
7488 		wptr = RREG32(IH_RB_WPTR);
7489 
7490 	if (wptr & RB_OVERFLOW) {
7491 		wptr &= ~RB_OVERFLOW;
7492 		/* When a ring buffer overflow happen start parsing interrupt
7493 		 * from the last not overwritten vector (wptr + 16). Hopefully
7494 		 * this should allow us to catchup.
7495 		 */
7496 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7497 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7498 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7499 		tmp = RREG32(IH_RB_CNTL);
7500 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7501 		WREG32(IH_RB_CNTL, tmp);
7502 	}
7503 	return (wptr & rdev->ih.ptr_mask);
7504 }
7505 
7506 /*        CIK IV Ring
7507  * Each IV ring entry is 128 bits:
7508  * [7:0]    - interrupt source id
7509  * [31:8]   - reserved
7510  * [59:32]  - interrupt source data
7511  * [63:60]  - reserved
7512  * [71:64]  - RINGID
7513  *            CP:
7514  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7515  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7516  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7517  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7518  *            PIPE_ID - ME0 0=3D
7519  *                    - ME1&2 compute dispatcher (4 pipes each)
7520  *            SDMA:
7521  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7522  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7523  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7524  * [79:72]  - VMID
7525  * [95:80]  - PASID
7526  * [127:96] - reserved
7527  */
7528 /**
7529  * cik_irq_process - interrupt handler
7530  *
7531  * @rdev: radeon_device pointer
7532  *
7533  * Interrupt hander (CIK).  Walk the IH ring,
7534  * ack interrupts and schedule work to handle
7535  * interrupt events.
7536  * Returns irq process return code.
7537  */
7538 int cik_irq_process(struct radeon_device *rdev)
7539 {
7540 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7541 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7542 	u32 wptr;
7543 	u32 rptr;
7544 	u32 src_id, src_data, ring_id;
7545 	u8 me_id, pipe_id, queue_id;
7546 	u32 ring_index;
7547 	bool queue_hotplug = false;
7548 	bool queue_dp = false;
7549 	bool queue_reset = false;
7550 	u32 addr, status, mc_client;
7551 	bool queue_thermal = false;
7552 
7553 	if (!rdev->ih.enabled || rdev->shutdown)
7554 		return IRQ_NONE;
7555 
7556 	wptr = cik_get_ih_wptr(rdev);
7557 
7558 restart_ih:
7559 	/* is somebody else already processing irqs? */
7560 	if (atomic_xchg(&rdev->ih.lock, 1))
7561 		return IRQ_NONE;
7562 
7563 	rptr = rdev->ih.rptr;
7564 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7565 
7566 	/* Order reading of wptr vs. reading of IH ring data */
7567 	rmb();
7568 
7569 	/* display interrupts */
7570 	cik_irq_ack(rdev);
7571 
7572 	while (rptr != wptr) {
7573 		/* wptr/rptr are in bytes! */
7574 		ring_index = rptr / 4;
7575 
7576 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7577 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7578 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7579 
7580 		switch (src_id) {
7581 		case 1: /* D1 vblank/vline */
7582 			switch (src_data) {
7583 			case 0: /* D1 vblank */
7584 				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7585 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7586 
7587 				if (rdev->irq.crtc_vblank_int[0]) {
7588 					drm_handle_vblank(rdev->ddev, 0);
7589 					rdev->pm.vblank_sync = true;
7590 					wake_up(&rdev->irq.vblank_queue);
7591 				}
7592 				if (atomic_read(&rdev->irq.pflip[0]))
7593 					radeon_crtc_handle_vblank(rdev, 0);
7594 				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7595 				DRM_DEBUG("IH: D1 vblank\n");
7596 
7597 				break;
7598 			case 1: /* D1 vline */
7599 				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7600 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7601 
7602 				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7603 				DRM_DEBUG("IH: D1 vline\n");
7604 
7605 				break;
7606 			default:
7607 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7608 				break;
7609 			}
7610 			break;
7611 		case 2: /* D2 vblank/vline */
7612 			switch (src_data) {
7613 			case 0: /* D2 vblank */
7614 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7615 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7616 
7617 				if (rdev->irq.crtc_vblank_int[1]) {
7618 					drm_handle_vblank(rdev->ddev, 1);
7619 					rdev->pm.vblank_sync = true;
7620 					wake_up(&rdev->irq.vblank_queue);
7621 				}
7622 				if (atomic_read(&rdev->irq.pflip[1]))
7623 					radeon_crtc_handle_vblank(rdev, 1);
7624 				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7625 				DRM_DEBUG("IH: D2 vblank\n");
7626 
7627 				break;
7628 			case 1: /* D2 vline */
7629 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7630 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7631 
7632 				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7633 				DRM_DEBUG("IH: D2 vline\n");
7634 
7635 				break;
7636 			default:
7637 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7638 				break;
7639 			}
7640 			break;
7641 		case 3: /* D3 vblank/vline */
7642 			switch (src_data) {
7643 			case 0: /* D3 vblank */
7644 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7645 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7646 
7647 				if (rdev->irq.crtc_vblank_int[2]) {
7648 					drm_handle_vblank(rdev->ddev, 2);
7649 					rdev->pm.vblank_sync = true;
7650 					wake_up(&rdev->irq.vblank_queue);
7651 				}
7652 				if (atomic_read(&rdev->irq.pflip[2]))
7653 					radeon_crtc_handle_vblank(rdev, 2);
7654 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7655 				DRM_DEBUG("IH: D3 vblank\n");
7656 
7657 				break;
7658 			case 1: /* D3 vline */
7659 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7660 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7661 
7662 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7663 				DRM_DEBUG("IH: D3 vline\n");
7664 
7665 				break;
7666 			default:
7667 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7668 				break;
7669 			}
7670 			break;
7671 		case 4: /* D4 vblank/vline */
7672 			switch (src_data) {
7673 			case 0: /* D4 vblank */
7674 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7675 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7676 
7677 				if (rdev->irq.crtc_vblank_int[3]) {
7678 					drm_handle_vblank(rdev->ddev, 3);
7679 					rdev->pm.vblank_sync = true;
7680 					wake_up(&rdev->irq.vblank_queue);
7681 				}
7682 				if (atomic_read(&rdev->irq.pflip[3]))
7683 					radeon_crtc_handle_vblank(rdev, 3);
7684 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7685 				DRM_DEBUG("IH: D4 vblank\n");
7686 
7687 				break;
7688 			case 1: /* D4 vline */
7689 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7690 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7691 
7692 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7693 				DRM_DEBUG("IH: D4 vline\n");
7694 
7695 				break;
7696 			default:
7697 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7698 				break;
7699 			}
7700 			break;
7701 		case 5: /* D5 vblank/vline */
7702 			switch (src_data) {
7703 			case 0: /* D5 vblank */
7704 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7705 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7706 
7707 				if (rdev->irq.crtc_vblank_int[4]) {
7708 					drm_handle_vblank(rdev->ddev, 4);
7709 					rdev->pm.vblank_sync = true;
7710 					wake_up(&rdev->irq.vblank_queue);
7711 				}
7712 				if (atomic_read(&rdev->irq.pflip[4]))
7713 					radeon_crtc_handle_vblank(rdev, 4);
7714 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7715 				DRM_DEBUG("IH: D5 vblank\n");
7716 
7717 				break;
7718 			case 1: /* D5 vline */
7719 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7720 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7721 
7722 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7723 				DRM_DEBUG("IH: D5 vline\n");
7724 
7725 				break;
7726 			default:
7727 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7728 				break;
7729 			}
7730 			break;
7731 		case 6: /* D6 vblank/vline */
7732 			switch (src_data) {
7733 			case 0: /* D6 vblank */
7734 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7735 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7736 
7737 				if (rdev->irq.crtc_vblank_int[5]) {
7738 					drm_handle_vblank(rdev->ddev, 5);
7739 					rdev->pm.vblank_sync = true;
7740 					wake_up(&rdev->irq.vblank_queue);
7741 				}
7742 				if (atomic_read(&rdev->irq.pflip[5]))
7743 					radeon_crtc_handle_vblank(rdev, 5);
7744 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7745 				DRM_DEBUG("IH: D6 vblank\n");
7746 
7747 				break;
7748 			case 1: /* D6 vline */
7749 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7750 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7751 
7752 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7753 				DRM_DEBUG("IH: D6 vline\n");
7754 
7755 				break;
7756 			default:
7757 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7758 				break;
7759 			}
7760 			break;
7761 		case 8: /* D1 page flip */
7762 		case 10: /* D2 page flip */
7763 		case 12: /* D3 page flip */
7764 		case 14: /* D4 page flip */
7765 		case 16: /* D5 page flip */
7766 		case 18: /* D6 page flip */
7767 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7768 			if (radeon_use_pflipirq > 0)
7769 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7770 			break;
7771 		case 42: /* HPD hotplug */
7772 			switch (src_data) {
7773 			case 0:
7774 				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7775 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7776 
7777 				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7778 				queue_hotplug = true;
7779 				DRM_DEBUG("IH: HPD1\n");
7780 
7781 				break;
7782 			case 1:
7783 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7784 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7785 
7786 				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7787 				queue_hotplug = true;
7788 				DRM_DEBUG("IH: HPD2\n");
7789 
7790 				break;
7791 			case 2:
7792 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7793 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7794 
7795 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7796 				queue_hotplug = true;
7797 				DRM_DEBUG("IH: HPD3\n");
7798 
7799 				break;
7800 			case 3:
7801 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7802 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7803 
7804 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7805 				queue_hotplug = true;
7806 				DRM_DEBUG("IH: HPD4\n");
7807 
7808 				break;
7809 			case 4:
7810 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7811 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7812 
7813 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7814 				queue_hotplug = true;
7815 				DRM_DEBUG("IH: HPD5\n");
7816 
7817 				break;
7818 			case 5:
7819 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7820 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7821 
7822 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7823 				queue_hotplug = true;
7824 				DRM_DEBUG("IH: HPD6\n");
7825 
7826 				break;
7827 			case 6:
7828 				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7829 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7830 
7831 				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7832 				queue_dp = true;
7833 				DRM_DEBUG("IH: HPD_RX 1\n");
7834 
7835 				break;
7836 			case 7:
7837 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7838 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7839 
7840 				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7841 				queue_dp = true;
7842 				DRM_DEBUG("IH: HPD_RX 2\n");
7843 
7844 				break;
7845 			case 8:
7846 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7847 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7848 
7849 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7850 				queue_dp = true;
7851 				DRM_DEBUG("IH: HPD_RX 3\n");
7852 
7853 				break;
7854 			case 9:
7855 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7856 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7857 
7858 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7859 				queue_dp = true;
7860 				DRM_DEBUG("IH: HPD_RX 4\n");
7861 
7862 				break;
7863 			case 10:
7864 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7865 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7866 
7867 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7868 				queue_dp = true;
7869 				DRM_DEBUG("IH: HPD_RX 5\n");
7870 
7871 				break;
7872 			case 11:
7873 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7874 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7875 
7876 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7877 				queue_dp = true;
7878 				DRM_DEBUG("IH: HPD_RX 6\n");
7879 
7880 				break;
7881 			default:
7882 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7883 				break;
7884 			}
7885 			break;
7886 		case 96:
7887 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7888 			WREG32(SRBM_INT_ACK, 0x1);
7889 			break;
7890 		case 124: /* UVD */
7891 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7892 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7893 			break;
7894 		case 146:
7895 		case 147:
7896 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7897 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7898 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7899 			/* reset addr and status */
7900 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7901 			if (addr == 0x0 && status == 0x0)
7902 				break;
7903 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7904 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7905 				addr);
7906 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7907 				status);
7908 			cik_vm_decode_fault(rdev, status, addr, mc_client);
7909 			break;
7910 		case 167: /* VCE */
7911 			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7912 			switch (src_data) {
7913 			case 0:
7914 				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7915 				break;
7916 			case 1:
7917 				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7918 				break;
7919 			default:
7920 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7921 				break;
7922 			}
7923 			break;
7924 		case 176: /* GFX RB CP_INT */
7925 		case 177: /* GFX IB CP_INT */
7926 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7927 			break;
7928 		case 181: /* CP EOP event */
7929 			DRM_DEBUG("IH: CP EOP\n");
7930 			/* XXX check the bitfield order! */
7931 			me_id = (ring_id & 0x60) >> 5;
7932 			pipe_id = (ring_id & 0x18) >> 3;
7933 			queue_id = (ring_id & 0x7) >> 0;
7934 			switch (me_id) {
7935 			case 0:
7936 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7937 				break;
7938 			case 1:
7939 			case 2:
7940 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7941 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7942 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7943 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7944 				break;
7945 			}
7946 			break;
7947 		case 184: /* CP Privileged reg access */
7948 			DRM_ERROR("Illegal register access in command stream\n");
7949 			/* XXX check the bitfield order! */
7950 			me_id = (ring_id & 0x60) >> 5;
7951 			switch (me_id) {
7952 			case 0:
7953 				/* This results in a full GPU reset, but all we need to do is soft
7954 				 * reset the CP for gfx
7955 				 */
7956 				queue_reset = true;
7957 				break;
7958 			case 1:
7959 				/* XXX compute */
7960 				queue_reset = true;
7961 				break;
7962 			case 2:
7963 				/* XXX compute */
7964 				queue_reset = true;
7965 				break;
7966 			}
7967 			break;
7968 		case 185: /* CP Privileged inst */
7969 			DRM_ERROR("Illegal instruction in command stream\n");
7970 			/* XXX check the bitfield order! */
7971 			me_id = (ring_id & 0x60) >> 5;
7972 			switch (me_id) {
7973 			case 0:
7974 				/* This results in a full GPU reset, but all we need to do is soft
7975 				 * reset the CP for gfx
7976 				 */
7977 				queue_reset = true;
7978 				break;
7979 			case 1:
7980 				/* XXX compute */
7981 				queue_reset = true;
7982 				break;
7983 			case 2:
7984 				/* XXX compute */
7985 				queue_reset = true;
7986 				break;
7987 			}
7988 			break;
7989 		case 224: /* SDMA trap event */
7990 			/* XXX check the bitfield order! */
7991 			me_id = (ring_id & 0x3) >> 0;
7992 			queue_id = (ring_id & 0xc) >> 2;
7993 			DRM_DEBUG("IH: SDMA trap\n");
7994 			switch (me_id) {
7995 			case 0:
7996 				switch (queue_id) {
7997 				case 0:
7998 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7999 					break;
8000 				case 1:
8001 					/* XXX compute */
8002 					break;
8003 				case 2:
8004 					/* XXX compute */
8005 					break;
8006 				}
8007 				break;
8008 			case 1:
8009 				switch (queue_id) {
8010 				case 0:
8011 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8012 					break;
8013 				case 1:
8014 					/* XXX compute */
8015 					break;
8016 				case 2:
8017 					/* XXX compute */
8018 					break;
8019 				}
8020 				break;
8021 			}
8022 			break;
8023 		case 230: /* thermal low to high */
8024 			DRM_DEBUG("IH: thermal low to high\n");
8025 			rdev->pm.dpm.thermal.high_to_low = false;
8026 			queue_thermal = true;
8027 			break;
8028 		case 231: /* thermal high to low */
8029 			DRM_DEBUG("IH: thermal high to low\n");
8030 			rdev->pm.dpm.thermal.high_to_low = true;
8031 			queue_thermal = true;
8032 			break;
8033 		case 233: /* GUI IDLE */
8034 			DRM_DEBUG("IH: GUI idle\n");
8035 			break;
8036 		case 241: /* SDMA Privileged inst */
8037 		case 247: /* SDMA Privileged inst */
8038 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8039 			/* XXX check the bitfield order! */
8040 			me_id = (ring_id & 0x3) >> 0;
8041 			queue_id = (ring_id & 0xc) >> 2;
8042 			switch (me_id) {
8043 			case 0:
8044 				switch (queue_id) {
8045 				case 0:
8046 					queue_reset = true;
8047 					break;
8048 				case 1:
8049 					/* XXX compute */
8050 					queue_reset = true;
8051 					break;
8052 				case 2:
8053 					/* XXX compute */
8054 					queue_reset = true;
8055 					break;
8056 				}
8057 				break;
8058 			case 1:
8059 				switch (queue_id) {
8060 				case 0:
8061 					queue_reset = true;
8062 					break;
8063 				case 1:
8064 					/* XXX compute */
8065 					queue_reset = true;
8066 					break;
8067 				case 2:
8068 					/* XXX compute */
8069 					queue_reset = true;
8070 					break;
8071 				}
8072 				break;
8073 			}
8074 			break;
8075 		default:
8076 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8077 			break;
8078 		}
8079 
8080 		/* wptr/rptr are in bytes! */
8081 		rptr += 16;
8082 		rptr &= rdev->ih.ptr_mask;
8083 		WREG32(IH_RB_RPTR, rptr);
8084 	}
8085 	if (queue_dp)
8086 		schedule_work(&rdev->dp_work);
8087 	if (queue_hotplug)
8088 		schedule_delayed_work(&rdev->hotplug_work, 0);
8089 	if (queue_reset) {
8090 		rdev->needs_reset = true;
8091 		wake_up_all(&rdev->fence_queue);
8092 	}
8093 	if (queue_thermal)
8094 		schedule_work(&rdev->pm.dpm.thermal.work);
8095 	rdev->ih.rptr = rptr;
8096 	atomic_set(&rdev->ih.lock, 0);
8097 
8098 	/* make sure wptr hasn't changed while processing */
8099 	wptr = cik_get_ih_wptr(rdev);
8100 	if (wptr != rptr)
8101 		goto restart_ih;
8102 
8103 	return IRQ_HANDLED;
8104 }
8105 
8106 /*
8107  * startup/shutdown callbacks
8108  */
8109 static void cik_uvd_init(struct radeon_device *rdev)
8110 {
8111 	int r;
8112 
8113 	if (!rdev->has_uvd)
8114 		return;
8115 
8116 	r = radeon_uvd_init(rdev);
8117 	if (r) {
8118 		dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8119 		/*
8120 		 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8121 		 * to early fails cik_uvd_start() and thus nothing happens
8122 		 * there. So it is pointless to try to go through that code
8123 		 * hence why we disable uvd here.
8124 		 */
8125 		rdev->has_uvd = false;
8126 		return;
8127 	}
8128 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8129 	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8130 }
8131 
8132 static void cik_uvd_start(struct radeon_device *rdev)
8133 {
8134 	int r;
8135 
8136 	if (!rdev->has_uvd)
8137 		return;
8138 
8139 	r = radeon_uvd_resume(rdev);
8140 	if (r) {
8141 		dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8142 		goto error;
8143 	}
8144 	r = uvd_v4_2_resume(rdev);
8145 	if (r) {
8146 		dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8147 		goto error;
8148 	}
8149 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8150 	if (r) {
8151 		dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8152 		goto error;
8153 	}
8154 	return;
8155 
8156 error:
8157 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8158 }
8159 
8160 static void cik_uvd_resume(struct radeon_device *rdev)
8161 {
8162 	struct radeon_ring *ring;
8163 	int r;
8164 
8165 	if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8166 		return;
8167 
8168 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8169 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
8170 	if (r) {
8171 		dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8172 		return;
8173 	}
8174 	r = uvd_v1_0_init(rdev);
8175 	if (r) {
8176 		dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8177 		return;
8178 	}
8179 }
8180 
8181 static void cik_vce_init(struct radeon_device *rdev)
8182 {
8183 	int r;
8184 
8185 	if (!rdev->has_vce)
8186 		return;
8187 
8188 	r = radeon_vce_init(rdev);
8189 	if (r) {
8190 		dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8191 		/*
8192 		 * At this point rdev->vce.vcpu_bo is NULL which trickles down
8193 		 * to early fails cik_vce_start() and thus nothing happens
8194 		 * there. So it is pointless to try to go through that code
8195 		 * hence why we disable vce here.
8196 		 */
8197 		rdev->has_vce = false;
8198 		return;
8199 	}
8200 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8201 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8202 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8203 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8204 }
8205 
8206 static void cik_vce_start(struct radeon_device *rdev)
8207 {
8208 	int r;
8209 
8210 	if (!rdev->has_vce)
8211 		return;
8212 
8213 	r = radeon_vce_resume(rdev);
8214 	if (r) {
8215 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8216 		goto error;
8217 	}
8218 	r = vce_v2_0_resume(rdev);
8219 	if (r) {
8220 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8221 		goto error;
8222 	}
8223 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8224 	if (r) {
8225 		dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8226 		goto error;
8227 	}
8228 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8229 	if (r) {
8230 		dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8231 		goto error;
8232 	}
8233 	return;
8234 
8235 error:
8236 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8237 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8238 }
8239 
8240 static void cik_vce_resume(struct radeon_device *rdev)
8241 {
8242 	struct radeon_ring *ring;
8243 	int r;
8244 
8245 	if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8246 		return;
8247 
8248 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8249 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8250 	if (r) {
8251 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8252 		return;
8253 	}
8254 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8255 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8256 	if (r) {
8257 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8258 		return;
8259 	}
8260 	r = vce_v1_0_init(rdev);
8261 	if (r) {
8262 		dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8263 		return;
8264 	}
8265 }
8266 
8267 /**
8268  * cik_startup - program the asic to a functional state
8269  *
8270  * @rdev: radeon_device pointer
8271  *
8272  * Programs the asic to a functional state (CIK).
8273  * Called by cik_init() and cik_resume().
8274  * Returns 0 for success, error for failure.
8275  */
8276 static int cik_startup(struct radeon_device *rdev)
8277 {
8278 	struct radeon_ring *ring;
8279 	u32 nop;
8280 	int r;
8281 
8282 	/* enable pcie gen2/3 link */
8283 	cik_pcie_gen3_enable(rdev);
8284 	/* enable aspm */
8285 	cik_program_aspm(rdev);
8286 
8287 	/* scratch needs to be initialized before MC */
8288 	r = r600_vram_scratch_init(rdev);
8289 	if (r)
8290 		return r;
8291 
8292 	cik_mc_program(rdev);
8293 
8294 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8295 		r = ci_mc_load_microcode(rdev);
8296 		if (r) {
8297 			DRM_ERROR("Failed to load MC firmware!\n");
8298 			return r;
8299 		}
8300 	}
8301 
8302 	r = cik_pcie_gart_enable(rdev);
8303 	if (r)
8304 		return r;
8305 	cik_gpu_init(rdev);
8306 
8307 	/* allocate rlc buffers */
8308 	if (rdev->flags & RADEON_IS_IGP) {
8309 		if (rdev->family == CHIP_KAVERI) {
8310 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8311 			rdev->rlc.reg_list_size =
8312 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8313 		} else {
8314 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8315 			rdev->rlc.reg_list_size =
8316 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8317 		}
8318 	}
8319 	rdev->rlc.cs_data = ci_cs_data;
8320 	rdev->rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
8321 	rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
8322 	r = sumo_rlc_init(rdev);
8323 	if (r) {
8324 		DRM_ERROR("Failed to init rlc BOs!\n");
8325 		return r;
8326 	}
8327 
8328 	/* allocate wb buffer */
8329 	r = radeon_wb_init(rdev);
8330 	if (r)
8331 		return r;
8332 
8333 	/* allocate mec buffers */
8334 	r = cik_mec_init(rdev);
8335 	if (r) {
8336 		DRM_ERROR("Failed to init MEC BOs!\n");
8337 		return r;
8338 	}
8339 
8340 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8341 	if (r) {
8342 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8343 		return r;
8344 	}
8345 
8346 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8347 	if (r) {
8348 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8349 		return r;
8350 	}
8351 
8352 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8353 	if (r) {
8354 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8355 		return r;
8356 	}
8357 
8358 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8359 	if (r) {
8360 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8361 		return r;
8362 	}
8363 
8364 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8365 	if (r) {
8366 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8367 		return r;
8368 	}
8369 
8370 	cik_uvd_start(rdev);
8371 	cik_vce_start(rdev);
8372 
8373 	/* Enable IRQ */
8374 	if (!rdev->irq.installed) {
8375 		r = radeon_irq_kms_init(rdev);
8376 		if (r)
8377 			return r;
8378 	}
8379 
8380 	r = cik_irq_init(rdev);
8381 	if (r) {
8382 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8383 		radeon_irq_kms_fini(rdev);
8384 		return r;
8385 	}
8386 	cik_irq_set(rdev);
8387 
8388 	if (rdev->family == CHIP_HAWAII) {
8389 		if (rdev->new_fw)
8390 			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8391 		else
8392 			nop = RADEON_CP_PACKET2;
8393 	} else {
8394 		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8395 	}
8396 
8397 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8398 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8399 			     nop);
8400 	if (r)
8401 		return r;
8402 
8403 	/* set up the compute queues */
8404 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8405 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8406 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8407 			     nop);
8408 	if (r)
8409 		return r;
8410 	ring->me = 1; /* first MEC */
8411 	ring->pipe = 0; /* first pipe */
8412 	ring->queue = 0; /* first queue */
8413 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8414 
8415 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8416 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8417 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8418 			     nop);
8419 	if (r)
8420 		return r;
8421 	/* dGPU only have 1 MEC */
8422 	ring->me = 1; /* first MEC */
8423 	ring->pipe = 0; /* first pipe */
8424 	ring->queue = 1; /* second queue */
8425 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8426 
8427 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8428 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8429 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8430 	if (r)
8431 		return r;
8432 
8433 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8434 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8435 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8436 	if (r)
8437 		return r;
8438 
8439 	r = cik_cp_resume(rdev);
8440 	if (r)
8441 		return r;
8442 
8443 	r = cik_sdma_resume(rdev);
8444 	if (r)
8445 		return r;
8446 
8447 	cik_uvd_resume(rdev);
8448 	cik_vce_resume(rdev);
8449 
8450 	r = radeon_ib_pool_init(rdev);
8451 	if (r) {
8452 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8453 		return r;
8454 	}
8455 
8456 	r = radeon_vm_manager_init(rdev);
8457 	if (r) {
8458 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8459 		return r;
8460 	}
8461 
8462 	r = radeon_audio_init(rdev);
8463 	if (r)
8464 		return r;
8465 
8466 	return 0;
8467 }
8468 
8469 /**
8470  * cik_resume - resume the asic to a functional state
8471  *
8472  * @rdev: radeon_device pointer
8473  *
8474  * Programs the asic to a functional state (CIK).
8475  * Called at resume.
8476  * Returns 0 for success, error for failure.
8477  */
8478 int cik_resume(struct radeon_device *rdev)
8479 {
8480 	int r;
8481 
8482 	/* post card */
8483 	atom_asic_init(rdev->mode_info.atom_context);
8484 
8485 	/* init golden registers */
8486 	cik_init_golden_registers(rdev);
8487 
8488 	if (rdev->pm.pm_method == PM_METHOD_DPM)
8489 		radeon_pm_resume(rdev);
8490 
8491 	rdev->accel_working = true;
8492 	r = cik_startup(rdev);
8493 	if (r) {
8494 		DRM_ERROR("cik startup failed on resume\n");
8495 		rdev->accel_working = false;
8496 		return r;
8497 	}
8498 
8499 	return r;
8500 
8501 }
8502 
8503 /**
8504  * cik_suspend - suspend the asic
8505  *
8506  * @rdev: radeon_device pointer
8507  *
8508  * Bring the chip into a state suitable for suspend (CIK).
8509  * Called at suspend.
8510  * Returns 0 for success.
8511  */
8512 int cik_suspend(struct radeon_device *rdev)
8513 {
8514 	radeon_pm_suspend(rdev);
8515 	radeon_audio_fini(rdev);
8516 	radeon_vm_manager_fini(rdev);
8517 	cik_cp_enable(rdev, false);
8518 	cik_sdma_enable(rdev, false);
8519 	if (rdev->has_uvd) {
8520 		uvd_v1_0_fini(rdev);
8521 		radeon_uvd_suspend(rdev);
8522 	}
8523 	if (rdev->has_vce)
8524 		radeon_vce_suspend(rdev);
8525 	cik_fini_pg(rdev);
8526 	cik_fini_cg(rdev);
8527 	cik_irq_suspend(rdev);
8528 	radeon_wb_disable(rdev);
8529 	cik_pcie_gart_disable(rdev);
8530 	return 0;
8531 }
8532 
8533 /* Plan is to move initialization in that function and use
8534  * helper function so that radeon_device_init pretty much
8535  * do nothing more than calling asic specific function. This
8536  * should also allow to remove a bunch of callback function
8537  * like vram_info.
8538  */
8539 /**
8540  * cik_init - asic specific driver and hw init
8541  *
8542  * @rdev: radeon_device pointer
8543  *
8544  * Setup asic specific driver variables and program the hw
8545  * to a functional state (CIK).
8546  * Called at driver startup.
8547  * Returns 0 for success, errors for failure.
8548  */
8549 int cik_init(struct radeon_device *rdev)
8550 {
8551 	struct radeon_ring *ring;
8552 	int r;
8553 
8554 	/* Read BIOS */
8555 	if (!radeon_get_bios(rdev)) {
8556 		if (ASIC_IS_AVIVO(rdev))
8557 			return -EINVAL;
8558 	}
8559 	/* Must be an ATOMBIOS */
8560 	if (!rdev->is_atom_bios) {
8561 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8562 		return -EINVAL;
8563 	}
8564 	r = radeon_atombios_init(rdev);
8565 	if (r)
8566 		return r;
8567 
8568 	/* Post card if necessary */
8569 	if (!radeon_card_posted(rdev)) {
8570 		if (!rdev->bios) {
8571 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8572 			return -EINVAL;
8573 		}
8574 		DRM_INFO("GPU not posted. posting now...\n");
8575 		atom_asic_init(rdev->mode_info.atom_context);
8576 	}
8577 	/* init golden registers */
8578 	cik_init_golden_registers(rdev);
8579 	/* Initialize scratch registers */
8580 	cik_scratch_init(rdev);
8581 	/* Initialize surface registers */
8582 	radeon_surface_init(rdev);
8583 	/* Initialize clocks */
8584 	radeon_get_clock_info(rdev->ddev);
8585 
8586 	/* Fence driver */
8587 	radeon_fence_driver_init(rdev);
8588 
8589 	/* initialize memory controller */
8590 	r = cik_mc_init(rdev);
8591 	if (r)
8592 		return r;
8593 	/* Memory manager */
8594 	r = radeon_bo_init(rdev);
8595 	if (r)
8596 		return r;
8597 
8598 	if (rdev->flags & RADEON_IS_IGP) {
8599 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8600 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8601 			r = cik_init_microcode(rdev);
8602 			if (r) {
8603 				DRM_ERROR("Failed to load firmware!\n");
8604 				return r;
8605 			}
8606 		}
8607 	} else {
8608 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8609 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8610 		    !rdev->mc_fw) {
8611 			r = cik_init_microcode(rdev);
8612 			if (r) {
8613 				DRM_ERROR("Failed to load firmware!\n");
8614 				return r;
8615 			}
8616 		}
8617 	}
8618 
8619 	/* Initialize power management */
8620 	radeon_pm_init(rdev);
8621 
8622 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8623 	ring->ring_obj = NULL;
8624 	r600_ring_init(rdev, ring, 1024 * 1024);
8625 
8626 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8627 	ring->ring_obj = NULL;
8628 	r600_ring_init(rdev, ring, 1024 * 1024);
8629 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8630 	if (r)
8631 		return r;
8632 
8633 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8634 	ring->ring_obj = NULL;
8635 	r600_ring_init(rdev, ring, 1024 * 1024);
8636 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8637 	if (r)
8638 		return r;
8639 
8640 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8641 	ring->ring_obj = NULL;
8642 	r600_ring_init(rdev, ring, 256 * 1024);
8643 
8644 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8645 	ring->ring_obj = NULL;
8646 	r600_ring_init(rdev, ring, 256 * 1024);
8647 
8648 	cik_uvd_init(rdev);
8649 	cik_vce_init(rdev);
8650 
8651 	rdev->ih.ring_obj = NULL;
8652 	r600_ih_ring_init(rdev, 64 * 1024);
8653 
8654 	r = r600_pcie_gart_init(rdev);
8655 	if (r)
8656 		return r;
8657 
8658 	rdev->accel_working = true;
8659 	r = cik_startup(rdev);
8660 	if (r) {
8661 		dev_err(rdev->dev, "disabling GPU acceleration\n");
8662 		cik_cp_fini(rdev);
8663 		cik_sdma_fini(rdev);
8664 		cik_irq_fini(rdev);
8665 		sumo_rlc_fini(rdev);
8666 		cik_mec_fini(rdev);
8667 		radeon_wb_fini(rdev);
8668 		radeon_ib_pool_fini(rdev);
8669 		radeon_vm_manager_fini(rdev);
8670 		radeon_irq_kms_fini(rdev);
8671 		cik_pcie_gart_fini(rdev);
8672 		rdev->accel_working = false;
8673 	}
8674 
8675 	/* Don't start up if the MC ucode is missing.
8676 	 * The default clocks and voltages before the MC ucode
8677 	 * is loaded are not suffient for advanced operations.
8678 	 */
8679 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8680 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8681 		return -EINVAL;
8682 	}
8683 
8684 	return 0;
8685 }
8686 
8687 /**
8688  * cik_fini - asic specific driver and hw fini
8689  *
8690  * @rdev: radeon_device pointer
8691  *
8692  * Tear down the asic specific driver variables and program the hw
8693  * to an idle state (CIK).
8694  * Called at driver unload.
8695  */
8696 void cik_fini(struct radeon_device *rdev)
8697 {
8698 	radeon_pm_fini(rdev);
8699 	cik_cp_fini(rdev);
8700 	cik_sdma_fini(rdev);
8701 	cik_fini_pg(rdev);
8702 	cik_fini_cg(rdev);
8703 	cik_irq_fini(rdev);
8704 	sumo_rlc_fini(rdev);
8705 	cik_mec_fini(rdev);
8706 	radeon_wb_fini(rdev);
8707 	radeon_vm_manager_fini(rdev);
8708 	radeon_ib_pool_fini(rdev);
8709 	radeon_irq_kms_fini(rdev);
8710 	uvd_v1_0_fini(rdev);
8711 	radeon_uvd_fini(rdev);
8712 	radeon_vce_fini(rdev);
8713 	cik_pcie_gart_fini(rdev);
8714 	r600_vram_scratch_fini(rdev);
8715 	radeon_gem_fini(rdev);
8716 	radeon_fence_driver_fini(rdev);
8717 	radeon_bo_fini(rdev);
8718 	radeon_atombios_fini(rdev);
8719 	kfree(rdev->bios);
8720 	rdev->bios = NULL;
8721 }
8722 
8723 void dce8_program_fmt(struct drm_encoder *encoder)
8724 {
8725 	struct drm_device *dev = encoder->dev;
8726 	struct radeon_device *rdev = dev->dev_private;
8727 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8728 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8729 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8730 	int bpc = 0;
8731 	u32 tmp = 0;
8732 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8733 
8734 	if (connector) {
8735 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8736 		bpc = radeon_get_monitor_bpc(connector);
8737 		dither = radeon_connector->dither;
8738 	}
8739 
8740 	/* LVDS/eDP FMT is set up by atom */
8741 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8742 		return;
8743 
8744 	/* not needed for analog */
8745 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8746 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8747 		return;
8748 
8749 	if (bpc == 0)
8750 		return;
8751 
8752 	switch (bpc) {
8753 	case 6:
8754 		if (dither == RADEON_FMT_DITHER_ENABLE)
8755 			/* XXX sort out optimal dither settings */
8756 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8757 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8758 		else
8759 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8760 		break;
8761 	case 8:
8762 		if (dither == RADEON_FMT_DITHER_ENABLE)
8763 			/* XXX sort out optimal dither settings */
8764 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8765 				FMT_RGB_RANDOM_ENABLE |
8766 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8767 		else
8768 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8769 		break;
8770 	case 10:
8771 		if (dither == RADEON_FMT_DITHER_ENABLE)
8772 			/* XXX sort out optimal dither settings */
8773 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8774 				FMT_RGB_RANDOM_ENABLE |
8775 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8776 		else
8777 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8778 		break;
8779 	default:
8780 		/* not needed */
8781 		break;
8782 	}
8783 
8784 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8785 }
8786 
8787 /* display watermark setup */
8788 /**
8789  * dce8_line_buffer_adjust - Set up the line buffer
8790  *
8791  * @rdev: radeon_device pointer
8792  * @radeon_crtc: the selected display controller
8793  * @mode: the current display mode on the selected display
8794  * controller
8795  *
8796  * Setup up the line buffer allocation for
8797  * the selected display controller (CIK).
8798  * Returns the line buffer size in pixels.
8799  */
8800 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8801 				   struct radeon_crtc *radeon_crtc,
8802 				   struct drm_display_mode *mode)
8803 {
8804 	u32 tmp, buffer_alloc, i;
8805 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8806 	/*
8807 	 * Line Buffer Setup
8808 	 * There are 6 line buffers, one for each display controllers.
8809 	 * There are 3 partitions per LB. Select the number of partitions
8810 	 * to enable based on the display width.  For display widths larger
8811 	 * than 4096, you need use to use 2 display controllers and combine
8812 	 * them using the stereo blender.
8813 	 */
8814 	if (radeon_crtc->base.enabled && mode) {
8815 		if (mode->crtc_hdisplay < 1920) {
8816 			tmp = 1;
8817 			buffer_alloc = 2;
8818 		} else if (mode->crtc_hdisplay < 2560) {
8819 			tmp = 2;
8820 			buffer_alloc = 2;
8821 		} else if (mode->crtc_hdisplay < 4096) {
8822 			tmp = 0;
8823 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8824 		} else {
8825 			DRM_DEBUG_KMS("Mode too big for LB!\n");
8826 			tmp = 0;
8827 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8828 		}
8829 	} else {
8830 		tmp = 1;
8831 		buffer_alloc = 0;
8832 	}
8833 
8834 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8835 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8836 
8837 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8838 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8839 	for (i = 0; i < rdev->usec_timeout; i++) {
8840 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8841 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8842 			break;
8843 		udelay(1);
8844 	}
8845 
8846 	if (radeon_crtc->base.enabled && mode) {
8847 		switch (tmp) {
8848 		case 0:
8849 		default:
8850 			return 4096 * 2;
8851 		case 1:
8852 			return 1920 * 2;
8853 		case 2:
8854 			return 2560 * 2;
8855 		}
8856 	}
8857 
8858 	/* controller not enabled, so no lb used */
8859 	return 0;
8860 }
8861 
8862 /**
8863  * cik_get_number_of_dram_channels - get the number of dram channels
8864  *
8865  * @rdev: radeon_device pointer
8866  *
8867  * Look up the number of video ram channels (CIK).
8868  * Used for display watermark bandwidth calculations
8869  * Returns the number of dram channels
8870  */
8871 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8872 {
8873 	u32 tmp = RREG32(MC_SHARED_CHMAP);
8874 
8875 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8876 	case 0:
8877 	default:
8878 		return 1;
8879 	case 1:
8880 		return 2;
8881 	case 2:
8882 		return 4;
8883 	case 3:
8884 		return 8;
8885 	case 4:
8886 		return 3;
8887 	case 5:
8888 		return 6;
8889 	case 6:
8890 		return 10;
8891 	case 7:
8892 		return 12;
8893 	case 8:
8894 		return 16;
8895 	}
8896 }
8897 
8898 struct dce8_wm_params {
8899 	u32 dram_channels; /* number of dram channels */
8900 	u32 yclk;          /* bandwidth per dram data pin in kHz */
8901 	u32 sclk;          /* engine clock in kHz */
8902 	u32 disp_clk;      /* display clock in kHz */
8903 	u32 src_width;     /* viewport width */
8904 	u32 active_time;   /* active display time in ns */
8905 	u32 blank_time;    /* blank time in ns */
8906 	bool interlaced;    /* mode is interlaced */
8907 	fixed20_12 vsc;    /* vertical scale ratio */
8908 	u32 num_heads;     /* number of active crtcs */
8909 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8910 	u32 lb_size;       /* line buffer allocated to pipe */
8911 	u32 vtaps;         /* vertical scaler taps */
8912 };
8913 
8914 /**
8915  * dce8_dram_bandwidth - get the dram bandwidth
8916  *
8917  * @wm: watermark calculation data
8918  *
8919  * Calculate the raw dram bandwidth (CIK).
8920  * Used for display watermark bandwidth calculations
8921  * Returns the dram bandwidth in MBytes/s
8922  */
8923 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8924 {
8925 	/* Calculate raw DRAM Bandwidth */
8926 	fixed20_12 dram_efficiency; /* 0.7 */
8927 	fixed20_12 yclk, dram_channels, bandwidth;
8928 	fixed20_12 a;
8929 
8930 	a.full = dfixed_const(1000);
8931 	yclk.full = dfixed_const(wm->yclk);
8932 	yclk.full = dfixed_div(yclk, a);
8933 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8934 	a.full = dfixed_const(10);
8935 	dram_efficiency.full = dfixed_const(7);
8936 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
8937 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8938 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8939 
8940 	return dfixed_trunc(bandwidth);
8941 }
8942 
8943 /**
8944  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8945  *
8946  * @wm: watermark calculation data
8947  *
8948  * Calculate the dram bandwidth used for display (CIK).
8949  * Used for display watermark bandwidth calculations
8950  * Returns the dram bandwidth for display in MBytes/s
8951  */
8952 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8953 {
8954 	/* Calculate DRAM Bandwidth and the part allocated to display. */
8955 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8956 	fixed20_12 yclk, dram_channels, bandwidth;
8957 	fixed20_12 a;
8958 
8959 	a.full = dfixed_const(1000);
8960 	yclk.full = dfixed_const(wm->yclk);
8961 	yclk.full = dfixed_div(yclk, a);
8962 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8963 	a.full = dfixed_const(10);
8964 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8965 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8966 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8967 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8968 
8969 	return dfixed_trunc(bandwidth);
8970 }
8971 
8972 /**
8973  * dce8_data_return_bandwidth - get the data return bandwidth
8974  *
8975  * @wm: watermark calculation data
8976  *
8977  * Calculate the data return bandwidth used for display (CIK).
8978  * Used for display watermark bandwidth calculations
8979  * Returns the data return bandwidth in MBytes/s
8980  */
8981 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8982 {
8983 	/* Calculate the display Data return Bandwidth */
8984 	fixed20_12 return_efficiency; /* 0.8 */
8985 	fixed20_12 sclk, bandwidth;
8986 	fixed20_12 a;
8987 
8988 	a.full = dfixed_const(1000);
8989 	sclk.full = dfixed_const(wm->sclk);
8990 	sclk.full = dfixed_div(sclk, a);
8991 	a.full = dfixed_const(10);
8992 	return_efficiency.full = dfixed_const(8);
8993 	return_efficiency.full = dfixed_div(return_efficiency, a);
8994 	a.full = dfixed_const(32);
8995 	bandwidth.full = dfixed_mul(a, sclk);
8996 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8997 
8998 	return dfixed_trunc(bandwidth);
8999 }
9000 
9001 /**
9002  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9003  *
9004  * @wm: watermark calculation data
9005  *
9006  * Calculate the dmif bandwidth used for display (CIK).
9007  * Used for display watermark bandwidth calculations
9008  * Returns the dmif bandwidth in MBytes/s
9009  */
9010 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9011 {
9012 	/* Calculate the DMIF Request Bandwidth */
9013 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9014 	fixed20_12 disp_clk, bandwidth;
9015 	fixed20_12 a, b;
9016 
9017 	a.full = dfixed_const(1000);
9018 	disp_clk.full = dfixed_const(wm->disp_clk);
9019 	disp_clk.full = dfixed_div(disp_clk, a);
9020 	a.full = dfixed_const(32);
9021 	b.full = dfixed_mul(a, disp_clk);
9022 
9023 	a.full = dfixed_const(10);
9024 	disp_clk_request_efficiency.full = dfixed_const(8);
9025 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9026 
9027 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9028 
9029 	return dfixed_trunc(bandwidth);
9030 }
9031 
9032 /**
9033  * dce8_available_bandwidth - get the min available bandwidth
9034  *
9035  * @wm: watermark calculation data
9036  *
9037  * Calculate the min available bandwidth used for display (CIK).
9038  * Used for display watermark bandwidth calculations
9039  * Returns the min available bandwidth in MBytes/s
9040  */
9041 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9042 {
9043 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9044 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9045 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9046 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9047 
9048 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9049 }
9050 
9051 /**
9052  * dce8_average_bandwidth - get the average available bandwidth
9053  *
9054  * @wm: watermark calculation data
9055  *
9056  * Calculate the average available bandwidth used for display (CIK).
9057  * Used for display watermark bandwidth calculations
9058  * Returns the average available bandwidth in MBytes/s
9059  */
9060 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9061 {
9062 	/* Calculate the display mode Average Bandwidth
9063 	 * DisplayMode should contain the source and destination dimensions,
9064 	 * timing, etc.
9065 	 */
9066 	fixed20_12 bpp;
9067 	fixed20_12 line_time;
9068 	fixed20_12 src_width;
9069 	fixed20_12 bandwidth;
9070 	fixed20_12 a;
9071 
9072 	a.full = dfixed_const(1000);
9073 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9074 	line_time.full = dfixed_div(line_time, a);
9075 	bpp.full = dfixed_const(wm->bytes_per_pixel);
9076 	src_width.full = dfixed_const(wm->src_width);
9077 	bandwidth.full = dfixed_mul(src_width, bpp);
9078 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9079 	bandwidth.full = dfixed_div(bandwidth, line_time);
9080 
9081 	return dfixed_trunc(bandwidth);
9082 }
9083 
9084 /**
9085  * dce8_latency_watermark - get the latency watermark
9086  *
9087  * @wm: watermark calculation data
9088  *
9089  * Calculate the latency watermark (CIK).
9090  * Used for display watermark bandwidth calculations
9091  * Returns the latency watermark in ns
9092  */
9093 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9094 {
9095 	/* First calculate the latency in ns */
9096 	u32 mc_latency = 2000; /* 2000 ns. */
9097 	u32 available_bandwidth = dce8_available_bandwidth(wm);
9098 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9099 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9100 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9101 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9102 		(wm->num_heads * cursor_line_pair_return_time);
9103 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9104 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9105 	u32 tmp, dmif_size = 12288;
9106 	fixed20_12 a, b, c;
9107 
9108 	if (wm->num_heads == 0)
9109 		return 0;
9110 
9111 	a.full = dfixed_const(2);
9112 	b.full = dfixed_const(1);
9113 	if ((wm->vsc.full > a.full) ||
9114 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9115 	    (wm->vtaps >= 5) ||
9116 	    ((wm->vsc.full >= a.full) && wm->interlaced))
9117 		max_src_lines_per_dst_line = 4;
9118 	else
9119 		max_src_lines_per_dst_line = 2;
9120 
9121 	a.full = dfixed_const(available_bandwidth);
9122 	b.full = dfixed_const(wm->num_heads);
9123 	a.full = dfixed_div(a, b);
9124 	tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
9125 	tmp = min(dfixed_trunc(a), tmp);
9126 
9127 	lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
9128 
9129 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9130 	b.full = dfixed_const(1000);
9131 	c.full = dfixed_const(lb_fill_bw);
9132 	b.full = dfixed_div(c, b);
9133 	a.full = dfixed_div(a, b);
9134 	line_fill_time = dfixed_trunc(a);
9135 
9136 	if (line_fill_time < wm->active_time)
9137 		return latency;
9138 	else
9139 		return latency + (line_fill_time - wm->active_time);
9140 
9141 }
9142 
9143 /**
9144  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9145  * average and available dram bandwidth
9146  *
9147  * @wm: watermark calculation data
9148  *
9149  * Check if the display average bandwidth fits in the display
9150  * dram bandwidth (CIK).
9151  * Used for display watermark bandwidth calculations
9152  * Returns true if the display fits, false if not.
9153  */
9154 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9155 {
9156 	if (dce8_average_bandwidth(wm) <=
9157 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9158 		return true;
9159 	else
9160 		return false;
9161 }
9162 
9163 /**
9164  * dce8_average_bandwidth_vs_available_bandwidth - check
9165  * average and available bandwidth
9166  *
9167  * @wm: watermark calculation data
9168  *
9169  * Check if the display average bandwidth fits in the display
9170  * available bandwidth (CIK).
9171  * Used for display watermark bandwidth calculations
9172  * Returns true if the display fits, false if not.
9173  */
9174 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9175 {
9176 	if (dce8_average_bandwidth(wm) <=
9177 	    (dce8_available_bandwidth(wm) / wm->num_heads))
9178 		return true;
9179 	else
9180 		return false;
9181 }
9182 
9183 /**
9184  * dce8_check_latency_hiding - check latency hiding
9185  *
9186  * @wm: watermark calculation data
9187  *
9188  * Check latency hiding (CIK).
9189  * Used for display watermark bandwidth calculations
9190  * Returns true if the display fits, false if not.
9191  */
9192 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9193 {
9194 	u32 lb_partitions = wm->lb_size / wm->src_width;
9195 	u32 line_time = wm->active_time + wm->blank_time;
9196 	u32 latency_tolerant_lines;
9197 	u32 latency_hiding;
9198 	fixed20_12 a;
9199 
9200 	a.full = dfixed_const(1);
9201 	if (wm->vsc.full > a.full)
9202 		latency_tolerant_lines = 1;
9203 	else {
9204 		if (lb_partitions <= (wm->vtaps + 1))
9205 			latency_tolerant_lines = 1;
9206 		else
9207 			latency_tolerant_lines = 2;
9208 	}
9209 
9210 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9211 
9212 	if (dce8_latency_watermark(wm) <= latency_hiding)
9213 		return true;
9214 	else
9215 		return false;
9216 }
9217 
9218 /**
9219  * dce8_program_watermarks - program display watermarks
9220  *
9221  * @rdev: radeon_device pointer
9222  * @radeon_crtc: the selected display controller
9223  * @lb_size: line buffer size
9224  * @num_heads: number of display controllers in use
9225  *
9226  * Calculate and program the display watermarks for the
9227  * selected display controller (CIK).
9228  */
9229 static void dce8_program_watermarks(struct radeon_device *rdev,
9230 				    struct radeon_crtc *radeon_crtc,
9231 				    u32 lb_size, u32 num_heads)
9232 {
9233 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9234 	struct dce8_wm_params wm_low, wm_high;
9235 	u32 active_time;
9236 	u32 line_time = 0;
9237 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9238 	u32 tmp, wm_mask;
9239 
9240 	if (radeon_crtc->base.enabled && num_heads && mode) {
9241 		active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
9242 					    (u32)mode->clock);
9243 		line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
9244 					  (u32)mode->clock);
9245 		line_time = min(line_time, (u32)65535);
9246 
9247 		/* watermark for high clocks */
9248 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9249 		    rdev->pm.dpm_enabled) {
9250 			wm_high.yclk =
9251 				radeon_dpm_get_mclk(rdev, false) * 10;
9252 			wm_high.sclk =
9253 				radeon_dpm_get_sclk(rdev, false) * 10;
9254 		} else {
9255 			wm_high.yclk = rdev->pm.current_mclk * 10;
9256 			wm_high.sclk = rdev->pm.current_sclk * 10;
9257 		}
9258 
9259 		wm_high.disp_clk = mode->clock;
9260 		wm_high.src_width = mode->crtc_hdisplay;
9261 		wm_high.active_time = active_time;
9262 		wm_high.blank_time = line_time - wm_high.active_time;
9263 		wm_high.interlaced = false;
9264 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9265 			wm_high.interlaced = true;
9266 		wm_high.vsc = radeon_crtc->vsc;
9267 		wm_high.vtaps = 1;
9268 		if (radeon_crtc->rmx_type != RMX_OFF)
9269 			wm_high.vtaps = 2;
9270 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9271 		wm_high.lb_size = lb_size;
9272 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9273 		wm_high.num_heads = num_heads;
9274 
9275 		/* set for high clocks */
9276 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9277 
9278 		/* possibly force display priority to high */
9279 		/* should really do this at mode validation time... */
9280 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9281 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9282 		    !dce8_check_latency_hiding(&wm_high) ||
9283 		    (rdev->disp_priority == 2)) {
9284 			DRM_DEBUG_KMS("force priority to high\n");
9285 		}
9286 
9287 		/* watermark for low clocks */
9288 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9289 		    rdev->pm.dpm_enabled) {
9290 			wm_low.yclk =
9291 				radeon_dpm_get_mclk(rdev, true) * 10;
9292 			wm_low.sclk =
9293 				radeon_dpm_get_sclk(rdev, true) * 10;
9294 		} else {
9295 			wm_low.yclk = rdev->pm.current_mclk * 10;
9296 			wm_low.sclk = rdev->pm.current_sclk * 10;
9297 		}
9298 
9299 		wm_low.disp_clk = mode->clock;
9300 		wm_low.src_width = mode->crtc_hdisplay;
9301 		wm_low.active_time = active_time;
9302 		wm_low.blank_time = line_time - wm_low.active_time;
9303 		wm_low.interlaced = false;
9304 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9305 			wm_low.interlaced = true;
9306 		wm_low.vsc = radeon_crtc->vsc;
9307 		wm_low.vtaps = 1;
9308 		if (radeon_crtc->rmx_type != RMX_OFF)
9309 			wm_low.vtaps = 2;
9310 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9311 		wm_low.lb_size = lb_size;
9312 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9313 		wm_low.num_heads = num_heads;
9314 
9315 		/* set for low clocks */
9316 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9317 
9318 		/* possibly force display priority to high */
9319 		/* should really do this at mode validation time... */
9320 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9321 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9322 		    !dce8_check_latency_hiding(&wm_low) ||
9323 		    (rdev->disp_priority == 2)) {
9324 			DRM_DEBUG_KMS("force priority to high\n");
9325 		}
9326 
9327 		/* Save number of lines the linebuffer leads before the scanout */
9328 		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9329 	}
9330 
9331 	/* select wm A */
9332 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9333 	tmp = wm_mask;
9334 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9335 	tmp |= LATENCY_WATERMARK_MASK(1);
9336 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9337 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9338 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9339 		LATENCY_HIGH_WATERMARK(line_time)));
9340 	/* select wm B */
9341 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9342 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9343 	tmp |= LATENCY_WATERMARK_MASK(2);
9344 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9345 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9346 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9347 		LATENCY_HIGH_WATERMARK(line_time)));
9348 	/* restore original selection */
9349 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9350 
9351 	/* save values for DPM */
9352 	radeon_crtc->line_time = line_time;
9353 	radeon_crtc->wm_high = latency_watermark_a;
9354 	radeon_crtc->wm_low = latency_watermark_b;
9355 }
9356 
9357 /**
9358  * dce8_bandwidth_update - program display watermarks
9359  *
9360  * @rdev: radeon_device pointer
9361  *
9362  * Calculate and program the display watermarks and line
9363  * buffer allocation (CIK).
9364  */
9365 void dce8_bandwidth_update(struct radeon_device *rdev)
9366 {
9367 	struct drm_display_mode *mode = NULL;
9368 	u32 num_heads = 0, lb_size;
9369 	int i;
9370 
9371 	if (!rdev->mode_info.mode_config_initialized)
9372 		return;
9373 
9374 	radeon_update_display_priority(rdev);
9375 
9376 	for (i = 0; i < rdev->num_crtc; i++) {
9377 		if (rdev->mode_info.crtcs[i]->base.enabled)
9378 			num_heads++;
9379 	}
9380 	for (i = 0; i < rdev->num_crtc; i++) {
9381 		mode = &rdev->mode_info.crtcs[i]->base.mode;
9382 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9383 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9384 	}
9385 }
9386 
9387 /**
9388  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9389  *
9390  * @rdev: radeon_device pointer
9391  *
9392  * Fetches a GPU clock counter snapshot (SI).
9393  * Returns the 64 bit clock counter snapshot.
9394  */
9395 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9396 {
9397 	uint64_t clock;
9398 
9399 	mutex_lock(&rdev->gpu_clock_mutex);
9400 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9401 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9402 		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9403 	mutex_unlock(&rdev->gpu_clock_mutex);
9404 	return clock;
9405 }
9406 
9407 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9408 			     u32 cntl_reg, u32 status_reg)
9409 {
9410 	int r, i;
9411 	struct atom_clock_dividers dividers;
9412 	uint32_t tmp;
9413 
9414 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9415 					   clock, false, &dividers);
9416 	if (r)
9417 		return r;
9418 
9419 	tmp = RREG32_SMC(cntl_reg);
9420 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9421 	tmp |= dividers.post_divider;
9422 	WREG32_SMC(cntl_reg, tmp);
9423 
9424 	for (i = 0; i < 100; i++) {
9425 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9426 			break;
9427 		mdelay(10);
9428 	}
9429 	if (i == 100)
9430 		return -ETIMEDOUT;
9431 
9432 	return 0;
9433 }
9434 
9435 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9436 {
9437 	int r = 0;
9438 
9439 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9440 	if (r)
9441 		return r;
9442 
9443 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9444 	return r;
9445 }
9446 
9447 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9448 {
9449 	int r, i;
9450 	struct atom_clock_dividers dividers;
9451 	u32 tmp;
9452 
9453 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9454 					   ecclk, false, &dividers);
9455 	if (r)
9456 		return r;
9457 
9458 	for (i = 0; i < 100; i++) {
9459 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9460 			break;
9461 		mdelay(10);
9462 	}
9463 	if (i == 100)
9464 		return -ETIMEDOUT;
9465 
9466 	tmp = RREG32_SMC(CG_ECLK_CNTL);
9467 	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9468 	tmp |= dividers.post_divider;
9469 	WREG32_SMC(CG_ECLK_CNTL, tmp);
9470 
9471 	for (i = 0; i < 100; i++) {
9472 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9473 			break;
9474 		mdelay(10);
9475 	}
9476 	if (i == 100)
9477 		return -ETIMEDOUT;
9478 
9479 	return 0;
9480 }
9481 
9482 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9483 {
9484 	struct pci_dev *root = rdev->pdev->bus->self;
9485 	enum pci_bus_speed speed_cap;
9486 	u32 speed_cntl, current_data_rate;
9487 	int i;
9488 	u16 tmp16;
9489 
9490 	if (pci_is_root_bus(rdev->pdev->bus))
9491 		return;
9492 
9493 	if (radeon_pcie_gen2 == 0)
9494 		return;
9495 
9496 	if (rdev->flags & RADEON_IS_IGP)
9497 		return;
9498 
9499 	if (!(rdev->flags & RADEON_IS_PCIE))
9500 		return;
9501 
9502 	speed_cap = pcie_get_speed_cap(root);
9503 	if (speed_cap == PCI_SPEED_UNKNOWN)
9504 		return;
9505 
9506 	if ((speed_cap != PCIE_SPEED_8_0GT) &&
9507 	    (speed_cap != PCIE_SPEED_5_0GT))
9508 		return;
9509 
9510 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9511 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9512 		LC_CURRENT_DATA_RATE_SHIFT;
9513 	if (speed_cap == PCIE_SPEED_8_0GT) {
9514 		if (current_data_rate == 2) {
9515 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9516 			return;
9517 		}
9518 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9519 	} else if (speed_cap == PCIE_SPEED_5_0GT) {
9520 		if (current_data_rate == 1) {
9521 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9522 			return;
9523 		}
9524 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9525 	}
9526 
9527 	if (!pci_is_pcie(root) || !pci_is_pcie(rdev->pdev))
9528 		return;
9529 
9530 	if (speed_cap == PCIE_SPEED_8_0GT) {
9531 		/* re-try equalization if gen3 is not already enabled */
9532 		if (current_data_rate != 2) {
9533 			u16 bridge_cfg, gpu_cfg;
9534 			u16 bridge_cfg2, gpu_cfg2;
9535 			u32 max_lw, current_lw, tmp;
9536 
9537 			pcie_capability_read_word(root, PCI_EXP_LNKCTL,
9538 						  &bridge_cfg);
9539 			pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL,
9540 						  &gpu_cfg);
9541 
9542 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9543 			pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16);
9544 
9545 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9546 			pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL,
9547 						   tmp16);
9548 
9549 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9550 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9551 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9552 
9553 			if (current_lw < max_lw) {
9554 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9555 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9556 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9557 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9558 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9559 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9560 				}
9561 			}
9562 
9563 			for (i = 0; i < 10; i++) {
9564 				/* check status */
9565 				pcie_capability_read_word(rdev->pdev,
9566 							  PCI_EXP_DEVSTA,
9567 							  &tmp16);
9568 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9569 					break;
9570 
9571 				pcie_capability_read_word(root, PCI_EXP_LNKCTL,
9572 							  &bridge_cfg);
9573 				pcie_capability_read_word(rdev->pdev,
9574 							  PCI_EXP_LNKCTL,
9575 							  &gpu_cfg);
9576 
9577 				pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
9578 							  &bridge_cfg2);
9579 				pcie_capability_read_word(rdev->pdev,
9580 							  PCI_EXP_LNKCTL2,
9581 							  &gpu_cfg2);
9582 
9583 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9584 				tmp |= LC_SET_QUIESCE;
9585 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9586 
9587 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9588 				tmp |= LC_REDO_EQ;
9589 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9590 
9591 				msleep(100);
9592 
9593 				/* linkctl */
9594 				pcie_capability_read_word(root, PCI_EXP_LNKCTL,
9595 							  &tmp16);
9596 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9597 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9598 				pcie_capability_write_word(root, PCI_EXP_LNKCTL,
9599 							   tmp16);
9600 
9601 				pcie_capability_read_word(rdev->pdev,
9602 							  PCI_EXP_LNKCTL,
9603 							  &tmp16);
9604 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9605 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9606 				pcie_capability_write_word(rdev->pdev,
9607 							   PCI_EXP_LNKCTL,
9608 							   tmp16);
9609 
9610 				/* linkctl2 */
9611 				pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
9612 							  &tmp16);
9613 				tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
9614 					   PCI_EXP_LNKCTL2_TX_MARGIN);
9615 				tmp16 |= (bridge_cfg2 &
9616 					  (PCI_EXP_LNKCTL2_ENTER_COMP |
9617 					   PCI_EXP_LNKCTL2_TX_MARGIN));
9618 				pcie_capability_write_word(root,
9619 							   PCI_EXP_LNKCTL2,
9620 							   tmp16);
9621 
9622 				pcie_capability_read_word(rdev->pdev,
9623 							  PCI_EXP_LNKCTL2,
9624 							  &tmp16);
9625 				tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
9626 					   PCI_EXP_LNKCTL2_TX_MARGIN);
9627 				tmp16 |= (gpu_cfg2 &
9628 					  (PCI_EXP_LNKCTL2_ENTER_COMP |
9629 					   PCI_EXP_LNKCTL2_TX_MARGIN));
9630 				pcie_capability_write_word(rdev->pdev,
9631 							   PCI_EXP_LNKCTL2,
9632 							   tmp16);
9633 
9634 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9635 				tmp &= ~LC_SET_QUIESCE;
9636 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9637 			}
9638 		}
9639 	}
9640 
9641 	/* set the link speed */
9642 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9643 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9644 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9645 
9646 	pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL2, &tmp16);
9647 	tmp16 &= ~PCI_EXP_LNKCTL2_TLS;
9648 	if (speed_cap == PCIE_SPEED_8_0GT)
9649 		tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
9650 	else if (speed_cap == PCIE_SPEED_5_0GT)
9651 		tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
9652 	else
9653 		tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
9654 	pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL2, tmp16);
9655 
9656 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9657 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9658 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9659 
9660 	for (i = 0; i < rdev->usec_timeout; i++) {
9661 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9662 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9663 			break;
9664 		udelay(1);
9665 	}
9666 }
9667 
9668 static void cik_program_aspm(struct radeon_device *rdev)
9669 {
9670 	u32 data, orig;
9671 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9672 	bool disable_clkreq = false;
9673 
9674 	if (radeon_aspm == 0)
9675 		return;
9676 
9677 	/* XXX double check IGPs */
9678 	if (rdev->flags & RADEON_IS_IGP)
9679 		return;
9680 
9681 	if (!(rdev->flags & RADEON_IS_PCIE))
9682 		return;
9683 
9684 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9685 	data &= ~LC_XMIT_N_FTS_MASK;
9686 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9687 	if (orig != data)
9688 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9689 
9690 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9691 	data |= LC_GO_TO_RECOVERY;
9692 	if (orig != data)
9693 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9694 
9695 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9696 	data |= P_IGNORE_EDB_ERR;
9697 	if (orig != data)
9698 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9699 
9700 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9701 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9702 	data |= LC_PMI_TO_L1_DIS;
9703 	if (!disable_l0s)
9704 		data |= LC_L0S_INACTIVITY(7);
9705 
9706 	if (!disable_l1) {
9707 		data |= LC_L1_INACTIVITY(7);
9708 		data &= ~LC_PMI_TO_L1_DIS;
9709 		if (orig != data)
9710 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9711 
9712 		if (!disable_plloff_in_l1) {
9713 			bool clk_req_support;
9714 
9715 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9716 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9717 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9718 			if (orig != data)
9719 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9720 
9721 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9722 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9723 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9724 			if (orig != data)
9725 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9726 
9727 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9728 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9729 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9730 			if (orig != data)
9731 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9732 
9733 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9734 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9735 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9736 			if (orig != data)
9737 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9738 
9739 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9740 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9741 			data |= LC_DYN_LANES_PWR_STATE(3);
9742 			if (orig != data)
9743 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9744 
9745 			if (!disable_clkreq &&
9746 			    !pci_is_root_bus(rdev->pdev->bus)) {
9747 				struct pci_dev *root = rdev->pdev->bus->self;
9748 				u32 lnkcap;
9749 
9750 				clk_req_support = false;
9751 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9752 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9753 					clk_req_support = true;
9754 			} else {
9755 				clk_req_support = false;
9756 			}
9757 
9758 			if (clk_req_support) {
9759 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9760 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9761 				if (orig != data)
9762 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9763 
9764 				orig = data = RREG32_SMC(THM_CLK_CNTL);
9765 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9766 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9767 				if (orig != data)
9768 					WREG32_SMC(THM_CLK_CNTL, data);
9769 
9770 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9771 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9772 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9773 				if (orig != data)
9774 					WREG32_SMC(MISC_CLK_CTRL, data);
9775 
9776 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9777 				data &= ~BCLK_AS_XCLK;
9778 				if (orig != data)
9779 					WREG32_SMC(CG_CLKPIN_CNTL, data);
9780 
9781 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9782 				data &= ~FORCE_BIF_REFCLK_EN;
9783 				if (orig != data)
9784 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9785 
9786 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9787 				data &= ~MPLL_CLKOUT_SEL_MASK;
9788 				data |= MPLL_CLKOUT_SEL(4);
9789 				if (orig != data)
9790 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9791 			}
9792 		}
9793 	} else {
9794 		if (orig != data)
9795 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9796 	}
9797 
9798 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9799 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9800 	if (orig != data)
9801 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9802 
9803 	if (!disable_l0s) {
9804 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9805 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9806 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9807 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9808 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9809 				data &= ~LC_L0S_INACTIVITY_MASK;
9810 				if (orig != data)
9811 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9812 			}
9813 		}
9814 	}
9815 }
9816