xref: /openbmc/linux/drivers/gpu/drm/radeon/cik.c (revision 79f08d9e)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35 
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
44 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
45 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
46 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
47 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
48 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
49 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
50 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
51 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
52 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
53 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
54 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
55 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
56 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
57 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
58 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
59 MODULE_FIRMWARE("radeon/KABINI_me.bin");
60 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
61 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
62 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
63 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
64 
65 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
66 extern void r600_ih_ring_fini(struct radeon_device *rdev);
67 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
68 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
69 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
70 extern void sumo_rlc_fini(struct radeon_device *rdev);
71 extern int sumo_rlc_init(struct radeon_device *rdev);
72 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
73 extern void si_rlc_reset(struct radeon_device *rdev);
74 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
75 extern int cik_sdma_resume(struct radeon_device *rdev);
76 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
77 extern void cik_sdma_fini(struct radeon_device *rdev);
78 static void cik_rlc_stop(struct radeon_device *rdev);
79 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
80 static void cik_program_aspm(struct radeon_device *rdev);
81 static void cik_init_pg(struct radeon_device *rdev);
82 static void cik_init_cg(struct radeon_device *rdev);
83 static void cik_fini_pg(struct radeon_device *rdev);
84 static void cik_fini_cg(struct radeon_device *rdev);
85 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
86 					  bool enable);
87 
88 /* get temperature in millidegrees */
89 int ci_get_temp(struct radeon_device *rdev)
90 {
91 	u32 temp;
92 	int actual_temp = 0;
93 
94 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
95 		CTF_TEMP_SHIFT;
96 
97 	if (temp & 0x200)
98 		actual_temp = 255;
99 	else
100 		actual_temp = temp & 0x1ff;
101 
102 	actual_temp = actual_temp * 1000;
103 
104 	return actual_temp;
105 }
106 
107 /* get temperature in millidegrees */
108 int kv_get_temp(struct radeon_device *rdev)
109 {
110 	u32 temp;
111 	int actual_temp = 0;
112 
113 	temp = RREG32_SMC(0xC0300E0C);
114 
115 	if (temp)
116 		actual_temp = (temp / 8) - 49;
117 	else
118 		actual_temp = 0;
119 
120 	actual_temp = actual_temp * 1000;
121 
122 	return actual_temp;
123 }
124 
125 /*
126  * Indirect registers accessor
127  */
128 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
129 {
130 	unsigned long flags;
131 	u32 r;
132 
133 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
134 	WREG32(PCIE_INDEX, reg);
135 	(void)RREG32(PCIE_INDEX);
136 	r = RREG32(PCIE_DATA);
137 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
138 	return r;
139 }
140 
141 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
142 {
143 	unsigned long flags;
144 
145 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
146 	WREG32(PCIE_INDEX, reg);
147 	(void)RREG32(PCIE_INDEX);
148 	WREG32(PCIE_DATA, v);
149 	(void)RREG32(PCIE_DATA);
150 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
151 }
152 
153 static const u32 spectre_rlc_save_restore_register_list[] =
154 {
155 	(0x0e00 << 16) | (0xc12c >> 2),
156 	0x00000000,
157 	(0x0e00 << 16) | (0xc140 >> 2),
158 	0x00000000,
159 	(0x0e00 << 16) | (0xc150 >> 2),
160 	0x00000000,
161 	(0x0e00 << 16) | (0xc15c >> 2),
162 	0x00000000,
163 	(0x0e00 << 16) | (0xc168 >> 2),
164 	0x00000000,
165 	(0x0e00 << 16) | (0xc170 >> 2),
166 	0x00000000,
167 	(0x0e00 << 16) | (0xc178 >> 2),
168 	0x00000000,
169 	(0x0e00 << 16) | (0xc204 >> 2),
170 	0x00000000,
171 	(0x0e00 << 16) | (0xc2b4 >> 2),
172 	0x00000000,
173 	(0x0e00 << 16) | (0xc2b8 >> 2),
174 	0x00000000,
175 	(0x0e00 << 16) | (0xc2bc >> 2),
176 	0x00000000,
177 	(0x0e00 << 16) | (0xc2c0 >> 2),
178 	0x00000000,
179 	(0x0e00 << 16) | (0x8228 >> 2),
180 	0x00000000,
181 	(0x0e00 << 16) | (0x829c >> 2),
182 	0x00000000,
183 	(0x0e00 << 16) | (0x869c >> 2),
184 	0x00000000,
185 	(0x0600 << 16) | (0x98f4 >> 2),
186 	0x00000000,
187 	(0x0e00 << 16) | (0x98f8 >> 2),
188 	0x00000000,
189 	(0x0e00 << 16) | (0x9900 >> 2),
190 	0x00000000,
191 	(0x0e00 << 16) | (0xc260 >> 2),
192 	0x00000000,
193 	(0x0e00 << 16) | (0x90e8 >> 2),
194 	0x00000000,
195 	(0x0e00 << 16) | (0x3c000 >> 2),
196 	0x00000000,
197 	(0x0e00 << 16) | (0x3c00c >> 2),
198 	0x00000000,
199 	(0x0e00 << 16) | (0x8c1c >> 2),
200 	0x00000000,
201 	(0x0e00 << 16) | (0x9700 >> 2),
202 	0x00000000,
203 	(0x0e00 << 16) | (0xcd20 >> 2),
204 	0x00000000,
205 	(0x4e00 << 16) | (0xcd20 >> 2),
206 	0x00000000,
207 	(0x5e00 << 16) | (0xcd20 >> 2),
208 	0x00000000,
209 	(0x6e00 << 16) | (0xcd20 >> 2),
210 	0x00000000,
211 	(0x7e00 << 16) | (0xcd20 >> 2),
212 	0x00000000,
213 	(0x8e00 << 16) | (0xcd20 >> 2),
214 	0x00000000,
215 	(0x9e00 << 16) | (0xcd20 >> 2),
216 	0x00000000,
217 	(0xae00 << 16) | (0xcd20 >> 2),
218 	0x00000000,
219 	(0xbe00 << 16) | (0xcd20 >> 2),
220 	0x00000000,
221 	(0x0e00 << 16) | (0x89bc >> 2),
222 	0x00000000,
223 	(0x0e00 << 16) | (0x8900 >> 2),
224 	0x00000000,
225 	0x3,
226 	(0x0e00 << 16) | (0xc130 >> 2),
227 	0x00000000,
228 	(0x0e00 << 16) | (0xc134 >> 2),
229 	0x00000000,
230 	(0x0e00 << 16) | (0xc1fc >> 2),
231 	0x00000000,
232 	(0x0e00 << 16) | (0xc208 >> 2),
233 	0x00000000,
234 	(0x0e00 << 16) | (0xc264 >> 2),
235 	0x00000000,
236 	(0x0e00 << 16) | (0xc268 >> 2),
237 	0x00000000,
238 	(0x0e00 << 16) | (0xc26c >> 2),
239 	0x00000000,
240 	(0x0e00 << 16) | (0xc270 >> 2),
241 	0x00000000,
242 	(0x0e00 << 16) | (0xc274 >> 2),
243 	0x00000000,
244 	(0x0e00 << 16) | (0xc278 >> 2),
245 	0x00000000,
246 	(0x0e00 << 16) | (0xc27c >> 2),
247 	0x00000000,
248 	(0x0e00 << 16) | (0xc280 >> 2),
249 	0x00000000,
250 	(0x0e00 << 16) | (0xc284 >> 2),
251 	0x00000000,
252 	(0x0e00 << 16) | (0xc288 >> 2),
253 	0x00000000,
254 	(0x0e00 << 16) | (0xc28c >> 2),
255 	0x00000000,
256 	(0x0e00 << 16) | (0xc290 >> 2),
257 	0x00000000,
258 	(0x0e00 << 16) | (0xc294 >> 2),
259 	0x00000000,
260 	(0x0e00 << 16) | (0xc298 >> 2),
261 	0x00000000,
262 	(0x0e00 << 16) | (0xc29c >> 2),
263 	0x00000000,
264 	(0x0e00 << 16) | (0xc2a0 >> 2),
265 	0x00000000,
266 	(0x0e00 << 16) | (0xc2a4 >> 2),
267 	0x00000000,
268 	(0x0e00 << 16) | (0xc2a8 >> 2),
269 	0x00000000,
270 	(0x0e00 << 16) | (0xc2ac  >> 2),
271 	0x00000000,
272 	(0x0e00 << 16) | (0xc2b0 >> 2),
273 	0x00000000,
274 	(0x0e00 << 16) | (0x301d0 >> 2),
275 	0x00000000,
276 	(0x0e00 << 16) | (0x30238 >> 2),
277 	0x00000000,
278 	(0x0e00 << 16) | (0x30250 >> 2),
279 	0x00000000,
280 	(0x0e00 << 16) | (0x30254 >> 2),
281 	0x00000000,
282 	(0x0e00 << 16) | (0x30258 >> 2),
283 	0x00000000,
284 	(0x0e00 << 16) | (0x3025c >> 2),
285 	0x00000000,
286 	(0x4e00 << 16) | (0xc900 >> 2),
287 	0x00000000,
288 	(0x5e00 << 16) | (0xc900 >> 2),
289 	0x00000000,
290 	(0x6e00 << 16) | (0xc900 >> 2),
291 	0x00000000,
292 	(0x7e00 << 16) | (0xc900 >> 2),
293 	0x00000000,
294 	(0x8e00 << 16) | (0xc900 >> 2),
295 	0x00000000,
296 	(0x9e00 << 16) | (0xc900 >> 2),
297 	0x00000000,
298 	(0xae00 << 16) | (0xc900 >> 2),
299 	0x00000000,
300 	(0xbe00 << 16) | (0xc900 >> 2),
301 	0x00000000,
302 	(0x4e00 << 16) | (0xc904 >> 2),
303 	0x00000000,
304 	(0x5e00 << 16) | (0xc904 >> 2),
305 	0x00000000,
306 	(0x6e00 << 16) | (0xc904 >> 2),
307 	0x00000000,
308 	(0x7e00 << 16) | (0xc904 >> 2),
309 	0x00000000,
310 	(0x8e00 << 16) | (0xc904 >> 2),
311 	0x00000000,
312 	(0x9e00 << 16) | (0xc904 >> 2),
313 	0x00000000,
314 	(0xae00 << 16) | (0xc904 >> 2),
315 	0x00000000,
316 	(0xbe00 << 16) | (0xc904 >> 2),
317 	0x00000000,
318 	(0x4e00 << 16) | (0xc908 >> 2),
319 	0x00000000,
320 	(0x5e00 << 16) | (0xc908 >> 2),
321 	0x00000000,
322 	(0x6e00 << 16) | (0xc908 >> 2),
323 	0x00000000,
324 	(0x7e00 << 16) | (0xc908 >> 2),
325 	0x00000000,
326 	(0x8e00 << 16) | (0xc908 >> 2),
327 	0x00000000,
328 	(0x9e00 << 16) | (0xc908 >> 2),
329 	0x00000000,
330 	(0xae00 << 16) | (0xc908 >> 2),
331 	0x00000000,
332 	(0xbe00 << 16) | (0xc908 >> 2),
333 	0x00000000,
334 	(0x4e00 << 16) | (0xc90c >> 2),
335 	0x00000000,
336 	(0x5e00 << 16) | (0xc90c >> 2),
337 	0x00000000,
338 	(0x6e00 << 16) | (0xc90c >> 2),
339 	0x00000000,
340 	(0x7e00 << 16) | (0xc90c >> 2),
341 	0x00000000,
342 	(0x8e00 << 16) | (0xc90c >> 2),
343 	0x00000000,
344 	(0x9e00 << 16) | (0xc90c >> 2),
345 	0x00000000,
346 	(0xae00 << 16) | (0xc90c >> 2),
347 	0x00000000,
348 	(0xbe00 << 16) | (0xc90c >> 2),
349 	0x00000000,
350 	(0x4e00 << 16) | (0xc910 >> 2),
351 	0x00000000,
352 	(0x5e00 << 16) | (0xc910 >> 2),
353 	0x00000000,
354 	(0x6e00 << 16) | (0xc910 >> 2),
355 	0x00000000,
356 	(0x7e00 << 16) | (0xc910 >> 2),
357 	0x00000000,
358 	(0x8e00 << 16) | (0xc910 >> 2),
359 	0x00000000,
360 	(0x9e00 << 16) | (0xc910 >> 2),
361 	0x00000000,
362 	(0xae00 << 16) | (0xc910 >> 2),
363 	0x00000000,
364 	(0xbe00 << 16) | (0xc910 >> 2),
365 	0x00000000,
366 	(0x0e00 << 16) | (0xc99c >> 2),
367 	0x00000000,
368 	(0x0e00 << 16) | (0x9834 >> 2),
369 	0x00000000,
370 	(0x0000 << 16) | (0x30f00 >> 2),
371 	0x00000000,
372 	(0x0001 << 16) | (0x30f00 >> 2),
373 	0x00000000,
374 	(0x0000 << 16) | (0x30f04 >> 2),
375 	0x00000000,
376 	(0x0001 << 16) | (0x30f04 >> 2),
377 	0x00000000,
378 	(0x0000 << 16) | (0x30f08 >> 2),
379 	0x00000000,
380 	(0x0001 << 16) | (0x30f08 >> 2),
381 	0x00000000,
382 	(0x0000 << 16) | (0x30f0c >> 2),
383 	0x00000000,
384 	(0x0001 << 16) | (0x30f0c >> 2),
385 	0x00000000,
386 	(0x0600 << 16) | (0x9b7c >> 2),
387 	0x00000000,
388 	(0x0e00 << 16) | (0x8a14 >> 2),
389 	0x00000000,
390 	(0x0e00 << 16) | (0x8a18 >> 2),
391 	0x00000000,
392 	(0x0600 << 16) | (0x30a00 >> 2),
393 	0x00000000,
394 	(0x0e00 << 16) | (0x8bf0 >> 2),
395 	0x00000000,
396 	(0x0e00 << 16) | (0x8bcc >> 2),
397 	0x00000000,
398 	(0x0e00 << 16) | (0x8b24 >> 2),
399 	0x00000000,
400 	(0x0e00 << 16) | (0x30a04 >> 2),
401 	0x00000000,
402 	(0x0600 << 16) | (0x30a10 >> 2),
403 	0x00000000,
404 	(0x0600 << 16) | (0x30a14 >> 2),
405 	0x00000000,
406 	(0x0600 << 16) | (0x30a18 >> 2),
407 	0x00000000,
408 	(0x0600 << 16) | (0x30a2c >> 2),
409 	0x00000000,
410 	(0x0e00 << 16) | (0xc700 >> 2),
411 	0x00000000,
412 	(0x0e00 << 16) | (0xc704 >> 2),
413 	0x00000000,
414 	(0x0e00 << 16) | (0xc708 >> 2),
415 	0x00000000,
416 	(0x0e00 << 16) | (0xc768 >> 2),
417 	0x00000000,
418 	(0x0400 << 16) | (0xc770 >> 2),
419 	0x00000000,
420 	(0x0400 << 16) | (0xc774 >> 2),
421 	0x00000000,
422 	(0x0400 << 16) | (0xc778 >> 2),
423 	0x00000000,
424 	(0x0400 << 16) | (0xc77c >> 2),
425 	0x00000000,
426 	(0x0400 << 16) | (0xc780 >> 2),
427 	0x00000000,
428 	(0x0400 << 16) | (0xc784 >> 2),
429 	0x00000000,
430 	(0x0400 << 16) | (0xc788 >> 2),
431 	0x00000000,
432 	(0x0400 << 16) | (0xc78c >> 2),
433 	0x00000000,
434 	(0x0400 << 16) | (0xc798 >> 2),
435 	0x00000000,
436 	(0x0400 << 16) | (0xc79c >> 2),
437 	0x00000000,
438 	(0x0400 << 16) | (0xc7a0 >> 2),
439 	0x00000000,
440 	(0x0400 << 16) | (0xc7a4 >> 2),
441 	0x00000000,
442 	(0x0400 << 16) | (0xc7a8 >> 2),
443 	0x00000000,
444 	(0x0400 << 16) | (0xc7ac >> 2),
445 	0x00000000,
446 	(0x0400 << 16) | (0xc7b0 >> 2),
447 	0x00000000,
448 	(0x0400 << 16) | (0xc7b4 >> 2),
449 	0x00000000,
450 	(0x0e00 << 16) | (0x9100 >> 2),
451 	0x00000000,
452 	(0x0e00 << 16) | (0x3c010 >> 2),
453 	0x00000000,
454 	(0x0e00 << 16) | (0x92a8 >> 2),
455 	0x00000000,
456 	(0x0e00 << 16) | (0x92ac >> 2),
457 	0x00000000,
458 	(0x0e00 << 16) | (0x92b4 >> 2),
459 	0x00000000,
460 	(0x0e00 << 16) | (0x92b8 >> 2),
461 	0x00000000,
462 	(0x0e00 << 16) | (0x92bc >> 2),
463 	0x00000000,
464 	(0x0e00 << 16) | (0x92c0 >> 2),
465 	0x00000000,
466 	(0x0e00 << 16) | (0x92c4 >> 2),
467 	0x00000000,
468 	(0x0e00 << 16) | (0x92c8 >> 2),
469 	0x00000000,
470 	(0x0e00 << 16) | (0x92cc >> 2),
471 	0x00000000,
472 	(0x0e00 << 16) | (0x92d0 >> 2),
473 	0x00000000,
474 	(0x0e00 << 16) | (0x8c00 >> 2),
475 	0x00000000,
476 	(0x0e00 << 16) | (0x8c04 >> 2),
477 	0x00000000,
478 	(0x0e00 << 16) | (0x8c20 >> 2),
479 	0x00000000,
480 	(0x0e00 << 16) | (0x8c38 >> 2),
481 	0x00000000,
482 	(0x0e00 << 16) | (0x8c3c >> 2),
483 	0x00000000,
484 	(0x0e00 << 16) | (0xae00 >> 2),
485 	0x00000000,
486 	(0x0e00 << 16) | (0x9604 >> 2),
487 	0x00000000,
488 	(0x0e00 << 16) | (0xac08 >> 2),
489 	0x00000000,
490 	(0x0e00 << 16) | (0xac0c >> 2),
491 	0x00000000,
492 	(0x0e00 << 16) | (0xac10 >> 2),
493 	0x00000000,
494 	(0x0e00 << 16) | (0xac14 >> 2),
495 	0x00000000,
496 	(0x0e00 << 16) | (0xac58 >> 2),
497 	0x00000000,
498 	(0x0e00 << 16) | (0xac68 >> 2),
499 	0x00000000,
500 	(0x0e00 << 16) | (0xac6c >> 2),
501 	0x00000000,
502 	(0x0e00 << 16) | (0xac70 >> 2),
503 	0x00000000,
504 	(0x0e00 << 16) | (0xac74 >> 2),
505 	0x00000000,
506 	(0x0e00 << 16) | (0xac78 >> 2),
507 	0x00000000,
508 	(0x0e00 << 16) | (0xac7c >> 2),
509 	0x00000000,
510 	(0x0e00 << 16) | (0xac80 >> 2),
511 	0x00000000,
512 	(0x0e00 << 16) | (0xac84 >> 2),
513 	0x00000000,
514 	(0x0e00 << 16) | (0xac88 >> 2),
515 	0x00000000,
516 	(0x0e00 << 16) | (0xac8c >> 2),
517 	0x00000000,
518 	(0x0e00 << 16) | (0x970c >> 2),
519 	0x00000000,
520 	(0x0e00 << 16) | (0x9714 >> 2),
521 	0x00000000,
522 	(0x0e00 << 16) | (0x9718 >> 2),
523 	0x00000000,
524 	(0x0e00 << 16) | (0x971c >> 2),
525 	0x00000000,
526 	(0x0e00 << 16) | (0x31068 >> 2),
527 	0x00000000,
528 	(0x4e00 << 16) | (0x31068 >> 2),
529 	0x00000000,
530 	(0x5e00 << 16) | (0x31068 >> 2),
531 	0x00000000,
532 	(0x6e00 << 16) | (0x31068 >> 2),
533 	0x00000000,
534 	(0x7e00 << 16) | (0x31068 >> 2),
535 	0x00000000,
536 	(0x8e00 << 16) | (0x31068 >> 2),
537 	0x00000000,
538 	(0x9e00 << 16) | (0x31068 >> 2),
539 	0x00000000,
540 	(0xae00 << 16) | (0x31068 >> 2),
541 	0x00000000,
542 	(0xbe00 << 16) | (0x31068 >> 2),
543 	0x00000000,
544 	(0x0e00 << 16) | (0xcd10 >> 2),
545 	0x00000000,
546 	(0x0e00 << 16) | (0xcd14 >> 2),
547 	0x00000000,
548 	(0x0e00 << 16) | (0x88b0 >> 2),
549 	0x00000000,
550 	(0x0e00 << 16) | (0x88b4 >> 2),
551 	0x00000000,
552 	(0x0e00 << 16) | (0x88b8 >> 2),
553 	0x00000000,
554 	(0x0e00 << 16) | (0x88bc >> 2),
555 	0x00000000,
556 	(0x0400 << 16) | (0x89c0 >> 2),
557 	0x00000000,
558 	(0x0e00 << 16) | (0x88c4 >> 2),
559 	0x00000000,
560 	(0x0e00 << 16) | (0x88c8 >> 2),
561 	0x00000000,
562 	(0x0e00 << 16) | (0x88d0 >> 2),
563 	0x00000000,
564 	(0x0e00 << 16) | (0x88d4 >> 2),
565 	0x00000000,
566 	(0x0e00 << 16) | (0x88d8 >> 2),
567 	0x00000000,
568 	(0x0e00 << 16) | (0x8980 >> 2),
569 	0x00000000,
570 	(0x0e00 << 16) | (0x30938 >> 2),
571 	0x00000000,
572 	(0x0e00 << 16) | (0x3093c >> 2),
573 	0x00000000,
574 	(0x0e00 << 16) | (0x30940 >> 2),
575 	0x00000000,
576 	(0x0e00 << 16) | (0x89a0 >> 2),
577 	0x00000000,
578 	(0x0e00 << 16) | (0x30900 >> 2),
579 	0x00000000,
580 	(0x0e00 << 16) | (0x30904 >> 2),
581 	0x00000000,
582 	(0x0e00 << 16) | (0x89b4 >> 2),
583 	0x00000000,
584 	(0x0e00 << 16) | (0x3c210 >> 2),
585 	0x00000000,
586 	(0x0e00 << 16) | (0x3c214 >> 2),
587 	0x00000000,
588 	(0x0e00 << 16) | (0x3c218 >> 2),
589 	0x00000000,
590 	(0x0e00 << 16) | (0x8904 >> 2),
591 	0x00000000,
592 	0x5,
593 	(0x0e00 << 16) | (0x8c28 >> 2),
594 	(0x0e00 << 16) | (0x8c2c >> 2),
595 	(0x0e00 << 16) | (0x8c30 >> 2),
596 	(0x0e00 << 16) | (0x8c34 >> 2),
597 	(0x0e00 << 16) | (0x9600 >> 2),
598 };
599 
600 static const u32 kalindi_rlc_save_restore_register_list[] =
601 {
602 	(0x0e00 << 16) | (0xc12c >> 2),
603 	0x00000000,
604 	(0x0e00 << 16) | (0xc140 >> 2),
605 	0x00000000,
606 	(0x0e00 << 16) | (0xc150 >> 2),
607 	0x00000000,
608 	(0x0e00 << 16) | (0xc15c >> 2),
609 	0x00000000,
610 	(0x0e00 << 16) | (0xc168 >> 2),
611 	0x00000000,
612 	(0x0e00 << 16) | (0xc170 >> 2),
613 	0x00000000,
614 	(0x0e00 << 16) | (0xc204 >> 2),
615 	0x00000000,
616 	(0x0e00 << 16) | (0xc2b4 >> 2),
617 	0x00000000,
618 	(0x0e00 << 16) | (0xc2b8 >> 2),
619 	0x00000000,
620 	(0x0e00 << 16) | (0xc2bc >> 2),
621 	0x00000000,
622 	(0x0e00 << 16) | (0xc2c0 >> 2),
623 	0x00000000,
624 	(0x0e00 << 16) | (0x8228 >> 2),
625 	0x00000000,
626 	(0x0e00 << 16) | (0x829c >> 2),
627 	0x00000000,
628 	(0x0e00 << 16) | (0x869c >> 2),
629 	0x00000000,
630 	(0x0600 << 16) | (0x98f4 >> 2),
631 	0x00000000,
632 	(0x0e00 << 16) | (0x98f8 >> 2),
633 	0x00000000,
634 	(0x0e00 << 16) | (0x9900 >> 2),
635 	0x00000000,
636 	(0x0e00 << 16) | (0xc260 >> 2),
637 	0x00000000,
638 	(0x0e00 << 16) | (0x90e8 >> 2),
639 	0x00000000,
640 	(0x0e00 << 16) | (0x3c000 >> 2),
641 	0x00000000,
642 	(0x0e00 << 16) | (0x3c00c >> 2),
643 	0x00000000,
644 	(0x0e00 << 16) | (0x8c1c >> 2),
645 	0x00000000,
646 	(0x0e00 << 16) | (0x9700 >> 2),
647 	0x00000000,
648 	(0x0e00 << 16) | (0xcd20 >> 2),
649 	0x00000000,
650 	(0x4e00 << 16) | (0xcd20 >> 2),
651 	0x00000000,
652 	(0x5e00 << 16) | (0xcd20 >> 2),
653 	0x00000000,
654 	(0x6e00 << 16) | (0xcd20 >> 2),
655 	0x00000000,
656 	(0x7e00 << 16) | (0xcd20 >> 2),
657 	0x00000000,
658 	(0x0e00 << 16) | (0x89bc >> 2),
659 	0x00000000,
660 	(0x0e00 << 16) | (0x8900 >> 2),
661 	0x00000000,
662 	0x3,
663 	(0x0e00 << 16) | (0xc130 >> 2),
664 	0x00000000,
665 	(0x0e00 << 16) | (0xc134 >> 2),
666 	0x00000000,
667 	(0x0e00 << 16) | (0xc1fc >> 2),
668 	0x00000000,
669 	(0x0e00 << 16) | (0xc208 >> 2),
670 	0x00000000,
671 	(0x0e00 << 16) | (0xc264 >> 2),
672 	0x00000000,
673 	(0x0e00 << 16) | (0xc268 >> 2),
674 	0x00000000,
675 	(0x0e00 << 16) | (0xc26c >> 2),
676 	0x00000000,
677 	(0x0e00 << 16) | (0xc270 >> 2),
678 	0x00000000,
679 	(0x0e00 << 16) | (0xc274 >> 2),
680 	0x00000000,
681 	(0x0e00 << 16) | (0xc28c >> 2),
682 	0x00000000,
683 	(0x0e00 << 16) | (0xc290 >> 2),
684 	0x00000000,
685 	(0x0e00 << 16) | (0xc294 >> 2),
686 	0x00000000,
687 	(0x0e00 << 16) | (0xc298 >> 2),
688 	0x00000000,
689 	(0x0e00 << 16) | (0xc2a0 >> 2),
690 	0x00000000,
691 	(0x0e00 << 16) | (0xc2a4 >> 2),
692 	0x00000000,
693 	(0x0e00 << 16) | (0xc2a8 >> 2),
694 	0x00000000,
695 	(0x0e00 << 16) | (0xc2ac >> 2),
696 	0x00000000,
697 	(0x0e00 << 16) | (0x301d0 >> 2),
698 	0x00000000,
699 	(0x0e00 << 16) | (0x30238 >> 2),
700 	0x00000000,
701 	(0x0e00 << 16) | (0x30250 >> 2),
702 	0x00000000,
703 	(0x0e00 << 16) | (0x30254 >> 2),
704 	0x00000000,
705 	(0x0e00 << 16) | (0x30258 >> 2),
706 	0x00000000,
707 	(0x0e00 << 16) | (0x3025c >> 2),
708 	0x00000000,
709 	(0x4e00 << 16) | (0xc900 >> 2),
710 	0x00000000,
711 	(0x5e00 << 16) | (0xc900 >> 2),
712 	0x00000000,
713 	(0x6e00 << 16) | (0xc900 >> 2),
714 	0x00000000,
715 	(0x7e00 << 16) | (0xc900 >> 2),
716 	0x00000000,
717 	(0x4e00 << 16) | (0xc904 >> 2),
718 	0x00000000,
719 	(0x5e00 << 16) | (0xc904 >> 2),
720 	0x00000000,
721 	(0x6e00 << 16) | (0xc904 >> 2),
722 	0x00000000,
723 	(0x7e00 << 16) | (0xc904 >> 2),
724 	0x00000000,
725 	(0x4e00 << 16) | (0xc908 >> 2),
726 	0x00000000,
727 	(0x5e00 << 16) | (0xc908 >> 2),
728 	0x00000000,
729 	(0x6e00 << 16) | (0xc908 >> 2),
730 	0x00000000,
731 	(0x7e00 << 16) | (0xc908 >> 2),
732 	0x00000000,
733 	(0x4e00 << 16) | (0xc90c >> 2),
734 	0x00000000,
735 	(0x5e00 << 16) | (0xc90c >> 2),
736 	0x00000000,
737 	(0x6e00 << 16) | (0xc90c >> 2),
738 	0x00000000,
739 	(0x7e00 << 16) | (0xc90c >> 2),
740 	0x00000000,
741 	(0x4e00 << 16) | (0xc910 >> 2),
742 	0x00000000,
743 	(0x5e00 << 16) | (0xc910 >> 2),
744 	0x00000000,
745 	(0x6e00 << 16) | (0xc910 >> 2),
746 	0x00000000,
747 	(0x7e00 << 16) | (0xc910 >> 2),
748 	0x00000000,
749 	(0x0e00 << 16) | (0xc99c >> 2),
750 	0x00000000,
751 	(0x0e00 << 16) | (0x9834 >> 2),
752 	0x00000000,
753 	(0x0000 << 16) | (0x30f00 >> 2),
754 	0x00000000,
755 	(0x0000 << 16) | (0x30f04 >> 2),
756 	0x00000000,
757 	(0x0000 << 16) | (0x30f08 >> 2),
758 	0x00000000,
759 	(0x0000 << 16) | (0x30f0c >> 2),
760 	0x00000000,
761 	(0x0600 << 16) | (0x9b7c >> 2),
762 	0x00000000,
763 	(0x0e00 << 16) | (0x8a14 >> 2),
764 	0x00000000,
765 	(0x0e00 << 16) | (0x8a18 >> 2),
766 	0x00000000,
767 	(0x0600 << 16) | (0x30a00 >> 2),
768 	0x00000000,
769 	(0x0e00 << 16) | (0x8bf0 >> 2),
770 	0x00000000,
771 	(0x0e00 << 16) | (0x8bcc >> 2),
772 	0x00000000,
773 	(0x0e00 << 16) | (0x8b24 >> 2),
774 	0x00000000,
775 	(0x0e00 << 16) | (0x30a04 >> 2),
776 	0x00000000,
777 	(0x0600 << 16) | (0x30a10 >> 2),
778 	0x00000000,
779 	(0x0600 << 16) | (0x30a14 >> 2),
780 	0x00000000,
781 	(0x0600 << 16) | (0x30a18 >> 2),
782 	0x00000000,
783 	(0x0600 << 16) | (0x30a2c >> 2),
784 	0x00000000,
785 	(0x0e00 << 16) | (0xc700 >> 2),
786 	0x00000000,
787 	(0x0e00 << 16) | (0xc704 >> 2),
788 	0x00000000,
789 	(0x0e00 << 16) | (0xc708 >> 2),
790 	0x00000000,
791 	(0x0e00 << 16) | (0xc768 >> 2),
792 	0x00000000,
793 	(0x0400 << 16) | (0xc770 >> 2),
794 	0x00000000,
795 	(0x0400 << 16) | (0xc774 >> 2),
796 	0x00000000,
797 	(0x0400 << 16) | (0xc798 >> 2),
798 	0x00000000,
799 	(0x0400 << 16) | (0xc79c >> 2),
800 	0x00000000,
801 	(0x0e00 << 16) | (0x9100 >> 2),
802 	0x00000000,
803 	(0x0e00 << 16) | (0x3c010 >> 2),
804 	0x00000000,
805 	(0x0e00 << 16) | (0x8c00 >> 2),
806 	0x00000000,
807 	(0x0e00 << 16) | (0x8c04 >> 2),
808 	0x00000000,
809 	(0x0e00 << 16) | (0x8c20 >> 2),
810 	0x00000000,
811 	(0x0e00 << 16) | (0x8c38 >> 2),
812 	0x00000000,
813 	(0x0e00 << 16) | (0x8c3c >> 2),
814 	0x00000000,
815 	(0x0e00 << 16) | (0xae00 >> 2),
816 	0x00000000,
817 	(0x0e00 << 16) | (0x9604 >> 2),
818 	0x00000000,
819 	(0x0e00 << 16) | (0xac08 >> 2),
820 	0x00000000,
821 	(0x0e00 << 16) | (0xac0c >> 2),
822 	0x00000000,
823 	(0x0e00 << 16) | (0xac10 >> 2),
824 	0x00000000,
825 	(0x0e00 << 16) | (0xac14 >> 2),
826 	0x00000000,
827 	(0x0e00 << 16) | (0xac58 >> 2),
828 	0x00000000,
829 	(0x0e00 << 16) | (0xac68 >> 2),
830 	0x00000000,
831 	(0x0e00 << 16) | (0xac6c >> 2),
832 	0x00000000,
833 	(0x0e00 << 16) | (0xac70 >> 2),
834 	0x00000000,
835 	(0x0e00 << 16) | (0xac74 >> 2),
836 	0x00000000,
837 	(0x0e00 << 16) | (0xac78 >> 2),
838 	0x00000000,
839 	(0x0e00 << 16) | (0xac7c >> 2),
840 	0x00000000,
841 	(0x0e00 << 16) | (0xac80 >> 2),
842 	0x00000000,
843 	(0x0e00 << 16) | (0xac84 >> 2),
844 	0x00000000,
845 	(0x0e00 << 16) | (0xac88 >> 2),
846 	0x00000000,
847 	(0x0e00 << 16) | (0xac8c >> 2),
848 	0x00000000,
849 	(0x0e00 << 16) | (0x970c >> 2),
850 	0x00000000,
851 	(0x0e00 << 16) | (0x9714 >> 2),
852 	0x00000000,
853 	(0x0e00 << 16) | (0x9718 >> 2),
854 	0x00000000,
855 	(0x0e00 << 16) | (0x971c >> 2),
856 	0x00000000,
857 	(0x0e00 << 16) | (0x31068 >> 2),
858 	0x00000000,
859 	(0x4e00 << 16) | (0x31068 >> 2),
860 	0x00000000,
861 	(0x5e00 << 16) | (0x31068 >> 2),
862 	0x00000000,
863 	(0x6e00 << 16) | (0x31068 >> 2),
864 	0x00000000,
865 	(0x7e00 << 16) | (0x31068 >> 2),
866 	0x00000000,
867 	(0x0e00 << 16) | (0xcd10 >> 2),
868 	0x00000000,
869 	(0x0e00 << 16) | (0xcd14 >> 2),
870 	0x00000000,
871 	(0x0e00 << 16) | (0x88b0 >> 2),
872 	0x00000000,
873 	(0x0e00 << 16) | (0x88b4 >> 2),
874 	0x00000000,
875 	(0x0e00 << 16) | (0x88b8 >> 2),
876 	0x00000000,
877 	(0x0e00 << 16) | (0x88bc >> 2),
878 	0x00000000,
879 	(0x0400 << 16) | (0x89c0 >> 2),
880 	0x00000000,
881 	(0x0e00 << 16) | (0x88c4 >> 2),
882 	0x00000000,
883 	(0x0e00 << 16) | (0x88c8 >> 2),
884 	0x00000000,
885 	(0x0e00 << 16) | (0x88d0 >> 2),
886 	0x00000000,
887 	(0x0e00 << 16) | (0x88d4 >> 2),
888 	0x00000000,
889 	(0x0e00 << 16) | (0x88d8 >> 2),
890 	0x00000000,
891 	(0x0e00 << 16) | (0x8980 >> 2),
892 	0x00000000,
893 	(0x0e00 << 16) | (0x30938 >> 2),
894 	0x00000000,
895 	(0x0e00 << 16) | (0x3093c >> 2),
896 	0x00000000,
897 	(0x0e00 << 16) | (0x30940 >> 2),
898 	0x00000000,
899 	(0x0e00 << 16) | (0x89a0 >> 2),
900 	0x00000000,
901 	(0x0e00 << 16) | (0x30900 >> 2),
902 	0x00000000,
903 	(0x0e00 << 16) | (0x30904 >> 2),
904 	0x00000000,
905 	(0x0e00 << 16) | (0x89b4 >> 2),
906 	0x00000000,
907 	(0x0e00 << 16) | (0x3e1fc >> 2),
908 	0x00000000,
909 	(0x0e00 << 16) | (0x3c210 >> 2),
910 	0x00000000,
911 	(0x0e00 << 16) | (0x3c214 >> 2),
912 	0x00000000,
913 	(0x0e00 << 16) | (0x3c218 >> 2),
914 	0x00000000,
915 	(0x0e00 << 16) | (0x8904 >> 2),
916 	0x00000000,
917 	0x5,
918 	(0x0e00 << 16) | (0x8c28 >> 2),
919 	(0x0e00 << 16) | (0x8c2c >> 2),
920 	(0x0e00 << 16) | (0x8c30 >> 2),
921 	(0x0e00 << 16) | (0x8c34 >> 2),
922 	(0x0e00 << 16) | (0x9600 >> 2),
923 };
924 
925 static const u32 bonaire_golden_spm_registers[] =
926 {
927 	0x30800, 0xe0ffffff, 0xe0000000
928 };
929 
930 static const u32 bonaire_golden_common_registers[] =
931 {
932 	0xc770, 0xffffffff, 0x00000800,
933 	0xc774, 0xffffffff, 0x00000800,
934 	0xc798, 0xffffffff, 0x00007fbf,
935 	0xc79c, 0xffffffff, 0x00007faf
936 };
937 
938 static const u32 bonaire_golden_registers[] =
939 {
940 	0x3354, 0x00000333, 0x00000333,
941 	0x3350, 0x000c0fc0, 0x00040200,
942 	0x9a10, 0x00010000, 0x00058208,
943 	0x3c000, 0xffff1fff, 0x00140000,
944 	0x3c200, 0xfdfc0fff, 0x00000100,
945 	0x3c234, 0x40000000, 0x40000200,
946 	0x9830, 0xffffffff, 0x00000000,
947 	0x9834, 0xf00fffff, 0x00000400,
948 	0x9838, 0x0002021c, 0x00020200,
949 	0xc78, 0x00000080, 0x00000000,
950 	0x5bb0, 0x000000f0, 0x00000070,
951 	0x5bc0, 0xf0311fff, 0x80300000,
952 	0x98f8, 0x73773777, 0x12010001,
953 	0x350c, 0x00810000, 0x408af000,
954 	0x7030, 0x31000111, 0x00000011,
955 	0x2f48, 0x73773777, 0x12010001,
956 	0x220c, 0x00007fb6, 0x0021a1b1,
957 	0x2210, 0x00007fb6, 0x002021b1,
958 	0x2180, 0x00007fb6, 0x00002191,
959 	0x2218, 0x00007fb6, 0x002121b1,
960 	0x221c, 0x00007fb6, 0x002021b1,
961 	0x21dc, 0x00007fb6, 0x00002191,
962 	0x21e0, 0x00007fb6, 0x00002191,
963 	0x3628, 0x0000003f, 0x0000000a,
964 	0x362c, 0x0000003f, 0x0000000a,
965 	0x2ae4, 0x00073ffe, 0x000022a2,
966 	0x240c, 0x000007ff, 0x00000000,
967 	0x8a14, 0xf000003f, 0x00000007,
968 	0x8bf0, 0x00002001, 0x00000001,
969 	0x8b24, 0xffffffff, 0x00ffffff,
970 	0x30a04, 0x0000ff0f, 0x00000000,
971 	0x28a4c, 0x07ffffff, 0x06000000,
972 	0x4d8, 0x00000fff, 0x00000100,
973 	0x3e78, 0x00000001, 0x00000002,
974 	0x9100, 0x03000000, 0x0362c688,
975 	0x8c00, 0x000000ff, 0x00000001,
976 	0xe40, 0x00001fff, 0x00001fff,
977 	0x9060, 0x0000007f, 0x00000020,
978 	0x9508, 0x00010000, 0x00010000,
979 	0xac14, 0x000003ff, 0x000000f3,
980 	0xac0c, 0xffffffff, 0x00001032
981 };
982 
983 static const u32 bonaire_mgcg_cgcg_init[] =
984 {
985 	0xc420, 0xffffffff, 0xfffffffc,
986 	0x30800, 0xffffffff, 0xe0000000,
987 	0x3c2a0, 0xffffffff, 0x00000100,
988 	0x3c208, 0xffffffff, 0x00000100,
989 	0x3c2c0, 0xffffffff, 0xc0000100,
990 	0x3c2c8, 0xffffffff, 0xc0000100,
991 	0x3c2c4, 0xffffffff, 0xc0000100,
992 	0x55e4, 0xffffffff, 0x00600100,
993 	0x3c280, 0xffffffff, 0x00000100,
994 	0x3c214, 0xffffffff, 0x06000100,
995 	0x3c220, 0xffffffff, 0x00000100,
996 	0x3c218, 0xffffffff, 0x06000100,
997 	0x3c204, 0xffffffff, 0x00000100,
998 	0x3c2e0, 0xffffffff, 0x00000100,
999 	0x3c224, 0xffffffff, 0x00000100,
1000 	0x3c200, 0xffffffff, 0x00000100,
1001 	0x3c230, 0xffffffff, 0x00000100,
1002 	0x3c234, 0xffffffff, 0x00000100,
1003 	0x3c250, 0xffffffff, 0x00000100,
1004 	0x3c254, 0xffffffff, 0x00000100,
1005 	0x3c258, 0xffffffff, 0x00000100,
1006 	0x3c25c, 0xffffffff, 0x00000100,
1007 	0x3c260, 0xffffffff, 0x00000100,
1008 	0x3c27c, 0xffffffff, 0x00000100,
1009 	0x3c278, 0xffffffff, 0x00000100,
1010 	0x3c210, 0xffffffff, 0x06000100,
1011 	0x3c290, 0xffffffff, 0x00000100,
1012 	0x3c274, 0xffffffff, 0x00000100,
1013 	0x3c2b4, 0xffffffff, 0x00000100,
1014 	0x3c2b0, 0xffffffff, 0x00000100,
1015 	0x3c270, 0xffffffff, 0x00000100,
1016 	0x30800, 0xffffffff, 0xe0000000,
1017 	0x3c020, 0xffffffff, 0x00010000,
1018 	0x3c024, 0xffffffff, 0x00030002,
1019 	0x3c028, 0xffffffff, 0x00040007,
1020 	0x3c02c, 0xffffffff, 0x00060005,
1021 	0x3c030, 0xffffffff, 0x00090008,
1022 	0x3c034, 0xffffffff, 0x00010000,
1023 	0x3c038, 0xffffffff, 0x00030002,
1024 	0x3c03c, 0xffffffff, 0x00040007,
1025 	0x3c040, 0xffffffff, 0x00060005,
1026 	0x3c044, 0xffffffff, 0x00090008,
1027 	0x3c048, 0xffffffff, 0x00010000,
1028 	0x3c04c, 0xffffffff, 0x00030002,
1029 	0x3c050, 0xffffffff, 0x00040007,
1030 	0x3c054, 0xffffffff, 0x00060005,
1031 	0x3c058, 0xffffffff, 0x00090008,
1032 	0x3c05c, 0xffffffff, 0x00010000,
1033 	0x3c060, 0xffffffff, 0x00030002,
1034 	0x3c064, 0xffffffff, 0x00040007,
1035 	0x3c068, 0xffffffff, 0x00060005,
1036 	0x3c06c, 0xffffffff, 0x00090008,
1037 	0x3c070, 0xffffffff, 0x00010000,
1038 	0x3c074, 0xffffffff, 0x00030002,
1039 	0x3c078, 0xffffffff, 0x00040007,
1040 	0x3c07c, 0xffffffff, 0x00060005,
1041 	0x3c080, 0xffffffff, 0x00090008,
1042 	0x3c084, 0xffffffff, 0x00010000,
1043 	0x3c088, 0xffffffff, 0x00030002,
1044 	0x3c08c, 0xffffffff, 0x00040007,
1045 	0x3c090, 0xffffffff, 0x00060005,
1046 	0x3c094, 0xffffffff, 0x00090008,
1047 	0x3c098, 0xffffffff, 0x00010000,
1048 	0x3c09c, 0xffffffff, 0x00030002,
1049 	0x3c0a0, 0xffffffff, 0x00040007,
1050 	0x3c0a4, 0xffffffff, 0x00060005,
1051 	0x3c0a8, 0xffffffff, 0x00090008,
1052 	0x3c000, 0xffffffff, 0x96e00200,
1053 	0x8708, 0xffffffff, 0x00900100,
1054 	0xc424, 0xffffffff, 0x0020003f,
1055 	0x38, 0xffffffff, 0x0140001c,
1056 	0x3c, 0x000f0000, 0x000f0000,
1057 	0x220, 0xffffffff, 0xC060000C,
1058 	0x224, 0xc0000fff, 0x00000100,
1059 	0xf90, 0xffffffff, 0x00000100,
1060 	0xf98, 0x00000101, 0x00000000,
1061 	0x20a8, 0xffffffff, 0x00000104,
1062 	0x55e4, 0xff000fff, 0x00000100,
1063 	0x30cc, 0xc0000fff, 0x00000104,
1064 	0xc1e4, 0x00000001, 0x00000001,
1065 	0xd00c, 0xff000ff0, 0x00000100,
1066 	0xd80c, 0xff000ff0, 0x00000100
1067 };
1068 
1069 static const u32 spectre_golden_spm_registers[] =
1070 {
1071 	0x30800, 0xe0ffffff, 0xe0000000
1072 };
1073 
1074 static const u32 spectre_golden_common_registers[] =
1075 {
1076 	0xc770, 0xffffffff, 0x00000800,
1077 	0xc774, 0xffffffff, 0x00000800,
1078 	0xc798, 0xffffffff, 0x00007fbf,
1079 	0xc79c, 0xffffffff, 0x00007faf
1080 };
1081 
1082 static const u32 spectre_golden_registers[] =
1083 {
1084 	0x3c000, 0xffff1fff, 0x96940200,
1085 	0x3c00c, 0xffff0001, 0xff000000,
1086 	0x3c200, 0xfffc0fff, 0x00000100,
1087 	0x6ed8, 0x00010101, 0x00010000,
1088 	0x9834, 0xf00fffff, 0x00000400,
1089 	0x9838, 0xfffffffc, 0x00020200,
1090 	0x5bb0, 0x000000f0, 0x00000070,
1091 	0x5bc0, 0xf0311fff, 0x80300000,
1092 	0x98f8, 0x73773777, 0x12010001,
1093 	0x9b7c, 0x00ff0000, 0x00fc0000,
1094 	0x2f48, 0x73773777, 0x12010001,
1095 	0x8a14, 0xf000003f, 0x00000007,
1096 	0x8b24, 0xffffffff, 0x00ffffff,
1097 	0x28350, 0x3f3f3fff, 0x00000082,
1098 	0x28355, 0x0000003f, 0x00000000,
1099 	0x3e78, 0x00000001, 0x00000002,
1100 	0x913c, 0xffff03df, 0x00000004,
1101 	0xc768, 0x00000008, 0x00000008,
1102 	0x8c00, 0x000008ff, 0x00000800,
1103 	0x9508, 0x00010000, 0x00010000,
1104 	0xac0c, 0xffffffff, 0x54763210,
1105 	0x214f8, 0x01ff01ff, 0x00000002,
1106 	0x21498, 0x007ff800, 0x00200000,
1107 	0x2015c, 0xffffffff, 0x00000f40,
1108 	0x30934, 0xffffffff, 0x00000001
1109 };
1110 
1111 static const u32 spectre_mgcg_cgcg_init[] =
1112 {
1113 	0xc420, 0xffffffff, 0xfffffffc,
1114 	0x30800, 0xffffffff, 0xe0000000,
1115 	0x3c2a0, 0xffffffff, 0x00000100,
1116 	0x3c208, 0xffffffff, 0x00000100,
1117 	0x3c2c0, 0xffffffff, 0x00000100,
1118 	0x3c2c8, 0xffffffff, 0x00000100,
1119 	0x3c2c4, 0xffffffff, 0x00000100,
1120 	0x55e4, 0xffffffff, 0x00600100,
1121 	0x3c280, 0xffffffff, 0x00000100,
1122 	0x3c214, 0xffffffff, 0x06000100,
1123 	0x3c220, 0xffffffff, 0x00000100,
1124 	0x3c218, 0xffffffff, 0x06000100,
1125 	0x3c204, 0xffffffff, 0x00000100,
1126 	0x3c2e0, 0xffffffff, 0x00000100,
1127 	0x3c224, 0xffffffff, 0x00000100,
1128 	0x3c200, 0xffffffff, 0x00000100,
1129 	0x3c230, 0xffffffff, 0x00000100,
1130 	0x3c234, 0xffffffff, 0x00000100,
1131 	0x3c250, 0xffffffff, 0x00000100,
1132 	0x3c254, 0xffffffff, 0x00000100,
1133 	0x3c258, 0xffffffff, 0x00000100,
1134 	0x3c25c, 0xffffffff, 0x00000100,
1135 	0x3c260, 0xffffffff, 0x00000100,
1136 	0x3c27c, 0xffffffff, 0x00000100,
1137 	0x3c278, 0xffffffff, 0x00000100,
1138 	0x3c210, 0xffffffff, 0x06000100,
1139 	0x3c290, 0xffffffff, 0x00000100,
1140 	0x3c274, 0xffffffff, 0x00000100,
1141 	0x3c2b4, 0xffffffff, 0x00000100,
1142 	0x3c2b0, 0xffffffff, 0x00000100,
1143 	0x3c270, 0xffffffff, 0x00000100,
1144 	0x30800, 0xffffffff, 0xe0000000,
1145 	0x3c020, 0xffffffff, 0x00010000,
1146 	0x3c024, 0xffffffff, 0x00030002,
1147 	0x3c028, 0xffffffff, 0x00040007,
1148 	0x3c02c, 0xffffffff, 0x00060005,
1149 	0x3c030, 0xffffffff, 0x00090008,
1150 	0x3c034, 0xffffffff, 0x00010000,
1151 	0x3c038, 0xffffffff, 0x00030002,
1152 	0x3c03c, 0xffffffff, 0x00040007,
1153 	0x3c040, 0xffffffff, 0x00060005,
1154 	0x3c044, 0xffffffff, 0x00090008,
1155 	0x3c048, 0xffffffff, 0x00010000,
1156 	0x3c04c, 0xffffffff, 0x00030002,
1157 	0x3c050, 0xffffffff, 0x00040007,
1158 	0x3c054, 0xffffffff, 0x00060005,
1159 	0x3c058, 0xffffffff, 0x00090008,
1160 	0x3c05c, 0xffffffff, 0x00010000,
1161 	0x3c060, 0xffffffff, 0x00030002,
1162 	0x3c064, 0xffffffff, 0x00040007,
1163 	0x3c068, 0xffffffff, 0x00060005,
1164 	0x3c06c, 0xffffffff, 0x00090008,
1165 	0x3c070, 0xffffffff, 0x00010000,
1166 	0x3c074, 0xffffffff, 0x00030002,
1167 	0x3c078, 0xffffffff, 0x00040007,
1168 	0x3c07c, 0xffffffff, 0x00060005,
1169 	0x3c080, 0xffffffff, 0x00090008,
1170 	0x3c084, 0xffffffff, 0x00010000,
1171 	0x3c088, 0xffffffff, 0x00030002,
1172 	0x3c08c, 0xffffffff, 0x00040007,
1173 	0x3c090, 0xffffffff, 0x00060005,
1174 	0x3c094, 0xffffffff, 0x00090008,
1175 	0x3c098, 0xffffffff, 0x00010000,
1176 	0x3c09c, 0xffffffff, 0x00030002,
1177 	0x3c0a0, 0xffffffff, 0x00040007,
1178 	0x3c0a4, 0xffffffff, 0x00060005,
1179 	0x3c0a8, 0xffffffff, 0x00090008,
1180 	0x3c0ac, 0xffffffff, 0x00010000,
1181 	0x3c0b0, 0xffffffff, 0x00030002,
1182 	0x3c0b4, 0xffffffff, 0x00040007,
1183 	0x3c0b8, 0xffffffff, 0x00060005,
1184 	0x3c0bc, 0xffffffff, 0x00090008,
1185 	0x3c000, 0xffffffff, 0x96e00200,
1186 	0x8708, 0xffffffff, 0x00900100,
1187 	0xc424, 0xffffffff, 0x0020003f,
1188 	0x38, 0xffffffff, 0x0140001c,
1189 	0x3c, 0x000f0000, 0x000f0000,
1190 	0x220, 0xffffffff, 0xC060000C,
1191 	0x224, 0xc0000fff, 0x00000100,
1192 	0xf90, 0xffffffff, 0x00000100,
1193 	0xf98, 0x00000101, 0x00000000,
1194 	0x20a8, 0xffffffff, 0x00000104,
1195 	0x55e4, 0xff000fff, 0x00000100,
1196 	0x30cc, 0xc0000fff, 0x00000104,
1197 	0xc1e4, 0x00000001, 0x00000001,
1198 	0xd00c, 0xff000ff0, 0x00000100,
1199 	0xd80c, 0xff000ff0, 0x00000100
1200 };
1201 
1202 static const u32 kalindi_golden_spm_registers[] =
1203 {
1204 	0x30800, 0xe0ffffff, 0xe0000000
1205 };
1206 
1207 static const u32 kalindi_golden_common_registers[] =
1208 {
1209 	0xc770, 0xffffffff, 0x00000800,
1210 	0xc774, 0xffffffff, 0x00000800,
1211 	0xc798, 0xffffffff, 0x00007fbf,
1212 	0xc79c, 0xffffffff, 0x00007faf
1213 };
1214 
1215 static const u32 kalindi_golden_registers[] =
1216 {
1217 	0x3c000, 0xffffdfff, 0x6e944040,
1218 	0x55e4, 0xff607fff, 0xfc000100,
1219 	0x3c220, 0xff000fff, 0x00000100,
1220 	0x3c224, 0xff000fff, 0x00000100,
1221 	0x3c200, 0xfffc0fff, 0x00000100,
1222 	0x6ed8, 0x00010101, 0x00010000,
1223 	0x9830, 0xffffffff, 0x00000000,
1224 	0x9834, 0xf00fffff, 0x00000400,
1225 	0x5bb0, 0x000000f0, 0x00000070,
1226 	0x5bc0, 0xf0311fff, 0x80300000,
1227 	0x98f8, 0x73773777, 0x12010001,
1228 	0x98fc, 0xffffffff, 0x00000010,
1229 	0x9b7c, 0x00ff0000, 0x00fc0000,
1230 	0x8030, 0x00001f0f, 0x0000100a,
1231 	0x2f48, 0x73773777, 0x12010001,
1232 	0x2408, 0x000fffff, 0x000c007f,
1233 	0x8a14, 0xf000003f, 0x00000007,
1234 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1235 	0x30a04, 0x0000ff0f, 0x00000000,
1236 	0x28a4c, 0x07ffffff, 0x06000000,
1237 	0x4d8, 0x00000fff, 0x00000100,
1238 	0x3e78, 0x00000001, 0x00000002,
1239 	0xc768, 0x00000008, 0x00000008,
1240 	0x8c00, 0x000000ff, 0x00000003,
1241 	0x214f8, 0x01ff01ff, 0x00000002,
1242 	0x21498, 0x007ff800, 0x00200000,
1243 	0x2015c, 0xffffffff, 0x00000f40,
1244 	0x88c4, 0x001f3ae3, 0x00000082,
1245 	0x88d4, 0x0000001f, 0x00000010,
1246 	0x30934, 0xffffffff, 0x00000000
1247 };
1248 
1249 static const u32 kalindi_mgcg_cgcg_init[] =
1250 {
1251 	0xc420, 0xffffffff, 0xfffffffc,
1252 	0x30800, 0xffffffff, 0xe0000000,
1253 	0x3c2a0, 0xffffffff, 0x00000100,
1254 	0x3c208, 0xffffffff, 0x00000100,
1255 	0x3c2c0, 0xffffffff, 0x00000100,
1256 	0x3c2c8, 0xffffffff, 0x00000100,
1257 	0x3c2c4, 0xffffffff, 0x00000100,
1258 	0x55e4, 0xffffffff, 0x00600100,
1259 	0x3c280, 0xffffffff, 0x00000100,
1260 	0x3c214, 0xffffffff, 0x06000100,
1261 	0x3c220, 0xffffffff, 0x00000100,
1262 	0x3c218, 0xffffffff, 0x06000100,
1263 	0x3c204, 0xffffffff, 0x00000100,
1264 	0x3c2e0, 0xffffffff, 0x00000100,
1265 	0x3c224, 0xffffffff, 0x00000100,
1266 	0x3c200, 0xffffffff, 0x00000100,
1267 	0x3c230, 0xffffffff, 0x00000100,
1268 	0x3c234, 0xffffffff, 0x00000100,
1269 	0x3c250, 0xffffffff, 0x00000100,
1270 	0x3c254, 0xffffffff, 0x00000100,
1271 	0x3c258, 0xffffffff, 0x00000100,
1272 	0x3c25c, 0xffffffff, 0x00000100,
1273 	0x3c260, 0xffffffff, 0x00000100,
1274 	0x3c27c, 0xffffffff, 0x00000100,
1275 	0x3c278, 0xffffffff, 0x00000100,
1276 	0x3c210, 0xffffffff, 0x06000100,
1277 	0x3c290, 0xffffffff, 0x00000100,
1278 	0x3c274, 0xffffffff, 0x00000100,
1279 	0x3c2b4, 0xffffffff, 0x00000100,
1280 	0x3c2b0, 0xffffffff, 0x00000100,
1281 	0x3c270, 0xffffffff, 0x00000100,
1282 	0x30800, 0xffffffff, 0xe0000000,
1283 	0x3c020, 0xffffffff, 0x00010000,
1284 	0x3c024, 0xffffffff, 0x00030002,
1285 	0x3c028, 0xffffffff, 0x00040007,
1286 	0x3c02c, 0xffffffff, 0x00060005,
1287 	0x3c030, 0xffffffff, 0x00090008,
1288 	0x3c034, 0xffffffff, 0x00010000,
1289 	0x3c038, 0xffffffff, 0x00030002,
1290 	0x3c03c, 0xffffffff, 0x00040007,
1291 	0x3c040, 0xffffffff, 0x00060005,
1292 	0x3c044, 0xffffffff, 0x00090008,
1293 	0x3c000, 0xffffffff, 0x96e00200,
1294 	0x8708, 0xffffffff, 0x00900100,
1295 	0xc424, 0xffffffff, 0x0020003f,
1296 	0x38, 0xffffffff, 0x0140001c,
1297 	0x3c, 0x000f0000, 0x000f0000,
1298 	0x220, 0xffffffff, 0xC060000C,
1299 	0x224, 0xc0000fff, 0x00000100,
1300 	0x20a8, 0xffffffff, 0x00000104,
1301 	0x55e4, 0xff000fff, 0x00000100,
1302 	0x30cc, 0xc0000fff, 0x00000104,
1303 	0xc1e4, 0x00000001, 0x00000001,
1304 	0xd00c, 0xff000ff0, 0x00000100,
1305 	0xd80c, 0xff000ff0, 0x00000100
1306 };
1307 
1308 static const u32 hawaii_golden_spm_registers[] =
1309 {
1310 	0x30800, 0xe0ffffff, 0xe0000000
1311 };
1312 
1313 static const u32 hawaii_golden_common_registers[] =
1314 {
1315 	0x30800, 0xffffffff, 0xe0000000,
1316 	0x28350, 0xffffffff, 0x3a00161a,
1317 	0x28354, 0xffffffff, 0x0000002e,
1318 	0x9a10, 0xffffffff, 0x00018208,
1319 	0x98f8, 0xffffffff, 0x12011003
1320 };
1321 
1322 static const u32 hawaii_golden_registers[] =
1323 {
1324 	0x3354, 0x00000333, 0x00000333,
1325 	0x9a10, 0x00010000, 0x00058208,
1326 	0x9830, 0xffffffff, 0x00000000,
1327 	0x9834, 0xf00fffff, 0x00000400,
1328 	0x9838, 0x0002021c, 0x00020200,
1329 	0xc78, 0x00000080, 0x00000000,
1330 	0x5bb0, 0x000000f0, 0x00000070,
1331 	0x5bc0, 0xf0311fff, 0x80300000,
1332 	0x350c, 0x00810000, 0x408af000,
1333 	0x7030, 0x31000111, 0x00000011,
1334 	0x2f48, 0x73773777, 0x12010001,
1335 	0x2120, 0x0000007f, 0x0000001b,
1336 	0x21dc, 0x00007fb6, 0x00002191,
1337 	0x3628, 0x0000003f, 0x0000000a,
1338 	0x362c, 0x0000003f, 0x0000000a,
1339 	0x2ae4, 0x00073ffe, 0x000022a2,
1340 	0x240c, 0x000007ff, 0x00000000,
1341 	0x8bf0, 0x00002001, 0x00000001,
1342 	0x8b24, 0xffffffff, 0x00ffffff,
1343 	0x30a04, 0x0000ff0f, 0x00000000,
1344 	0x28a4c, 0x07ffffff, 0x06000000,
1345 	0x3e78, 0x00000001, 0x00000002,
1346 	0xc768, 0x00000008, 0x00000008,
1347 	0xc770, 0x00000f00, 0x00000800,
1348 	0xc774, 0x00000f00, 0x00000800,
1349 	0xc798, 0x00ffffff, 0x00ff7fbf,
1350 	0xc79c, 0x00ffffff, 0x00ff7faf,
1351 	0x8c00, 0x000000ff, 0x00000800,
1352 	0xe40, 0x00001fff, 0x00001fff,
1353 	0x9060, 0x0000007f, 0x00000020,
1354 	0x9508, 0x00010000, 0x00010000,
1355 	0xae00, 0x00100000, 0x000ff07c,
1356 	0xac14, 0x000003ff, 0x0000000f,
1357 	0xac10, 0xffffffff, 0x7564fdec,
1358 	0xac0c, 0xffffffff, 0x3120b9a8,
1359 	0xac08, 0x20000000, 0x0f9c0000
1360 };
1361 
1362 static const u32 hawaii_mgcg_cgcg_init[] =
1363 {
1364 	0xc420, 0xffffffff, 0xfffffffd,
1365 	0x30800, 0xffffffff, 0xe0000000,
1366 	0x3c2a0, 0xffffffff, 0x00000100,
1367 	0x3c208, 0xffffffff, 0x00000100,
1368 	0x3c2c0, 0xffffffff, 0x00000100,
1369 	0x3c2c8, 0xffffffff, 0x00000100,
1370 	0x3c2c4, 0xffffffff, 0x00000100,
1371 	0x55e4, 0xffffffff, 0x00200100,
1372 	0x3c280, 0xffffffff, 0x00000100,
1373 	0x3c214, 0xffffffff, 0x06000100,
1374 	0x3c220, 0xffffffff, 0x00000100,
1375 	0x3c218, 0xffffffff, 0x06000100,
1376 	0x3c204, 0xffffffff, 0x00000100,
1377 	0x3c2e0, 0xffffffff, 0x00000100,
1378 	0x3c224, 0xffffffff, 0x00000100,
1379 	0x3c200, 0xffffffff, 0x00000100,
1380 	0x3c230, 0xffffffff, 0x00000100,
1381 	0x3c234, 0xffffffff, 0x00000100,
1382 	0x3c250, 0xffffffff, 0x00000100,
1383 	0x3c254, 0xffffffff, 0x00000100,
1384 	0x3c258, 0xffffffff, 0x00000100,
1385 	0x3c25c, 0xffffffff, 0x00000100,
1386 	0x3c260, 0xffffffff, 0x00000100,
1387 	0x3c27c, 0xffffffff, 0x00000100,
1388 	0x3c278, 0xffffffff, 0x00000100,
1389 	0x3c210, 0xffffffff, 0x06000100,
1390 	0x3c290, 0xffffffff, 0x00000100,
1391 	0x3c274, 0xffffffff, 0x00000100,
1392 	0x3c2b4, 0xffffffff, 0x00000100,
1393 	0x3c2b0, 0xffffffff, 0x00000100,
1394 	0x3c270, 0xffffffff, 0x00000100,
1395 	0x30800, 0xffffffff, 0xe0000000,
1396 	0x3c020, 0xffffffff, 0x00010000,
1397 	0x3c024, 0xffffffff, 0x00030002,
1398 	0x3c028, 0xffffffff, 0x00040007,
1399 	0x3c02c, 0xffffffff, 0x00060005,
1400 	0x3c030, 0xffffffff, 0x00090008,
1401 	0x3c034, 0xffffffff, 0x00010000,
1402 	0x3c038, 0xffffffff, 0x00030002,
1403 	0x3c03c, 0xffffffff, 0x00040007,
1404 	0x3c040, 0xffffffff, 0x00060005,
1405 	0x3c044, 0xffffffff, 0x00090008,
1406 	0x3c048, 0xffffffff, 0x00010000,
1407 	0x3c04c, 0xffffffff, 0x00030002,
1408 	0x3c050, 0xffffffff, 0x00040007,
1409 	0x3c054, 0xffffffff, 0x00060005,
1410 	0x3c058, 0xffffffff, 0x00090008,
1411 	0x3c05c, 0xffffffff, 0x00010000,
1412 	0x3c060, 0xffffffff, 0x00030002,
1413 	0x3c064, 0xffffffff, 0x00040007,
1414 	0x3c068, 0xffffffff, 0x00060005,
1415 	0x3c06c, 0xffffffff, 0x00090008,
1416 	0x3c070, 0xffffffff, 0x00010000,
1417 	0x3c074, 0xffffffff, 0x00030002,
1418 	0x3c078, 0xffffffff, 0x00040007,
1419 	0x3c07c, 0xffffffff, 0x00060005,
1420 	0x3c080, 0xffffffff, 0x00090008,
1421 	0x3c084, 0xffffffff, 0x00010000,
1422 	0x3c088, 0xffffffff, 0x00030002,
1423 	0x3c08c, 0xffffffff, 0x00040007,
1424 	0x3c090, 0xffffffff, 0x00060005,
1425 	0x3c094, 0xffffffff, 0x00090008,
1426 	0x3c098, 0xffffffff, 0x00010000,
1427 	0x3c09c, 0xffffffff, 0x00030002,
1428 	0x3c0a0, 0xffffffff, 0x00040007,
1429 	0x3c0a4, 0xffffffff, 0x00060005,
1430 	0x3c0a8, 0xffffffff, 0x00090008,
1431 	0x3c0ac, 0xffffffff, 0x00010000,
1432 	0x3c0b0, 0xffffffff, 0x00030002,
1433 	0x3c0b4, 0xffffffff, 0x00040007,
1434 	0x3c0b8, 0xffffffff, 0x00060005,
1435 	0x3c0bc, 0xffffffff, 0x00090008,
1436 	0x3c0c0, 0xffffffff, 0x00010000,
1437 	0x3c0c4, 0xffffffff, 0x00030002,
1438 	0x3c0c8, 0xffffffff, 0x00040007,
1439 	0x3c0cc, 0xffffffff, 0x00060005,
1440 	0x3c0d0, 0xffffffff, 0x00090008,
1441 	0x3c0d4, 0xffffffff, 0x00010000,
1442 	0x3c0d8, 0xffffffff, 0x00030002,
1443 	0x3c0dc, 0xffffffff, 0x00040007,
1444 	0x3c0e0, 0xffffffff, 0x00060005,
1445 	0x3c0e4, 0xffffffff, 0x00090008,
1446 	0x3c0e8, 0xffffffff, 0x00010000,
1447 	0x3c0ec, 0xffffffff, 0x00030002,
1448 	0x3c0f0, 0xffffffff, 0x00040007,
1449 	0x3c0f4, 0xffffffff, 0x00060005,
1450 	0x3c0f8, 0xffffffff, 0x00090008,
1451 	0xc318, 0xffffffff, 0x00020200,
1452 	0x3350, 0xffffffff, 0x00000200,
1453 	0x15c0, 0xffffffff, 0x00000400,
1454 	0x55e8, 0xffffffff, 0x00000000,
1455 	0x2f50, 0xffffffff, 0x00000902,
1456 	0x3c000, 0xffffffff, 0x96940200,
1457 	0x8708, 0xffffffff, 0x00900100,
1458 	0xc424, 0xffffffff, 0x0020003f,
1459 	0x38, 0xffffffff, 0x0140001c,
1460 	0x3c, 0x000f0000, 0x000f0000,
1461 	0x220, 0xffffffff, 0xc060000c,
1462 	0x224, 0xc0000fff, 0x00000100,
1463 	0xf90, 0xffffffff, 0x00000100,
1464 	0xf98, 0x00000101, 0x00000000,
1465 	0x20a8, 0xffffffff, 0x00000104,
1466 	0x55e4, 0xff000fff, 0x00000100,
1467 	0x30cc, 0xc0000fff, 0x00000104,
1468 	0xc1e4, 0x00000001, 0x00000001,
1469 	0xd00c, 0xff000ff0, 0x00000100,
1470 	0xd80c, 0xff000ff0, 0x00000100
1471 };
1472 
1473 static void cik_init_golden_registers(struct radeon_device *rdev)
1474 {
1475 	switch (rdev->family) {
1476 	case CHIP_BONAIRE:
1477 		radeon_program_register_sequence(rdev,
1478 						 bonaire_mgcg_cgcg_init,
1479 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1480 		radeon_program_register_sequence(rdev,
1481 						 bonaire_golden_registers,
1482 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1483 		radeon_program_register_sequence(rdev,
1484 						 bonaire_golden_common_registers,
1485 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1486 		radeon_program_register_sequence(rdev,
1487 						 bonaire_golden_spm_registers,
1488 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1489 		break;
1490 	case CHIP_KABINI:
1491 		radeon_program_register_sequence(rdev,
1492 						 kalindi_mgcg_cgcg_init,
1493 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1494 		radeon_program_register_sequence(rdev,
1495 						 kalindi_golden_registers,
1496 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1497 		radeon_program_register_sequence(rdev,
1498 						 kalindi_golden_common_registers,
1499 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1500 		radeon_program_register_sequence(rdev,
1501 						 kalindi_golden_spm_registers,
1502 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1503 		break;
1504 	case CHIP_KAVERI:
1505 		radeon_program_register_sequence(rdev,
1506 						 spectre_mgcg_cgcg_init,
1507 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1508 		radeon_program_register_sequence(rdev,
1509 						 spectre_golden_registers,
1510 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1511 		radeon_program_register_sequence(rdev,
1512 						 spectre_golden_common_registers,
1513 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1514 		radeon_program_register_sequence(rdev,
1515 						 spectre_golden_spm_registers,
1516 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1517 		break;
1518 	case CHIP_HAWAII:
1519 		radeon_program_register_sequence(rdev,
1520 						 hawaii_mgcg_cgcg_init,
1521 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1522 		radeon_program_register_sequence(rdev,
1523 						 hawaii_golden_registers,
1524 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1525 		radeon_program_register_sequence(rdev,
1526 						 hawaii_golden_common_registers,
1527 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1528 		radeon_program_register_sequence(rdev,
1529 						 hawaii_golden_spm_registers,
1530 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1531 		break;
1532 	default:
1533 		break;
1534 	}
1535 }
1536 
1537 /**
1538  * cik_get_xclk - get the xclk
1539  *
1540  * @rdev: radeon_device pointer
1541  *
1542  * Returns the reference clock used by the gfx engine
1543  * (CIK).
1544  */
1545 u32 cik_get_xclk(struct radeon_device *rdev)
1546 {
1547         u32 reference_clock = rdev->clock.spll.reference_freq;
1548 
1549 	if (rdev->flags & RADEON_IS_IGP) {
1550 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1551 			return reference_clock / 2;
1552 	} else {
1553 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1554 			return reference_clock / 4;
1555 	}
1556 	return reference_clock;
1557 }
1558 
1559 /**
1560  * cik_mm_rdoorbell - read a doorbell dword
1561  *
1562  * @rdev: radeon_device pointer
1563  * @index: doorbell index
1564  *
1565  * Returns the value in the doorbell aperture at the
1566  * requested doorbell index (CIK).
1567  */
1568 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1569 {
1570 	if (index < rdev->doorbell.num_doorbells) {
1571 		return readl(rdev->doorbell.ptr + index);
1572 	} else {
1573 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1574 		return 0;
1575 	}
1576 }
1577 
1578 /**
1579  * cik_mm_wdoorbell - write a doorbell dword
1580  *
1581  * @rdev: radeon_device pointer
1582  * @index: doorbell index
1583  * @v: value to write
1584  *
1585  * Writes @v to the doorbell aperture at the
1586  * requested doorbell index (CIK).
1587  */
1588 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1589 {
1590 	if (index < rdev->doorbell.num_doorbells) {
1591 		writel(v, rdev->doorbell.ptr + index);
1592 	} else {
1593 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1594 	}
1595 }
1596 
1597 #define BONAIRE_IO_MC_REGS_SIZE 36
1598 
1599 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1600 {
1601 	{0x00000070, 0x04400000},
1602 	{0x00000071, 0x80c01803},
1603 	{0x00000072, 0x00004004},
1604 	{0x00000073, 0x00000100},
1605 	{0x00000074, 0x00ff0000},
1606 	{0x00000075, 0x34000000},
1607 	{0x00000076, 0x08000014},
1608 	{0x00000077, 0x00cc08ec},
1609 	{0x00000078, 0x00000400},
1610 	{0x00000079, 0x00000000},
1611 	{0x0000007a, 0x04090000},
1612 	{0x0000007c, 0x00000000},
1613 	{0x0000007e, 0x4408a8e8},
1614 	{0x0000007f, 0x00000304},
1615 	{0x00000080, 0x00000000},
1616 	{0x00000082, 0x00000001},
1617 	{0x00000083, 0x00000002},
1618 	{0x00000084, 0xf3e4f400},
1619 	{0x00000085, 0x052024e3},
1620 	{0x00000087, 0x00000000},
1621 	{0x00000088, 0x01000000},
1622 	{0x0000008a, 0x1c0a0000},
1623 	{0x0000008b, 0xff010000},
1624 	{0x0000008d, 0xffffefff},
1625 	{0x0000008e, 0xfff3efff},
1626 	{0x0000008f, 0xfff3efbf},
1627 	{0x00000092, 0xf7ffffff},
1628 	{0x00000093, 0xffffff7f},
1629 	{0x00000095, 0x00101101},
1630 	{0x00000096, 0x00000fff},
1631 	{0x00000097, 0x00116fff},
1632 	{0x00000098, 0x60010000},
1633 	{0x00000099, 0x10010000},
1634 	{0x0000009a, 0x00006000},
1635 	{0x0000009b, 0x00001000},
1636 	{0x0000009f, 0x00b48000}
1637 };
1638 
1639 #define HAWAII_IO_MC_REGS_SIZE 22
1640 
1641 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1642 {
1643 	{0x0000007d, 0x40000000},
1644 	{0x0000007e, 0x40180304},
1645 	{0x0000007f, 0x0000ff00},
1646 	{0x00000081, 0x00000000},
1647 	{0x00000083, 0x00000800},
1648 	{0x00000086, 0x00000000},
1649 	{0x00000087, 0x00000100},
1650 	{0x00000088, 0x00020100},
1651 	{0x00000089, 0x00000000},
1652 	{0x0000008b, 0x00040000},
1653 	{0x0000008c, 0x00000100},
1654 	{0x0000008e, 0xff010000},
1655 	{0x00000090, 0xffffefff},
1656 	{0x00000091, 0xfff3efff},
1657 	{0x00000092, 0xfff3efbf},
1658 	{0x00000093, 0xf7ffffff},
1659 	{0x00000094, 0xffffff7f},
1660 	{0x00000095, 0x00000fff},
1661 	{0x00000096, 0x00116fff},
1662 	{0x00000097, 0x60010000},
1663 	{0x00000098, 0x10010000},
1664 	{0x0000009f, 0x00c79000}
1665 };
1666 
1667 
1668 /**
1669  * cik_srbm_select - select specific register instances
1670  *
1671  * @rdev: radeon_device pointer
1672  * @me: selected ME (micro engine)
1673  * @pipe: pipe
1674  * @queue: queue
1675  * @vmid: VMID
1676  *
1677  * Switches the currently active registers instances.  Some
1678  * registers are instanced per VMID, others are instanced per
1679  * me/pipe/queue combination.
1680  */
1681 static void cik_srbm_select(struct radeon_device *rdev,
1682 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1683 {
1684 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1685 			     MEID(me & 0x3) |
1686 			     VMID(vmid & 0xf) |
1687 			     QUEUEID(queue & 0x7));
1688 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1689 }
1690 
1691 /* ucode loading */
1692 /**
1693  * ci_mc_load_microcode - load MC ucode into the hw
1694  *
1695  * @rdev: radeon_device pointer
1696  *
1697  * Load the GDDR MC ucode into the hw (CIK).
1698  * Returns 0 on success, error on failure.
1699  */
1700 static int ci_mc_load_microcode(struct radeon_device *rdev)
1701 {
1702 	const __be32 *fw_data;
1703 	u32 running, blackout = 0;
1704 	u32 *io_mc_regs;
1705 	int i, ucode_size, regs_size;
1706 
1707 	if (!rdev->mc_fw)
1708 		return -EINVAL;
1709 
1710 	switch (rdev->family) {
1711 	case CHIP_BONAIRE:
1712 		io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1713 		ucode_size = CIK_MC_UCODE_SIZE;
1714 		regs_size = BONAIRE_IO_MC_REGS_SIZE;
1715 		break;
1716 	case CHIP_HAWAII:
1717 		io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1718 		ucode_size = HAWAII_MC_UCODE_SIZE;
1719 		regs_size = HAWAII_IO_MC_REGS_SIZE;
1720 		break;
1721 	default:
1722 		return -EINVAL;
1723 	}
1724 
1725 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1726 
1727 	if (running == 0) {
1728 		if (running) {
1729 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1730 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1731 		}
1732 
1733 		/* reset the engine and set to writable */
1734 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1735 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1736 
1737 		/* load mc io regs */
1738 		for (i = 0; i < regs_size; i++) {
1739 			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1740 			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1741 		}
1742 		/* load the MC ucode */
1743 		fw_data = (const __be32 *)rdev->mc_fw->data;
1744 		for (i = 0; i < ucode_size; i++)
1745 			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1746 
1747 		/* put the engine back into the active state */
1748 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1749 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1750 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1751 
1752 		/* wait for training to complete */
1753 		for (i = 0; i < rdev->usec_timeout; i++) {
1754 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1755 				break;
1756 			udelay(1);
1757 		}
1758 		for (i = 0; i < rdev->usec_timeout; i++) {
1759 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1760 				break;
1761 			udelay(1);
1762 		}
1763 
1764 		if (running)
1765 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1766 	}
1767 
1768 	return 0;
1769 }
1770 
1771 /**
1772  * cik_init_microcode - load ucode images from disk
1773  *
1774  * @rdev: radeon_device pointer
1775  *
1776  * Use the firmware interface to load the ucode images into
1777  * the driver (not loaded into hw).
1778  * Returns 0 on success, error on failure.
1779  */
1780 static int cik_init_microcode(struct radeon_device *rdev)
1781 {
1782 	const char *chip_name;
1783 	size_t pfp_req_size, me_req_size, ce_req_size,
1784 		mec_req_size, rlc_req_size, mc_req_size = 0,
1785 		sdma_req_size, smc_req_size = 0;
1786 	char fw_name[30];
1787 	int err;
1788 
1789 	DRM_DEBUG("\n");
1790 
1791 	switch (rdev->family) {
1792 	case CHIP_BONAIRE:
1793 		chip_name = "BONAIRE";
1794 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1795 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1796 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1797 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1798 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1799 		mc_req_size = CIK_MC_UCODE_SIZE * 4;
1800 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1801 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1802 		break;
1803 	case CHIP_HAWAII:
1804 		chip_name = "HAWAII";
1805 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1806 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1807 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1808 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1809 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1810 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1811 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1812 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1813 		break;
1814 	case CHIP_KAVERI:
1815 		chip_name = "KAVERI";
1816 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1817 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1818 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1819 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1820 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1821 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1822 		break;
1823 	case CHIP_KABINI:
1824 		chip_name = "KABINI";
1825 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1826 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1827 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1828 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1829 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1830 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1831 		break;
1832 	default: BUG();
1833 	}
1834 
1835 	DRM_INFO("Loading %s Microcode\n", chip_name);
1836 
1837 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1838 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1839 	if (err)
1840 		goto out;
1841 	if (rdev->pfp_fw->size != pfp_req_size) {
1842 		printk(KERN_ERR
1843 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1844 		       rdev->pfp_fw->size, fw_name);
1845 		err = -EINVAL;
1846 		goto out;
1847 	}
1848 
1849 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1850 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1851 	if (err)
1852 		goto out;
1853 	if (rdev->me_fw->size != me_req_size) {
1854 		printk(KERN_ERR
1855 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1856 		       rdev->me_fw->size, fw_name);
1857 		err = -EINVAL;
1858 	}
1859 
1860 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1861 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1862 	if (err)
1863 		goto out;
1864 	if (rdev->ce_fw->size != ce_req_size) {
1865 		printk(KERN_ERR
1866 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1867 		       rdev->ce_fw->size, fw_name);
1868 		err = -EINVAL;
1869 	}
1870 
1871 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1872 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1873 	if (err)
1874 		goto out;
1875 	if (rdev->mec_fw->size != mec_req_size) {
1876 		printk(KERN_ERR
1877 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1878 		       rdev->mec_fw->size, fw_name);
1879 		err = -EINVAL;
1880 	}
1881 
1882 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1883 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1884 	if (err)
1885 		goto out;
1886 	if (rdev->rlc_fw->size != rlc_req_size) {
1887 		printk(KERN_ERR
1888 		       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1889 		       rdev->rlc_fw->size, fw_name);
1890 		err = -EINVAL;
1891 	}
1892 
1893 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1894 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1895 	if (err)
1896 		goto out;
1897 	if (rdev->sdma_fw->size != sdma_req_size) {
1898 		printk(KERN_ERR
1899 		       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1900 		       rdev->sdma_fw->size, fw_name);
1901 		err = -EINVAL;
1902 	}
1903 
1904 	/* No SMC, MC ucode on APUs */
1905 	if (!(rdev->flags & RADEON_IS_IGP)) {
1906 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1907 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1908 		if (err)
1909 			goto out;
1910 		if (rdev->mc_fw->size != mc_req_size) {
1911 			printk(KERN_ERR
1912 			       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1913 			       rdev->mc_fw->size, fw_name);
1914 			err = -EINVAL;
1915 		}
1916 
1917 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1918 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1919 		if (err) {
1920 			printk(KERN_ERR
1921 			       "smc: error loading firmware \"%s\"\n",
1922 			       fw_name);
1923 			release_firmware(rdev->smc_fw);
1924 			rdev->smc_fw = NULL;
1925 			err = 0;
1926 		} else if (rdev->smc_fw->size != smc_req_size) {
1927 			printk(KERN_ERR
1928 			       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1929 			       rdev->smc_fw->size, fw_name);
1930 			err = -EINVAL;
1931 		}
1932 	}
1933 
1934 out:
1935 	if (err) {
1936 		if (err != -EINVAL)
1937 			printk(KERN_ERR
1938 			       "cik_cp: Failed to load firmware \"%s\"\n",
1939 			       fw_name);
1940 		release_firmware(rdev->pfp_fw);
1941 		rdev->pfp_fw = NULL;
1942 		release_firmware(rdev->me_fw);
1943 		rdev->me_fw = NULL;
1944 		release_firmware(rdev->ce_fw);
1945 		rdev->ce_fw = NULL;
1946 		release_firmware(rdev->rlc_fw);
1947 		rdev->rlc_fw = NULL;
1948 		release_firmware(rdev->mc_fw);
1949 		rdev->mc_fw = NULL;
1950 		release_firmware(rdev->smc_fw);
1951 		rdev->smc_fw = NULL;
1952 	}
1953 	return err;
1954 }
1955 
1956 /*
1957  * Core functions
1958  */
1959 /**
1960  * cik_tiling_mode_table_init - init the hw tiling table
1961  *
1962  * @rdev: radeon_device pointer
1963  *
1964  * Starting with SI, the tiling setup is done globally in a
1965  * set of 32 tiling modes.  Rather than selecting each set of
1966  * parameters per surface as on older asics, we just select
1967  * which index in the tiling table we want to use, and the
1968  * surface uses those parameters (CIK).
1969  */
1970 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1971 {
1972 	const u32 num_tile_mode_states = 32;
1973 	const u32 num_secondary_tile_mode_states = 16;
1974 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1975 	u32 num_pipe_configs;
1976 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
1977 		rdev->config.cik.max_shader_engines;
1978 
1979 	switch (rdev->config.cik.mem_row_size_in_kb) {
1980 	case 1:
1981 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1982 		break;
1983 	case 2:
1984 	default:
1985 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1986 		break;
1987 	case 4:
1988 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1989 		break;
1990 	}
1991 
1992 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
1993 	if (num_pipe_configs > 8)
1994 		num_pipe_configs = 16;
1995 
1996 	if (num_pipe_configs == 16) {
1997 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1998 			switch (reg_offset) {
1999 			case 0:
2000 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2001 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2002 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2003 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2004 				break;
2005 			case 1:
2006 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2007 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2008 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2009 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2010 				break;
2011 			case 2:
2012 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2013 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2014 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2015 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2016 				break;
2017 			case 3:
2018 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2019 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2020 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2021 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2022 				break;
2023 			case 4:
2024 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2025 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2026 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2027 						 TILE_SPLIT(split_equal_to_row_size));
2028 				break;
2029 			case 5:
2030 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2031 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2032 				break;
2033 			case 6:
2034 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2035 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2036 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2037 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2038 				break;
2039 			case 7:
2040 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2041 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2042 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2043 						 TILE_SPLIT(split_equal_to_row_size));
2044 				break;
2045 			case 8:
2046 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2047 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2048 				break;
2049 			case 9:
2050 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2051 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2052 				break;
2053 			case 10:
2054 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2055 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2056 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2057 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2058 				break;
2059 			case 11:
2060 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2061 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2062 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2063 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2064 				break;
2065 			case 12:
2066 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2067 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2068 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2069 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2070 				break;
2071 			case 13:
2072 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2073 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2074 				break;
2075 			case 14:
2076 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2077 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2078 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2079 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2080 				break;
2081 			case 16:
2082 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2083 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2084 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2085 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2086 				break;
2087 			case 17:
2088 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2089 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2090 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2091 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2092 				break;
2093 			case 27:
2094 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2095 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2096 				break;
2097 			case 28:
2098 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2099 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2100 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2101 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2102 				break;
2103 			case 29:
2104 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2105 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2106 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2107 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2108 				break;
2109 			case 30:
2110 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2111 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2112 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2113 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2114 				break;
2115 			default:
2116 				gb_tile_moden = 0;
2117 				break;
2118 			}
2119 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2120 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2121 		}
2122 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2123 			switch (reg_offset) {
2124 			case 0:
2125 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2126 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2127 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2128 						 NUM_BANKS(ADDR_SURF_16_BANK));
2129 				break;
2130 			case 1:
2131 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2132 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2133 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2134 						 NUM_BANKS(ADDR_SURF_16_BANK));
2135 				break;
2136 			case 2:
2137 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2138 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2139 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2140 						 NUM_BANKS(ADDR_SURF_16_BANK));
2141 				break;
2142 			case 3:
2143 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2144 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2145 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2146 						 NUM_BANKS(ADDR_SURF_16_BANK));
2147 				break;
2148 			case 4:
2149 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2150 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2151 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2152 						 NUM_BANKS(ADDR_SURF_8_BANK));
2153 				break;
2154 			case 5:
2155 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2156 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2157 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2158 						 NUM_BANKS(ADDR_SURF_4_BANK));
2159 				break;
2160 			case 6:
2161 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2162 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2163 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2164 						 NUM_BANKS(ADDR_SURF_2_BANK));
2165 				break;
2166 			case 8:
2167 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2168 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2169 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2170 						 NUM_BANKS(ADDR_SURF_16_BANK));
2171 				break;
2172 			case 9:
2173 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2174 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2175 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2176 						 NUM_BANKS(ADDR_SURF_16_BANK));
2177 				break;
2178 			case 10:
2179 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2180 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2181 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2182 						 NUM_BANKS(ADDR_SURF_16_BANK));
2183 				break;
2184 			case 11:
2185 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2186 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2187 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2188 						 NUM_BANKS(ADDR_SURF_8_BANK));
2189 				break;
2190 			case 12:
2191 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2192 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2193 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2194 						 NUM_BANKS(ADDR_SURF_4_BANK));
2195 				break;
2196 			case 13:
2197 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2198 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2199 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2200 						 NUM_BANKS(ADDR_SURF_2_BANK));
2201 				break;
2202 			case 14:
2203 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2204 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2205 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2206 						 NUM_BANKS(ADDR_SURF_2_BANK));
2207 				break;
2208 			default:
2209 				gb_tile_moden = 0;
2210 				break;
2211 			}
2212 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2213 		}
2214 	} else if (num_pipe_configs == 8) {
2215 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2216 			switch (reg_offset) {
2217 			case 0:
2218 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2219 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2220 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2221 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2222 				break;
2223 			case 1:
2224 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2225 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2226 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2227 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2228 				break;
2229 			case 2:
2230 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2231 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2232 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2233 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2234 				break;
2235 			case 3:
2236 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2237 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2238 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2239 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2240 				break;
2241 			case 4:
2242 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2243 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2244 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2245 						 TILE_SPLIT(split_equal_to_row_size));
2246 				break;
2247 			case 5:
2248 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2249 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2250 				break;
2251 			case 6:
2252 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2253 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2254 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2255 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2256 				break;
2257 			case 7:
2258 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2259 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2260 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2261 						 TILE_SPLIT(split_equal_to_row_size));
2262 				break;
2263 			case 8:
2264 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2265 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2266 				break;
2267 			case 9:
2268 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2269 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2270 				break;
2271 			case 10:
2272 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2273 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2274 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2275 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2276 				break;
2277 			case 11:
2278 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2279 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2280 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2281 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2282 				break;
2283 			case 12:
2284 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2285 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2286 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2287 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2288 				break;
2289 			case 13:
2290 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2291 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2292 				break;
2293 			case 14:
2294 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2295 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2296 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2297 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2298 				break;
2299 			case 16:
2300 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2301 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2302 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2303 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2304 				break;
2305 			case 17:
2306 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2307 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2308 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2309 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2310 				break;
2311 			case 27:
2312 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2313 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2314 				break;
2315 			case 28:
2316 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2317 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2318 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2319 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2320 				break;
2321 			case 29:
2322 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2323 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2324 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2325 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2326 				break;
2327 			case 30:
2328 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2329 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2330 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2331 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2332 				break;
2333 			default:
2334 				gb_tile_moden = 0;
2335 				break;
2336 			}
2337 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2338 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2339 		}
2340 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2341 			switch (reg_offset) {
2342 			case 0:
2343 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2344 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2345 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2346 						 NUM_BANKS(ADDR_SURF_16_BANK));
2347 				break;
2348 			case 1:
2349 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2350 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2351 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2352 						 NUM_BANKS(ADDR_SURF_16_BANK));
2353 				break;
2354 			case 2:
2355 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2356 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2357 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2358 						 NUM_BANKS(ADDR_SURF_16_BANK));
2359 				break;
2360 			case 3:
2361 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2362 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2363 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2364 						 NUM_BANKS(ADDR_SURF_16_BANK));
2365 				break;
2366 			case 4:
2367 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2368 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2369 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2370 						 NUM_BANKS(ADDR_SURF_8_BANK));
2371 				break;
2372 			case 5:
2373 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2374 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2375 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2376 						 NUM_BANKS(ADDR_SURF_4_BANK));
2377 				break;
2378 			case 6:
2379 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2380 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2381 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2382 						 NUM_BANKS(ADDR_SURF_2_BANK));
2383 				break;
2384 			case 8:
2385 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2386 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2387 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2388 						 NUM_BANKS(ADDR_SURF_16_BANK));
2389 				break;
2390 			case 9:
2391 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2392 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2393 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2394 						 NUM_BANKS(ADDR_SURF_16_BANK));
2395 				break;
2396 			case 10:
2397 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2398 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2399 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2400 						 NUM_BANKS(ADDR_SURF_16_BANK));
2401 				break;
2402 			case 11:
2403 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2404 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2405 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2406 						 NUM_BANKS(ADDR_SURF_16_BANK));
2407 				break;
2408 			case 12:
2409 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2410 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2411 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2412 						 NUM_BANKS(ADDR_SURF_8_BANK));
2413 				break;
2414 			case 13:
2415 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2416 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2417 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2418 						 NUM_BANKS(ADDR_SURF_4_BANK));
2419 				break;
2420 			case 14:
2421 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2422 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2423 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2424 						 NUM_BANKS(ADDR_SURF_2_BANK));
2425 				break;
2426 			default:
2427 				gb_tile_moden = 0;
2428 				break;
2429 			}
2430 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2431 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2432 		}
2433 	} else if (num_pipe_configs == 4) {
2434 		if (num_rbs == 4) {
2435 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2436 				switch (reg_offset) {
2437 				case 0:
2438 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2439 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2440 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2441 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2442 					break;
2443 				case 1:
2444 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2445 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2446 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2447 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2448 					break;
2449 				case 2:
2450 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2451 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2452 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2453 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2454 					break;
2455 				case 3:
2456 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2457 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2458 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2459 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2460 					break;
2461 				case 4:
2462 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2463 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2464 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2465 							 TILE_SPLIT(split_equal_to_row_size));
2466 					break;
2467 				case 5:
2468 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2469 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2470 					break;
2471 				case 6:
2472 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2473 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2474 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2475 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2476 					break;
2477 				case 7:
2478 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2479 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2480 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2481 							 TILE_SPLIT(split_equal_to_row_size));
2482 					break;
2483 				case 8:
2484 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2485 							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2486 					break;
2487 				case 9:
2488 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2489 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2490 					break;
2491 				case 10:
2492 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2493 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2494 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2495 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2496 					break;
2497 				case 11:
2498 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2499 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2500 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2501 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2502 					break;
2503 				case 12:
2504 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2505 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2506 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2507 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2508 					break;
2509 				case 13:
2510 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2511 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2512 					break;
2513 				case 14:
2514 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2515 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2516 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2517 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2518 					break;
2519 				case 16:
2520 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2521 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2522 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2523 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2524 					break;
2525 				case 17:
2526 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2527 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2528 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2529 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2530 					break;
2531 				case 27:
2532 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2533 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2534 					break;
2535 				case 28:
2536 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2537 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2538 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2539 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2540 					break;
2541 				case 29:
2542 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2543 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2544 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2545 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2546 					break;
2547 				case 30:
2548 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2549 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2550 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2551 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2552 					break;
2553 				default:
2554 					gb_tile_moden = 0;
2555 					break;
2556 				}
2557 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2558 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2559 			}
2560 		} else if (num_rbs < 4) {
2561 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2562 				switch (reg_offset) {
2563 				case 0:
2564 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2565 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2566 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2567 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2568 					break;
2569 				case 1:
2570 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2571 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2572 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2573 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2574 					break;
2575 				case 2:
2576 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2577 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2578 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2579 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2580 					break;
2581 				case 3:
2582 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2583 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2584 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2585 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2586 					break;
2587 				case 4:
2588 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2589 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2590 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2591 							 TILE_SPLIT(split_equal_to_row_size));
2592 					break;
2593 				case 5:
2594 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2595 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2596 					break;
2597 				case 6:
2598 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2599 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2600 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2601 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2602 					break;
2603 				case 7:
2604 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2605 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2606 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2607 							 TILE_SPLIT(split_equal_to_row_size));
2608 					break;
2609 				case 8:
2610 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2611 						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2612 					break;
2613 				case 9:
2614 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2615 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2616 					break;
2617 				case 10:
2618 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2619 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2620 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2621 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2622 					break;
2623 				case 11:
2624 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2625 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2626 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2627 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2628 					break;
2629 				case 12:
2630 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2631 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2632 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2633 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2634 					break;
2635 				case 13:
2636 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2637 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2638 					break;
2639 				case 14:
2640 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2641 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2642 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2643 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2644 					break;
2645 				case 16:
2646 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2647 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2648 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2649 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2650 					break;
2651 				case 17:
2652 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2653 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2654 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2655 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2656 					break;
2657 				case 27:
2658 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2659 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2660 					break;
2661 				case 28:
2662 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2663 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2664 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2665 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2666 					break;
2667 				case 29:
2668 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2669 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2670 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2671 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2672 					break;
2673 				case 30:
2674 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2675 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2676 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2677 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2678 					break;
2679 				default:
2680 					gb_tile_moden = 0;
2681 					break;
2682 				}
2683 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2684 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2685 			}
2686 		}
2687 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2688 			switch (reg_offset) {
2689 			case 0:
2690 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2691 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2692 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2693 						 NUM_BANKS(ADDR_SURF_16_BANK));
2694 				break;
2695 			case 1:
2696 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2697 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2698 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2699 						 NUM_BANKS(ADDR_SURF_16_BANK));
2700 				break;
2701 			case 2:
2702 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2703 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2704 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2705 						 NUM_BANKS(ADDR_SURF_16_BANK));
2706 				break;
2707 			case 3:
2708 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2709 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2710 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2711 						 NUM_BANKS(ADDR_SURF_16_BANK));
2712 				break;
2713 			case 4:
2714 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2715 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2716 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2717 						 NUM_BANKS(ADDR_SURF_16_BANK));
2718 				break;
2719 			case 5:
2720 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2721 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2722 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2723 						 NUM_BANKS(ADDR_SURF_8_BANK));
2724 				break;
2725 			case 6:
2726 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2728 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2729 						 NUM_BANKS(ADDR_SURF_4_BANK));
2730 				break;
2731 			case 8:
2732 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2733 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2734 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2735 						 NUM_BANKS(ADDR_SURF_16_BANK));
2736 				break;
2737 			case 9:
2738 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2739 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2740 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2741 						 NUM_BANKS(ADDR_SURF_16_BANK));
2742 				break;
2743 			case 10:
2744 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2745 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2746 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2747 						 NUM_BANKS(ADDR_SURF_16_BANK));
2748 				break;
2749 			case 11:
2750 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2751 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2752 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2753 						 NUM_BANKS(ADDR_SURF_16_BANK));
2754 				break;
2755 			case 12:
2756 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2757 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2758 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2759 						 NUM_BANKS(ADDR_SURF_16_BANK));
2760 				break;
2761 			case 13:
2762 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2763 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2764 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2765 						 NUM_BANKS(ADDR_SURF_8_BANK));
2766 				break;
2767 			case 14:
2768 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2769 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2770 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2771 						 NUM_BANKS(ADDR_SURF_4_BANK));
2772 				break;
2773 			default:
2774 				gb_tile_moden = 0;
2775 				break;
2776 			}
2777 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2778 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2779 		}
2780 	} else if (num_pipe_configs == 2) {
2781 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2782 			switch (reg_offset) {
2783 			case 0:
2784 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2785 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2786 						 PIPE_CONFIG(ADDR_SURF_P2) |
2787 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2788 				break;
2789 			case 1:
2790 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2791 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2792 						 PIPE_CONFIG(ADDR_SURF_P2) |
2793 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2794 				break;
2795 			case 2:
2796 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2797 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2798 						 PIPE_CONFIG(ADDR_SURF_P2) |
2799 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2800 				break;
2801 			case 3:
2802 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2803 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2804 						 PIPE_CONFIG(ADDR_SURF_P2) |
2805 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2806 				break;
2807 			case 4:
2808 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2809 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2810 						 PIPE_CONFIG(ADDR_SURF_P2) |
2811 						 TILE_SPLIT(split_equal_to_row_size));
2812 				break;
2813 			case 5:
2814 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2815 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2816 				break;
2817 			case 6:
2818 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2819 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2820 						 PIPE_CONFIG(ADDR_SURF_P2) |
2821 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2822 				break;
2823 			case 7:
2824 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2825 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2826 						 PIPE_CONFIG(ADDR_SURF_P2) |
2827 						 TILE_SPLIT(split_equal_to_row_size));
2828 				break;
2829 			case 8:
2830 				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
2831 				break;
2832 			case 9:
2833 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2834 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2835 				break;
2836 			case 10:
2837 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2838 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2839 						 PIPE_CONFIG(ADDR_SURF_P2) |
2840 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2841 				break;
2842 			case 11:
2843 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2844 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2845 						 PIPE_CONFIG(ADDR_SURF_P2) |
2846 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2847 				break;
2848 			case 12:
2849 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2850 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2851 						 PIPE_CONFIG(ADDR_SURF_P2) |
2852 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2853 				break;
2854 			case 13:
2855 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2856 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2857 				break;
2858 			case 14:
2859 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2860 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2861 						 PIPE_CONFIG(ADDR_SURF_P2) |
2862 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2863 				break;
2864 			case 16:
2865 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2866 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2867 						 PIPE_CONFIG(ADDR_SURF_P2) |
2868 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2869 				break;
2870 			case 17:
2871 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2872 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2873 						 PIPE_CONFIG(ADDR_SURF_P2) |
2874 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2875 				break;
2876 			case 27:
2877 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2878 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2879 				break;
2880 			case 28:
2881 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2882 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2883 						 PIPE_CONFIG(ADDR_SURF_P2) |
2884 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2885 				break;
2886 			case 29:
2887 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2888 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2889 						 PIPE_CONFIG(ADDR_SURF_P2) |
2890 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2891 				break;
2892 			case 30:
2893 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2894 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2895 						 PIPE_CONFIG(ADDR_SURF_P2) |
2896 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2897 				break;
2898 			default:
2899 				gb_tile_moden = 0;
2900 				break;
2901 			}
2902 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2903 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2904 		}
2905 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2906 			switch (reg_offset) {
2907 			case 0:
2908 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2909 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2910 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2911 						 NUM_BANKS(ADDR_SURF_16_BANK));
2912 				break;
2913 			case 1:
2914 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2915 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2916 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2917 						 NUM_BANKS(ADDR_SURF_16_BANK));
2918 				break;
2919 			case 2:
2920 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2921 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2922 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2923 						 NUM_BANKS(ADDR_SURF_16_BANK));
2924 				break;
2925 			case 3:
2926 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2927 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2928 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2929 						 NUM_BANKS(ADDR_SURF_16_BANK));
2930 				break;
2931 			case 4:
2932 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2933 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2934 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2935 						 NUM_BANKS(ADDR_SURF_16_BANK));
2936 				break;
2937 			case 5:
2938 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2939 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2940 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2941 						 NUM_BANKS(ADDR_SURF_16_BANK));
2942 				break;
2943 			case 6:
2944 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2945 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2946 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2947 						 NUM_BANKS(ADDR_SURF_8_BANK));
2948 				break;
2949 			case 8:
2950 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2951 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2952 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2953 						 NUM_BANKS(ADDR_SURF_16_BANK));
2954 				break;
2955 			case 9:
2956 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2957 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2958 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2959 						 NUM_BANKS(ADDR_SURF_16_BANK));
2960 				break;
2961 			case 10:
2962 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2963 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2964 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2965 						 NUM_BANKS(ADDR_SURF_16_BANK));
2966 				break;
2967 			case 11:
2968 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2969 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2970 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2971 						 NUM_BANKS(ADDR_SURF_16_BANK));
2972 				break;
2973 			case 12:
2974 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2975 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2976 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2977 						 NUM_BANKS(ADDR_SURF_16_BANK));
2978 				break;
2979 			case 13:
2980 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2981 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2982 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2983 						 NUM_BANKS(ADDR_SURF_16_BANK));
2984 				break;
2985 			case 14:
2986 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2987 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2988 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2989 						 NUM_BANKS(ADDR_SURF_8_BANK));
2990 				break;
2991 			default:
2992 				gb_tile_moden = 0;
2993 				break;
2994 			}
2995 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2996 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2997 		}
2998 	} else
2999 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3000 }
3001 
3002 /**
3003  * cik_select_se_sh - select which SE, SH to address
3004  *
3005  * @rdev: radeon_device pointer
3006  * @se_num: shader engine to address
3007  * @sh_num: sh block to address
3008  *
3009  * Select which SE, SH combinations to address. Certain
3010  * registers are instanced per SE or SH.  0xffffffff means
3011  * broadcast to all SEs or SHs (CIK).
3012  */
3013 static void cik_select_se_sh(struct radeon_device *rdev,
3014 			     u32 se_num, u32 sh_num)
3015 {
3016 	u32 data = INSTANCE_BROADCAST_WRITES;
3017 
3018 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3019 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3020 	else if (se_num == 0xffffffff)
3021 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3022 	else if (sh_num == 0xffffffff)
3023 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3024 	else
3025 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3026 	WREG32(GRBM_GFX_INDEX, data);
3027 }
3028 
3029 /**
3030  * cik_create_bitmask - create a bitmask
3031  *
3032  * @bit_width: length of the mask
3033  *
3034  * create a variable length bit mask (CIK).
3035  * Returns the bitmask.
3036  */
3037 static u32 cik_create_bitmask(u32 bit_width)
3038 {
3039 	u32 i, mask = 0;
3040 
3041 	for (i = 0; i < bit_width; i++) {
3042 		mask <<= 1;
3043 		mask |= 1;
3044 	}
3045 	return mask;
3046 }
3047 
3048 /**
3049  * cik_select_se_sh - select which SE, SH to address
3050  *
3051  * @rdev: radeon_device pointer
3052  * @max_rb_num: max RBs (render backends) for the asic
3053  * @se_num: number of SEs (shader engines) for the asic
3054  * @sh_per_se: number of SH blocks per SE for the asic
3055  *
3056  * Calculates the bitmask of disabled RBs (CIK).
3057  * Returns the disabled RB bitmask.
3058  */
3059 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3060 			      u32 max_rb_num, u32 se_num,
3061 			      u32 sh_per_se)
3062 {
3063 	u32 data, mask;
3064 
3065 	data = RREG32(CC_RB_BACKEND_DISABLE);
3066 	if (data & 1)
3067 		data &= BACKEND_DISABLE_MASK;
3068 	else
3069 		data = 0;
3070 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3071 
3072 	data >>= BACKEND_DISABLE_SHIFT;
3073 
3074 	mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
3075 
3076 	return data & mask;
3077 }
3078 
3079 /**
3080  * cik_setup_rb - setup the RBs on the asic
3081  *
3082  * @rdev: radeon_device pointer
3083  * @se_num: number of SEs (shader engines) for the asic
3084  * @sh_per_se: number of SH blocks per SE for the asic
3085  * @max_rb_num: max RBs (render backends) for the asic
3086  *
3087  * Configures per-SE/SH RB registers (CIK).
3088  */
3089 static void cik_setup_rb(struct radeon_device *rdev,
3090 			 u32 se_num, u32 sh_per_se,
3091 			 u32 max_rb_num)
3092 {
3093 	int i, j;
3094 	u32 data, mask;
3095 	u32 disabled_rbs = 0;
3096 	u32 enabled_rbs = 0;
3097 
3098 	for (i = 0; i < se_num; i++) {
3099 		for (j = 0; j < sh_per_se; j++) {
3100 			cik_select_se_sh(rdev, i, j);
3101 			data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
3102 			if (rdev->family == CHIP_HAWAII)
3103 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3104 			else
3105 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3106 		}
3107 	}
3108 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3109 
3110 	mask = 1;
3111 	for (i = 0; i < max_rb_num; i++) {
3112 		if (!(disabled_rbs & mask))
3113 			enabled_rbs |= mask;
3114 		mask <<= 1;
3115 	}
3116 
3117 	for (i = 0; i < se_num; i++) {
3118 		cik_select_se_sh(rdev, i, 0xffffffff);
3119 		data = 0;
3120 		for (j = 0; j < sh_per_se; j++) {
3121 			switch (enabled_rbs & 3) {
3122 			case 0:
3123 				if (j == 0)
3124 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3125 				else
3126 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3127 				break;
3128 			case 1:
3129 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3130 				break;
3131 			case 2:
3132 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3133 				break;
3134 			case 3:
3135 			default:
3136 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3137 				break;
3138 			}
3139 			enabled_rbs >>= 2;
3140 		}
3141 		WREG32(PA_SC_RASTER_CONFIG, data);
3142 	}
3143 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3144 }
3145 
3146 /**
3147  * cik_gpu_init - setup the 3D engine
3148  *
3149  * @rdev: radeon_device pointer
3150  *
3151  * Configures the 3D engine and tiling configuration
3152  * registers so that the 3D engine is usable.
3153  */
3154 static void cik_gpu_init(struct radeon_device *rdev)
3155 {
3156 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3157 	u32 mc_shared_chmap, mc_arb_ramcfg;
3158 	u32 hdp_host_path_cntl;
3159 	u32 tmp;
3160 	int i, j;
3161 
3162 	switch (rdev->family) {
3163 	case CHIP_BONAIRE:
3164 		rdev->config.cik.max_shader_engines = 2;
3165 		rdev->config.cik.max_tile_pipes = 4;
3166 		rdev->config.cik.max_cu_per_sh = 7;
3167 		rdev->config.cik.max_sh_per_se = 1;
3168 		rdev->config.cik.max_backends_per_se = 2;
3169 		rdev->config.cik.max_texture_channel_caches = 4;
3170 		rdev->config.cik.max_gprs = 256;
3171 		rdev->config.cik.max_gs_threads = 32;
3172 		rdev->config.cik.max_hw_contexts = 8;
3173 
3174 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3175 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3176 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3177 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3178 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3179 		break;
3180 	case CHIP_HAWAII:
3181 		rdev->config.cik.max_shader_engines = 4;
3182 		rdev->config.cik.max_tile_pipes = 16;
3183 		rdev->config.cik.max_cu_per_sh = 11;
3184 		rdev->config.cik.max_sh_per_se = 1;
3185 		rdev->config.cik.max_backends_per_se = 4;
3186 		rdev->config.cik.max_texture_channel_caches = 16;
3187 		rdev->config.cik.max_gprs = 256;
3188 		rdev->config.cik.max_gs_threads = 32;
3189 		rdev->config.cik.max_hw_contexts = 8;
3190 
3191 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3192 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3193 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3194 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3195 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3196 		break;
3197 	case CHIP_KAVERI:
3198 		rdev->config.cik.max_shader_engines = 1;
3199 		rdev->config.cik.max_tile_pipes = 4;
3200 		if ((rdev->pdev->device == 0x1304) ||
3201 		    (rdev->pdev->device == 0x1305) ||
3202 		    (rdev->pdev->device == 0x130C) ||
3203 		    (rdev->pdev->device == 0x130F) ||
3204 		    (rdev->pdev->device == 0x1310) ||
3205 		    (rdev->pdev->device == 0x1311) ||
3206 		    (rdev->pdev->device == 0x131C)) {
3207 			rdev->config.cik.max_cu_per_sh = 8;
3208 			rdev->config.cik.max_backends_per_se = 2;
3209 		} else if ((rdev->pdev->device == 0x1309) ||
3210 			   (rdev->pdev->device == 0x130A) ||
3211 			   (rdev->pdev->device == 0x130D) ||
3212 			   (rdev->pdev->device == 0x1313) ||
3213 			   (rdev->pdev->device == 0x131D)) {
3214 			rdev->config.cik.max_cu_per_sh = 6;
3215 			rdev->config.cik.max_backends_per_se = 2;
3216 		} else if ((rdev->pdev->device == 0x1306) ||
3217 			   (rdev->pdev->device == 0x1307) ||
3218 			   (rdev->pdev->device == 0x130B) ||
3219 			   (rdev->pdev->device == 0x130E) ||
3220 			   (rdev->pdev->device == 0x1315) ||
3221 			   (rdev->pdev->device == 0x131B)) {
3222 			rdev->config.cik.max_cu_per_sh = 4;
3223 			rdev->config.cik.max_backends_per_se = 1;
3224 		} else {
3225 			rdev->config.cik.max_cu_per_sh = 3;
3226 			rdev->config.cik.max_backends_per_se = 1;
3227 		}
3228 		rdev->config.cik.max_sh_per_se = 1;
3229 		rdev->config.cik.max_texture_channel_caches = 4;
3230 		rdev->config.cik.max_gprs = 256;
3231 		rdev->config.cik.max_gs_threads = 16;
3232 		rdev->config.cik.max_hw_contexts = 8;
3233 
3234 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3235 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3236 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3237 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3238 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3239 		break;
3240 	case CHIP_KABINI:
3241 	default:
3242 		rdev->config.cik.max_shader_engines = 1;
3243 		rdev->config.cik.max_tile_pipes = 2;
3244 		rdev->config.cik.max_cu_per_sh = 2;
3245 		rdev->config.cik.max_sh_per_se = 1;
3246 		rdev->config.cik.max_backends_per_se = 1;
3247 		rdev->config.cik.max_texture_channel_caches = 2;
3248 		rdev->config.cik.max_gprs = 256;
3249 		rdev->config.cik.max_gs_threads = 16;
3250 		rdev->config.cik.max_hw_contexts = 8;
3251 
3252 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3253 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3254 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3255 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3256 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3257 		break;
3258 	}
3259 
3260 	/* Initialize HDP */
3261 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3262 		WREG32((0x2c14 + j), 0x00000000);
3263 		WREG32((0x2c18 + j), 0x00000000);
3264 		WREG32((0x2c1c + j), 0x00000000);
3265 		WREG32((0x2c20 + j), 0x00000000);
3266 		WREG32((0x2c24 + j), 0x00000000);
3267 	}
3268 
3269 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3270 
3271 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3272 
3273 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3274 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3275 
3276 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3277 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3278 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3279 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3280 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3281 		rdev->config.cik.mem_row_size_in_kb = 4;
3282 	/* XXX use MC settings? */
3283 	rdev->config.cik.shader_engine_tile_size = 32;
3284 	rdev->config.cik.num_gpus = 1;
3285 	rdev->config.cik.multi_gpu_tile_size = 64;
3286 
3287 	/* fix up row size */
3288 	gb_addr_config &= ~ROW_SIZE_MASK;
3289 	switch (rdev->config.cik.mem_row_size_in_kb) {
3290 	case 1:
3291 	default:
3292 		gb_addr_config |= ROW_SIZE(0);
3293 		break;
3294 	case 2:
3295 		gb_addr_config |= ROW_SIZE(1);
3296 		break;
3297 	case 4:
3298 		gb_addr_config |= ROW_SIZE(2);
3299 		break;
3300 	}
3301 
3302 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3303 	 * not have bank info, so create a custom tiling dword.
3304 	 * bits 3:0   num_pipes
3305 	 * bits 7:4   num_banks
3306 	 * bits 11:8  group_size
3307 	 * bits 15:12 row_size
3308 	 */
3309 	rdev->config.cik.tile_config = 0;
3310 	switch (rdev->config.cik.num_tile_pipes) {
3311 	case 1:
3312 		rdev->config.cik.tile_config |= (0 << 0);
3313 		break;
3314 	case 2:
3315 		rdev->config.cik.tile_config |= (1 << 0);
3316 		break;
3317 	case 4:
3318 		rdev->config.cik.tile_config |= (2 << 0);
3319 		break;
3320 	case 8:
3321 	default:
3322 		/* XXX what about 12? */
3323 		rdev->config.cik.tile_config |= (3 << 0);
3324 		break;
3325 	}
3326 	rdev->config.cik.tile_config |=
3327 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3328 	rdev->config.cik.tile_config |=
3329 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3330 	rdev->config.cik.tile_config |=
3331 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3332 
3333 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3334 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3335 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3336 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3337 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3338 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3339 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3340 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3341 
3342 	cik_tiling_mode_table_init(rdev);
3343 
3344 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3345 		     rdev->config.cik.max_sh_per_se,
3346 		     rdev->config.cik.max_backends_per_se);
3347 
3348 	/* set HW defaults for 3D engine */
3349 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3350 
3351 	WREG32(SX_DEBUG_1, 0x20);
3352 
3353 	WREG32(TA_CNTL_AUX, 0x00010000);
3354 
3355 	tmp = RREG32(SPI_CONFIG_CNTL);
3356 	tmp |= 0x03000000;
3357 	WREG32(SPI_CONFIG_CNTL, tmp);
3358 
3359 	WREG32(SQ_CONFIG, 1);
3360 
3361 	WREG32(DB_DEBUG, 0);
3362 
3363 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3364 	tmp |= 0x00000400;
3365 	WREG32(DB_DEBUG2, tmp);
3366 
3367 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3368 	tmp |= 0x00020200;
3369 	WREG32(DB_DEBUG3, tmp);
3370 
3371 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3372 	tmp |= 0x00018208;
3373 	WREG32(CB_HW_CONTROL, tmp);
3374 
3375 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3376 
3377 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3378 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3379 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3380 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3381 
3382 	WREG32(VGT_NUM_INSTANCES, 1);
3383 
3384 	WREG32(CP_PERFMON_CNTL, 0);
3385 
3386 	WREG32(SQ_CONFIG, 0);
3387 
3388 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3389 					  FORCE_EOV_MAX_REZ_CNT(255)));
3390 
3391 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3392 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3393 
3394 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3395 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3396 
3397 	tmp = RREG32(HDP_MISC_CNTL);
3398 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3399 	WREG32(HDP_MISC_CNTL, tmp);
3400 
3401 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3402 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3403 
3404 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3405 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3406 
3407 	udelay(50);
3408 }
3409 
3410 /*
3411  * GPU scratch registers helpers function.
3412  */
3413 /**
3414  * cik_scratch_init - setup driver info for CP scratch regs
3415  *
3416  * @rdev: radeon_device pointer
3417  *
3418  * Set up the number and offset of the CP scratch registers.
3419  * NOTE: use of CP scratch registers is a legacy inferface and
3420  * is not used by default on newer asics (r6xx+).  On newer asics,
3421  * memory buffers are used for fences rather than scratch regs.
3422  */
3423 static void cik_scratch_init(struct radeon_device *rdev)
3424 {
3425 	int i;
3426 
3427 	rdev->scratch.num_reg = 7;
3428 	rdev->scratch.reg_base = SCRATCH_REG0;
3429 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3430 		rdev->scratch.free[i] = true;
3431 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3432 	}
3433 }
3434 
3435 /**
3436  * cik_ring_test - basic gfx ring test
3437  *
3438  * @rdev: radeon_device pointer
3439  * @ring: radeon_ring structure holding ring information
3440  *
3441  * Allocate a scratch register and write to it using the gfx ring (CIK).
3442  * Provides a basic gfx ring test to verify that the ring is working.
3443  * Used by cik_cp_gfx_resume();
3444  * Returns 0 on success, error on failure.
3445  */
3446 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3447 {
3448 	uint32_t scratch;
3449 	uint32_t tmp = 0;
3450 	unsigned i;
3451 	int r;
3452 
3453 	r = radeon_scratch_get(rdev, &scratch);
3454 	if (r) {
3455 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3456 		return r;
3457 	}
3458 	WREG32(scratch, 0xCAFEDEAD);
3459 	r = radeon_ring_lock(rdev, ring, 3);
3460 	if (r) {
3461 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3462 		radeon_scratch_free(rdev, scratch);
3463 		return r;
3464 	}
3465 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3466 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3467 	radeon_ring_write(ring, 0xDEADBEEF);
3468 	radeon_ring_unlock_commit(rdev, ring);
3469 
3470 	for (i = 0; i < rdev->usec_timeout; i++) {
3471 		tmp = RREG32(scratch);
3472 		if (tmp == 0xDEADBEEF)
3473 			break;
3474 		DRM_UDELAY(1);
3475 	}
3476 	if (i < rdev->usec_timeout) {
3477 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3478 	} else {
3479 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3480 			  ring->idx, scratch, tmp);
3481 		r = -EINVAL;
3482 	}
3483 	radeon_scratch_free(rdev, scratch);
3484 	return r;
3485 }
3486 
3487 /**
3488  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3489  *
3490  * @rdev: radeon_device pointer
3491  * @fence: radeon fence object
3492  *
3493  * Emits a fence sequnce number on the gfx ring and flushes
3494  * GPU caches.
3495  */
3496 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3497 			     struct radeon_fence *fence)
3498 {
3499 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3500 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3501 
3502 	/* EVENT_WRITE_EOP - flush caches, send int */
3503 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3504 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3505 				 EOP_TC_ACTION_EN |
3506 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3507 				 EVENT_INDEX(5)));
3508 	radeon_ring_write(ring, addr & 0xfffffffc);
3509 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3510 	radeon_ring_write(ring, fence->seq);
3511 	radeon_ring_write(ring, 0);
3512 	/* HDP flush */
3513 	/* We should be using the new WAIT_REG_MEM special op packet here
3514 	 * but it causes the CP to hang
3515 	 */
3516 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3517 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3518 				 WRITE_DATA_DST_SEL(0)));
3519 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3520 	radeon_ring_write(ring, 0);
3521 	radeon_ring_write(ring, 0);
3522 }
3523 
3524 /**
3525  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3526  *
3527  * @rdev: radeon_device pointer
3528  * @fence: radeon fence object
3529  *
3530  * Emits a fence sequnce number on the compute ring and flushes
3531  * GPU caches.
3532  */
3533 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3534 				 struct radeon_fence *fence)
3535 {
3536 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3537 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3538 
3539 	/* RELEASE_MEM - flush caches, send int */
3540 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3541 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3542 				 EOP_TC_ACTION_EN |
3543 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3544 				 EVENT_INDEX(5)));
3545 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3546 	radeon_ring_write(ring, addr & 0xfffffffc);
3547 	radeon_ring_write(ring, upper_32_bits(addr));
3548 	radeon_ring_write(ring, fence->seq);
3549 	radeon_ring_write(ring, 0);
3550 	/* HDP flush */
3551 	/* We should be using the new WAIT_REG_MEM special op packet here
3552 	 * but it causes the CP to hang
3553 	 */
3554 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3555 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3556 				 WRITE_DATA_DST_SEL(0)));
3557 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3558 	radeon_ring_write(ring, 0);
3559 	radeon_ring_write(ring, 0);
3560 }
3561 
3562 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3563 			     struct radeon_ring *ring,
3564 			     struct radeon_semaphore *semaphore,
3565 			     bool emit_wait)
3566 {
3567 /* TODO: figure out why semaphore cause lockups */
3568 #if 0
3569 	uint64_t addr = semaphore->gpu_addr;
3570 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3571 
3572 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3573 	radeon_ring_write(ring, addr & 0xffffffff);
3574 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3575 
3576 	return true;
3577 #else
3578 	return false;
3579 #endif
3580 }
3581 
3582 /**
3583  * cik_copy_cpdma - copy pages using the CP DMA engine
3584  *
3585  * @rdev: radeon_device pointer
3586  * @src_offset: src GPU address
3587  * @dst_offset: dst GPU address
3588  * @num_gpu_pages: number of GPU pages to xfer
3589  * @fence: radeon fence object
3590  *
3591  * Copy GPU paging using the CP DMA engine (CIK+).
3592  * Used by the radeon ttm implementation to move pages if
3593  * registered as the asic copy callback.
3594  */
3595 int cik_copy_cpdma(struct radeon_device *rdev,
3596 		   uint64_t src_offset, uint64_t dst_offset,
3597 		   unsigned num_gpu_pages,
3598 		   struct radeon_fence **fence)
3599 {
3600 	struct radeon_semaphore *sem = NULL;
3601 	int ring_index = rdev->asic->copy.blit_ring_index;
3602 	struct radeon_ring *ring = &rdev->ring[ring_index];
3603 	u32 size_in_bytes, cur_size_in_bytes, control;
3604 	int i, num_loops;
3605 	int r = 0;
3606 
3607 	r = radeon_semaphore_create(rdev, &sem);
3608 	if (r) {
3609 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3610 		return r;
3611 	}
3612 
3613 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3614 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3615 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3616 	if (r) {
3617 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3618 		radeon_semaphore_free(rdev, &sem, NULL);
3619 		return r;
3620 	}
3621 
3622 	radeon_semaphore_sync_to(sem, *fence);
3623 	radeon_semaphore_sync_rings(rdev, sem, ring->idx);
3624 
3625 	for (i = 0; i < num_loops; i++) {
3626 		cur_size_in_bytes = size_in_bytes;
3627 		if (cur_size_in_bytes > 0x1fffff)
3628 			cur_size_in_bytes = 0x1fffff;
3629 		size_in_bytes -= cur_size_in_bytes;
3630 		control = 0;
3631 		if (size_in_bytes == 0)
3632 			control |= PACKET3_DMA_DATA_CP_SYNC;
3633 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3634 		radeon_ring_write(ring, control);
3635 		radeon_ring_write(ring, lower_32_bits(src_offset));
3636 		radeon_ring_write(ring, upper_32_bits(src_offset));
3637 		radeon_ring_write(ring, lower_32_bits(dst_offset));
3638 		radeon_ring_write(ring, upper_32_bits(dst_offset));
3639 		radeon_ring_write(ring, cur_size_in_bytes);
3640 		src_offset += cur_size_in_bytes;
3641 		dst_offset += cur_size_in_bytes;
3642 	}
3643 
3644 	r = radeon_fence_emit(rdev, fence, ring->idx);
3645 	if (r) {
3646 		radeon_ring_unlock_undo(rdev, ring);
3647 		return r;
3648 	}
3649 
3650 	radeon_ring_unlock_commit(rdev, ring);
3651 	radeon_semaphore_free(rdev, &sem, *fence);
3652 
3653 	return r;
3654 }
3655 
3656 /*
3657  * IB stuff
3658  */
3659 /**
3660  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3661  *
3662  * @rdev: radeon_device pointer
3663  * @ib: radeon indirect buffer object
3664  *
3665  * Emits an DE (drawing engine) or CE (constant engine) IB
3666  * on the gfx ring.  IBs are usually generated by userspace
3667  * acceleration drivers and submitted to the kernel for
3668  * sheduling on the ring.  This function schedules the IB
3669  * on the gfx ring for execution by the GPU.
3670  */
3671 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3672 {
3673 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3674 	u32 header, control = INDIRECT_BUFFER_VALID;
3675 
3676 	if (ib->is_const_ib) {
3677 		/* set switch buffer packet before const IB */
3678 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3679 		radeon_ring_write(ring, 0);
3680 
3681 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3682 	} else {
3683 		u32 next_rptr;
3684 		if (ring->rptr_save_reg) {
3685 			next_rptr = ring->wptr + 3 + 4;
3686 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3687 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3688 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
3689 			radeon_ring_write(ring, next_rptr);
3690 		} else if (rdev->wb.enabled) {
3691 			next_rptr = ring->wptr + 5 + 4;
3692 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3693 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3694 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3695 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3696 			radeon_ring_write(ring, next_rptr);
3697 		}
3698 
3699 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3700 	}
3701 
3702 	control |= ib->length_dw |
3703 		(ib->vm ? (ib->vm->id << 24) : 0);
3704 
3705 	radeon_ring_write(ring, header);
3706 	radeon_ring_write(ring,
3707 #ifdef __BIG_ENDIAN
3708 			  (2 << 0) |
3709 #endif
3710 			  (ib->gpu_addr & 0xFFFFFFFC));
3711 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3712 	radeon_ring_write(ring, control);
3713 }
3714 
3715 /**
3716  * cik_ib_test - basic gfx ring IB test
3717  *
3718  * @rdev: radeon_device pointer
3719  * @ring: radeon_ring structure holding ring information
3720  *
3721  * Allocate an IB and execute it on the gfx ring (CIK).
3722  * Provides a basic gfx ring test to verify that IBs are working.
3723  * Returns 0 on success, error on failure.
3724  */
3725 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3726 {
3727 	struct radeon_ib ib;
3728 	uint32_t scratch;
3729 	uint32_t tmp = 0;
3730 	unsigned i;
3731 	int r;
3732 
3733 	r = radeon_scratch_get(rdev, &scratch);
3734 	if (r) {
3735 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3736 		return r;
3737 	}
3738 	WREG32(scratch, 0xCAFEDEAD);
3739 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3740 	if (r) {
3741 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3742 		radeon_scratch_free(rdev, scratch);
3743 		return r;
3744 	}
3745 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3746 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3747 	ib.ptr[2] = 0xDEADBEEF;
3748 	ib.length_dw = 3;
3749 	r = radeon_ib_schedule(rdev, &ib, NULL);
3750 	if (r) {
3751 		radeon_scratch_free(rdev, scratch);
3752 		radeon_ib_free(rdev, &ib);
3753 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3754 		return r;
3755 	}
3756 	r = radeon_fence_wait(ib.fence, false);
3757 	if (r) {
3758 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3759 		radeon_scratch_free(rdev, scratch);
3760 		radeon_ib_free(rdev, &ib);
3761 		return r;
3762 	}
3763 	for (i = 0; i < rdev->usec_timeout; i++) {
3764 		tmp = RREG32(scratch);
3765 		if (tmp == 0xDEADBEEF)
3766 			break;
3767 		DRM_UDELAY(1);
3768 	}
3769 	if (i < rdev->usec_timeout) {
3770 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3771 	} else {
3772 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3773 			  scratch, tmp);
3774 		r = -EINVAL;
3775 	}
3776 	radeon_scratch_free(rdev, scratch);
3777 	radeon_ib_free(rdev, &ib);
3778 	return r;
3779 }
3780 
3781 /*
3782  * CP.
3783  * On CIK, gfx and compute now have independant command processors.
3784  *
3785  * GFX
3786  * Gfx consists of a single ring and can process both gfx jobs and
3787  * compute jobs.  The gfx CP consists of three microengines (ME):
3788  * PFP - Pre-Fetch Parser
3789  * ME - Micro Engine
3790  * CE - Constant Engine
3791  * The PFP and ME make up what is considered the Drawing Engine (DE).
3792  * The CE is an asynchronous engine used for updating buffer desciptors
3793  * used by the DE so that they can be loaded into cache in parallel
3794  * while the DE is processing state update packets.
3795  *
3796  * Compute
3797  * The compute CP consists of two microengines (ME):
3798  * MEC1 - Compute MicroEngine 1
3799  * MEC2 - Compute MicroEngine 2
3800  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3801  * The queues are exposed to userspace and are programmed directly
3802  * by the compute runtime.
3803  */
3804 /**
3805  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3806  *
3807  * @rdev: radeon_device pointer
3808  * @enable: enable or disable the MEs
3809  *
3810  * Halts or unhalts the gfx MEs.
3811  */
3812 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3813 {
3814 	if (enable)
3815 		WREG32(CP_ME_CNTL, 0);
3816 	else {
3817 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3818 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3819 	}
3820 	udelay(50);
3821 }
3822 
3823 /**
3824  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3825  *
3826  * @rdev: radeon_device pointer
3827  *
3828  * Loads the gfx PFP, ME, and CE ucode.
3829  * Returns 0 for success, -EINVAL if the ucode is not available.
3830  */
3831 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3832 {
3833 	const __be32 *fw_data;
3834 	int i;
3835 
3836 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3837 		return -EINVAL;
3838 
3839 	cik_cp_gfx_enable(rdev, false);
3840 
3841 	/* PFP */
3842 	fw_data = (const __be32 *)rdev->pfp_fw->data;
3843 	WREG32(CP_PFP_UCODE_ADDR, 0);
3844 	for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3845 		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3846 	WREG32(CP_PFP_UCODE_ADDR, 0);
3847 
3848 	/* CE */
3849 	fw_data = (const __be32 *)rdev->ce_fw->data;
3850 	WREG32(CP_CE_UCODE_ADDR, 0);
3851 	for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3852 		WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3853 	WREG32(CP_CE_UCODE_ADDR, 0);
3854 
3855 	/* ME */
3856 	fw_data = (const __be32 *)rdev->me_fw->data;
3857 	WREG32(CP_ME_RAM_WADDR, 0);
3858 	for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3859 		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3860 	WREG32(CP_ME_RAM_WADDR, 0);
3861 
3862 	WREG32(CP_PFP_UCODE_ADDR, 0);
3863 	WREG32(CP_CE_UCODE_ADDR, 0);
3864 	WREG32(CP_ME_RAM_WADDR, 0);
3865 	WREG32(CP_ME_RAM_RADDR, 0);
3866 	return 0;
3867 }
3868 
3869 /**
3870  * cik_cp_gfx_start - start the gfx ring
3871  *
3872  * @rdev: radeon_device pointer
3873  *
3874  * Enables the ring and loads the clear state context and other
3875  * packets required to init the ring.
3876  * Returns 0 for success, error for failure.
3877  */
3878 static int cik_cp_gfx_start(struct radeon_device *rdev)
3879 {
3880 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3881 	int r, i;
3882 
3883 	/* init the CP */
3884 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3885 	WREG32(CP_ENDIAN_SWAP, 0);
3886 	WREG32(CP_DEVICE_ID, 1);
3887 
3888 	cik_cp_gfx_enable(rdev, true);
3889 
3890 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3891 	if (r) {
3892 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3893 		return r;
3894 	}
3895 
3896 	/* init the CE partitions.  CE only used for gfx on CIK */
3897 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3898 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3899 	radeon_ring_write(ring, 0xc000);
3900 	radeon_ring_write(ring, 0xc000);
3901 
3902 	/* setup clear context state */
3903 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3904 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3905 
3906 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3907 	radeon_ring_write(ring, 0x80000000);
3908 	radeon_ring_write(ring, 0x80000000);
3909 
3910 	for (i = 0; i < cik_default_size; i++)
3911 		radeon_ring_write(ring, cik_default_state[i]);
3912 
3913 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3914 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3915 
3916 	/* set clear context state */
3917 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3918 	radeon_ring_write(ring, 0);
3919 
3920 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3921 	radeon_ring_write(ring, 0x00000316);
3922 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3923 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3924 
3925 	radeon_ring_unlock_commit(rdev, ring);
3926 
3927 	return 0;
3928 }
3929 
3930 /**
3931  * cik_cp_gfx_fini - stop the gfx ring
3932  *
3933  * @rdev: radeon_device pointer
3934  *
3935  * Stop the gfx ring and tear down the driver ring
3936  * info.
3937  */
3938 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3939 {
3940 	cik_cp_gfx_enable(rdev, false);
3941 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3942 }
3943 
3944 /**
3945  * cik_cp_gfx_resume - setup the gfx ring buffer registers
3946  *
3947  * @rdev: radeon_device pointer
3948  *
3949  * Program the location and size of the gfx ring buffer
3950  * and test it to make sure it's working.
3951  * Returns 0 for success, error for failure.
3952  */
3953 static int cik_cp_gfx_resume(struct radeon_device *rdev)
3954 {
3955 	struct radeon_ring *ring;
3956 	u32 tmp;
3957 	u32 rb_bufsz;
3958 	u64 rb_addr;
3959 	int r;
3960 
3961 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3962 	if (rdev->family != CHIP_HAWAII)
3963 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3964 
3965 	/* Set the write pointer delay */
3966 	WREG32(CP_RB_WPTR_DELAY, 0);
3967 
3968 	/* set the RB to use vmid 0 */
3969 	WREG32(CP_RB_VMID, 0);
3970 
3971 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3972 
3973 	/* ring 0 - compute and gfx */
3974 	/* Set ring buffer size */
3975 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3976 	rb_bufsz = order_base_2(ring->ring_size / 8);
3977 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3978 #ifdef __BIG_ENDIAN
3979 	tmp |= BUF_SWAP_32BIT;
3980 #endif
3981 	WREG32(CP_RB0_CNTL, tmp);
3982 
3983 	/* Initialize the ring buffer's read and write pointers */
3984 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3985 	ring->wptr = 0;
3986 	WREG32(CP_RB0_WPTR, ring->wptr);
3987 
3988 	/* set the wb address wether it's enabled or not */
3989 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3990 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3991 
3992 	/* scratch register shadowing is no longer supported */
3993 	WREG32(SCRATCH_UMSK, 0);
3994 
3995 	if (!rdev->wb.enabled)
3996 		tmp |= RB_NO_UPDATE;
3997 
3998 	mdelay(1);
3999 	WREG32(CP_RB0_CNTL, tmp);
4000 
4001 	rb_addr = ring->gpu_addr >> 8;
4002 	WREG32(CP_RB0_BASE, rb_addr);
4003 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4004 
4005 	ring->rptr = RREG32(CP_RB0_RPTR);
4006 
4007 	/* start the ring */
4008 	cik_cp_gfx_start(rdev);
4009 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4010 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4011 	if (r) {
4012 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4013 		return r;
4014 	}
4015 	return 0;
4016 }
4017 
4018 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
4019 			      struct radeon_ring *ring)
4020 {
4021 	u32 rptr;
4022 
4023 
4024 
4025 	if (rdev->wb.enabled) {
4026 		rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
4027 	} else {
4028 		mutex_lock(&rdev->srbm_mutex);
4029 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4030 		rptr = RREG32(CP_HQD_PQ_RPTR);
4031 		cik_srbm_select(rdev, 0, 0, 0, 0);
4032 		mutex_unlock(&rdev->srbm_mutex);
4033 	}
4034 
4035 	return rptr;
4036 }
4037 
4038 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
4039 			      struct radeon_ring *ring)
4040 {
4041 	u32 wptr;
4042 
4043 	if (rdev->wb.enabled) {
4044 		wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
4045 	} else {
4046 		mutex_lock(&rdev->srbm_mutex);
4047 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4048 		wptr = RREG32(CP_HQD_PQ_WPTR);
4049 		cik_srbm_select(rdev, 0, 0, 0, 0);
4050 		mutex_unlock(&rdev->srbm_mutex);
4051 	}
4052 
4053 	return wptr;
4054 }
4055 
4056 void cik_compute_ring_set_wptr(struct radeon_device *rdev,
4057 			       struct radeon_ring *ring)
4058 {
4059 	rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(ring->wptr);
4060 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4061 }
4062 
4063 /**
4064  * cik_cp_compute_enable - enable/disable the compute CP MEs
4065  *
4066  * @rdev: radeon_device pointer
4067  * @enable: enable or disable the MEs
4068  *
4069  * Halts or unhalts the compute MEs.
4070  */
4071 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4072 {
4073 	if (enable)
4074 		WREG32(CP_MEC_CNTL, 0);
4075 	else
4076 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4077 	udelay(50);
4078 }
4079 
4080 /**
4081  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4082  *
4083  * @rdev: radeon_device pointer
4084  *
4085  * Loads the compute MEC1&2 ucode.
4086  * Returns 0 for success, -EINVAL if the ucode is not available.
4087  */
4088 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4089 {
4090 	const __be32 *fw_data;
4091 	int i;
4092 
4093 	if (!rdev->mec_fw)
4094 		return -EINVAL;
4095 
4096 	cik_cp_compute_enable(rdev, false);
4097 
4098 	/* MEC1 */
4099 	fw_data = (const __be32 *)rdev->mec_fw->data;
4100 	WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4101 	for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4102 		WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4103 	WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4104 
4105 	if (rdev->family == CHIP_KAVERI) {
4106 		/* MEC2 */
4107 		fw_data = (const __be32 *)rdev->mec_fw->data;
4108 		WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4109 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4110 			WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4111 		WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4112 	}
4113 
4114 	return 0;
4115 }
4116 
4117 /**
4118  * cik_cp_compute_start - start the compute queues
4119  *
4120  * @rdev: radeon_device pointer
4121  *
4122  * Enable the compute queues.
4123  * Returns 0 for success, error for failure.
4124  */
4125 static int cik_cp_compute_start(struct radeon_device *rdev)
4126 {
4127 	cik_cp_compute_enable(rdev, true);
4128 
4129 	return 0;
4130 }
4131 
4132 /**
4133  * cik_cp_compute_fini - stop the compute queues
4134  *
4135  * @rdev: radeon_device pointer
4136  *
4137  * Stop the compute queues and tear down the driver queue
4138  * info.
4139  */
4140 static void cik_cp_compute_fini(struct radeon_device *rdev)
4141 {
4142 	int i, idx, r;
4143 
4144 	cik_cp_compute_enable(rdev, false);
4145 
4146 	for (i = 0; i < 2; i++) {
4147 		if (i == 0)
4148 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4149 		else
4150 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4151 
4152 		if (rdev->ring[idx].mqd_obj) {
4153 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4154 			if (unlikely(r != 0))
4155 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4156 
4157 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4158 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4159 
4160 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4161 			rdev->ring[idx].mqd_obj = NULL;
4162 		}
4163 	}
4164 }
4165 
4166 static void cik_mec_fini(struct radeon_device *rdev)
4167 {
4168 	int r;
4169 
4170 	if (rdev->mec.hpd_eop_obj) {
4171 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4172 		if (unlikely(r != 0))
4173 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4174 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4175 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4176 
4177 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4178 		rdev->mec.hpd_eop_obj = NULL;
4179 	}
4180 }
4181 
4182 #define MEC_HPD_SIZE 2048
4183 
4184 static int cik_mec_init(struct radeon_device *rdev)
4185 {
4186 	int r;
4187 	u32 *hpd;
4188 
4189 	/*
4190 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4191 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4192 	 */
4193 	if (rdev->family == CHIP_KAVERI)
4194 		rdev->mec.num_mec = 2;
4195 	else
4196 		rdev->mec.num_mec = 1;
4197 	rdev->mec.num_pipe = 4;
4198 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4199 
4200 	if (rdev->mec.hpd_eop_obj == NULL) {
4201 		r = radeon_bo_create(rdev,
4202 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4203 				     PAGE_SIZE, true,
4204 				     RADEON_GEM_DOMAIN_GTT, NULL,
4205 				     &rdev->mec.hpd_eop_obj);
4206 		if (r) {
4207 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4208 			return r;
4209 		}
4210 	}
4211 
4212 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4213 	if (unlikely(r != 0)) {
4214 		cik_mec_fini(rdev);
4215 		return r;
4216 	}
4217 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4218 			  &rdev->mec.hpd_eop_gpu_addr);
4219 	if (r) {
4220 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4221 		cik_mec_fini(rdev);
4222 		return r;
4223 	}
4224 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4225 	if (r) {
4226 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4227 		cik_mec_fini(rdev);
4228 		return r;
4229 	}
4230 
4231 	/* clear memory.  Not sure if this is required or not */
4232 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4233 
4234 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4235 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4236 
4237 	return 0;
4238 }
4239 
4240 struct hqd_registers
4241 {
4242 	u32 cp_mqd_base_addr;
4243 	u32 cp_mqd_base_addr_hi;
4244 	u32 cp_hqd_active;
4245 	u32 cp_hqd_vmid;
4246 	u32 cp_hqd_persistent_state;
4247 	u32 cp_hqd_pipe_priority;
4248 	u32 cp_hqd_queue_priority;
4249 	u32 cp_hqd_quantum;
4250 	u32 cp_hqd_pq_base;
4251 	u32 cp_hqd_pq_base_hi;
4252 	u32 cp_hqd_pq_rptr;
4253 	u32 cp_hqd_pq_rptr_report_addr;
4254 	u32 cp_hqd_pq_rptr_report_addr_hi;
4255 	u32 cp_hqd_pq_wptr_poll_addr;
4256 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4257 	u32 cp_hqd_pq_doorbell_control;
4258 	u32 cp_hqd_pq_wptr;
4259 	u32 cp_hqd_pq_control;
4260 	u32 cp_hqd_ib_base_addr;
4261 	u32 cp_hqd_ib_base_addr_hi;
4262 	u32 cp_hqd_ib_rptr;
4263 	u32 cp_hqd_ib_control;
4264 	u32 cp_hqd_iq_timer;
4265 	u32 cp_hqd_iq_rptr;
4266 	u32 cp_hqd_dequeue_request;
4267 	u32 cp_hqd_dma_offload;
4268 	u32 cp_hqd_sema_cmd;
4269 	u32 cp_hqd_msg_type;
4270 	u32 cp_hqd_atomic0_preop_lo;
4271 	u32 cp_hqd_atomic0_preop_hi;
4272 	u32 cp_hqd_atomic1_preop_lo;
4273 	u32 cp_hqd_atomic1_preop_hi;
4274 	u32 cp_hqd_hq_scheduler0;
4275 	u32 cp_hqd_hq_scheduler1;
4276 	u32 cp_mqd_control;
4277 };
4278 
4279 struct bonaire_mqd
4280 {
4281 	u32 header;
4282 	u32 dispatch_initiator;
4283 	u32 dimensions[3];
4284 	u32 start_idx[3];
4285 	u32 num_threads[3];
4286 	u32 pipeline_stat_enable;
4287 	u32 perf_counter_enable;
4288 	u32 pgm[2];
4289 	u32 tba[2];
4290 	u32 tma[2];
4291 	u32 pgm_rsrc[2];
4292 	u32 vmid;
4293 	u32 resource_limits;
4294 	u32 static_thread_mgmt01[2];
4295 	u32 tmp_ring_size;
4296 	u32 static_thread_mgmt23[2];
4297 	u32 restart[3];
4298 	u32 thread_trace_enable;
4299 	u32 reserved1;
4300 	u32 user_data[16];
4301 	u32 vgtcs_invoke_count[2];
4302 	struct hqd_registers queue_state;
4303 	u32 dequeue_cntr;
4304 	u32 interrupt_queue[64];
4305 };
4306 
4307 /**
4308  * cik_cp_compute_resume - setup the compute queue registers
4309  *
4310  * @rdev: radeon_device pointer
4311  *
4312  * Program the compute queues and test them to make sure they
4313  * are working.
4314  * Returns 0 for success, error for failure.
4315  */
4316 static int cik_cp_compute_resume(struct radeon_device *rdev)
4317 {
4318 	int r, i, idx;
4319 	u32 tmp;
4320 	bool use_doorbell = true;
4321 	u64 hqd_gpu_addr;
4322 	u64 mqd_gpu_addr;
4323 	u64 eop_gpu_addr;
4324 	u64 wb_gpu_addr;
4325 	u32 *buf;
4326 	struct bonaire_mqd *mqd;
4327 
4328 	r = cik_cp_compute_start(rdev);
4329 	if (r)
4330 		return r;
4331 
4332 	/* fix up chicken bits */
4333 	tmp = RREG32(CP_CPF_DEBUG);
4334 	tmp |= (1 << 23);
4335 	WREG32(CP_CPF_DEBUG, tmp);
4336 
4337 	/* init the pipes */
4338 	mutex_lock(&rdev->srbm_mutex);
4339 	for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
4340 		int me = (i < 4) ? 1 : 2;
4341 		int pipe = (i < 4) ? i : (i - 4);
4342 
4343 		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
4344 
4345 		cik_srbm_select(rdev, me, pipe, 0, 0);
4346 
4347 		/* write the EOP addr */
4348 		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4349 		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4350 
4351 		/* set the VMID assigned */
4352 		WREG32(CP_HPD_EOP_VMID, 0);
4353 
4354 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4355 		tmp = RREG32(CP_HPD_EOP_CONTROL);
4356 		tmp &= ~EOP_SIZE_MASK;
4357 		tmp |= order_base_2(MEC_HPD_SIZE / 8);
4358 		WREG32(CP_HPD_EOP_CONTROL, tmp);
4359 	}
4360 	cik_srbm_select(rdev, 0, 0, 0, 0);
4361 	mutex_unlock(&rdev->srbm_mutex);
4362 
4363 	/* init the queues.  Just two for now. */
4364 	for (i = 0; i < 2; i++) {
4365 		if (i == 0)
4366 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4367 		else
4368 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4369 
4370 		if (rdev->ring[idx].mqd_obj == NULL) {
4371 			r = radeon_bo_create(rdev,
4372 					     sizeof(struct bonaire_mqd),
4373 					     PAGE_SIZE, true,
4374 					     RADEON_GEM_DOMAIN_GTT, NULL,
4375 					     &rdev->ring[idx].mqd_obj);
4376 			if (r) {
4377 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4378 				return r;
4379 			}
4380 		}
4381 
4382 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4383 		if (unlikely(r != 0)) {
4384 			cik_cp_compute_fini(rdev);
4385 			return r;
4386 		}
4387 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4388 				  &mqd_gpu_addr);
4389 		if (r) {
4390 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4391 			cik_cp_compute_fini(rdev);
4392 			return r;
4393 		}
4394 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4395 		if (r) {
4396 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4397 			cik_cp_compute_fini(rdev);
4398 			return r;
4399 		}
4400 
4401 		/* init the mqd struct */
4402 		memset(buf, 0, sizeof(struct bonaire_mqd));
4403 
4404 		mqd = (struct bonaire_mqd *)buf;
4405 		mqd->header = 0xC0310800;
4406 		mqd->static_thread_mgmt01[0] = 0xffffffff;
4407 		mqd->static_thread_mgmt01[1] = 0xffffffff;
4408 		mqd->static_thread_mgmt23[0] = 0xffffffff;
4409 		mqd->static_thread_mgmt23[1] = 0xffffffff;
4410 
4411 		mutex_lock(&rdev->srbm_mutex);
4412 		cik_srbm_select(rdev, rdev->ring[idx].me,
4413 				rdev->ring[idx].pipe,
4414 				rdev->ring[idx].queue, 0);
4415 
4416 		/* disable wptr polling */
4417 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4418 		tmp &= ~WPTR_POLL_EN;
4419 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4420 
4421 		/* enable doorbell? */
4422 		mqd->queue_state.cp_hqd_pq_doorbell_control =
4423 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4424 		if (use_doorbell)
4425 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4426 		else
4427 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4428 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4429 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4430 
4431 		/* disable the queue if it's active */
4432 		mqd->queue_state.cp_hqd_dequeue_request = 0;
4433 		mqd->queue_state.cp_hqd_pq_rptr = 0;
4434 		mqd->queue_state.cp_hqd_pq_wptr= 0;
4435 		if (RREG32(CP_HQD_ACTIVE) & 1) {
4436 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4437 			for (i = 0; i < rdev->usec_timeout; i++) {
4438 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4439 					break;
4440 				udelay(1);
4441 			}
4442 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4443 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4444 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4445 		}
4446 
4447 		/* set the pointer to the MQD */
4448 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4449 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4450 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4451 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4452 		/* set MQD vmid to 0 */
4453 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4454 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4455 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4456 
4457 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4458 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4459 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4460 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4461 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4462 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4463 
4464 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4465 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4466 		mqd->queue_state.cp_hqd_pq_control &=
4467 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4468 
4469 		mqd->queue_state.cp_hqd_pq_control |=
4470 			order_base_2(rdev->ring[idx].ring_size / 8);
4471 		mqd->queue_state.cp_hqd_pq_control |=
4472 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4473 #ifdef __BIG_ENDIAN
4474 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4475 #endif
4476 		mqd->queue_state.cp_hqd_pq_control &=
4477 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4478 		mqd->queue_state.cp_hqd_pq_control |=
4479 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4480 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4481 
4482 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4483 		if (i == 0)
4484 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4485 		else
4486 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4487 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4488 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4489 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4490 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4491 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4492 
4493 		/* set the wb address wether it's enabled or not */
4494 		if (i == 0)
4495 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4496 		else
4497 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4498 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4499 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4500 			upper_32_bits(wb_gpu_addr) & 0xffff;
4501 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4502 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4503 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4504 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4505 
4506 		/* enable the doorbell if requested */
4507 		if (use_doorbell) {
4508 			mqd->queue_state.cp_hqd_pq_doorbell_control =
4509 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4510 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4511 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
4512 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4513 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4514 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
4515 				~(DOORBELL_SOURCE | DOORBELL_HIT);
4516 
4517 		} else {
4518 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4519 		}
4520 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4521 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4522 
4523 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4524 		rdev->ring[idx].wptr = 0;
4525 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4526 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4527 		rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
4528 		mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
4529 
4530 		/* set the vmid for the queue */
4531 		mqd->queue_state.cp_hqd_vmid = 0;
4532 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4533 
4534 		/* activate the queue */
4535 		mqd->queue_state.cp_hqd_active = 1;
4536 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4537 
4538 		cik_srbm_select(rdev, 0, 0, 0, 0);
4539 		mutex_unlock(&rdev->srbm_mutex);
4540 
4541 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4542 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4543 
4544 		rdev->ring[idx].ready = true;
4545 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4546 		if (r)
4547 			rdev->ring[idx].ready = false;
4548 	}
4549 
4550 	return 0;
4551 }
4552 
4553 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4554 {
4555 	cik_cp_gfx_enable(rdev, enable);
4556 	cik_cp_compute_enable(rdev, enable);
4557 }
4558 
4559 static int cik_cp_load_microcode(struct radeon_device *rdev)
4560 {
4561 	int r;
4562 
4563 	r = cik_cp_gfx_load_microcode(rdev);
4564 	if (r)
4565 		return r;
4566 	r = cik_cp_compute_load_microcode(rdev);
4567 	if (r)
4568 		return r;
4569 
4570 	return 0;
4571 }
4572 
4573 static void cik_cp_fini(struct radeon_device *rdev)
4574 {
4575 	cik_cp_gfx_fini(rdev);
4576 	cik_cp_compute_fini(rdev);
4577 }
4578 
4579 static int cik_cp_resume(struct radeon_device *rdev)
4580 {
4581 	int r;
4582 
4583 	cik_enable_gui_idle_interrupt(rdev, false);
4584 
4585 	r = cik_cp_load_microcode(rdev);
4586 	if (r)
4587 		return r;
4588 
4589 	r = cik_cp_gfx_resume(rdev);
4590 	if (r)
4591 		return r;
4592 	r = cik_cp_compute_resume(rdev);
4593 	if (r)
4594 		return r;
4595 
4596 	cik_enable_gui_idle_interrupt(rdev, true);
4597 
4598 	return 0;
4599 }
4600 
4601 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4602 {
4603 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4604 		RREG32(GRBM_STATUS));
4605 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4606 		RREG32(GRBM_STATUS2));
4607 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4608 		RREG32(GRBM_STATUS_SE0));
4609 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4610 		RREG32(GRBM_STATUS_SE1));
4611 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4612 		RREG32(GRBM_STATUS_SE2));
4613 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4614 		RREG32(GRBM_STATUS_SE3));
4615 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4616 		RREG32(SRBM_STATUS));
4617 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4618 		RREG32(SRBM_STATUS2));
4619 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4620 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4621 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4622 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4623 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4624 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4625 		 RREG32(CP_STALLED_STAT1));
4626 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4627 		 RREG32(CP_STALLED_STAT2));
4628 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4629 		 RREG32(CP_STALLED_STAT3));
4630 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4631 		 RREG32(CP_CPF_BUSY_STAT));
4632 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4633 		 RREG32(CP_CPF_STALLED_STAT1));
4634 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4635 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4636 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4637 		 RREG32(CP_CPC_STALLED_STAT1));
4638 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4639 }
4640 
4641 /**
4642  * cik_gpu_check_soft_reset - check which blocks are busy
4643  *
4644  * @rdev: radeon_device pointer
4645  *
4646  * Check which blocks are busy and return the relevant reset
4647  * mask to be used by cik_gpu_soft_reset().
4648  * Returns a mask of the blocks to be reset.
4649  */
4650 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4651 {
4652 	u32 reset_mask = 0;
4653 	u32 tmp;
4654 
4655 	/* GRBM_STATUS */
4656 	tmp = RREG32(GRBM_STATUS);
4657 	if (tmp & (PA_BUSY | SC_BUSY |
4658 		   BCI_BUSY | SX_BUSY |
4659 		   TA_BUSY | VGT_BUSY |
4660 		   DB_BUSY | CB_BUSY |
4661 		   GDS_BUSY | SPI_BUSY |
4662 		   IA_BUSY | IA_BUSY_NO_DMA))
4663 		reset_mask |= RADEON_RESET_GFX;
4664 
4665 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4666 		reset_mask |= RADEON_RESET_CP;
4667 
4668 	/* GRBM_STATUS2 */
4669 	tmp = RREG32(GRBM_STATUS2);
4670 	if (tmp & RLC_BUSY)
4671 		reset_mask |= RADEON_RESET_RLC;
4672 
4673 	/* SDMA0_STATUS_REG */
4674 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4675 	if (!(tmp & SDMA_IDLE))
4676 		reset_mask |= RADEON_RESET_DMA;
4677 
4678 	/* SDMA1_STATUS_REG */
4679 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4680 	if (!(tmp & SDMA_IDLE))
4681 		reset_mask |= RADEON_RESET_DMA1;
4682 
4683 	/* SRBM_STATUS2 */
4684 	tmp = RREG32(SRBM_STATUS2);
4685 	if (tmp & SDMA_BUSY)
4686 		reset_mask |= RADEON_RESET_DMA;
4687 
4688 	if (tmp & SDMA1_BUSY)
4689 		reset_mask |= RADEON_RESET_DMA1;
4690 
4691 	/* SRBM_STATUS */
4692 	tmp = RREG32(SRBM_STATUS);
4693 
4694 	if (tmp & IH_BUSY)
4695 		reset_mask |= RADEON_RESET_IH;
4696 
4697 	if (tmp & SEM_BUSY)
4698 		reset_mask |= RADEON_RESET_SEM;
4699 
4700 	if (tmp & GRBM_RQ_PENDING)
4701 		reset_mask |= RADEON_RESET_GRBM;
4702 
4703 	if (tmp & VMC_BUSY)
4704 		reset_mask |= RADEON_RESET_VMC;
4705 
4706 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4707 		   MCC_BUSY | MCD_BUSY))
4708 		reset_mask |= RADEON_RESET_MC;
4709 
4710 	if (evergreen_is_display_hung(rdev))
4711 		reset_mask |= RADEON_RESET_DISPLAY;
4712 
4713 	/* Skip MC reset as it's mostly likely not hung, just busy */
4714 	if (reset_mask & RADEON_RESET_MC) {
4715 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4716 		reset_mask &= ~RADEON_RESET_MC;
4717 	}
4718 
4719 	return reset_mask;
4720 }
4721 
4722 /**
4723  * cik_gpu_soft_reset - soft reset GPU
4724  *
4725  * @rdev: radeon_device pointer
4726  * @reset_mask: mask of which blocks to reset
4727  *
4728  * Soft reset the blocks specified in @reset_mask.
4729  */
4730 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4731 {
4732 	struct evergreen_mc_save save;
4733 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4734 	u32 tmp;
4735 
4736 	if (reset_mask == 0)
4737 		return;
4738 
4739 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4740 
4741 	cik_print_gpu_status_regs(rdev);
4742 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4743 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4744 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4745 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4746 
4747 	/* disable CG/PG */
4748 	cik_fini_pg(rdev);
4749 	cik_fini_cg(rdev);
4750 
4751 	/* stop the rlc */
4752 	cik_rlc_stop(rdev);
4753 
4754 	/* Disable GFX parsing/prefetching */
4755 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4756 
4757 	/* Disable MEC parsing/prefetching */
4758 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4759 
4760 	if (reset_mask & RADEON_RESET_DMA) {
4761 		/* sdma0 */
4762 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4763 		tmp |= SDMA_HALT;
4764 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4765 	}
4766 	if (reset_mask & RADEON_RESET_DMA1) {
4767 		/* sdma1 */
4768 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4769 		tmp |= SDMA_HALT;
4770 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4771 	}
4772 
4773 	evergreen_mc_stop(rdev, &save);
4774 	if (evergreen_mc_wait_for_idle(rdev)) {
4775 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4776 	}
4777 
4778 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4779 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4780 
4781 	if (reset_mask & RADEON_RESET_CP) {
4782 		grbm_soft_reset |= SOFT_RESET_CP;
4783 
4784 		srbm_soft_reset |= SOFT_RESET_GRBM;
4785 	}
4786 
4787 	if (reset_mask & RADEON_RESET_DMA)
4788 		srbm_soft_reset |= SOFT_RESET_SDMA;
4789 
4790 	if (reset_mask & RADEON_RESET_DMA1)
4791 		srbm_soft_reset |= SOFT_RESET_SDMA1;
4792 
4793 	if (reset_mask & RADEON_RESET_DISPLAY)
4794 		srbm_soft_reset |= SOFT_RESET_DC;
4795 
4796 	if (reset_mask & RADEON_RESET_RLC)
4797 		grbm_soft_reset |= SOFT_RESET_RLC;
4798 
4799 	if (reset_mask & RADEON_RESET_SEM)
4800 		srbm_soft_reset |= SOFT_RESET_SEM;
4801 
4802 	if (reset_mask & RADEON_RESET_IH)
4803 		srbm_soft_reset |= SOFT_RESET_IH;
4804 
4805 	if (reset_mask & RADEON_RESET_GRBM)
4806 		srbm_soft_reset |= SOFT_RESET_GRBM;
4807 
4808 	if (reset_mask & RADEON_RESET_VMC)
4809 		srbm_soft_reset |= SOFT_RESET_VMC;
4810 
4811 	if (!(rdev->flags & RADEON_IS_IGP)) {
4812 		if (reset_mask & RADEON_RESET_MC)
4813 			srbm_soft_reset |= SOFT_RESET_MC;
4814 	}
4815 
4816 	if (grbm_soft_reset) {
4817 		tmp = RREG32(GRBM_SOFT_RESET);
4818 		tmp |= grbm_soft_reset;
4819 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4820 		WREG32(GRBM_SOFT_RESET, tmp);
4821 		tmp = RREG32(GRBM_SOFT_RESET);
4822 
4823 		udelay(50);
4824 
4825 		tmp &= ~grbm_soft_reset;
4826 		WREG32(GRBM_SOFT_RESET, tmp);
4827 		tmp = RREG32(GRBM_SOFT_RESET);
4828 	}
4829 
4830 	if (srbm_soft_reset) {
4831 		tmp = RREG32(SRBM_SOFT_RESET);
4832 		tmp |= srbm_soft_reset;
4833 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4834 		WREG32(SRBM_SOFT_RESET, tmp);
4835 		tmp = RREG32(SRBM_SOFT_RESET);
4836 
4837 		udelay(50);
4838 
4839 		tmp &= ~srbm_soft_reset;
4840 		WREG32(SRBM_SOFT_RESET, tmp);
4841 		tmp = RREG32(SRBM_SOFT_RESET);
4842 	}
4843 
4844 	/* Wait a little for things to settle down */
4845 	udelay(50);
4846 
4847 	evergreen_mc_resume(rdev, &save);
4848 	udelay(50);
4849 
4850 	cik_print_gpu_status_regs(rdev);
4851 }
4852 
4853 /**
4854  * cik_asic_reset - soft reset GPU
4855  *
4856  * @rdev: radeon_device pointer
4857  *
4858  * Look up which blocks are hung and attempt
4859  * to reset them.
4860  * Returns 0 for success.
4861  */
4862 int cik_asic_reset(struct radeon_device *rdev)
4863 {
4864 	u32 reset_mask;
4865 
4866 	reset_mask = cik_gpu_check_soft_reset(rdev);
4867 
4868 	if (reset_mask)
4869 		r600_set_bios_scratch_engine_hung(rdev, true);
4870 
4871 	cik_gpu_soft_reset(rdev, reset_mask);
4872 
4873 	reset_mask = cik_gpu_check_soft_reset(rdev);
4874 
4875 	if (!reset_mask)
4876 		r600_set_bios_scratch_engine_hung(rdev, false);
4877 
4878 	return 0;
4879 }
4880 
4881 /**
4882  * cik_gfx_is_lockup - check if the 3D engine is locked up
4883  *
4884  * @rdev: radeon_device pointer
4885  * @ring: radeon_ring structure holding ring information
4886  *
4887  * Check if the 3D engine is locked up (CIK).
4888  * Returns true if the engine is locked, false if not.
4889  */
4890 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4891 {
4892 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4893 
4894 	if (!(reset_mask & (RADEON_RESET_GFX |
4895 			    RADEON_RESET_COMPUTE |
4896 			    RADEON_RESET_CP))) {
4897 		radeon_ring_lockup_update(ring);
4898 		return false;
4899 	}
4900 	/* force CP activities */
4901 	radeon_ring_force_activity(rdev, ring);
4902 	return radeon_ring_test_lockup(rdev, ring);
4903 }
4904 
4905 /* MC */
4906 /**
4907  * cik_mc_program - program the GPU memory controller
4908  *
4909  * @rdev: radeon_device pointer
4910  *
4911  * Set the location of vram, gart, and AGP in the GPU's
4912  * physical address space (CIK).
4913  */
4914 static void cik_mc_program(struct radeon_device *rdev)
4915 {
4916 	struct evergreen_mc_save save;
4917 	u32 tmp;
4918 	int i, j;
4919 
4920 	/* Initialize HDP */
4921 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4922 		WREG32((0x2c14 + j), 0x00000000);
4923 		WREG32((0x2c18 + j), 0x00000000);
4924 		WREG32((0x2c1c + j), 0x00000000);
4925 		WREG32((0x2c20 + j), 0x00000000);
4926 		WREG32((0x2c24 + j), 0x00000000);
4927 	}
4928 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4929 
4930 	evergreen_mc_stop(rdev, &save);
4931 	if (radeon_mc_wait_for_idle(rdev)) {
4932 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4933 	}
4934 	/* Lockout access through VGA aperture*/
4935 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4936 	/* Update configuration */
4937 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4938 	       rdev->mc.vram_start >> 12);
4939 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4940 	       rdev->mc.vram_end >> 12);
4941 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4942 	       rdev->vram_scratch.gpu_addr >> 12);
4943 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4944 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4945 	WREG32(MC_VM_FB_LOCATION, tmp);
4946 	/* XXX double check these! */
4947 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4948 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4949 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4950 	WREG32(MC_VM_AGP_BASE, 0);
4951 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4952 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4953 	if (radeon_mc_wait_for_idle(rdev)) {
4954 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4955 	}
4956 	evergreen_mc_resume(rdev, &save);
4957 	/* we need to own VRAM, so turn off the VGA renderer here
4958 	 * to stop it overwriting our objects */
4959 	rv515_vga_render_disable(rdev);
4960 }
4961 
4962 /**
4963  * cik_mc_init - initialize the memory controller driver params
4964  *
4965  * @rdev: radeon_device pointer
4966  *
4967  * Look up the amount of vram, vram width, and decide how to place
4968  * vram and gart within the GPU's physical address space (CIK).
4969  * Returns 0 for success.
4970  */
4971 static int cik_mc_init(struct radeon_device *rdev)
4972 {
4973 	u32 tmp;
4974 	int chansize, numchan;
4975 
4976 	/* Get VRAM informations */
4977 	rdev->mc.vram_is_ddr = true;
4978 	tmp = RREG32(MC_ARB_RAMCFG);
4979 	if (tmp & CHANSIZE_MASK) {
4980 		chansize = 64;
4981 	} else {
4982 		chansize = 32;
4983 	}
4984 	tmp = RREG32(MC_SHARED_CHMAP);
4985 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4986 	case 0:
4987 	default:
4988 		numchan = 1;
4989 		break;
4990 	case 1:
4991 		numchan = 2;
4992 		break;
4993 	case 2:
4994 		numchan = 4;
4995 		break;
4996 	case 3:
4997 		numchan = 8;
4998 		break;
4999 	case 4:
5000 		numchan = 3;
5001 		break;
5002 	case 5:
5003 		numchan = 6;
5004 		break;
5005 	case 6:
5006 		numchan = 10;
5007 		break;
5008 	case 7:
5009 		numchan = 12;
5010 		break;
5011 	case 8:
5012 		numchan = 16;
5013 		break;
5014 	}
5015 	rdev->mc.vram_width = numchan * chansize;
5016 	/* Could aper size report 0 ? */
5017 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5018 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5019 	/* size in MB on si */
5020 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5021 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5022 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5023 	si_vram_gtt_location(rdev, &rdev->mc);
5024 	radeon_update_bandwidth_info(rdev);
5025 
5026 	return 0;
5027 }
5028 
5029 /*
5030  * GART
5031  * VMID 0 is the physical GPU addresses as used by the kernel.
5032  * VMIDs 1-15 are used for userspace clients and are handled
5033  * by the radeon vm/hsa code.
5034  */
5035 /**
5036  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5037  *
5038  * @rdev: radeon_device pointer
5039  *
5040  * Flush the TLB for the VMID 0 page table (CIK).
5041  */
5042 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5043 {
5044 	/* flush hdp cache */
5045 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5046 
5047 	/* bits 0-15 are the VM contexts0-15 */
5048 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5049 }
5050 
5051 /**
5052  * cik_pcie_gart_enable - gart enable
5053  *
5054  * @rdev: radeon_device pointer
5055  *
5056  * This sets up the TLBs, programs the page tables for VMID0,
5057  * sets up the hw for VMIDs 1-15 which are allocated on
5058  * demand, and sets up the global locations for the LDS, GDS,
5059  * and GPUVM for FSA64 clients (CIK).
5060  * Returns 0 for success, errors for failure.
5061  */
5062 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5063 {
5064 	int r, i;
5065 
5066 	if (rdev->gart.robj == NULL) {
5067 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5068 		return -EINVAL;
5069 	}
5070 	r = radeon_gart_table_vram_pin(rdev);
5071 	if (r)
5072 		return r;
5073 	radeon_gart_restore(rdev);
5074 	/* Setup TLB control */
5075 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5076 	       (0xA << 7) |
5077 	       ENABLE_L1_TLB |
5078 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5079 	       ENABLE_ADVANCED_DRIVER_MODEL |
5080 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5081 	/* Setup L2 cache */
5082 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5083 	       ENABLE_L2_FRAGMENT_PROCESSING |
5084 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5085 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5086 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5087 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5088 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5089 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5090 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5091 	/* setup context0 */
5092 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5093 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5094 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5095 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5096 			(u32)(rdev->dummy_page.addr >> 12));
5097 	WREG32(VM_CONTEXT0_CNTL2, 0);
5098 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5099 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5100 
5101 	WREG32(0x15D4, 0);
5102 	WREG32(0x15D8, 0);
5103 	WREG32(0x15DC, 0);
5104 
5105 	/* empty context1-15 */
5106 	/* FIXME start with 4G, once using 2 level pt switch to full
5107 	 * vm size space
5108 	 */
5109 	/* set vm size, must be a multiple of 4 */
5110 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5111 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5112 	for (i = 1; i < 16; i++) {
5113 		if (i < 8)
5114 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5115 			       rdev->gart.table_addr >> 12);
5116 		else
5117 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5118 			       rdev->gart.table_addr >> 12);
5119 	}
5120 
5121 	/* enable context1-15 */
5122 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5123 	       (u32)(rdev->dummy_page.addr >> 12));
5124 	WREG32(VM_CONTEXT1_CNTL2, 4);
5125 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5126 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5127 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5128 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5129 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5130 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5131 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5132 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5133 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5134 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5135 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5136 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5137 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5138 
5139 	/* TC cache setup ??? */
5140 	WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
5141 	WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
5142 	WREG32(TC_CFG_L1_STORE_POLICY, 0);
5143 
5144 	WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
5145 	WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
5146 	WREG32(TC_CFG_L2_STORE_POLICY0, 0);
5147 	WREG32(TC_CFG_L2_STORE_POLICY1, 0);
5148 	WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
5149 
5150 	WREG32(TC_CFG_L1_VOLATILE, 0);
5151 	WREG32(TC_CFG_L2_VOLATILE, 0);
5152 
5153 	if (rdev->family == CHIP_KAVERI) {
5154 		u32 tmp = RREG32(CHUB_CONTROL);
5155 		tmp &= ~BYPASS_VM;
5156 		WREG32(CHUB_CONTROL, tmp);
5157 	}
5158 
5159 	/* XXX SH_MEM regs */
5160 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5161 	mutex_lock(&rdev->srbm_mutex);
5162 	for (i = 0; i < 16; i++) {
5163 		cik_srbm_select(rdev, 0, 0, 0, i);
5164 		/* CP and shaders */
5165 		WREG32(SH_MEM_CONFIG, 0);
5166 		WREG32(SH_MEM_APE1_BASE, 1);
5167 		WREG32(SH_MEM_APE1_LIMIT, 0);
5168 		WREG32(SH_MEM_BASES, 0);
5169 		/* SDMA GFX */
5170 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5171 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5172 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5173 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5174 		/* XXX SDMA RLC - todo */
5175 	}
5176 	cik_srbm_select(rdev, 0, 0, 0, 0);
5177 	mutex_unlock(&rdev->srbm_mutex);
5178 
5179 	cik_pcie_gart_tlb_flush(rdev);
5180 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5181 		 (unsigned)(rdev->mc.gtt_size >> 20),
5182 		 (unsigned long long)rdev->gart.table_addr);
5183 	rdev->gart.ready = true;
5184 	return 0;
5185 }
5186 
5187 /**
5188  * cik_pcie_gart_disable - gart disable
5189  *
5190  * @rdev: radeon_device pointer
5191  *
5192  * This disables all VM page table (CIK).
5193  */
5194 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5195 {
5196 	/* Disable all tables */
5197 	WREG32(VM_CONTEXT0_CNTL, 0);
5198 	WREG32(VM_CONTEXT1_CNTL, 0);
5199 	/* Setup TLB control */
5200 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5201 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5202 	/* Setup L2 cache */
5203 	WREG32(VM_L2_CNTL,
5204 	       ENABLE_L2_FRAGMENT_PROCESSING |
5205 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5206 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5207 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5208 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5209 	WREG32(VM_L2_CNTL2, 0);
5210 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5211 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5212 	radeon_gart_table_vram_unpin(rdev);
5213 }
5214 
5215 /**
5216  * cik_pcie_gart_fini - vm fini callback
5217  *
5218  * @rdev: radeon_device pointer
5219  *
5220  * Tears down the driver GART/VM setup (CIK).
5221  */
5222 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5223 {
5224 	cik_pcie_gart_disable(rdev);
5225 	radeon_gart_table_vram_free(rdev);
5226 	radeon_gart_fini(rdev);
5227 }
5228 
5229 /* vm parser */
5230 /**
5231  * cik_ib_parse - vm ib_parse callback
5232  *
5233  * @rdev: radeon_device pointer
5234  * @ib: indirect buffer pointer
5235  *
5236  * CIK uses hw IB checking so this is a nop (CIK).
5237  */
5238 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5239 {
5240 	return 0;
5241 }
5242 
5243 /*
5244  * vm
5245  * VMID 0 is the physical GPU addresses as used by the kernel.
5246  * VMIDs 1-15 are used for userspace clients and are handled
5247  * by the radeon vm/hsa code.
5248  */
5249 /**
5250  * cik_vm_init - cik vm init callback
5251  *
5252  * @rdev: radeon_device pointer
5253  *
5254  * Inits cik specific vm parameters (number of VMs, base of vram for
5255  * VMIDs 1-15) (CIK).
5256  * Returns 0 for success.
5257  */
5258 int cik_vm_init(struct radeon_device *rdev)
5259 {
5260 	/* number of VMs */
5261 	rdev->vm_manager.nvm = 16;
5262 	/* base offset of vram pages */
5263 	if (rdev->flags & RADEON_IS_IGP) {
5264 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5265 		tmp <<= 22;
5266 		rdev->vm_manager.vram_base_offset = tmp;
5267 	} else
5268 		rdev->vm_manager.vram_base_offset = 0;
5269 
5270 	return 0;
5271 }
5272 
5273 /**
5274  * cik_vm_fini - cik vm fini callback
5275  *
5276  * @rdev: radeon_device pointer
5277  *
5278  * Tear down any asic specific VM setup (CIK).
5279  */
5280 void cik_vm_fini(struct radeon_device *rdev)
5281 {
5282 }
5283 
5284 /**
5285  * cik_vm_decode_fault - print human readable fault info
5286  *
5287  * @rdev: radeon_device pointer
5288  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5289  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5290  *
5291  * Print human readable fault information (CIK).
5292  */
5293 static void cik_vm_decode_fault(struct radeon_device *rdev,
5294 				u32 status, u32 addr, u32 mc_client)
5295 {
5296 	u32 mc_id;
5297 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5298 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5299 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5300 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5301 
5302 	if (rdev->family == CHIP_HAWAII)
5303 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5304 	else
5305 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5306 
5307 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5308 	       protections, vmid, addr,
5309 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5310 	       block, mc_client, mc_id);
5311 }
5312 
5313 /**
5314  * cik_vm_flush - cik vm flush using the CP
5315  *
5316  * @rdev: radeon_device pointer
5317  *
5318  * Update the page table base and flush the VM TLB
5319  * using the CP (CIK).
5320  */
5321 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5322 {
5323 	struct radeon_ring *ring = &rdev->ring[ridx];
5324 
5325 	if (vm == NULL)
5326 		return;
5327 
5328 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5329 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5330 				 WRITE_DATA_DST_SEL(0)));
5331 	if (vm->id < 8) {
5332 		radeon_ring_write(ring,
5333 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5334 	} else {
5335 		radeon_ring_write(ring,
5336 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5337 	}
5338 	radeon_ring_write(ring, 0);
5339 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5340 
5341 	/* update SH_MEM_* regs */
5342 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5343 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5344 				 WRITE_DATA_DST_SEL(0)));
5345 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5346 	radeon_ring_write(ring, 0);
5347 	radeon_ring_write(ring, VMID(vm->id));
5348 
5349 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5350 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5351 				 WRITE_DATA_DST_SEL(0)));
5352 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
5353 	radeon_ring_write(ring, 0);
5354 
5355 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5356 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5357 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5358 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5359 
5360 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5361 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5362 				 WRITE_DATA_DST_SEL(0)));
5363 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5364 	radeon_ring_write(ring, 0);
5365 	radeon_ring_write(ring, VMID(0));
5366 
5367 	/* HDP flush */
5368 	/* We should be using the WAIT_REG_MEM packet here like in
5369 	 * cik_fence_ring_emit(), but it causes the CP to hang in this
5370 	 * context...
5371 	 */
5372 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5373 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5374 				 WRITE_DATA_DST_SEL(0)));
5375 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5376 	radeon_ring_write(ring, 0);
5377 	radeon_ring_write(ring, 0);
5378 
5379 	/* bits 0-15 are the VM contexts0-15 */
5380 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5381 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5382 				 WRITE_DATA_DST_SEL(0)));
5383 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5384 	radeon_ring_write(ring, 0);
5385 	radeon_ring_write(ring, 1 << vm->id);
5386 
5387 	/* compute doesn't have PFP */
5388 	if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
5389 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5390 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5391 		radeon_ring_write(ring, 0x0);
5392 	}
5393 }
5394 
5395 /*
5396  * RLC
5397  * The RLC is a multi-purpose microengine that handles a
5398  * variety of functions, the most important of which is
5399  * the interrupt controller.
5400  */
5401 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5402 					  bool enable)
5403 {
5404 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5405 
5406 	if (enable)
5407 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5408 	else
5409 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5410 	WREG32(CP_INT_CNTL_RING0, tmp);
5411 }
5412 
5413 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5414 {
5415 	u32 tmp;
5416 
5417 	tmp = RREG32(RLC_LB_CNTL);
5418 	if (enable)
5419 		tmp |= LOAD_BALANCE_ENABLE;
5420 	else
5421 		tmp &= ~LOAD_BALANCE_ENABLE;
5422 	WREG32(RLC_LB_CNTL, tmp);
5423 }
5424 
5425 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5426 {
5427 	u32 i, j, k;
5428 	u32 mask;
5429 
5430 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5431 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5432 			cik_select_se_sh(rdev, i, j);
5433 			for (k = 0; k < rdev->usec_timeout; k++) {
5434 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5435 					break;
5436 				udelay(1);
5437 			}
5438 		}
5439 	}
5440 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5441 
5442 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5443 	for (k = 0; k < rdev->usec_timeout; k++) {
5444 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5445 			break;
5446 		udelay(1);
5447 	}
5448 }
5449 
5450 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5451 {
5452 	u32 tmp;
5453 
5454 	tmp = RREG32(RLC_CNTL);
5455 	if (tmp != rlc)
5456 		WREG32(RLC_CNTL, rlc);
5457 }
5458 
5459 static u32 cik_halt_rlc(struct radeon_device *rdev)
5460 {
5461 	u32 data, orig;
5462 
5463 	orig = data = RREG32(RLC_CNTL);
5464 
5465 	if (data & RLC_ENABLE) {
5466 		u32 i;
5467 
5468 		data &= ~RLC_ENABLE;
5469 		WREG32(RLC_CNTL, data);
5470 
5471 		for (i = 0; i < rdev->usec_timeout; i++) {
5472 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5473 				break;
5474 			udelay(1);
5475 		}
5476 
5477 		cik_wait_for_rlc_serdes(rdev);
5478 	}
5479 
5480 	return orig;
5481 }
5482 
5483 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5484 {
5485 	u32 tmp, i, mask;
5486 
5487 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5488 	WREG32(RLC_GPR_REG2, tmp);
5489 
5490 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5491 	for (i = 0; i < rdev->usec_timeout; i++) {
5492 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5493 			break;
5494 		udelay(1);
5495 	}
5496 
5497 	for (i = 0; i < rdev->usec_timeout; i++) {
5498 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5499 			break;
5500 		udelay(1);
5501 	}
5502 }
5503 
5504 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5505 {
5506 	u32 tmp;
5507 
5508 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5509 	WREG32(RLC_GPR_REG2, tmp);
5510 }
5511 
5512 /**
5513  * cik_rlc_stop - stop the RLC ME
5514  *
5515  * @rdev: radeon_device pointer
5516  *
5517  * Halt the RLC ME (MicroEngine) (CIK).
5518  */
5519 static void cik_rlc_stop(struct radeon_device *rdev)
5520 {
5521 	WREG32(RLC_CNTL, 0);
5522 
5523 	cik_enable_gui_idle_interrupt(rdev, false);
5524 
5525 	cik_wait_for_rlc_serdes(rdev);
5526 }
5527 
5528 /**
5529  * cik_rlc_start - start the RLC ME
5530  *
5531  * @rdev: radeon_device pointer
5532  *
5533  * Unhalt the RLC ME (MicroEngine) (CIK).
5534  */
5535 static void cik_rlc_start(struct radeon_device *rdev)
5536 {
5537 	WREG32(RLC_CNTL, RLC_ENABLE);
5538 
5539 	cik_enable_gui_idle_interrupt(rdev, true);
5540 
5541 	udelay(50);
5542 }
5543 
5544 /**
5545  * cik_rlc_resume - setup the RLC hw
5546  *
5547  * @rdev: radeon_device pointer
5548  *
5549  * Initialize the RLC registers, load the ucode,
5550  * and start the RLC (CIK).
5551  * Returns 0 for success, -EINVAL if the ucode is not available.
5552  */
5553 static int cik_rlc_resume(struct radeon_device *rdev)
5554 {
5555 	u32 i, size, tmp;
5556 	const __be32 *fw_data;
5557 
5558 	if (!rdev->rlc_fw)
5559 		return -EINVAL;
5560 
5561 	switch (rdev->family) {
5562 	case CHIP_BONAIRE:
5563 	case CHIP_HAWAII:
5564 	default:
5565 		size = BONAIRE_RLC_UCODE_SIZE;
5566 		break;
5567 	case CHIP_KAVERI:
5568 		size = KV_RLC_UCODE_SIZE;
5569 		break;
5570 	case CHIP_KABINI:
5571 		size = KB_RLC_UCODE_SIZE;
5572 		break;
5573 	}
5574 
5575 	cik_rlc_stop(rdev);
5576 
5577 	/* disable CG */
5578 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5579 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5580 
5581 	si_rlc_reset(rdev);
5582 
5583 	cik_init_pg(rdev);
5584 
5585 	cik_init_cg(rdev);
5586 
5587 	WREG32(RLC_LB_CNTR_INIT, 0);
5588 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5589 
5590 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5591 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5592 	WREG32(RLC_LB_PARAMS, 0x00600408);
5593 	WREG32(RLC_LB_CNTL, 0x80000004);
5594 
5595 	WREG32(RLC_MC_CNTL, 0);
5596 	WREG32(RLC_UCODE_CNTL, 0);
5597 
5598 	fw_data = (const __be32 *)rdev->rlc_fw->data;
5599 		WREG32(RLC_GPM_UCODE_ADDR, 0);
5600 	for (i = 0; i < size; i++)
5601 		WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5602 	WREG32(RLC_GPM_UCODE_ADDR, 0);
5603 
5604 	/* XXX - find out what chips support lbpw */
5605 	cik_enable_lbpw(rdev, false);
5606 
5607 	if (rdev->family == CHIP_BONAIRE)
5608 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
5609 
5610 	cik_rlc_start(rdev);
5611 
5612 	return 0;
5613 }
5614 
5615 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5616 {
5617 	u32 data, orig, tmp, tmp2;
5618 
5619 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5620 
5621 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5622 		cik_enable_gui_idle_interrupt(rdev, true);
5623 
5624 		tmp = cik_halt_rlc(rdev);
5625 
5626 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5627 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5628 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5629 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5630 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
5631 
5632 		cik_update_rlc(rdev, tmp);
5633 
5634 		data |= CGCG_EN | CGLS_EN;
5635 	} else {
5636 		cik_enable_gui_idle_interrupt(rdev, false);
5637 
5638 		RREG32(CB_CGTT_SCLK_CTRL);
5639 		RREG32(CB_CGTT_SCLK_CTRL);
5640 		RREG32(CB_CGTT_SCLK_CTRL);
5641 		RREG32(CB_CGTT_SCLK_CTRL);
5642 
5643 		data &= ~(CGCG_EN | CGLS_EN);
5644 	}
5645 
5646 	if (orig != data)
5647 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5648 
5649 }
5650 
5651 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5652 {
5653 	u32 data, orig, tmp = 0;
5654 
5655 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5656 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5657 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5658 				orig = data = RREG32(CP_MEM_SLP_CNTL);
5659 				data |= CP_MEM_LS_EN;
5660 				if (orig != data)
5661 					WREG32(CP_MEM_SLP_CNTL, data);
5662 			}
5663 		}
5664 
5665 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5666 		data &= 0xfffffffd;
5667 		if (orig != data)
5668 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5669 
5670 		tmp = cik_halt_rlc(rdev);
5671 
5672 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5673 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5674 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5675 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5676 		WREG32(RLC_SERDES_WR_CTRL, data);
5677 
5678 		cik_update_rlc(rdev, tmp);
5679 
5680 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
5681 			orig = data = RREG32(CGTS_SM_CTRL_REG);
5682 			data &= ~SM_MODE_MASK;
5683 			data |= SM_MODE(0x2);
5684 			data |= SM_MODE_ENABLE;
5685 			data &= ~CGTS_OVERRIDE;
5686 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
5687 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
5688 				data &= ~CGTS_LS_OVERRIDE;
5689 			data &= ~ON_MONITOR_ADD_MASK;
5690 			data |= ON_MONITOR_ADD_EN;
5691 			data |= ON_MONITOR_ADD(0x96);
5692 			if (orig != data)
5693 				WREG32(CGTS_SM_CTRL_REG, data);
5694 		}
5695 	} else {
5696 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5697 		data |= 0x00000002;
5698 		if (orig != data)
5699 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5700 
5701 		data = RREG32(RLC_MEM_SLP_CNTL);
5702 		if (data & RLC_MEM_LS_EN) {
5703 			data &= ~RLC_MEM_LS_EN;
5704 			WREG32(RLC_MEM_SLP_CNTL, data);
5705 		}
5706 
5707 		data = RREG32(CP_MEM_SLP_CNTL);
5708 		if (data & CP_MEM_LS_EN) {
5709 			data &= ~CP_MEM_LS_EN;
5710 			WREG32(CP_MEM_SLP_CNTL, data);
5711 		}
5712 
5713 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5714 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5715 		if (orig != data)
5716 			WREG32(CGTS_SM_CTRL_REG, data);
5717 
5718 		tmp = cik_halt_rlc(rdev);
5719 
5720 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5721 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5722 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5723 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5724 		WREG32(RLC_SERDES_WR_CTRL, data);
5725 
5726 		cik_update_rlc(rdev, tmp);
5727 	}
5728 }
5729 
5730 static const u32 mc_cg_registers[] =
5731 {
5732 	MC_HUB_MISC_HUB_CG,
5733 	MC_HUB_MISC_SIP_CG,
5734 	MC_HUB_MISC_VM_CG,
5735 	MC_XPB_CLK_GAT,
5736 	ATC_MISC_CG,
5737 	MC_CITF_MISC_WR_CG,
5738 	MC_CITF_MISC_RD_CG,
5739 	MC_CITF_MISC_VM_CG,
5740 	VM_L2_CG,
5741 };
5742 
5743 static void cik_enable_mc_ls(struct radeon_device *rdev,
5744 			     bool enable)
5745 {
5746 	int i;
5747 	u32 orig, data;
5748 
5749 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5750 		orig = data = RREG32(mc_cg_registers[i]);
5751 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5752 			data |= MC_LS_ENABLE;
5753 		else
5754 			data &= ~MC_LS_ENABLE;
5755 		if (data != orig)
5756 			WREG32(mc_cg_registers[i], data);
5757 	}
5758 }
5759 
5760 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5761 			       bool enable)
5762 {
5763 	int i;
5764 	u32 orig, data;
5765 
5766 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5767 		orig = data = RREG32(mc_cg_registers[i]);
5768 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5769 			data |= MC_CG_ENABLE;
5770 		else
5771 			data &= ~MC_CG_ENABLE;
5772 		if (data != orig)
5773 			WREG32(mc_cg_registers[i], data);
5774 	}
5775 }
5776 
5777 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
5778 				 bool enable)
5779 {
5780 	u32 orig, data;
5781 
5782 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5783 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
5784 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
5785 	} else {
5786 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
5787 		data |= 0xff000000;
5788 		if (data != orig)
5789 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
5790 
5791 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
5792 		data |= 0xff000000;
5793 		if (data != orig)
5794 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
5795 	}
5796 }
5797 
5798 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
5799 				 bool enable)
5800 {
5801 	u32 orig, data;
5802 
5803 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
5804 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5805 		data |= 0x100;
5806 		if (orig != data)
5807 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5808 
5809 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5810 		data |= 0x100;
5811 		if (orig != data)
5812 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5813 	} else {
5814 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5815 		data &= ~0x100;
5816 		if (orig != data)
5817 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5818 
5819 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5820 		data &= ~0x100;
5821 		if (orig != data)
5822 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5823 	}
5824 }
5825 
5826 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
5827 				bool enable)
5828 {
5829 	u32 orig, data;
5830 
5831 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5832 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5833 		data = 0xfff;
5834 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5835 
5836 		orig = data = RREG32(UVD_CGC_CTRL);
5837 		data |= DCM;
5838 		if (orig != data)
5839 			WREG32(UVD_CGC_CTRL, data);
5840 	} else {
5841 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5842 		data &= ~0xfff;
5843 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5844 
5845 		orig = data = RREG32(UVD_CGC_CTRL);
5846 		data &= ~DCM;
5847 		if (orig != data)
5848 			WREG32(UVD_CGC_CTRL, data);
5849 	}
5850 }
5851 
5852 static void cik_enable_bif_mgls(struct radeon_device *rdev,
5853 			       bool enable)
5854 {
5855 	u32 orig, data;
5856 
5857 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
5858 
5859 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5860 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5861 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5862 	else
5863 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5864 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5865 
5866 	if (orig != data)
5867 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
5868 }
5869 
5870 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
5871 				bool enable)
5872 {
5873 	u32 orig, data;
5874 
5875 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
5876 
5877 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5878 		data &= ~CLOCK_GATING_DIS;
5879 	else
5880 		data |= CLOCK_GATING_DIS;
5881 
5882 	if (orig != data)
5883 		WREG32(HDP_HOST_PATH_CNTL, data);
5884 }
5885 
5886 static void cik_enable_hdp_ls(struct radeon_device *rdev,
5887 			      bool enable)
5888 {
5889 	u32 orig, data;
5890 
5891 	orig = data = RREG32(HDP_MEM_POWER_LS);
5892 
5893 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5894 		data |= HDP_LS_ENABLE;
5895 	else
5896 		data &= ~HDP_LS_ENABLE;
5897 
5898 	if (orig != data)
5899 		WREG32(HDP_MEM_POWER_LS, data);
5900 }
5901 
5902 void cik_update_cg(struct radeon_device *rdev,
5903 		   u32 block, bool enable)
5904 {
5905 
5906 	if (block & RADEON_CG_BLOCK_GFX) {
5907 		cik_enable_gui_idle_interrupt(rdev, false);
5908 		/* order matters! */
5909 		if (enable) {
5910 			cik_enable_mgcg(rdev, true);
5911 			cik_enable_cgcg(rdev, true);
5912 		} else {
5913 			cik_enable_cgcg(rdev, false);
5914 			cik_enable_mgcg(rdev, false);
5915 		}
5916 		cik_enable_gui_idle_interrupt(rdev, true);
5917 	}
5918 
5919 	if (block & RADEON_CG_BLOCK_MC) {
5920 		if (!(rdev->flags & RADEON_IS_IGP)) {
5921 			cik_enable_mc_mgcg(rdev, enable);
5922 			cik_enable_mc_ls(rdev, enable);
5923 		}
5924 	}
5925 
5926 	if (block & RADEON_CG_BLOCK_SDMA) {
5927 		cik_enable_sdma_mgcg(rdev, enable);
5928 		cik_enable_sdma_mgls(rdev, enable);
5929 	}
5930 
5931 	if (block & RADEON_CG_BLOCK_BIF) {
5932 		cik_enable_bif_mgls(rdev, enable);
5933 	}
5934 
5935 	if (block & RADEON_CG_BLOCK_UVD) {
5936 		if (rdev->has_uvd)
5937 			cik_enable_uvd_mgcg(rdev, enable);
5938 	}
5939 
5940 	if (block & RADEON_CG_BLOCK_HDP) {
5941 		cik_enable_hdp_mgcg(rdev, enable);
5942 		cik_enable_hdp_ls(rdev, enable);
5943 	}
5944 }
5945 
5946 static void cik_init_cg(struct radeon_device *rdev)
5947 {
5948 
5949 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
5950 
5951 	if (rdev->has_uvd)
5952 		si_init_uvd_internal_cg(rdev);
5953 
5954 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5955 			     RADEON_CG_BLOCK_SDMA |
5956 			     RADEON_CG_BLOCK_BIF |
5957 			     RADEON_CG_BLOCK_UVD |
5958 			     RADEON_CG_BLOCK_HDP), true);
5959 }
5960 
5961 static void cik_fini_cg(struct radeon_device *rdev)
5962 {
5963 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5964 			     RADEON_CG_BLOCK_SDMA |
5965 			     RADEON_CG_BLOCK_BIF |
5966 			     RADEON_CG_BLOCK_UVD |
5967 			     RADEON_CG_BLOCK_HDP), false);
5968 
5969 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
5970 }
5971 
5972 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
5973 					  bool enable)
5974 {
5975 	u32 data, orig;
5976 
5977 	orig = data = RREG32(RLC_PG_CNTL);
5978 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5979 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5980 	else
5981 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5982 	if (orig != data)
5983 		WREG32(RLC_PG_CNTL, data);
5984 }
5985 
5986 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
5987 					  bool enable)
5988 {
5989 	u32 data, orig;
5990 
5991 	orig = data = RREG32(RLC_PG_CNTL);
5992 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5993 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5994 	else
5995 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5996 	if (orig != data)
5997 		WREG32(RLC_PG_CNTL, data);
5998 }
5999 
6000 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6001 {
6002 	u32 data, orig;
6003 
6004 	orig = data = RREG32(RLC_PG_CNTL);
6005 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6006 		data &= ~DISABLE_CP_PG;
6007 	else
6008 		data |= DISABLE_CP_PG;
6009 	if (orig != data)
6010 		WREG32(RLC_PG_CNTL, data);
6011 }
6012 
6013 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6014 {
6015 	u32 data, orig;
6016 
6017 	orig = data = RREG32(RLC_PG_CNTL);
6018 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6019 		data &= ~DISABLE_GDS_PG;
6020 	else
6021 		data |= DISABLE_GDS_PG;
6022 	if (orig != data)
6023 		WREG32(RLC_PG_CNTL, data);
6024 }
6025 
6026 #define CP_ME_TABLE_SIZE    96
6027 #define CP_ME_TABLE_OFFSET  2048
6028 #define CP_MEC_TABLE_OFFSET 4096
6029 
6030 void cik_init_cp_pg_table(struct radeon_device *rdev)
6031 {
6032 	const __be32 *fw_data;
6033 	volatile u32 *dst_ptr;
6034 	int me, i, max_me = 4;
6035 	u32 bo_offset = 0;
6036 	u32 table_offset;
6037 
6038 	if (rdev->family == CHIP_KAVERI)
6039 		max_me = 5;
6040 
6041 	if (rdev->rlc.cp_table_ptr == NULL)
6042 		return;
6043 
6044 	/* write the cp table buffer */
6045 	dst_ptr = rdev->rlc.cp_table_ptr;
6046 	for (me = 0; me < max_me; me++) {
6047 		if (me == 0) {
6048 			fw_data = (const __be32 *)rdev->ce_fw->data;
6049 			table_offset = CP_ME_TABLE_OFFSET;
6050 		} else if (me == 1) {
6051 			fw_data = (const __be32 *)rdev->pfp_fw->data;
6052 			table_offset = CP_ME_TABLE_OFFSET;
6053 		} else if (me == 2) {
6054 			fw_data = (const __be32 *)rdev->me_fw->data;
6055 			table_offset = CP_ME_TABLE_OFFSET;
6056 		} else {
6057 			fw_data = (const __be32 *)rdev->mec_fw->data;
6058 			table_offset = CP_MEC_TABLE_OFFSET;
6059 		}
6060 
6061 		for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
6062 			dst_ptr[bo_offset + i] = cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6063 		}
6064 		bo_offset += CP_ME_TABLE_SIZE;
6065 	}
6066 }
6067 
6068 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6069 				bool enable)
6070 {
6071 	u32 data, orig;
6072 
6073 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6074 		orig = data = RREG32(RLC_PG_CNTL);
6075 		data |= GFX_PG_ENABLE;
6076 		if (orig != data)
6077 			WREG32(RLC_PG_CNTL, data);
6078 
6079 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6080 		data |= AUTO_PG_EN;
6081 		if (orig != data)
6082 			WREG32(RLC_AUTO_PG_CTRL, data);
6083 	} else {
6084 		orig = data = RREG32(RLC_PG_CNTL);
6085 		data &= ~GFX_PG_ENABLE;
6086 		if (orig != data)
6087 			WREG32(RLC_PG_CNTL, data);
6088 
6089 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6090 		data &= ~AUTO_PG_EN;
6091 		if (orig != data)
6092 			WREG32(RLC_AUTO_PG_CTRL, data);
6093 
6094 		data = RREG32(DB_RENDER_CONTROL);
6095 	}
6096 }
6097 
6098 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6099 {
6100 	u32 mask = 0, tmp, tmp1;
6101 	int i;
6102 
6103 	cik_select_se_sh(rdev, se, sh);
6104 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6105 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6106 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6107 
6108 	tmp &= 0xffff0000;
6109 
6110 	tmp |= tmp1;
6111 	tmp >>= 16;
6112 
6113 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6114 		mask <<= 1;
6115 		mask |= 1;
6116 	}
6117 
6118 	return (~tmp) & mask;
6119 }
6120 
6121 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6122 {
6123 	u32 i, j, k, active_cu_number = 0;
6124 	u32 mask, counter, cu_bitmap;
6125 	u32 tmp = 0;
6126 
6127 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6128 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6129 			mask = 1;
6130 			cu_bitmap = 0;
6131 			counter = 0;
6132 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6133 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6134 					if (counter < 2)
6135 						cu_bitmap |= mask;
6136 					counter ++;
6137 				}
6138 				mask <<= 1;
6139 			}
6140 
6141 			active_cu_number += counter;
6142 			tmp |= (cu_bitmap << (i * 16 + j * 8));
6143 		}
6144 	}
6145 
6146 	WREG32(RLC_PG_AO_CU_MASK, tmp);
6147 
6148 	tmp = RREG32(RLC_MAX_PG_CU);
6149 	tmp &= ~MAX_PU_CU_MASK;
6150 	tmp |= MAX_PU_CU(active_cu_number);
6151 	WREG32(RLC_MAX_PG_CU, tmp);
6152 }
6153 
6154 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6155 				       bool enable)
6156 {
6157 	u32 data, orig;
6158 
6159 	orig = data = RREG32(RLC_PG_CNTL);
6160 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6161 		data |= STATIC_PER_CU_PG_ENABLE;
6162 	else
6163 		data &= ~STATIC_PER_CU_PG_ENABLE;
6164 	if (orig != data)
6165 		WREG32(RLC_PG_CNTL, data);
6166 }
6167 
6168 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6169 					bool enable)
6170 {
6171 	u32 data, orig;
6172 
6173 	orig = data = RREG32(RLC_PG_CNTL);
6174 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6175 		data |= DYN_PER_CU_PG_ENABLE;
6176 	else
6177 		data &= ~DYN_PER_CU_PG_ENABLE;
6178 	if (orig != data)
6179 		WREG32(RLC_PG_CNTL, data);
6180 }
6181 
6182 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6183 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6184 
6185 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6186 {
6187 	u32 data, orig;
6188 	u32 i;
6189 
6190 	if (rdev->rlc.cs_data) {
6191 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6192 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6193 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6194 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6195 	} else {
6196 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6197 		for (i = 0; i < 3; i++)
6198 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6199 	}
6200 	if (rdev->rlc.reg_list) {
6201 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6202 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6203 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6204 	}
6205 
6206 	orig = data = RREG32(RLC_PG_CNTL);
6207 	data |= GFX_PG_SRC;
6208 	if (orig != data)
6209 		WREG32(RLC_PG_CNTL, data);
6210 
6211 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6212 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6213 
6214 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6215 	data &= ~IDLE_POLL_COUNT_MASK;
6216 	data |= IDLE_POLL_COUNT(0x60);
6217 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6218 
6219 	data = 0x10101010;
6220 	WREG32(RLC_PG_DELAY, data);
6221 
6222 	data = RREG32(RLC_PG_DELAY_2);
6223 	data &= ~0xff;
6224 	data |= 0x3;
6225 	WREG32(RLC_PG_DELAY_2, data);
6226 
6227 	data = RREG32(RLC_AUTO_PG_CTRL);
6228 	data &= ~GRBM_REG_SGIT_MASK;
6229 	data |= GRBM_REG_SGIT(0x700);
6230 	WREG32(RLC_AUTO_PG_CTRL, data);
6231 
6232 }
6233 
6234 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6235 {
6236 	cik_enable_gfx_cgpg(rdev, enable);
6237 	cik_enable_gfx_static_mgpg(rdev, enable);
6238 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
6239 }
6240 
6241 u32 cik_get_csb_size(struct radeon_device *rdev)
6242 {
6243 	u32 count = 0;
6244 	const struct cs_section_def *sect = NULL;
6245 	const struct cs_extent_def *ext = NULL;
6246 
6247 	if (rdev->rlc.cs_data == NULL)
6248 		return 0;
6249 
6250 	/* begin clear state */
6251 	count += 2;
6252 	/* context control state */
6253 	count += 3;
6254 
6255 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6256 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6257 			if (sect->id == SECT_CONTEXT)
6258 				count += 2 + ext->reg_count;
6259 			else
6260 				return 0;
6261 		}
6262 	}
6263 	/* pa_sc_raster_config/pa_sc_raster_config1 */
6264 	count += 4;
6265 	/* end clear state */
6266 	count += 2;
6267 	/* clear state */
6268 	count += 2;
6269 
6270 	return count;
6271 }
6272 
6273 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6274 {
6275 	u32 count = 0, i;
6276 	const struct cs_section_def *sect = NULL;
6277 	const struct cs_extent_def *ext = NULL;
6278 
6279 	if (rdev->rlc.cs_data == NULL)
6280 		return;
6281 	if (buffer == NULL)
6282 		return;
6283 
6284 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6285 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6286 
6287 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6288 	buffer[count++] = cpu_to_le32(0x80000000);
6289 	buffer[count++] = cpu_to_le32(0x80000000);
6290 
6291 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6292 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6293 			if (sect->id == SECT_CONTEXT) {
6294 				buffer[count++] =
6295 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6296 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6297 				for (i = 0; i < ext->reg_count; i++)
6298 					buffer[count++] = cpu_to_le32(ext->extent[i]);
6299 			} else {
6300 				return;
6301 			}
6302 		}
6303 	}
6304 
6305 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6306 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6307 	switch (rdev->family) {
6308 	case CHIP_BONAIRE:
6309 		buffer[count++] = cpu_to_le32(0x16000012);
6310 		buffer[count++] = cpu_to_le32(0x00000000);
6311 		break;
6312 	case CHIP_KAVERI:
6313 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6314 		buffer[count++] = cpu_to_le32(0x00000000);
6315 		break;
6316 	case CHIP_KABINI:
6317 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6318 		buffer[count++] = cpu_to_le32(0x00000000);
6319 		break;
6320 	case CHIP_HAWAII:
6321 		buffer[count++] = 0x3a00161a;
6322 		buffer[count++] = 0x0000002e;
6323 		break;
6324 	default:
6325 		buffer[count++] = cpu_to_le32(0x00000000);
6326 		buffer[count++] = cpu_to_le32(0x00000000);
6327 		break;
6328 	}
6329 
6330 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6331 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6332 
6333 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6334 	buffer[count++] = cpu_to_le32(0);
6335 }
6336 
6337 static void cik_init_pg(struct radeon_device *rdev)
6338 {
6339 	if (rdev->pg_flags) {
6340 		cik_enable_sck_slowdown_on_pu(rdev, true);
6341 		cik_enable_sck_slowdown_on_pd(rdev, true);
6342 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6343 			cik_init_gfx_cgpg(rdev);
6344 			cik_enable_cp_pg(rdev, true);
6345 			cik_enable_gds_pg(rdev, true);
6346 		}
6347 		cik_init_ao_cu_mask(rdev);
6348 		cik_update_gfx_pg(rdev, true);
6349 	}
6350 }
6351 
6352 static void cik_fini_pg(struct radeon_device *rdev)
6353 {
6354 	if (rdev->pg_flags) {
6355 		cik_update_gfx_pg(rdev, false);
6356 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6357 			cik_enable_cp_pg(rdev, false);
6358 			cik_enable_gds_pg(rdev, false);
6359 		}
6360 	}
6361 }
6362 
6363 /*
6364  * Interrupts
6365  * Starting with r6xx, interrupts are handled via a ring buffer.
6366  * Ring buffers are areas of GPU accessible memory that the GPU
6367  * writes interrupt vectors into and the host reads vectors out of.
6368  * There is a rptr (read pointer) that determines where the
6369  * host is currently reading, and a wptr (write pointer)
6370  * which determines where the GPU has written.  When the
6371  * pointers are equal, the ring is idle.  When the GPU
6372  * writes vectors to the ring buffer, it increments the
6373  * wptr.  When there is an interrupt, the host then starts
6374  * fetching commands and processing them until the pointers are
6375  * equal again at which point it updates the rptr.
6376  */
6377 
6378 /**
6379  * cik_enable_interrupts - Enable the interrupt ring buffer
6380  *
6381  * @rdev: radeon_device pointer
6382  *
6383  * Enable the interrupt ring buffer (CIK).
6384  */
6385 static void cik_enable_interrupts(struct radeon_device *rdev)
6386 {
6387 	u32 ih_cntl = RREG32(IH_CNTL);
6388 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6389 
6390 	ih_cntl |= ENABLE_INTR;
6391 	ih_rb_cntl |= IH_RB_ENABLE;
6392 	WREG32(IH_CNTL, ih_cntl);
6393 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6394 	rdev->ih.enabled = true;
6395 }
6396 
6397 /**
6398  * cik_disable_interrupts - Disable the interrupt ring buffer
6399  *
6400  * @rdev: radeon_device pointer
6401  *
6402  * Disable the interrupt ring buffer (CIK).
6403  */
6404 static void cik_disable_interrupts(struct radeon_device *rdev)
6405 {
6406 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6407 	u32 ih_cntl = RREG32(IH_CNTL);
6408 
6409 	ih_rb_cntl &= ~IH_RB_ENABLE;
6410 	ih_cntl &= ~ENABLE_INTR;
6411 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6412 	WREG32(IH_CNTL, ih_cntl);
6413 	/* set rptr, wptr to 0 */
6414 	WREG32(IH_RB_RPTR, 0);
6415 	WREG32(IH_RB_WPTR, 0);
6416 	rdev->ih.enabled = false;
6417 	rdev->ih.rptr = 0;
6418 }
6419 
6420 /**
6421  * cik_disable_interrupt_state - Disable all interrupt sources
6422  *
6423  * @rdev: radeon_device pointer
6424  *
6425  * Clear all interrupt enable bits used by the driver (CIK).
6426  */
6427 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6428 {
6429 	u32 tmp;
6430 
6431 	/* gfx ring */
6432 	tmp = RREG32(CP_INT_CNTL_RING0) &
6433 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6434 	WREG32(CP_INT_CNTL_RING0, tmp);
6435 	/* sdma */
6436 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6437 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6438 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6439 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6440 	/* compute queues */
6441 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6442 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6443 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6444 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6445 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6446 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6447 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6448 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6449 	/* grbm */
6450 	WREG32(GRBM_INT_CNTL, 0);
6451 	/* vline/vblank, etc. */
6452 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6453 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6454 	if (rdev->num_crtc >= 4) {
6455 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6456 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6457 	}
6458 	if (rdev->num_crtc >= 6) {
6459 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6460 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6461 	}
6462 
6463 	/* dac hotplug */
6464 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6465 
6466 	/* digital hotplug */
6467 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6468 	WREG32(DC_HPD1_INT_CONTROL, tmp);
6469 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6470 	WREG32(DC_HPD2_INT_CONTROL, tmp);
6471 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6472 	WREG32(DC_HPD3_INT_CONTROL, tmp);
6473 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6474 	WREG32(DC_HPD4_INT_CONTROL, tmp);
6475 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6476 	WREG32(DC_HPD5_INT_CONTROL, tmp);
6477 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6478 	WREG32(DC_HPD6_INT_CONTROL, tmp);
6479 
6480 }
6481 
6482 /**
6483  * cik_irq_init - init and enable the interrupt ring
6484  *
6485  * @rdev: radeon_device pointer
6486  *
6487  * Allocate a ring buffer for the interrupt controller,
6488  * enable the RLC, disable interrupts, enable the IH
6489  * ring buffer and enable it (CIK).
6490  * Called at device load and reume.
6491  * Returns 0 for success, errors for failure.
6492  */
6493 static int cik_irq_init(struct radeon_device *rdev)
6494 {
6495 	int ret = 0;
6496 	int rb_bufsz;
6497 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6498 
6499 	/* allocate ring */
6500 	ret = r600_ih_ring_alloc(rdev);
6501 	if (ret)
6502 		return ret;
6503 
6504 	/* disable irqs */
6505 	cik_disable_interrupts(rdev);
6506 
6507 	/* init rlc */
6508 	ret = cik_rlc_resume(rdev);
6509 	if (ret) {
6510 		r600_ih_ring_fini(rdev);
6511 		return ret;
6512 	}
6513 
6514 	/* setup interrupt control */
6515 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
6516 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6517 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
6518 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6519 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6520 	 */
6521 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6522 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6523 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6524 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6525 
6526 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6527 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6528 
6529 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6530 		      IH_WPTR_OVERFLOW_CLEAR |
6531 		      (rb_bufsz << 1));
6532 
6533 	if (rdev->wb.enabled)
6534 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6535 
6536 	/* set the writeback address whether it's enabled or not */
6537 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6538 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6539 
6540 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6541 
6542 	/* set rptr, wptr to 0 */
6543 	WREG32(IH_RB_RPTR, 0);
6544 	WREG32(IH_RB_WPTR, 0);
6545 
6546 	/* Default settings for IH_CNTL (disabled at first) */
6547 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6548 	/* RPTR_REARM only works if msi's are enabled */
6549 	if (rdev->msi_enabled)
6550 		ih_cntl |= RPTR_REARM;
6551 	WREG32(IH_CNTL, ih_cntl);
6552 
6553 	/* force the active interrupt state to all disabled */
6554 	cik_disable_interrupt_state(rdev);
6555 
6556 	pci_set_master(rdev->pdev);
6557 
6558 	/* enable irqs */
6559 	cik_enable_interrupts(rdev);
6560 
6561 	return ret;
6562 }
6563 
6564 /**
6565  * cik_irq_set - enable/disable interrupt sources
6566  *
6567  * @rdev: radeon_device pointer
6568  *
6569  * Enable interrupt sources on the GPU (vblanks, hpd,
6570  * etc.) (CIK).
6571  * Returns 0 for success, errors for failure.
6572  */
6573 int cik_irq_set(struct radeon_device *rdev)
6574 {
6575 	u32 cp_int_cntl;
6576 	u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6577 	u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6578 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6579 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6580 	u32 grbm_int_cntl = 0;
6581 	u32 dma_cntl, dma_cntl1;
6582 	u32 thermal_int;
6583 
6584 	if (!rdev->irq.installed) {
6585 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6586 		return -EINVAL;
6587 	}
6588 	/* don't enable anything if the ih is disabled */
6589 	if (!rdev->ih.enabled) {
6590 		cik_disable_interrupts(rdev);
6591 		/* force the active interrupt state to all disabled */
6592 		cik_disable_interrupt_state(rdev);
6593 		return 0;
6594 	}
6595 
6596 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6597 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6598 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6599 
6600 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6601 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6602 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6603 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6604 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6605 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6606 
6607 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6608 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6609 
6610 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6611 	cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6612 	cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6613 	cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6614 	cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6615 	cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6616 	cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6617 	cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6618 
6619 	if (rdev->flags & RADEON_IS_IGP)
6620 		thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6621 			~(THERM_INTH_MASK | THERM_INTL_MASK);
6622 	else
6623 		thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6624 			~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6625 
6626 	/* enable CP interrupts on all rings */
6627 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6628 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
6629 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6630 	}
6631 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6632 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6633 		DRM_DEBUG("si_irq_set: sw int cp1\n");
6634 		if (ring->me == 1) {
6635 			switch (ring->pipe) {
6636 			case 0:
6637 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6638 				break;
6639 			case 1:
6640 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6641 				break;
6642 			case 2:
6643 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6644 				break;
6645 			case 3:
6646 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6647 				break;
6648 			default:
6649 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6650 				break;
6651 			}
6652 		} else if (ring->me == 2) {
6653 			switch (ring->pipe) {
6654 			case 0:
6655 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6656 				break;
6657 			case 1:
6658 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6659 				break;
6660 			case 2:
6661 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6662 				break;
6663 			case 3:
6664 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6665 				break;
6666 			default:
6667 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6668 				break;
6669 			}
6670 		} else {
6671 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6672 		}
6673 	}
6674 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6675 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6676 		DRM_DEBUG("si_irq_set: sw int cp2\n");
6677 		if (ring->me == 1) {
6678 			switch (ring->pipe) {
6679 			case 0:
6680 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6681 				break;
6682 			case 1:
6683 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6684 				break;
6685 			case 2:
6686 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6687 				break;
6688 			case 3:
6689 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6690 				break;
6691 			default:
6692 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6693 				break;
6694 			}
6695 		} else if (ring->me == 2) {
6696 			switch (ring->pipe) {
6697 			case 0:
6698 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6699 				break;
6700 			case 1:
6701 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6702 				break;
6703 			case 2:
6704 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6705 				break;
6706 			case 3:
6707 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6708 				break;
6709 			default:
6710 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6711 				break;
6712 			}
6713 		} else {
6714 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6715 		}
6716 	}
6717 
6718 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6719 		DRM_DEBUG("cik_irq_set: sw int dma\n");
6720 		dma_cntl |= TRAP_ENABLE;
6721 	}
6722 
6723 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6724 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
6725 		dma_cntl1 |= TRAP_ENABLE;
6726 	}
6727 
6728 	if (rdev->irq.crtc_vblank_int[0] ||
6729 	    atomic_read(&rdev->irq.pflip[0])) {
6730 		DRM_DEBUG("cik_irq_set: vblank 0\n");
6731 		crtc1 |= VBLANK_INTERRUPT_MASK;
6732 	}
6733 	if (rdev->irq.crtc_vblank_int[1] ||
6734 	    atomic_read(&rdev->irq.pflip[1])) {
6735 		DRM_DEBUG("cik_irq_set: vblank 1\n");
6736 		crtc2 |= VBLANK_INTERRUPT_MASK;
6737 	}
6738 	if (rdev->irq.crtc_vblank_int[2] ||
6739 	    atomic_read(&rdev->irq.pflip[2])) {
6740 		DRM_DEBUG("cik_irq_set: vblank 2\n");
6741 		crtc3 |= VBLANK_INTERRUPT_MASK;
6742 	}
6743 	if (rdev->irq.crtc_vblank_int[3] ||
6744 	    atomic_read(&rdev->irq.pflip[3])) {
6745 		DRM_DEBUG("cik_irq_set: vblank 3\n");
6746 		crtc4 |= VBLANK_INTERRUPT_MASK;
6747 	}
6748 	if (rdev->irq.crtc_vblank_int[4] ||
6749 	    atomic_read(&rdev->irq.pflip[4])) {
6750 		DRM_DEBUG("cik_irq_set: vblank 4\n");
6751 		crtc5 |= VBLANK_INTERRUPT_MASK;
6752 	}
6753 	if (rdev->irq.crtc_vblank_int[5] ||
6754 	    atomic_read(&rdev->irq.pflip[5])) {
6755 		DRM_DEBUG("cik_irq_set: vblank 5\n");
6756 		crtc6 |= VBLANK_INTERRUPT_MASK;
6757 	}
6758 	if (rdev->irq.hpd[0]) {
6759 		DRM_DEBUG("cik_irq_set: hpd 1\n");
6760 		hpd1 |= DC_HPDx_INT_EN;
6761 	}
6762 	if (rdev->irq.hpd[1]) {
6763 		DRM_DEBUG("cik_irq_set: hpd 2\n");
6764 		hpd2 |= DC_HPDx_INT_EN;
6765 	}
6766 	if (rdev->irq.hpd[2]) {
6767 		DRM_DEBUG("cik_irq_set: hpd 3\n");
6768 		hpd3 |= DC_HPDx_INT_EN;
6769 	}
6770 	if (rdev->irq.hpd[3]) {
6771 		DRM_DEBUG("cik_irq_set: hpd 4\n");
6772 		hpd4 |= DC_HPDx_INT_EN;
6773 	}
6774 	if (rdev->irq.hpd[4]) {
6775 		DRM_DEBUG("cik_irq_set: hpd 5\n");
6776 		hpd5 |= DC_HPDx_INT_EN;
6777 	}
6778 	if (rdev->irq.hpd[5]) {
6779 		DRM_DEBUG("cik_irq_set: hpd 6\n");
6780 		hpd6 |= DC_HPDx_INT_EN;
6781 	}
6782 
6783 	if (rdev->irq.dpm_thermal) {
6784 		DRM_DEBUG("dpm thermal\n");
6785 		if (rdev->flags & RADEON_IS_IGP)
6786 			thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
6787 		else
6788 			thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6789 	}
6790 
6791 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6792 
6793 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
6794 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
6795 
6796 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
6797 	WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
6798 	WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
6799 	WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
6800 	WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
6801 	WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
6802 	WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
6803 	WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
6804 
6805 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6806 
6807 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6808 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6809 	if (rdev->num_crtc >= 4) {
6810 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6811 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6812 	}
6813 	if (rdev->num_crtc >= 6) {
6814 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6815 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6816 	}
6817 
6818 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
6819 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
6820 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
6821 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
6822 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
6823 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
6824 
6825 	if (rdev->flags & RADEON_IS_IGP)
6826 		WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
6827 	else
6828 		WREG32_SMC(CG_THERMAL_INT, thermal_int);
6829 
6830 	return 0;
6831 }
6832 
6833 /**
6834  * cik_irq_ack - ack interrupt sources
6835  *
6836  * @rdev: radeon_device pointer
6837  *
6838  * Ack interrupt sources on the GPU (vblanks, hpd,
6839  * etc.) (CIK).  Certain interrupts sources are sw
6840  * generated and do not require an explicit ack.
6841  */
6842 static inline void cik_irq_ack(struct radeon_device *rdev)
6843 {
6844 	u32 tmp;
6845 
6846 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6847 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6848 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6849 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6850 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6851 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6852 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
6853 
6854 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
6855 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6856 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
6857 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6858 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6859 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6860 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6861 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6862 
6863 	if (rdev->num_crtc >= 4) {
6864 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6865 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6866 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6867 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6868 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6869 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6870 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6871 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6872 	}
6873 
6874 	if (rdev->num_crtc >= 6) {
6875 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6876 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6877 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6878 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6879 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6880 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6881 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6882 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6883 	}
6884 
6885 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6886 		tmp = RREG32(DC_HPD1_INT_CONTROL);
6887 		tmp |= DC_HPDx_INT_ACK;
6888 		WREG32(DC_HPD1_INT_CONTROL, tmp);
6889 	}
6890 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6891 		tmp = RREG32(DC_HPD2_INT_CONTROL);
6892 		tmp |= DC_HPDx_INT_ACK;
6893 		WREG32(DC_HPD2_INT_CONTROL, tmp);
6894 	}
6895 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6896 		tmp = RREG32(DC_HPD3_INT_CONTROL);
6897 		tmp |= DC_HPDx_INT_ACK;
6898 		WREG32(DC_HPD3_INT_CONTROL, tmp);
6899 	}
6900 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6901 		tmp = RREG32(DC_HPD4_INT_CONTROL);
6902 		tmp |= DC_HPDx_INT_ACK;
6903 		WREG32(DC_HPD4_INT_CONTROL, tmp);
6904 	}
6905 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6906 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6907 		tmp |= DC_HPDx_INT_ACK;
6908 		WREG32(DC_HPD5_INT_CONTROL, tmp);
6909 	}
6910 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6911 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6912 		tmp |= DC_HPDx_INT_ACK;
6913 		WREG32(DC_HPD6_INT_CONTROL, tmp);
6914 	}
6915 }
6916 
6917 /**
6918  * cik_irq_disable - disable interrupts
6919  *
6920  * @rdev: radeon_device pointer
6921  *
6922  * Disable interrupts on the hw (CIK).
6923  */
6924 static void cik_irq_disable(struct radeon_device *rdev)
6925 {
6926 	cik_disable_interrupts(rdev);
6927 	/* Wait and acknowledge irq */
6928 	mdelay(1);
6929 	cik_irq_ack(rdev);
6930 	cik_disable_interrupt_state(rdev);
6931 }
6932 
6933 /**
6934  * cik_irq_disable - disable interrupts for suspend
6935  *
6936  * @rdev: radeon_device pointer
6937  *
6938  * Disable interrupts and stop the RLC (CIK).
6939  * Used for suspend.
6940  */
6941 static void cik_irq_suspend(struct radeon_device *rdev)
6942 {
6943 	cik_irq_disable(rdev);
6944 	cik_rlc_stop(rdev);
6945 }
6946 
6947 /**
6948  * cik_irq_fini - tear down interrupt support
6949  *
6950  * @rdev: radeon_device pointer
6951  *
6952  * Disable interrupts on the hw and free the IH ring
6953  * buffer (CIK).
6954  * Used for driver unload.
6955  */
6956 static void cik_irq_fini(struct radeon_device *rdev)
6957 {
6958 	cik_irq_suspend(rdev);
6959 	r600_ih_ring_fini(rdev);
6960 }
6961 
6962 /**
6963  * cik_get_ih_wptr - get the IH ring buffer wptr
6964  *
6965  * @rdev: radeon_device pointer
6966  *
6967  * Get the IH ring buffer wptr from either the register
6968  * or the writeback memory buffer (CIK).  Also check for
6969  * ring buffer overflow and deal with it.
6970  * Used by cik_irq_process().
6971  * Returns the value of the wptr.
6972  */
6973 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
6974 {
6975 	u32 wptr, tmp;
6976 
6977 	if (rdev->wb.enabled)
6978 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6979 	else
6980 		wptr = RREG32(IH_RB_WPTR);
6981 
6982 	if (wptr & RB_OVERFLOW) {
6983 		/* When a ring buffer overflow happen start parsing interrupt
6984 		 * from the last not overwritten vector (wptr + 16). Hopefully
6985 		 * this should allow us to catchup.
6986 		 */
6987 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
6988 			wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
6989 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6990 		tmp = RREG32(IH_RB_CNTL);
6991 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
6992 		WREG32(IH_RB_CNTL, tmp);
6993 	}
6994 	return (wptr & rdev->ih.ptr_mask);
6995 }
6996 
6997 /*        CIK IV Ring
6998  * Each IV ring entry is 128 bits:
6999  * [7:0]    - interrupt source id
7000  * [31:8]   - reserved
7001  * [59:32]  - interrupt source data
7002  * [63:60]  - reserved
7003  * [71:64]  - RINGID
7004  *            CP:
7005  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7006  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7007  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7008  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7009  *            PIPE_ID - ME0 0=3D
7010  *                    - ME1&2 compute dispatcher (4 pipes each)
7011  *            SDMA:
7012  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7013  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7014  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7015  * [79:72]  - VMID
7016  * [95:80]  - PASID
7017  * [127:96] - reserved
7018  */
7019 /**
7020  * cik_irq_process - interrupt handler
7021  *
7022  * @rdev: radeon_device pointer
7023  *
7024  * Interrupt hander (CIK).  Walk the IH ring,
7025  * ack interrupts and schedule work to handle
7026  * interrupt events.
7027  * Returns irq process return code.
7028  */
7029 int cik_irq_process(struct radeon_device *rdev)
7030 {
7031 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7032 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7033 	u32 wptr;
7034 	u32 rptr;
7035 	u32 src_id, src_data, ring_id;
7036 	u8 me_id, pipe_id, queue_id;
7037 	u32 ring_index;
7038 	bool queue_hotplug = false;
7039 	bool queue_reset = false;
7040 	u32 addr, status, mc_client;
7041 	bool queue_thermal = false;
7042 
7043 	if (!rdev->ih.enabled || rdev->shutdown)
7044 		return IRQ_NONE;
7045 
7046 	wptr = cik_get_ih_wptr(rdev);
7047 
7048 restart_ih:
7049 	/* is somebody else already processing irqs? */
7050 	if (atomic_xchg(&rdev->ih.lock, 1))
7051 		return IRQ_NONE;
7052 
7053 	rptr = rdev->ih.rptr;
7054 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7055 
7056 	/* Order reading of wptr vs. reading of IH ring data */
7057 	rmb();
7058 
7059 	/* display interrupts */
7060 	cik_irq_ack(rdev);
7061 
7062 	while (rptr != wptr) {
7063 		/* wptr/rptr are in bytes! */
7064 		ring_index = rptr / 4;
7065 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7066 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7067 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7068 
7069 		switch (src_id) {
7070 		case 1: /* D1 vblank/vline */
7071 			switch (src_data) {
7072 			case 0: /* D1 vblank */
7073 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7074 					if (rdev->irq.crtc_vblank_int[0]) {
7075 						drm_handle_vblank(rdev->ddev, 0);
7076 						rdev->pm.vblank_sync = true;
7077 						wake_up(&rdev->irq.vblank_queue);
7078 					}
7079 					if (atomic_read(&rdev->irq.pflip[0]))
7080 						radeon_crtc_handle_flip(rdev, 0);
7081 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7082 					DRM_DEBUG("IH: D1 vblank\n");
7083 				}
7084 				break;
7085 			case 1: /* D1 vline */
7086 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7087 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7088 					DRM_DEBUG("IH: D1 vline\n");
7089 				}
7090 				break;
7091 			default:
7092 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7093 				break;
7094 			}
7095 			break;
7096 		case 2: /* D2 vblank/vline */
7097 			switch (src_data) {
7098 			case 0: /* D2 vblank */
7099 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7100 					if (rdev->irq.crtc_vblank_int[1]) {
7101 						drm_handle_vblank(rdev->ddev, 1);
7102 						rdev->pm.vblank_sync = true;
7103 						wake_up(&rdev->irq.vblank_queue);
7104 					}
7105 					if (atomic_read(&rdev->irq.pflip[1]))
7106 						radeon_crtc_handle_flip(rdev, 1);
7107 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7108 					DRM_DEBUG("IH: D2 vblank\n");
7109 				}
7110 				break;
7111 			case 1: /* D2 vline */
7112 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7113 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7114 					DRM_DEBUG("IH: D2 vline\n");
7115 				}
7116 				break;
7117 			default:
7118 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7119 				break;
7120 			}
7121 			break;
7122 		case 3: /* D3 vblank/vline */
7123 			switch (src_data) {
7124 			case 0: /* D3 vblank */
7125 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7126 					if (rdev->irq.crtc_vblank_int[2]) {
7127 						drm_handle_vblank(rdev->ddev, 2);
7128 						rdev->pm.vblank_sync = true;
7129 						wake_up(&rdev->irq.vblank_queue);
7130 					}
7131 					if (atomic_read(&rdev->irq.pflip[2]))
7132 						radeon_crtc_handle_flip(rdev, 2);
7133 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7134 					DRM_DEBUG("IH: D3 vblank\n");
7135 				}
7136 				break;
7137 			case 1: /* D3 vline */
7138 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7139 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7140 					DRM_DEBUG("IH: D3 vline\n");
7141 				}
7142 				break;
7143 			default:
7144 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7145 				break;
7146 			}
7147 			break;
7148 		case 4: /* D4 vblank/vline */
7149 			switch (src_data) {
7150 			case 0: /* D4 vblank */
7151 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7152 					if (rdev->irq.crtc_vblank_int[3]) {
7153 						drm_handle_vblank(rdev->ddev, 3);
7154 						rdev->pm.vblank_sync = true;
7155 						wake_up(&rdev->irq.vblank_queue);
7156 					}
7157 					if (atomic_read(&rdev->irq.pflip[3]))
7158 						radeon_crtc_handle_flip(rdev, 3);
7159 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7160 					DRM_DEBUG("IH: D4 vblank\n");
7161 				}
7162 				break;
7163 			case 1: /* D4 vline */
7164 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7165 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7166 					DRM_DEBUG("IH: D4 vline\n");
7167 				}
7168 				break;
7169 			default:
7170 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7171 				break;
7172 			}
7173 			break;
7174 		case 5: /* D5 vblank/vline */
7175 			switch (src_data) {
7176 			case 0: /* D5 vblank */
7177 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7178 					if (rdev->irq.crtc_vblank_int[4]) {
7179 						drm_handle_vblank(rdev->ddev, 4);
7180 						rdev->pm.vblank_sync = true;
7181 						wake_up(&rdev->irq.vblank_queue);
7182 					}
7183 					if (atomic_read(&rdev->irq.pflip[4]))
7184 						radeon_crtc_handle_flip(rdev, 4);
7185 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7186 					DRM_DEBUG("IH: D5 vblank\n");
7187 				}
7188 				break;
7189 			case 1: /* D5 vline */
7190 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7191 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7192 					DRM_DEBUG("IH: D5 vline\n");
7193 				}
7194 				break;
7195 			default:
7196 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7197 				break;
7198 			}
7199 			break;
7200 		case 6: /* D6 vblank/vline */
7201 			switch (src_data) {
7202 			case 0: /* D6 vblank */
7203 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7204 					if (rdev->irq.crtc_vblank_int[5]) {
7205 						drm_handle_vblank(rdev->ddev, 5);
7206 						rdev->pm.vblank_sync = true;
7207 						wake_up(&rdev->irq.vblank_queue);
7208 					}
7209 					if (atomic_read(&rdev->irq.pflip[5]))
7210 						radeon_crtc_handle_flip(rdev, 5);
7211 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7212 					DRM_DEBUG("IH: D6 vblank\n");
7213 				}
7214 				break;
7215 			case 1: /* D6 vline */
7216 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7217 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7218 					DRM_DEBUG("IH: D6 vline\n");
7219 				}
7220 				break;
7221 			default:
7222 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7223 				break;
7224 			}
7225 			break;
7226 		case 42: /* HPD hotplug */
7227 			switch (src_data) {
7228 			case 0:
7229 				if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7230 					rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7231 					queue_hotplug = true;
7232 					DRM_DEBUG("IH: HPD1\n");
7233 				}
7234 				break;
7235 			case 1:
7236 				if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7237 					rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7238 					queue_hotplug = true;
7239 					DRM_DEBUG("IH: HPD2\n");
7240 				}
7241 				break;
7242 			case 2:
7243 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7244 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7245 					queue_hotplug = true;
7246 					DRM_DEBUG("IH: HPD3\n");
7247 				}
7248 				break;
7249 			case 3:
7250 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7251 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7252 					queue_hotplug = true;
7253 					DRM_DEBUG("IH: HPD4\n");
7254 				}
7255 				break;
7256 			case 4:
7257 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7258 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7259 					queue_hotplug = true;
7260 					DRM_DEBUG("IH: HPD5\n");
7261 				}
7262 				break;
7263 			case 5:
7264 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7265 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7266 					queue_hotplug = true;
7267 					DRM_DEBUG("IH: HPD6\n");
7268 				}
7269 				break;
7270 			default:
7271 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7272 				break;
7273 			}
7274 			break;
7275 		case 124: /* UVD */
7276 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7277 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7278 			break;
7279 		case 146:
7280 		case 147:
7281 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7282 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7283 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7284 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7285 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7286 				addr);
7287 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7288 				status);
7289 			cik_vm_decode_fault(rdev, status, addr, mc_client);
7290 			/* reset addr and status */
7291 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7292 			break;
7293 		case 176: /* GFX RB CP_INT */
7294 		case 177: /* GFX IB CP_INT */
7295 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7296 			break;
7297 		case 181: /* CP EOP event */
7298 			DRM_DEBUG("IH: CP EOP\n");
7299 			/* XXX check the bitfield order! */
7300 			me_id = (ring_id & 0x60) >> 5;
7301 			pipe_id = (ring_id & 0x18) >> 3;
7302 			queue_id = (ring_id & 0x7) >> 0;
7303 			switch (me_id) {
7304 			case 0:
7305 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7306 				break;
7307 			case 1:
7308 			case 2:
7309 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7310 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7311 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7312 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7313 				break;
7314 			}
7315 			break;
7316 		case 184: /* CP Privileged reg access */
7317 			DRM_ERROR("Illegal register access in command stream\n");
7318 			/* XXX check the bitfield order! */
7319 			me_id = (ring_id & 0x60) >> 5;
7320 			pipe_id = (ring_id & 0x18) >> 3;
7321 			queue_id = (ring_id & 0x7) >> 0;
7322 			switch (me_id) {
7323 			case 0:
7324 				/* This results in a full GPU reset, but all we need to do is soft
7325 				 * reset the CP for gfx
7326 				 */
7327 				queue_reset = true;
7328 				break;
7329 			case 1:
7330 				/* XXX compute */
7331 				queue_reset = true;
7332 				break;
7333 			case 2:
7334 				/* XXX compute */
7335 				queue_reset = true;
7336 				break;
7337 			}
7338 			break;
7339 		case 185: /* CP Privileged inst */
7340 			DRM_ERROR("Illegal instruction in command stream\n");
7341 			/* XXX check the bitfield order! */
7342 			me_id = (ring_id & 0x60) >> 5;
7343 			pipe_id = (ring_id & 0x18) >> 3;
7344 			queue_id = (ring_id & 0x7) >> 0;
7345 			switch (me_id) {
7346 			case 0:
7347 				/* This results in a full GPU reset, but all we need to do is soft
7348 				 * reset the CP for gfx
7349 				 */
7350 				queue_reset = true;
7351 				break;
7352 			case 1:
7353 				/* XXX compute */
7354 				queue_reset = true;
7355 				break;
7356 			case 2:
7357 				/* XXX compute */
7358 				queue_reset = true;
7359 				break;
7360 			}
7361 			break;
7362 		case 224: /* SDMA trap event */
7363 			/* XXX check the bitfield order! */
7364 			me_id = (ring_id & 0x3) >> 0;
7365 			queue_id = (ring_id & 0xc) >> 2;
7366 			DRM_DEBUG("IH: SDMA trap\n");
7367 			switch (me_id) {
7368 			case 0:
7369 				switch (queue_id) {
7370 				case 0:
7371 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7372 					break;
7373 				case 1:
7374 					/* XXX compute */
7375 					break;
7376 				case 2:
7377 					/* XXX compute */
7378 					break;
7379 				}
7380 				break;
7381 			case 1:
7382 				switch (queue_id) {
7383 				case 0:
7384 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7385 					break;
7386 				case 1:
7387 					/* XXX compute */
7388 					break;
7389 				case 2:
7390 					/* XXX compute */
7391 					break;
7392 				}
7393 				break;
7394 			}
7395 			break;
7396 		case 230: /* thermal low to high */
7397 			DRM_DEBUG("IH: thermal low to high\n");
7398 			rdev->pm.dpm.thermal.high_to_low = false;
7399 			queue_thermal = true;
7400 			break;
7401 		case 231: /* thermal high to low */
7402 			DRM_DEBUG("IH: thermal high to low\n");
7403 			rdev->pm.dpm.thermal.high_to_low = true;
7404 			queue_thermal = true;
7405 			break;
7406 		case 233: /* GUI IDLE */
7407 			DRM_DEBUG("IH: GUI idle\n");
7408 			break;
7409 		case 241: /* SDMA Privileged inst */
7410 		case 247: /* SDMA Privileged inst */
7411 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
7412 			/* XXX check the bitfield order! */
7413 			me_id = (ring_id & 0x3) >> 0;
7414 			queue_id = (ring_id & 0xc) >> 2;
7415 			switch (me_id) {
7416 			case 0:
7417 				switch (queue_id) {
7418 				case 0:
7419 					queue_reset = true;
7420 					break;
7421 				case 1:
7422 					/* XXX compute */
7423 					queue_reset = true;
7424 					break;
7425 				case 2:
7426 					/* XXX compute */
7427 					queue_reset = true;
7428 					break;
7429 				}
7430 				break;
7431 			case 1:
7432 				switch (queue_id) {
7433 				case 0:
7434 					queue_reset = true;
7435 					break;
7436 				case 1:
7437 					/* XXX compute */
7438 					queue_reset = true;
7439 					break;
7440 				case 2:
7441 					/* XXX compute */
7442 					queue_reset = true;
7443 					break;
7444 				}
7445 				break;
7446 			}
7447 			break;
7448 		default:
7449 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7450 			break;
7451 		}
7452 
7453 		/* wptr/rptr are in bytes! */
7454 		rptr += 16;
7455 		rptr &= rdev->ih.ptr_mask;
7456 	}
7457 	if (queue_hotplug)
7458 		schedule_work(&rdev->hotplug_work);
7459 	if (queue_reset)
7460 		schedule_work(&rdev->reset_work);
7461 	if (queue_thermal)
7462 		schedule_work(&rdev->pm.dpm.thermal.work);
7463 	rdev->ih.rptr = rptr;
7464 	WREG32(IH_RB_RPTR, rdev->ih.rptr);
7465 	atomic_set(&rdev->ih.lock, 0);
7466 
7467 	/* make sure wptr hasn't changed while processing */
7468 	wptr = cik_get_ih_wptr(rdev);
7469 	if (wptr != rptr)
7470 		goto restart_ih;
7471 
7472 	return IRQ_HANDLED;
7473 }
7474 
7475 /*
7476  * startup/shutdown callbacks
7477  */
7478 /**
7479  * cik_startup - program the asic to a functional state
7480  *
7481  * @rdev: radeon_device pointer
7482  *
7483  * Programs the asic to a functional state (CIK).
7484  * Called by cik_init() and cik_resume().
7485  * Returns 0 for success, error for failure.
7486  */
7487 static int cik_startup(struct radeon_device *rdev)
7488 {
7489 	struct radeon_ring *ring;
7490 	int r;
7491 
7492 	/* enable pcie gen2/3 link */
7493 	cik_pcie_gen3_enable(rdev);
7494 	/* enable aspm */
7495 	cik_program_aspm(rdev);
7496 
7497 	/* scratch needs to be initialized before MC */
7498 	r = r600_vram_scratch_init(rdev);
7499 	if (r)
7500 		return r;
7501 
7502 	cik_mc_program(rdev);
7503 
7504 	if (rdev->flags & RADEON_IS_IGP) {
7505 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7506 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
7507 			r = cik_init_microcode(rdev);
7508 			if (r) {
7509 				DRM_ERROR("Failed to load firmware!\n");
7510 				return r;
7511 			}
7512 		}
7513 	} else {
7514 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7515 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
7516 		    !rdev->mc_fw) {
7517 			r = cik_init_microcode(rdev);
7518 			if (r) {
7519 				DRM_ERROR("Failed to load firmware!\n");
7520 				return r;
7521 			}
7522 		}
7523 
7524 		r = ci_mc_load_microcode(rdev);
7525 		if (r) {
7526 			DRM_ERROR("Failed to load MC firmware!\n");
7527 			return r;
7528 		}
7529 	}
7530 
7531 	r = cik_pcie_gart_enable(rdev);
7532 	if (r)
7533 		return r;
7534 	cik_gpu_init(rdev);
7535 
7536 	/* allocate rlc buffers */
7537 	if (rdev->flags & RADEON_IS_IGP) {
7538 		if (rdev->family == CHIP_KAVERI) {
7539 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7540 			rdev->rlc.reg_list_size =
7541 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7542 		} else {
7543 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7544 			rdev->rlc.reg_list_size =
7545 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7546 		}
7547 	}
7548 	rdev->rlc.cs_data = ci_cs_data;
7549 	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7550 	r = sumo_rlc_init(rdev);
7551 	if (r) {
7552 		DRM_ERROR("Failed to init rlc BOs!\n");
7553 		return r;
7554 	}
7555 
7556 	/* allocate wb buffer */
7557 	r = radeon_wb_init(rdev);
7558 	if (r)
7559 		return r;
7560 
7561 	/* allocate mec buffers */
7562 	r = cik_mec_init(rdev);
7563 	if (r) {
7564 		DRM_ERROR("Failed to init MEC BOs!\n");
7565 		return r;
7566 	}
7567 
7568 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7569 	if (r) {
7570 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7571 		return r;
7572 	}
7573 
7574 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7575 	if (r) {
7576 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7577 		return r;
7578 	}
7579 
7580 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7581 	if (r) {
7582 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7583 		return r;
7584 	}
7585 
7586 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7587 	if (r) {
7588 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7589 		return r;
7590 	}
7591 
7592 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7593 	if (r) {
7594 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7595 		return r;
7596 	}
7597 
7598 	r = radeon_uvd_resume(rdev);
7599 	if (!r) {
7600 		r = uvd_v4_2_resume(rdev);
7601 		if (!r) {
7602 			r = radeon_fence_driver_start_ring(rdev,
7603 							   R600_RING_TYPE_UVD_INDEX);
7604 			if (r)
7605 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7606 		}
7607 	}
7608 	if (r)
7609 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7610 
7611 	/* Enable IRQ */
7612 	if (!rdev->irq.installed) {
7613 		r = radeon_irq_kms_init(rdev);
7614 		if (r)
7615 			return r;
7616 	}
7617 
7618 	r = cik_irq_init(rdev);
7619 	if (r) {
7620 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
7621 		radeon_irq_kms_fini(rdev);
7622 		return r;
7623 	}
7624 	cik_irq_set(rdev);
7625 
7626 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7627 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7628 			     CP_RB0_RPTR, CP_RB0_WPTR,
7629 			     PACKET3(PACKET3_NOP, 0x3FFF));
7630 	if (r)
7631 		return r;
7632 
7633 	/* set up the compute queues */
7634 	/* type-2 packets are deprecated on MEC, use type-3 instead */
7635 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7636 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7637 			     CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7638 			     PACKET3(PACKET3_NOP, 0x3FFF));
7639 	if (r)
7640 		return r;
7641 	ring->me = 1; /* first MEC */
7642 	ring->pipe = 0; /* first pipe */
7643 	ring->queue = 0; /* first queue */
7644 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
7645 
7646 	/* type-2 packets are deprecated on MEC, use type-3 instead */
7647 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7648 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7649 			     CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7650 			     PACKET3(PACKET3_NOP, 0x3FFF));
7651 	if (r)
7652 		return r;
7653 	/* dGPU only have 1 MEC */
7654 	ring->me = 1; /* first MEC */
7655 	ring->pipe = 0; /* first pipe */
7656 	ring->queue = 1; /* second queue */
7657 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
7658 
7659 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7660 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7661 			     SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
7662 			     SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
7663 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7664 	if (r)
7665 		return r;
7666 
7667 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7668 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7669 			     SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
7670 			     SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
7671 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7672 	if (r)
7673 		return r;
7674 
7675 	r = cik_cp_resume(rdev);
7676 	if (r)
7677 		return r;
7678 
7679 	r = cik_sdma_resume(rdev);
7680 	if (r)
7681 		return r;
7682 
7683 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7684 	if (ring->ring_size) {
7685 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7686 				     UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
7687 				     RADEON_CP_PACKET2);
7688 		if (!r)
7689 			r = uvd_v1_0_init(rdev);
7690 		if (r)
7691 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7692 	}
7693 
7694 	r = radeon_ib_pool_init(rdev);
7695 	if (r) {
7696 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7697 		return r;
7698 	}
7699 
7700 	r = radeon_vm_manager_init(rdev);
7701 	if (r) {
7702 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7703 		return r;
7704 	}
7705 
7706 	r = dce6_audio_init(rdev);
7707 	if (r)
7708 		return r;
7709 
7710 	return 0;
7711 }
7712 
7713 /**
7714  * cik_resume - resume the asic to a functional state
7715  *
7716  * @rdev: radeon_device pointer
7717  *
7718  * Programs the asic to a functional state (CIK).
7719  * Called at resume.
7720  * Returns 0 for success, error for failure.
7721  */
7722 int cik_resume(struct radeon_device *rdev)
7723 {
7724 	int r;
7725 
7726 	/* post card */
7727 	atom_asic_init(rdev->mode_info.atom_context);
7728 
7729 	/* init golden registers */
7730 	cik_init_golden_registers(rdev);
7731 
7732 	rdev->accel_working = true;
7733 	r = cik_startup(rdev);
7734 	if (r) {
7735 		DRM_ERROR("cik startup failed on resume\n");
7736 		rdev->accel_working = false;
7737 		return r;
7738 	}
7739 
7740 	return r;
7741 
7742 }
7743 
7744 /**
7745  * cik_suspend - suspend the asic
7746  *
7747  * @rdev: radeon_device pointer
7748  *
7749  * Bring the chip into a state suitable for suspend (CIK).
7750  * Called at suspend.
7751  * Returns 0 for success.
7752  */
7753 int cik_suspend(struct radeon_device *rdev)
7754 {
7755 	dce6_audio_fini(rdev);
7756 	radeon_vm_manager_fini(rdev);
7757 	cik_cp_enable(rdev, false);
7758 	cik_sdma_enable(rdev, false);
7759 	uvd_v1_0_fini(rdev);
7760 	radeon_uvd_suspend(rdev);
7761 	cik_fini_pg(rdev);
7762 	cik_fini_cg(rdev);
7763 	cik_irq_suspend(rdev);
7764 	radeon_wb_disable(rdev);
7765 	cik_pcie_gart_disable(rdev);
7766 	return 0;
7767 }
7768 
7769 /* Plan is to move initialization in that function and use
7770  * helper function so that radeon_device_init pretty much
7771  * do nothing more than calling asic specific function. This
7772  * should also allow to remove a bunch of callback function
7773  * like vram_info.
7774  */
7775 /**
7776  * cik_init - asic specific driver and hw init
7777  *
7778  * @rdev: radeon_device pointer
7779  *
7780  * Setup asic specific driver variables and program the hw
7781  * to a functional state (CIK).
7782  * Called at driver startup.
7783  * Returns 0 for success, errors for failure.
7784  */
7785 int cik_init(struct radeon_device *rdev)
7786 {
7787 	struct radeon_ring *ring;
7788 	int r;
7789 
7790 	/* Read BIOS */
7791 	if (!radeon_get_bios(rdev)) {
7792 		if (ASIC_IS_AVIVO(rdev))
7793 			return -EINVAL;
7794 	}
7795 	/* Must be an ATOMBIOS */
7796 	if (!rdev->is_atom_bios) {
7797 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7798 		return -EINVAL;
7799 	}
7800 	r = radeon_atombios_init(rdev);
7801 	if (r)
7802 		return r;
7803 
7804 	/* Post card if necessary */
7805 	if (!radeon_card_posted(rdev)) {
7806 		if (!rdev->bios) {
7807 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7808 			return -EINVAL;
7809 		}
7810 		DRM_INFO("GPU not posted. posting now...\n");
7811 		atom_asic_init(rdev->mode_info.atom_context);
7812 	}
7813 	/* init golden registers */
7814 	cik_init_golden_registers(rdev);
7815 	/* Initialize scratch registers */
7816 	cik_scratch_init(rdev);
7817 	/* Initialize surface registers */
7818 	radeon_surface_init(rdev);
7819 	/* Initialize clocks */
7820 	radeon_get_clock_info(rdev->ddev);
7821 
7822 	/* Fence driver */
7823 	r = radeon_fence_driver_init(rdev);
7824 	if (r)
7825 		return r;
7826 
7827 	/* initialize memory controller */
7828 	r = cik_mc_init(rdev);
7829 	if (r)
7830 		return r;
7831 	/* Memory manager */
7832 	r = radeon_bo_init(rdev);
7833 	if (r)
7834 		return r;
7835 
7836 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7837 	ring->ring_obj = NULL;
7838 	r600_ring_init(rdev, ring, 1024 * 1024);
7839 
7840 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7841 	ring->ring_obj = NULL;
7842 	r600_ring_init(rdev, ring, 1024 * 1024);
7843 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
7844 	if (r)
7845 		return r;
7846 
7847 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7848 	ring->ring_obj = NULL;
7849 	r600_ring_init(rdev, ring, 1024 * 1024);
7850 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
7851 	if (r)
7852 		return r;
7853 
7854 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7855 	ring->ring_obj = NULL;
7856 	r600_ring_init(rdev, ring, 256 * 1024);
7857 
7858 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7859 	ring->ring_obj = NULL;
7860 	r600_ring_init(rdev, ring, 256 * 1024);
7861 
7862 	r = radeon_uvd_init(rdev);
7863 	if (!r) {
7864 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7865 		ring->ring_obj = NULL;
7866 		r600_ring_init(rdev, ring, 4096);
7867 	}
7868 
7869 	rdev->ih.ring_obj = NULL;
7870 	r600_ih_ring_init(rdev, 64 * 1024);
7871 
7872 	r = r600_pcie_gart_init(rdev);
7873 	if (r)
7874 		return r;
7875 
7876 	rdev->accel_working = true;
7877 	r = cik_startup(rdev);
7878 	if (r) {
7879 		dev_err(rdev->dev, "disabling GPU acceleration\n");
7880 		cik_cp_fini(rdev);
7881 		cik_sdma_fini(rdev);
7882 		cik_irq_fini(rdev);
7883 		sumo_rlc_fini(rdev);
7884 		cik_mec_fini(rdev);
7885 		radeon_wb_fini(rdev);
7886 		radeon_ib_pool_fini(rdev);
7887 		radeon_vm_manager_fini(rdev);
7888 		radeon_irq_kms_fini(rdev);
7889 		cik_pcie_gart_fini(rdev);
7890 		rdev->accel_working = false;
7891 	}
7892 
7893 	/* Don't start up if the MC ucode is missing.
7894 	 * The default clocks and voltages before the MC ucode
7895 	 * is loaded are not suffient for advanced operations.
7896 	 */
7897 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
7898 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
7899 		return -EINVAL;
7900 	}
7901 
7902 	return 0;
7903 }
7904 
7905 /**
7906  * cik_fini - asic specific driver and hw fini
7907  *
7908  * @rdev: radeon_device pointer
7909  *
7910  * Tear down the asic specific driver variables and program the hw
7911  * to an idle state (CIK).
7912  * Called at driver unload.
7913  */
7914 void cik_fini(struct radeon_device *rdev)
7915 {
7916 	cik_cp_fini(rdev);
7917 	cik_sdma_fini(rdev);
7918 	cik_fini_pg(rdev);
7919 	cik_fini_cg(rdev);
7920 	cik_irq_fini(rdev);
7921 	sumo_rlc_fini(rdev);
7922 	cik_mec_fini(rdev);
7923 	radeon_wb_fini(rdev);
7924 	radeon_vm_manager_fini(rdev);
7925 	radeon_ib_pool_fini(rdev);
7926 	radeon_irq_kms_fini(rdev);
7927 	uvd_v1_0_fini(rdev);
7928 	radeon_uvd_fini(rdev);
7929 	cik_pcie_gart_fini(rdev);
7930 	r600_vram_scratch_fini(rdev);
7931 	radeon_gem_fini(rdev);
7932 	radeon_fence_driver_fini(rdev);
7933 	radeon_bo_fini(rdev);
7934 	radeon_atombios_fini(rdev);
7935 	kfree(rdev->bios);
7936 	rdev->bios = NULL;
7937 }
7938 
7939 void dce8_program_fmt(struct drm_encoder *encoder)
7940 {
7941 	struct drm_device *dev = encoder->dev;
7942 	struct radeon_device *rdev = dev->dev_private;
7943 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
7944 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
7945 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
7946 	int bpc = 0;
7947 	u32 tmp = 0;
7948 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
7949 
7950 	if (connector) {
7951 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
7952 		bpc = radeon_get_monitor_bpc(connector);
7953 		dither = radeon_connector->dither;
7954 	}
7955 
7956 	/* LVDS/eDP FMT is set up by atom */
7957 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
7958 		return;
7959 
7960 	/* not needed for analog */
7961 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
7962 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
7963 		return;
7964 
7965 	if (bpc == 0)
7966 		return;
7967 
7968 	switch (bpc) {
7969 	case 6:
7970 		if (dither == RADEON_FMT_DITHER_ENABLE)
7971 			/* XXX sort out optimal dither settings */
7972 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
7973 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
7974 		else
7975 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
7976 		break;
7977 	case 8:
7978 		if (dither == RADEON_FMT_DITHER_ENABLE)
7979 			/* XXX sort out optimal dither settings */
7980 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
7981 				FMT_RGB_RANDOM_ENABLE |
7982 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
7983 		else
7984 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
7985 		break;
7986 	case 10:
7987 		if (dither == RADEON_FMT_DITHER_ENABLE)
7988 			/* XXX sort out optimal dither settings */
7989 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
7990 				FMT_RGB_RANDOM_ENABLE |
7991 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
7992 		else
7993 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
7994 		break;
7995 	default:
7996 		/* not needed */
7997 		break;
7998 	}
7999 
8000 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8001 }
8002 
8003 /* display watermark setup */
8004 /**
8005  * dce8_line_buffer_adjust - Set up the line buffer
8006  *
8007  * @rdev: radeon_device pointer
8008  * @radeon_crtc: the selected display controller
8009  * @mode: the current display mode on the selected display
8010  * controller
8011  *
8012  * Setup up the line buffer allocation for
8013  * the selected display controller (CIK).
8014  * Returns the line buffer size in pixels.
8015  */
8016 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8017 				   struct radeon_crtc *radeon_crtc,
8018 				   struct drm_display_mode *mode)
8019 {
8020 	u32 tmp, buffer_alloc, i;
8021 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8022 	/*
8023 	 * Line Buffer Setup
8024 	 * There are 6 line buffers, one for each display controllers.
8025 	 * There are 3 partitions per LB. Select the number of partitions
8026 	 * to enable based on the display width.  For display widths larger
8027 	 * than 4096, you need use to use 2 display controllers and combine
8028 	 * them using the stereo blender.
8029 	 */
8030 	if (radeon_crtc->base.enabled && mode) {
8031 		if (mode->crtc_hdisplay < 1920) {
8032 			tmp = 1;
8033 			buffer_alloc = 2;
8034 		} else if (mode->crtc_hdisplay < 2560) {
8035 			tmp = 2;
8036 			buffer_alloc = 2;
8037 		} else if (mode->crtc_hdisplay < 4096) {
8038 			tmp = 0;
8039 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8040 		} else {
8041 			DRM_DEBUG_KMS("Mode too big for LB!\n");
8042 			tmp = 0;
8043 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8044 		}
8045 	} else {
8046 		tmp = 1;
8047 		buffer_alloc = 0;
8048 	}
8049 
8050 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8051 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8052 
8053 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8054 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8055 	for (i = 0; i < rdev->usec_timeout; i++) {
8056 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8057 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8058 			break;
8059 		udelay(1);
8060 	}
8061 
8062 	if (radeon_crtc->base.enabled && mode) {
8063 		switch (tmp) {
8064 		case 0:
8065 		default:
8066 			return 4096 * 2;
8067 		case 1:
8068 			return 1920 * 2;
8069 		case 2:
8070 			return 2560 * 2;
8071 		}
8072 	}
8073 
8074 	/* controller not enabled, so no lb used */
8075 	return 0;
8076 }
8077 
8078 /**
8079  * cik_get_number_of_dram_channels - get the number of dram channels
8080  *
8081  * @rdev: radeon_device pointer
8082  *
8083  * Look up the number of video ram channels (CIK).
8084  * Used for display watermark bandwidth calculations
8085  * Returns the number of dram channels
8086  */
8087 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8088 {
8089 	u32 tmp = RREG32(MC_SHARED_CHMAP);
8090 
8091 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8092 	case 0:
8093 	default:
8094 		return 1;
8095 	case 1:
8096 		return 2;
8097 	case 2:
8098 		return 4;
8099 	case 3:
8100 		return 8;
8101 	case 4:
8102 		return 3;
8103 	case 5:
8104 		return 6;
8105 	case 6:
8106 		return 10;
8107 	case 7:
8108 		return 12;
8109 	case 8:
8110 		return 16;
8111 	}
8112 }
8113 
8114 struct dce8_wm_params {
8115 	u32 dram_channels; /* number of dram channels */
8116 	u32 yclk;          /* bandwidth per dram data pin in kHz */
8117 	u32 sclk;          /* engine clock in kHz */
8118 	u32 disp_clk;      /* display clock in kHz */
8119 	u32 src_width;     /* viewport width */
8120 	u32 active_time;   /* active display time in ns */
8121 	u32 blank_time;    /* blank time in ns */
8122 	bool interlaced;    /* mode is interlaced */
8123 	fixed20_12 vsc;    /* vertical scale ratio */
8124 	u32 num_heads;     /* number of active crtcs */
8125 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8126 	u32 lb_size;       /* line buffer allocated to pipe */
8127 	u32 vtaps;         /* vertical scaler taps */
8128 };
8129 
8130 /**
8131  * dce8_dram_bandwidth - get the dram bandwidth
8132  *
8133  * @wm: watermark calculation data
8134  *
8135  * Calculate the raw dram bandwidth (CIK).
8136  * Used for display watermark bandwidth calculations
8137  * Returns the dram bandwidth in MBytes/s
8138  */
8139 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8140 {
8141 	/* Calculate raw DRAM Bandwidth */
8142 	fixed20_12 dram_efficiency; /* 0.7 */
8143 	fixed20_12 yclk, dram_channels, bandwidth;
8144 	fixed20_12 a;
8145 
8146 	a.full = dfixed_const(1000);
8147 	yclk.full = dfixed_const(wm->yclk);
8148 	yclk.full = dfixed_div(yclk, a);
8149 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8150 	a.full = dfixed_const(10);
8151 	dram_efficiency.full = dfixed_const(7);
8152 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
8153 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8154 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8155 
8156 	return dfixed_trunc(bandwidth);
8157 }
8158 
8159 /**
8160  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8161  *
8162  * @wm: watermark calculation data
8163  *
8164  * Calculate the dram bandwidth used for display (CIK).
8165  * Used for display watermark bandwidth calculations
8166  * Returns the dram bandwidth for display in MBytes/s
8167  */
8168 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8169 {
8170 	/* Calculate DRAM Bandwidth and the part allocated to display. */
8171 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8172 	fixed20_12 yclk, dram_channels, bandwidth;
8173 	fixed20_12 a;
8174 
8175 	a.full = dfixed_const(1000);
8176 	yclk.full = dfixed_const(wm->yclk);
8177 	yclk.full = dfixed_div(yclk, a);
8178 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8179 	a.full = dfixed_const(10);
8180 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8181 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8182 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8183 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8184 
8185 	return dfixed_trunc(bandwidth);
8186 }
8187 
8188 /**
8189  * dce8_data_return_bandwidth - get the data return bandwidth
8190  *
8191  * @wm: watermark calculation data
8192  *
8193  * Calculate the data return bandwidth used for display (CIK).
8194  * Used for display watermark bandwidth calculations
8195  * Returns the data return bandwidth in MBytes/s
8196  */
8197 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8198 {
8199 	/* Calculate the display Data return Bandwidth */
8200 	fixed20_12 return_efficiency; /* 0.8 */
8201 	fixed20_12 sclk, bandwidth;
8202 	fixed20_12 a;
8203 
8204 	a.full = dfixed_const(1000);
8205 	sclk.full = dfixed_const(wm->sclk);
8206 	sclk.full = dfixed_div(sclk, a);
8207 	a.full = dfixed_const(10);
8208 	return_efficiency.full = dfixed_const(8);
8209 	return_efficiency.full = dfixed_div(return_efficiency, a);
8210 	a.full = dfixed_const(32);
8211 	bandwidth.full = dfixed_mul(a, sclk);
8212 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8213 
8214 	return dfixed_trunc(bandwidth);
8215 }
8216 
8217 /**
8218  * dce8_dmif_request_bandwidth - get the dmif bandwidth
8219  *
8220  * @wm: watermark calculation data
8221  *
8222  * Calculate the dmif bandwidth used for display (CIK).
8223  * Used for display watermark bandwidth calculations
8224  * Returns the dmif bandwidth in MBytes/s
8225  */
8226 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
8227 {
8228 	/* Calculate the DMIF Request Bandwidth */
8229 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
8230 	fixed20_12 disp_clk, bandwidth;
8231 	fixed20_12 a, b;
8232 
8233 	a.full = dfixed_const(1000);
8234 	disp_clk.full = dfixed_const(wm->disp_clk);
8235 	disp_clk.full = dfixed_div(disp_clk, a);
8236 	a.full = dfixed_const(32);
8237 	b.full = dfixed_mul(a, disp_clk);
8238 
8239 	a.full = dfixed_const(10);
8240 	disp_clk_request_efficiency.full = dfixed_const(8);
8241 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
8242 
8243 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
8244 
8245 	return dfixed_trunc(bandwidth);
8246 }
8247 
8248 /**
8249  * dce8_available_bandwidth - get the min available bandwidth
8250  *
8251  * @wm: watermark calculation data
8252  *
8253  * Calculate the min available bandwidth used for display (CIK).
8254  * Used for display watermark bandwidth calculations
8255  * Returns the min available bandwidth in MBytes/s
8256  */
8257 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
8258 {
8259 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8260 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
8261 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
8262 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
8263 
8264 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
8265 }
8266 
8267 /**
8268  * dce8_average_bandwidth - get the average available bandwidth
8269  *
8270  * @wm: watermark calculation data
8271  *
8272  * Calculate the average available bandwidth used for display (CIK).
8273  * Used for display watermark bandwidth calculations
8274  * Returns the average available bandwidth in MBytes/s
8275  */
8276 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
8277 {
8278 	/* Calculate the display mode Average Bandwidth
8279 	 * DisplayMode should contain the source and destination dimensions,
8280 	 * timing, etc.
8281 	 */
8282 	fixed20_12 bpp;
8283 	fixed20_12 line_time;
8284 	fixed20_12 src_width;
8285 	fixed20_12 bandwidth;
8286 	fixed20_12 a;
8287 
8288 	a.full = dfixed_const(1000);
8289 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
8290 	line_time.full = dfixed_div(line_time, a);
8291 	bpp.full = dfixed_const(wm->bytes_per_pixel);
8292 	src_width.full = dfixed_const(wm->src_width);
8293 	bandwidth.full = dfixed_mul(src_width, bpp);
8294 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
8295 	bandwidth.full = dfixed_div(bandwidth, line_time);
8296 
8297 	return dfixed_trunc(bandwidth);
8298 }
8299 
8300 /**
8301  * dce8_latency_watermark - get the latency watermark
8302  *
8303  * @wm: watermark calculation data
8304  *
8305  * Calculate the latency watermark (CIK).
8306  * Used for display watermark bandwidth calculations
8307  * Returns the latency watermark in ns
8308  */
8309 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
8310 {
8311 	/* First calculate the latency in ns */
8312 	u32 mc_latency = 2000; /* 2000 ns. */
8313 	u32 available_bandwidth = dce8_available_bandwidth(wm);
8314 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
8315 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
8316 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
8317 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
8318 		(wm->num_heads * cursor_line_pair_return_time);
8319 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
8320 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
8321 	u32 tmp, dmif_size = 12288;
8322 	fixed20_12 a, b, c;
8323 
8324 	if (wm->num_heads == 0)
8325 		return 0;
8326 
8327 	a.full = dfixed_const(2);
8328 	b.full = dfixed_const(1);
8329 	if ((wm->vsc.full > a.full) ||
8330 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
8331 	    (wm->vtaps >= 5) ||
8332 	    ((wm->vsc.full >= a.full) && wm->interlaced))
8333 		max_src_lines_per_dst_line = 4;
8334 	else
8335 		max_src_lines_per_dst_line = 2;
8336 
8337 	a.full = dfixed_const(available_bandwidth);
8338 	b.full = dfixed_const(wm->num_heads);
8339 	a.full = dfixed_div(a, b);
8340 
8341 	b.full = dfixed_const(mc_latency + 512);
8342 	c.full = dfixed_const(wm->disp_clk);
8343 	b.full = dfixed_div(b, c);
8344 
8345 	c.full = dfixed_const(dmif_size);
8346 	b.full = dfixed_div(c, b);
8347 
8348 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
8349 
8350 	b.full = dfixed_const(1000);
8351 	c.full = dfixed_const(wm->disp_clk);
8352 	b.full = dfixed_div(c, b);
8353 	c.full = dfixed_const(wm->bytes_per_pixel);
8354 	b.full = dfixed_mul(b, c);
8355 
8356 	lb_fill_bw = min(tmp, dfixed_trunc(b));
8357 
8358 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
8359 	b.full = dfixed_const(1000);
8360 	c.full = dfixed_const(lb_fill_bw);
8361 	b.full = dfixed_div(c, b);
8362 	a.full = dfixed_div(a, b);
8363 	line_fill_time = dfixed_trunc(a);
8364 
8365 	if (line_fill_time < wm->active_time)
8366 		return latency;
8367 	else
8368 		return latency + (line_fill_time - wm->active_time);
8369 
8370 }
8371 
8372 /**
8373  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
8374  * average and available dram bandwidth
8375  *
8376  * @wm: watermark calculation data
8377  *
8378  * Check if the display average bandwidth fits in the display
8379  * dram bandwidth (CIK).
8380  * Used for display watermark bandwidth calculations
8381  * Returns true if the display fits, false if not.
8382  */
8383 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8384 {
8385 	if (dce8_average_bandwidth(wm) <=
8386 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
8387 		return true;
8388 	else
8389 		return false;
8390 }
8391 
8392 /**
8393  * dce8_average_bandwidth_vs_available_bandwidth - check
8394  * average and available bandwidth
8395  *
8396  * @wm: watermark calculation data
8397  *
8398  * Check if the display average bandwidth fits in the display
8399  * available bandwidth (CIK).
8400  * Used for display watermark bandwidth calculations
8401  * Returns true if the display fits, false if not.
8402  */
8403 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
8404 {
8405 	if (dce8_average_bandwidth(wm) <=
8406 	    (dce8_available_bandwidth(wm) / wm->num_heads))
8407 		return true;
8408 	else
8409 		return false;
8410 }
8411 
8412 /**
8413  * dce8_check_latency_hiding - check latency hiding
8414  *
8415  * @wm: watermark calculation data
8416  *
8417  * Check latency hiding (CIK).
8418  * Used for display watermark bandwidth calculations
8419  * Returns true if the display fits, false if not.
8420  */
8421 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
8422 {
8423 	u32 lb_partitions = wm->lb_size / wm->src_width;
8424 	u32 line_time = wm->active_time + wm->blank_time;
8425 	u32 latency_tolerant_lines;
8426 	u32 latency_hiding;
8427 	fixed20_12 a;
8428 
8429 	a.full = dfixed_const(1);
8430 	if (wm->vsc.full > a.full)
8431 		latency_tolerant_lines = 1;
8432 	else {
8433 		if (lb_partitions <= (wm->vtaps + 1))
8434 			latency_tolerant_lines = 1;
8435 		else
8436 			latency_tolerant_lines = 2;
8437 	}
8438 
8439 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
8440 
8441 	if (dce8_latency_watermark(wm) <= latency_hiding)
8442 		return true;
8443 	else
8444 		return false;
8445 }
8446 
8447 /**
8448  * dce8_program_watermarks - program display watermarks
8449  *
8450  * @rdev: radeon_device pointer
8451  * @radeon_crtc: the selected display controller
8452  * @lb_size: line buffer size
8453  * @num_heads: number of display controllers in use
8454  *
8455  * Calculate and program the display watermarks for the
8456  * selected display controller (CIK).
8457  */
8458 static void dce8_program_watermarks(struct radeon_device *rdev,
8459 				    struct radeon_crtc *radeon_crtc,
8460 				    u32 lb_size, u32 num_heads)
8461 {
8462 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
8463 	struct dce8_wm_params wm_low, wm_high;
8464 	u32 pixel_period;
8465 	u32 line_time = 0;
8466 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
8467 	u32 tmp, wm_mask;
8468 
8469 	if (radeon_crtc->base.enabled && num_heads && mode) {
8470 		pixel_period = 1000000 / (u32)mode->clock;
8471 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
8472 
8473 		/* watermark for high clocks */
8474 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8475 		    rdev->pm.dpm_enabled) {
8476 			wm_high.yclk =
8477 				radeon_dpm_get_mclk(rdev, false) * 10;
8478 			wm_high.sclk =
8479 				radeon_dpm_get_sclk(rdev, false) * 10;
8480 		} else {
8481 			wm_high.yclk = rdev->pm.current_mclk * 10;
8482 			wm_high.sclk = rdev->pm.current_sclk * 10;
8483 		}
8484 
8485 		wm_high.disp_clk = mode->clock;
8486 		wm_high.src_width = mode->crtc_hdisplay;
8487 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
8488 		wm_high.blank_time = line_time - wm_high.active_time;
8489 		wm_high.interlaced = false;
8490 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8491 			wm_high.interlaced = true;
8492 		wm_high.vsc = radeon_crtc->vsc;
8493 		wm_high.vtaps = 1;
8494 		if (radeon_crtc->rmx_type != RMX_OFF)
8495 			wm_high.vtaps = 2;
8496 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
8497 		wm_high.lb_size = lb_size;
8498 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
8499 		wm_high.num_heads = num_heads;
8500 
8501 		/* set for high clocks */
8502 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
8503 
8504 		/* possibly force display priority to high */
8505 		/* should really do this at mode validation time... */
8506 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
8507 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
8508 		    !dce8_check_latency_hiding(&wm_high) ||
8509 		    (rdev->disp_priority == 2)) {
8510 			DRM_DEBUG_KMS("force priority to high\n");
8511 		}
8512 
8513 		/* watermark for low clocks */
8514 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8515 		    rdev->pm.dpm_enabled) {
8516 			wm_low.yclk =
8517 				radeon_dpm_get_mclk(rdev, true) * 10;
8518 			wm_low.sclk =
8519 				radeon_dpm_get_sclk(rdev, true) * 10;
8520 		} else {
8521 			wm_low.yclk = rdev->pm.current_mclk * 10;
8522 			wm_low.sclk = rdev->pm.current_sclk * 10;
8523 		}
8524 
8525 		wm_low.disp_clk = mode->clock;
8526 		wm_low.src_width = mode->crtc_hdisplay;
8527 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
8528 		wm_low.blank_time = line_time - wm_low.active_time;
8529 		wm_low.interlaced = false;
8530 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8531 			wm_low.interlaced = true;
8532 		wm_low.vsc = radeon_crtc->vsc;
8533 		wm_low.vtaps = 1;
8534 		if (radeon_crtc->rmx_type != RMX_OFF)
8535 			wm_low.vtaps = 2;
8536 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
8537 		wm_low.lb_size = lb_size;
8538 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
8539 		wm_low.num_heads = num_heads;
8540 
8541 		/* set for low clocks */
8542 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
8543 
8544 		/* possibly force display priority to high */
8545 		/* should really do this at mode validation time... */
8546 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
8547 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
8548 		    !dce8_check_latency_hiding(&wm_low) ||
8549 		    (rdev->disp_priority == 2)) {
8550 			DRM_DEBUG_KMS("force priority to high\n");
8551 		}
8552 	}
8553 
8554 	/* select wm A */
8555 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8556 	tmp = wm_mask;
8557 	tmp &= ~LATENCY_WATERMARK_MASK(3);
8558 	tmp |= LATENCY_WATERMARK_MASK(1);
8559 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8560 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8561 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
8562 		LATENCY_HIGH_WATERMARK(line_time)));
8563 	/* select wm B */
8564 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8565 	tmp &= ~LATENCY_WATERMARK_MASK(3);
8566 	tmp |= LATENCY_WATERMARK_MASK(2);
8567 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8568 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8569 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
8570 		LATENCY_HIGH_WATERMARK(line_time)));
8571 	/* restore original selection */
8572 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
8573 
8574 	/* save values for DPM */
8575 	radeon_crtc->line_time = line_time;
8576 	radeon_crtc->wm_high = latency_watermark_a;
8577 	radeon_crtc->wm_low = latency_watermark_b;
8578 }
8579 
8580 /**
8581  * dce8_bandwidth_update - program display watermarks
8582  *
8583  * @rdev: radeon_device pointer
8584  *
8585  * Calculate and program the display watermarks and line
8586  * buffer allocation (CIK).
8587  */
8588 void dce8_bandwidth_update(struct radeon_device *rdev)
8589 {
8590 	struct drm_display_mode *mode = NULL;
8591 	u32 num_heads = 0, lb_size;
8592 	int i;
8593 
8594 	radeon_update_display_priority(rdev);
8595 
8596 	for (i = 0; i < rdev->num_crtc; i++) {
8597 		if (rdev->mode_info.crtcs[i]->base.enabled)
8598 			num_heads++;
8599 	}
8600 	for (i = 0; i < rdev->num_crtc; i++) {
8601 		mode = &rdev->mode_info.crtcs[i]->base.mode;
8602 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
8603 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
8604 	}
8605 }
8606 
8607 /**
8608  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8609  *
8610  * @rdev: radeon_device pointer
8611  *
8612  * Fetches a GPU clock counter snapshot (SI).
8613  * Returns the 64 bit clock counter snapshot.
8614  */
8615 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
8616 {
8617 	uint64_t clock;
8618 
8619 	mutex_lock(&rdev->gpu_clock_mutex);
8620 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
8621 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
8622 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
8623 	mutex_unlock(&rdev->gpu_clock_mutex);
8624 	return clock;
8625 }
8626 
8627 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
8628                               u32 cntl_reg, u32 status_reg)
8629 {
8630 	int r, i;
8631 	struct atom_clock_dividers dividers;
8632 	uint32_t tmp;
8633 
8634 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8635 					   clock, false, &dividers);
8636 	if (r)
8637 		return r;
8638 
8639 	tmp = RREG32_SMC(cntl_reg);
8640 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
8641 	tmp |= dividers.post_divider;
8642 	WREG32_SMC(cntl_reg, tmp);
8643 
8644 	for (i = 0; i < 100; i++) {
8645 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
8646 			break;
8647 		mdelay(10);
8648 	}
8649 	if (i == 100)
8650 		return -ETIMEDOUT;
8651 
8652 	return 0;
8653 }
8654 
8655 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
8656 {
8657 	int r = 0;
8658 
8659 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
8660 	if (r)
8661 		return r;
8662 
8663 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
8664 	return r;
8665 }
8666 
8667 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
8668 {
8669 	struct pci_dev *root = rdev->pdev->bus->self;
8670 	int bridge_pos, gpu_pos;
8671 	u32 speed_cntl, mask, current_data_rate;
8672 	int ret, i;
8673 	u16 tmp16;
8674 
8675 	if (radeon_pcie_gen2 == 0)
8676 		return;
8677 
8678 	if (rdev->flags & RADEON_IS_IGP)
8679 		return;
8680 
8681 	if (!(rdev->flags & RADEON_IS_PCIE))
8682 		return;
8683 
8684 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
8685 	if (ret != 0)
8686 		return;
8687 
8688 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
8689 		return;
8690 
8691 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8692 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
8693 		LC_CURRENT_DATA_RATE_SHIFT;
8694 	if (mask & DRM_PCIE_SPEED_80) {
8695 		if (current_data_rate == 2) {
8696 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
8697 			return;
8698 		}
8699 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
8700 	} else if (mask & DRM_PCIE_SPEED_50) {
8701 		if (current_data_rate == 1) {
8702 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
8703 			return;
8704 		}
8705 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
8706 	}
8707 
8708 	bridge_pos = pci_pcie_cap(root);
8709 	if (!bridge_pos)
8710 		return;
8711 
8712 	gpu_pos = pci_pcie_cap(rdev->pdev);
8713 	if (!gpu_pos)
8714 		return;
8715 
8716 	if (mask & DRM_PCIE_SPEED_80) {
8717 		/* re-try equalization if gen3 is not already enabled */
8718 		if (current_data_rate != 2) {
8719 			u16 bridge_cfg, gpu_cfg;
8720 			u16 bridge_cfg2, gpu_cfg2;
8721 			u32 max_lw, current_lw, tmp;
8722 
8723 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8724 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8725 
8726 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
8727 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8728 
8729 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
8730 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8731 
8732 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8733 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
8734 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
8735 
8736 			if (current_lw < max_lw) {
8737 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8738 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
8739 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
8740 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
8741 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
8742 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
8743 				}
8744 			}
8745 
8746 			for (i = 0; i < 10; i++) {
8747 				/* check status */
8748 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
8749 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
8750 					break;
8751 
8752 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8753 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8754 
8755 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
8756 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
8757 
8758 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8759 				tmp |= LC_SET_QUIESCE;
8760 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8761 
8762 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8763 				tmp |= LC_REDO_EQ;
8764 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8765 
8766 				mdelay(100);
8767 
8768 				/* linkctl */
8769 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
8770 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8771 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
8772 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8773 
8774 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
8775 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8776 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
8777 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8778 
8779 				/* linkctl2 */
8780 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
8781 				tmp16 &= ~((1 << 4) | (7 << 9));
8782 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
8783 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
8784 
8785 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8786 				tmp16 &= ~((1 << 4) | (7 << 9));
8787 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
8788 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8789 
8790 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8791 				tmp &= ~LC_SET_QUIESCE;
8792 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8793 			}
8794 		}
8795 	}
8796 
8797 	/* set the link speed */
8798 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
8799 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
8800 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8801 
8802 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8803 	tmp16 &= ~0xf;
8804 	if (mask & DRM_PCIE_SPEED_80)
8805 		tmp16 |= 3; /* gen3 */
8806 	else if (mask & DRM_PCIE_SPEED_50)
8807 		tmp16 |= 2; /* gen2 */
8808 	else
8809 		tmp16 |= 1; /* gen1 */
8810 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8811 
8812 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8813 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
8814 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8815 
8816 	for (i = 0; i < rdev->usec_timeout; i++) {
8817 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8818 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
8819 			break;
8820 		udelay(1);
8821 	}
8822 }
8823 
8824 static void cik_program_aspm(struct radeon_device *rdev)
8825 {
8826 	u32 data, orig;
8827 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
8828 	bool disable_clkreq = false;
8829 
8830 	if (radeon_aspm == 0)
8831 		return;
8832 
8833 	/* XXX double check IGPs */
8834 	if (rdev->flags & RADEON_IS_IGP)
8835 		return;
8836 
8837 	if (!(rdev->flags & RADEON_IS_PCIE))
8838 		return;
8839 
8840 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8841 	data &= ~LC_XMIT_N_FTS_MASK;
8842 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
8843 	if (orig != data)
8844 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
8845 
8846 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
8847 	data |= LC_GO_TO_RECOVERY;
8848 	if (orig != data)
8849 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
8850 
8851 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
8852 	data |= P_IGNORE_EDB_ERR;
8853 	if (orig != data)
8854 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
8855 
8856 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8857 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
8858 	data |= LC_PMI_TO_L1_DIS;
8859 	if (!disable_l0s)
8860 		data |= LC_L0S_INACTIVITY(7);
8861 
8862 	if (!disable_l1) {
8863 		data |= LC_L1_INACTIVITY(7);
8864 		data &= ~LC_PMI_TO_L1_DIS;
8865 		if (orig != data)
8866 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8867 
8868 		if (!disable_plloff_in_l1) {
8869 			bool clk_req_support;
8870 
8871 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
8872 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8873 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8874 			if (orig != data)
8875 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
8876 
8877 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
8878 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8879 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8880 			if (orig != data)
8881 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
8882 
8883 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
8884 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8885 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8886 			if (orig != data)
8887 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
8888 
8889 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
8890 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8891 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8892 			if (orig != data)
8893 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
8894 
8895 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8896 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
8897 			data |= LC_DYN_LANES_PWR_STATE(3);
8898 			if (orig != data)
8899 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
8900 
8901 			if (!disable_clkreq) {
8902 				struct pci_dev *root = rdev->pdev->bus->self;
8903 				u32 lnkcap;
8904 
8905 				clk_req_support = false;
8906 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
8907 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
8908 					clk_req_support = true;
8909 			} else {
8910 				clk_req_support = false;
8911 			}
8912 
8913 			if (clk_req_support) {
8914 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
8915 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
8916 				if (orig != data)
8917 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
8918 
8919 				orig = data = RREG32_SMC(THM_CLK_CNTL);
8920 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
8921 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
8922 				if (orig != data)
8923 					WREG32_SMC(THM_CLK_CNTL, data);
8924 
8925 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
8926 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
8927 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
8928 				if (orig != data)
8929 					WREG32_SMC(MISC_CLK_CTRL, data);
8930 
8931 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
8932 				data &= ~BCLK_AS_XCLK;
8933 				if (orig != data)
8934 					WREG32_SMC(CG_CLKPIN_CNTL, data);
8935 
8936 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
8937 				data &= ~FORCE_BIF_REFCLK_EN;
8938 				if (orig != data)
8939 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
8940 
8941 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
8942 				data &= ~MPLL_CLKOUT_SEL_MASK;
8943 				data |= MPLL_CLKOUT_SEL(4);
8944 				if (orig != data)
8945 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
8946 			}
8947 		}
8948 	} else {
8949 		if (orig != data)
8950 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8951 	}
8952 
8953 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
8954 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
8955 	if (orig != data)
8956 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
8957 
8958 	if (!disable_l0s) {
8959 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8960 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
8961 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8962 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
8963 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8964 				data &= ~LC_L0S_INACTIVITY_MASK;
8965 				if (orig != data)
8966 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8967 			}
8968 		}
8969 	}
8970 }
8971