xref: /openbmc/linux/drivers/gpu/drm/radeon/cik.c (revision afb46f79)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35 
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
45 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
46 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
47 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
48 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
49 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
50 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
51 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
52 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
53 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
54 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
55 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
56 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
57 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
58 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
59 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
60 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
61 MODULE_FIRMWARE("radeon/KABINI_me.bin");
62 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
63 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
64 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
65 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
66 
67 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
68 extern void r600_ih_ring_fini(struct radeon_device *rdev);
69 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
70 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
71 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
72 extern void sumo_rlc_fini(struct radeon_device *rdev);
73 extern int sumo_rlc_init(struct radeon_device *rdev);
74 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
75 extern void si_rlc_reset(struct radeon_device *rdev);
76 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
77 extern int cik_sdma_resume(struct radeon_device *rdev);
78 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
79 extern void cik_sdma_fini(struct radeon_device *rdev);
80 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
81 static void cik_rlc_stop(struct radeon_device *rdev);
82 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
83 static void cik_program_aspm(struct radeon_device *rdev);
84 static void cik_init_pg(struct radeon_device *rdev);
85 static void cik_init_cg(struct radeon_device *rdev);
86 static void cik_fini_pg(struct radeon_device *rdev);
87 static void cik_fini_cg(struct radeon_device *rdev);
88 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
89 					  bool enable);
90 
91 /* get temperature in millidegrees */
92 int ci_get_temp(struct radeon_device *rdev)
93 {
94 	u32 temp;
95 	int actual_temp = 0;
96 
97 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
98 		CTF_TEMP_SHIFT;
99 
100 	if (temp & 0x200)
101 		actual_temp = 255;
102 	else
103 		actual_temp = temp & 0x1ff;
104 
105 	actual_temp = actual_temp * 1000;
106 
107 	return actual_temp;
108 }
109 
110 /* get temperature in millidegrees */
111 int kv_get_temp(struct radeon_device *rdev)
112 {
113 	u32 temp;
114 	int actual_temp = 0;
115 
116 	temp = RREG32_SMC(0xC0300E0C);
117 
118 	if (temp)
119 		actual_temp = (temp / 8) - 49;
120 	else
121 		actual_temp = 0;
122 
123 	actual_temp = actual_temp * 1000;
124 
125 	return actual_temp;
126 }
127 
128 /*
129  * Indirect registers accessor
130  */
131 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
132 {
133 	unsigned long flags;
134 	u32 r;
135 
136 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
137 	WREG32(PCIE_INDEX, reg);
138 	(void)RREG32(PCIE_INDEX);
139 	r = RREG32(PCIE_DATA);
140 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
141 	return r;
142 }
143 
144 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
145 {
146 	unsigned long flags;
147 
148 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
149 	WREG32(PCIE_INDEX, reg);
150 	(void)RREG32(PCIE_INDEX);
151 	WREG32(PCIE_DATA, v);
152 	(void)RREG32(PCIE_DATA);
153 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
154 }
155 
156 static const u32 spectre_rlc_save_restore_register_list[] =
157 {
158 	(0x0e00 << 16) | (0xc12c >> 2),
159 	0x00000000,
160 	(0x0e00 << 16) | (0xc140 >> 2),
161 	0x00000000,
162 	(0x0e00 << 16) | (0xc150 >> 2),
163 	0x00000000,
164 	(0x0e00 << 16) | (0xc15c >> 2),
165 	0x00000000,
166 	(0x0e00 << 16) | (0xc168 >> 2),
167 	0x00000000,
168 	(0x0e00 << 16) | (0xc170 >> 2),
169 	0x00000000,
170 	(0x0e00 << 16) | (0xc178 >> 2),
171 	0x00000000,
172 	(0x0e00 << 16) | (0xc204 >> 2),
173 	0x00000000,
174 	(0x0e00 << 16) | (0xc2b4 >> 2),
175 	0x00000000,
176 	(0x0e00 << 16) | (0xc2b8 >> 2),
177 	0x00000000,
178 	(0x0e00 << 16) | (0xc2bc >> 2),
179 	0x00000000,
180 	(0x0e00 << 16) | (0xc2c0 >> 2),
181 	0x00000000,
182 	(0x0e00 << 16) | (0x8228 >> 2),
183 	0x00000000,
184 	(0x0e00 << 16) | (0x829c >> 2),
185 	0x00000000,
186 	(0x0e00 << 16) | (0x869c >> 2),
187 	0x00000000,
188 	(0x0600 << 16) | (0x98f4 >> 2),
189 	0x00000000,
190 	(0x0e00 << 16) | (0x98f8 >> 2),
191 	0x00000000,
192 	(0x0e00 << 16) | (0x9900 >> 2),
193 	0x00000000,
194 	(0x0e00 << 16) | (0xc260 >> 2),
195 	0x00000000,
196 	(0x0e00 << 16) | (0x90e8 >> 2),
197 	0x00000000,
198 	(0x0e00 << 16) | (0x3c000 >> 2),
199 	0x00000000,
200 	(0x0e00 << 16) | (0x3c00c >> 2),
201 	0x00000000,
202 	(0x0e00 << 16) | (0x8c1c >> 2),
203 	0x00000000,
204 	(0x0e00 << 16) | (0x9700 >> 2),
205 	0x00000000,
206 	(0x0e00 << 16) | (0xcd20 >> 2),
207 	0x00000000,
208 	(0x4e00 << 16) | (0xcd20 >> 2),
209 	0x00000000,
210 	(0x5e00 << 16) | (0xcd20 >> 2),
211 	0x00000000,
212 	(0x6e00 << 16) | (0xcd20 >> 2),
213 	0x00000000,
214 	(0x7e00 << 16) | (0xcd20 >> 2),
215 	0x00000000,
216 	(0x8e00 << 16) | (0xcd20 >> 2),
217 	0x00000000,
218 	(0x9e00 << 16) | (0xcd20 >> 2),
219 	0x00000000,
220 	(0xae00 << 16) | (0xcd20 >> 2),
221 	0x00000000,
222 	(0xbe00 << 16) | (0xcd20 >> 2),
223 	0x00000000,
224 	(0x0e00 << 16) | (0x89bc >> 2),
225 	0x00000000,
226 	(0x0e00 << 16) | (0x8900 >> 2),
227 	0x00000000,
228 	0x3,
229 	(0x0e00 << 16) | (0xc130 >> 2),
230 	0x00000000,
231 	(0x0e00 << 16) | (0xc134 >> 2),
232 	0x00000000,
233 	(0x0e00 << 16) | (0xc1fc >> 2),
234 	0x00000000,
235 	(0x0e00 << 16) | (0xc208 >> 2),
236 	0x00000000,
237 	(0x0e00 << 16) | (0xc264 >> 2),
238 	0x00000000,
239 	(0x0e00 << 16) | (0xc268 >> 2),
240 	0x00000000,
241 	(0x0e00 << 16) | (0xc26c >> 2),
242 	0x00000000,
243 	(0x0e00 << 16) | (0xc270 >> 2),
244 	0x00000000,
245 	(0x0e00 << 16) | (0xc274 >> 2),
246 	0x00000000,
247 	(0x0e00 << 16) | (0xc278 >> 2),
248 	0x00000000,
249 	(0x0e00 << 16) | (0xc27c >> 2),
250 	0x00000000,
251 	(0x0e00 << 16) | (0xc280 >> 2),
252 	0x00000000,
253 	(0x0e00 << 16) | (0xc284 >> 2),
254 	0x00000000,
255 	(0x0e00 << 16) | (0xc288 >> 2),
256 	0x00000000,
257 	(0x0e00 << 16) | (0xc28c >> 2),
258 	0x00000000,
259 	(0x0e00 << 16) | (0xc290 >> 2),
260 	0x00000000,
261 	(0x0e00 << 16) | (0xc294 >> 2),
262 	0x00000000,
263 	(0x0e00 << 16) | (0xc298 >> 2),
264 	0x00000000,
265 	(0x0e00 << 16) | (0xc29c >> 2),
266 	0x00000000,
267 	(0x0e00 << 16) | (0xc2a0 >> 2),
268 	0x00000000,
269 	(0x0e00 << 16) | (0xc2a4 >> 2),
270 	0x00000000,
271 	(0x0e00 << 16) | (0xc2a8 >> 2),
272 	0x00000000,
273 	(0x0e00 << 16) | (0xc2ac  >> 2),
274 	0x00000000,
275 	(0x0e00 << 16) | (0xc2b0 >> 2),
276 	0x00000000,
277 	(0x0e00 << 16) | (0x301d0 >> 2),
278 	0x00000000,
279 	(0x0e00 << 16) | (0x30238 >> 2),
280 	0x00000000,
281 	(0x0e00 << 16) | (0x30250 >> 2),
282 	0x00000000,
283 	(0x0e00 << 16) | (0x30254 >> 2),
284 	0x00000000,
285 	(0x0e00 << 16) | (0x30258 >> 2),
286 	0x00000000,
287 	(0x0e00 << 16) | (0x3025c >> 2),
288 	0x00000000,
289 	(0x4e00 << 16) | (0xc900 >> 2),
290 	0x00000000,
291 	(0x5e00 << 16) | (0xc900 >> 2),
292 	0x00000000,
293 	(0x6e00 << 16) | (0xc900 >> 2),
294 	0x00000000,
295 	(0x7e00 << 16) | (0xc900 >> 2),
296 	0x00000000,
297 	(0x8e00 << 16) | (0xc900 >> 2),
298 	0x00000000,
299 	(0x9e00 << 16) | (0xc900 >> 2),
300 	0x00000000,
301 	(0xae00 << 16) | (0xc900 >> 2),
302 	0x00000000,
303 	(0xbe00 << 16) | (0xc900 >> 2),
304 	0x00000000,
305 	(0x4e00 << 16) | (0xc904 >> 2),
306 	0x00000000,
307 	(0x5e00 << 16) | (0xc904 >> 2),
308 	0x00000000,
309 	(0x6e00 << 16) | (0xc904 >> 2),
310 	0x00000000,
311 	(0x7e00 << 16) | (0xc904 >> 2),
312 	0x00000000,
313 	(0x8e00 << 16) | (0xc904 >> 2),
314 	0x00000000,
315 	(0x9e00 << 16) | (0xc904 >> 2),
316 	0x00000000,
317 	(0xae00 << 16) | (0xc904 >> 2),
318 	0x00000000,
319 	(0xbe00 << 16) | (0xc904 >> 2),
320 	0x00000000,
321 	(0x4e00 << 16) | (0xc908 >> 2),
322 	0x00000000,
323 	(0x5e00 << 16) | (0xc908 >> 2),
324 	0x00000000,
325 	(0x6e00 << 16) | (0xc908 >> 2),
326 	0x00000000,
327 	(0x7e00 << 16) | (0xc908 >> 2),
328 	0x00000000,
329 	(0x8e00 << 16) | (0xc908 >> 2),
330 	0x00000000,
331 	(0x9e00 << 16) | (0xc908 >> 2),
332 	0x00000000,
333 	(0xae00 << 16) | (0xc908 >> 2),
334 	0x00000000,
335 	(0xbe00 << 16) | (0xc908 >> 2),
336 	0x00000000,
337 	(0x4e00 << 16) | (0xc90c >> 2),
338 	0x00000000,
339 	(0x5e00 << 16) | (0xc90c >> 2),
340 	0x00000000,
341 	(0x6e00 << 16) | (0xc90c >> 2),
342 	0x00000000,
343 	(0x7e00 << 16) | (0xc90c >> 2),
344 	0x00000000,
345 	(0x8e00 << 16) | (0xc90c >> 2),
346 	0x00000000,
347 	(0x9e00 << 16) | (0xc90c >> 2),
348 	0x00000000,
349 	(0xae00 << 16) | (0xc90c >> 2),
350 	0x00000000,
351 	(0xbe00 << 16) | (0xc90c >> 2),
352 	0x00000000,
353 	(0x4e00 << 16) | (0xc910 >> 2),
354 	0x00000000,
355 	(0x5e00 << 16) | (0xc910 >> 2),
356 	0x00000000,
357 	(0x6e00 << 16) | (0xc910 >> 2),
358 	0x00000000,
359 	(0x7e00 << 16) | (0xc910 >> 2),
360 	0x00000000,
361 	(0x8e00 << 16) | (0xc910 >> 2),
362 	0x00000000,
363 	(0x9e00 << 16) | (0xc910 >> 2),
364 	0x00000000,
365 	(0xae00 << 16) | (0xc910 >> 2),
366 	0x00000000,
367 	(0xbe00 << 16) | (0xc910 >> 2),
368 	0x00000000,
369 	(0x0e00 << 16) | (0xc99c >> 2),
370 	0x00000000,
371 	(0x0e00 << 16) | (0x9834 >> 2),
372 	0x00000000,
373 	(0x0000 << 16) | (0x30f00 >> 2),
374 	0x00000000,
375 	(0x0001 << 16) | (0x30f00 >> 2),
376 	0x00000000,
377 	(0x0000 << 16) | (0x30f04 >> 2),
378 	0x00000000,
379 	(0x0001 << 16) | (0x30f04 >> 2),
380 	0x00000000,
381 	(0x0000 << 16) | (0x30f08 >> 2),
382 	0x00000000,
383 	(0x0001 << 16) | (0x30f08 >> 2),
384 	0x00000000,
385 	(0x0000 << 16) | (0x30f0c >> 2),
386 	0x00000000,
387 	(0x0001 << 16) | (0x30f0c >> 2),
388 	0x00000000,
389 	(0x0600 << 16) | (0x9b7c >> 2),
390 	0x00000000,
391 	(0x0e00 << 16) | (0x8a14 >> 2),
392 	0x00000000,
393 	(0x0e00 << 16) | (0x8a18 >> 2),
394 	0x00000000,
395 	(0x0600 << 16) | (0x30a00 >> 2),
396 	0x00000000,
397 	(0x0e00 << 16) | (0x8bf0 >> 2),
398 	0x00000000,
399 	(0x0e00 << 16) | (0x8bcc >> 2),
400 	0x00000000,
401 	(0x0e00 << 16) | (0x8b24 >> 2),
402 	0x00000000,
403 	(0x0e00 << 16) | (0x30a04 >> 2),
404 	0x00000000,
405 	(0x0600 << 16) | (0x30a10 >> 2),
406 	0x00000000,
407 	(0x0600 << 16) | (0x30a14 >> 2),
408 	0x00000000,
409 	(0x0600 << 16) | (0x30a18 >> 2),
410 	0x00000000,
411 	(0x0600 << 16) | (0x30a2c >> 2),
412 	0x00000000,
413 	(0x0e00 << 16) | (0xc700 >> 2),
414 	0x00000000,
415 	(0x0e00 << 16) | (0xc704 >> 2),
416 	0x00000000,
417 	(0x0e00 << 16) | (0xc708 >> 2),
418 	0x00000000,
419 	(0x0e00 << 16) | (0xc768 >> 2),
420 	0x00000000,
421 	(0x0400 << 16) | (0xc770 >> 2),
422 	0x00000000,
423 	(0x0400 << 16) | (0xc774 >> 2),
424 	0x00000000,
425 	(0x0400 << 16) | (0xc778 >> 2),
426 	0x00000000,
427 	(0x0400 << 16) | (0xc77c >> 2),
428 	0x00000000,
429 	(0x0400 << 16) | (0xc780 >> 2),
430 	0x00000000,
431 	(0x0400 << 16) | (0xc784 >> 2),
432 	0x00000000,
433 	(0x0400 << 16) | (0xc788 >> 2),
434 	0x00000000,
435 	(0x0400 << 16) | (0xc78c >> 2),
436 	0x00000000,
437 	(0x0400 << 16) | (0xc798 >> 2),
438 	0x00000000,
439 	(0x0400 << 16) | (0xc79c >> 2),
440 	0x00000000,
441 	(0x0400 << 16) | (0xc7a0 >> 2),
442 	0x00000000,
443 	(0x0400 << 16) | (0xc7a4 >> 2),
444 	0x00000000,
445 	(0x0400 << 16) | (0xc7a8 >> 2),
446 	0x00000000,
447 	(0x0400 << 16) | (0xc7ac >> 2),
448 	0x00000000,
449 	(0x0400 << 16) | (0xc7b0 >> 2),
450 	0x00000000,
451 	(0x0400 << 16) | (0xc7b4 >> 2),
452 	0x00000000,
453 	(0x0e00 << 16) | (0x9100 >> 2),
454 	0x00000000,
455 	(0x0e00 << 16) | (0x3c010 >> 2),
456 	0x00000000,
457 	(0x0e00 << 16) | (0x92a8 >> 2),
458 	0x00000000,
459 	(0x0e00 << 16) | (0x92ac >> 2),
460 	0x00000000,
461 	(0x0e00 << 16) | (0x92b4 >> 2),
462 	0x00000000,
463 	(0x0e00 << 16) | (0x92b8 >> 2),
464 	0x00000000,
465 	(0x0e00 << 16) | (0x92bc >> 2),
466 	0x00000000,
467 	(0x0e00 << 16) | (0x92c0 >> 2),
468 	0x00000000,
469 	(0x0e00 << 16) | (0x92c4 >> 2),
470 	0x00000000,
471 	(0x0e00 << 16) | (0x92c8 >> 2),
472 	0x00000000,
473 	(0x0e00 << 16) | (0x92cc >> 2),
474 	0x00000000,
475 	(0x0e00 << 16) | (0x92d0 >> 2),
476 	0x00000000,
477 	(0x0e00 << 16) | (0x8c00 >> 2),
478 	0x00000000,
479 	(0x0e00 << 16) | (0x8c04 >> 2),
480 	0x00000000,
481 	(0x0e00 << 16) | (0x8c20 >> 2),
482 	0x00000000,
483 	(0x0e00 << 16) | (0x8c38 >> 2),
484 	0x00000000,
485 	(0x0e00 << 16) | (0x8c3c >> 2),
486 	0x00000000,
487 	(0x0e00 << 16) | (0xae00 >> 2),
488 	0x00000000,
489 	(0x0e00 << 16) | (0x9604 >> 2),
490 	0x00000000,
491 	(0x0e00 << 16) | (0xac08 >> 2),
492 	0x00000000,
493 	(0x0e00 << 16) | (0xac0c >> 2),
494 	0x00000000,
495 	(0x0e00 << 16) | (0xac10 >> 2),
496 	0x00000000,
497 	(0x0e00 << 16) | (0xac14 >> 2),
498 	0x00000000,
499 	(0x0e00 << 16) | (0xac58 >> 2),
500 	0x00000000,
501 	(0x0e00 << 16) | (0xac68 >> 2),
502 	0x00000000,
503 	(0x0e00 << 16) | (0xac6c >> 2),
504 	0x00000000,
505 	(0x0e00 << 16) | (0xac70 >> 2),
506 	0x00000000,
507 	(0x0e00 << 16) | (0xac74 >> 2),
508 	0x00000000,
509 	(0x0e00 << 16) | (0xac78 >> 2),
510 	0x00000000,
511 	(0x0e00 << 16) | (0xac7c >> 2),
512 	0x00000000,
513 	(0x0e00 << 16) | (0xac80 >> 2),
514 	0x00000000,
515 	(0x0e00 << 16) | (0xac84 >> 2),
516 	0x00000000,
517 	(0x0e00 << 16) | (0xac88 >> 2),
518 	0x00000000,
519 	(0x0e00 << 16) | (0xac8c >> 2),
520 	0x00000000,
521 	(0x0e00 << 16) | (0x970c >> 2),
522 	0x00000000,
523 	(0x0e00 << 16) | (0x9714 >> 2),
524 	0x00000000,
525 	(0x0e00 << 16) | (0x9718 >> 2),
526 	0x00000000,
527 	(0x0e00 << 16) | (0x971c >> 2),
528 	0x00000000,
529 	(0x0e00 << 16) | (0x31068 >> 2),
530 	0x00000000,
531 	(0x4e00 << 16) | (0x31068 >> 2),
532 	0x00000000,
533 	(0x5e00 << 16) | (0x31068 >> 2),
534 	0x00000000,
535 	(0x6e00 << 16) | (0x31068 >> 2),
536 	0x00000000,
537 	(0x7e00 << 16) | (0x31068 >> 2),
538 	0x00000000,
539 	(0x8e00 << 16) | (0x31068 >> 2),
540 	0x00000000,
541 	(0x9e00 << 16) | (0x31068 >> 2),
542 	0x00000000,
543 	(0xae00 << 16) | (0x31068 >> 2),
544 	0x00000000,
545 	(0xbe00 << 16) | (0x31068 >> 2),
546 	0x00000000,
547 	(0x0e00 << 16) | (0xcd10 >> 2),
548 	0x00000000,
549 	(0x0e00 << 16) | (0xcd14 >> 2),
550 	0x00000000,
551 	(0x0e00 << 16) | (0x88b0 >> 2),
552 	0x00000000,
553 	(0x0e00 << 16) | (0x88b4 >> 2),
554 	0x00000000,
555 	(0x0e00 << 16) | (0x88b8 >> 2),
556 	0x00000000,
557 	(0x0e00 << 16) | (0x88bc >> 2),
558 	0x00000000,
559 	(0x0400 << 16) | (0x89c0 >> 2),
560 	0x00000000,
561 	(0x0e00 << 16) | (0x88c4 >> 2),
562 	0x00000000,
563 	(0x0e00 << 16) | (0x88c8 >> 2),
564 	0x00000000,
565 	(0x0e00 << 16) | (0x88d0 >> 2),
566 	0x00000000,
567 	(0x0e00 << 16) | (0x88d4 >> 2),
568 	0x00000000,
569 	(0x0e00 << 16) | (0x88d8 >> 2),
570 	0x00000000,
571 	(0x0e00 << 16) | (0x8980 >> 2),
572 	0x00000000,
573 	(0x0e00 << 16) | (0x30938 >> 2),
574 	0x00000000,
575 	(0x0e00 << 16) | (0x3093c >> 2),
576 	0x00000000,
577 	(0x0e00 << 16) | (0x30940 >> 2),
578 	0x00000000,
579 	(0x0e00 << 16) | (0x89a0 >> 2),
580 	0x00000000,
581 	(0x0e00 << 16) | (0x30900 >> 2),
582 	0x00000000,
583 	(0x0e00 << 16) | (0x30904 >> 2),
584 	0x00000000,
585 	(0x0e00 << 16) | (0x89b4 >> 2),
586 	0x00000000,
587 	(0x0e00 << 16) | (0x3c210 >> 2),
588 	0x00000000,
589 	(0x0e00 << 16) | (0x3c214 >> 2),
590 	0x00000000,
591 	(0x0e00 << 16) | (0x3c218 >> 2),
592 	0x00000000,
593 	(0x0e00 << 16) | (0x8904 >> 2),
594 	0x00000000,
595 	0x5,
596 	(0x0e00 << 16) | (0x8c28 >> 2),
597 	(0x0e00 << 16) | (0x8c2c >> 2),
598 	(0x0e00 << 16) | (0x8c30 >> 2),
599 	(0x0e00 << 16) | (0x8c34 >> 2),
600 	(0x0e00 << 16) | (0x9600 >> 2),
601 };
602 
603 static const u32 kalindi_rlc_save_restore_register_list[] =
604 {
605 	(0x0e00 << 16) | (0xc12c >> 2),
606 	0x00000000,
607 	(0x0e00 << 16) | (0xc140 >> 2),
608 	0x00000000,
609 	(0x0e00 << 16) | (0xc150 >> 2),
610 	0x00000000,
611 	(0x0e00 << 16) | (0xc15c >> 2),
612 	0x00000000,
613 	(0x0e00 << 16) | (0xc168 >> 2),
614 	0x00000000,
615 	(0x0e00 << 16) | (0xc170 >> 2),
616 	0x00000000,
617 	(0x0e00 << 16) | (0xc204 >> 2),
618 	0x00000000,
619 	(0x0e00 << 16) | (0xc2b4 >> 2),
620 	0x00000000,
621 	(0x0e00 << 16) | (0xc2b8 >> 2),
622 	0x00000000,
623 	(0x0e00 << 16) | (0xc2bc >> 2),
624 	0x00000000,
625 	(0x0e00 << 16) | (0xc2c0 >> 2),
626 	0x00000000,
627 	(0x0e00 << 16) | (0x8228 >> 2),
628 	0x00000000,
629 	(0x0e00 << 16) | (0x829c >> 2),
630 	0x00000000,
631 	(0x0e00 << 16) | (0x869c >> 2),
632 	0x00000000,
633 	(0x0600 << 16) | (0x98f4 >> 2),
634 	0x00000000,
635 	(0x0e00 << 16) | (0x98f8 >> 2),
636 	0x00000000,
637 	(0x0e00 << 16) | (0x9900 >> 2),
638 	0x00000000,
639 	(0x0e00 << 16) | (0xc260 >> 2),
640 	0x00000000,
641 	(0x0e00 << 16) | (0x90e8 >> 2),
642 	0x00000000,
643 	(0x0e00 << 16) | (0x3c000 >> 2),
644 	0x00000000,
645 	(0x0e00 << 16) | (0x3c00c >> 2),
646 	0x00000000,
647 	(0x0e00 << 16) | (0x8c1c >> 2),
648 	0x00000000,
649 	(0x0e00 << 16) | (0x9700 >> 2),
650 	0x00000000,
651 	(0x0e00 << 16) | (0xcd20 >> 2),
652 	0x00000000,
653 	(0x4e00 << 16) | (0xcd20 >> 2),
654 	0x00000000,
655 	(0x5e00 << 16) | (0xcd20 >> 2),
656 	0x00000000,
657 	(0x6e00 << 16) | (0xcd20 >> 2),
658 	0x00000000,
659 	(0x7e00 << 16) | (0xcd20 >> 2),
660 	0x00000000,
661 	(0x0e00 << 16) | (0x89bc >> 2),
662 	0x00000000,
663 	(0x0e00 << 16) | (0x8900 >> 2),
664 	0x00000000,
665 	0x3,
666 	(0x0e00 << 16) | (0xc130 >> 2),
667 	0x00000000,
668 	(0x0e00 << 16) | (0xc134 >> 2),
669 	0x00000000,
670 	(0x0e00 << 16) | (0xc1fc >> 2),
671 	0x00000000,
672 	(0x0e00 << 16) | (0xc208 >> 2),
673 	0x00000000,
674 	(0x0e00 << 16) | (0xc264 >> 2),
675 	0x00000000,
676 	(0x0e00 << 16) | (0xc268 >> 2),
677 	0x00000000,
678 	(0x0e00 << 16) | (0xc26c >> 2),
679 	0x00000000,
680 	(0x0e00 << 16) | (0xc270 >> 2),
681 	0x00000000,
682 	(0x0e00 << 16) | (0xc274 >> 2),
683 	0x00000000,
684 	(0x0e00 << 16) | (0xc28c >> 2),
685 	0x00000000,
686 	(0x0e00 << 16) | (0xc290 >> 2),
687 	0x00000000,
688 	(0x0e00 << 16) | (0xc294 >> 2),
689 	0x00000000,
690 	(0x0e00 << 16) | (0xc298 >> 2),
691 	0x00000000,
692 	(0x0e00 << 16) | (0xc2a0 >> 2),
693 	0x00000000,
694 	(0x0e00 << 16) | (0xc2a4 >> 2),
695 	0x00000000,
696 	(0x0e00 << 16) | (0xc2a8 >> 2),
697 	0x00000000,
698 	(0x0e00 << 16) | (0xc2ac >> 2),
699 	0x00000000,
700 	(0x0e00 << 16) | (0x301d0 >> 2),
701 	0x00000000,
702 	(0x0e00 << 16) | (0x30238 >> 2),
703 	0x00000000,
704 	(0x0e00 << 16) | (0x30250 >> 2),
705 	0x00000000,
706 	(0x0e00 << 16) | (0x30254 >> 2),
707 	0x00000000,
708 	(0x0e00 << 16) | (0x30258 >> 2),
709 	0x00000000,
710 	(0x0e00 << 16) | (0x3025c >> 2),
711 	0x00000000,
712 	(0x4e00 << 16) | (0xc900 >> 2),
713 	0x00000000,
714 	(0x5e00 << 16) | (0xc900 >> 2),
715 	0x00000000,
716 	(0x6e00 << 16) | (0xc900 >> 2),
717 	0x00000000,
718 	(0x7e00 << 16) | (0xc900 >> 2),
719 	0x00000000,
720 	(0x4e00 << 16) | (0xc904 >> 2),
721 	0x00000000,
722 	(0x5e00 << 16) | (0xc904 >> 2),
723 	0x00000000,
724 	(0x6e00 << 16) | (0xc904 >> 2),
725 	0x00000000,
726 	(0x7e00 << 16) | (0xc904 >> 2),
727 	0x00000000,
728 	(0x4e00 << 16) | (0xc908 >> 2),
729 	0x00000000,
730 	(0x5e00 << 16) | (0xc908 >> 2),
731 	0x00000000,
732 	(0x6e00 << 16) | (0xc908 >> 2),
733 	0x00000000,
734 	(0x7e00 << 16) | (0xc908 >> 2),
735 	0x00000000,
736 	(0x4e00 << 16) | (0xc90c >> 2),
737 	0x00000000,
738 	(0x5e00 << 16) | (0xc90c >> 2),
739 	0x00000000,
740 	(0x6e00 << 16) | (0xc90c >> 2),
741 	0x00000000,
742 	(0x7e00 << 16) | (0xc90c >> 2),
743 	0x00000000,
744 	(0x4e00 << 16) | (0xc910 >> 2),
745 	0x00000000,
746 	(0x5e00 << 16) | (0xc910 >> 2),
747 	0x00000000,
748 	(0x6e00 << 16) | (0xc910 >> 2),
749 	0x00000000,
750 	(0x7e00 << 16) | (0xc910 >> 2),
751 	0x00000000,
752 	(0x0e00 << 16) | (0xc99c >> 2),
753 	0x00000000,
754 	(0x0e00 << 16) | (0x9834 >> 2),
755 	0x00000000,
756 	(0x0000 << 16) | (0x30f00 >> 2),
757 	0x00000000,
758 	(0x0000 << 16) | (0x30f04 >> 2),
759 	0x00000000,
760 	(0x0000 << 16) | (0x30f08 >> 2),
761 	0x00000000,
762 	(0x0000 << 16) | (0x30f0c >> 2),
763 	0x00000000,
764 	(0x0600 << 16) | (0x9b7c >> 2),
765 	0x00000000,
766 	(0x0e00 << 16) | (0x8a14 >> 2),
767 	0x00000000,
768 	(0x0e00 << 16) | (0x8a18 >> 2),
769 	0x00000000,
770 	(0x0600 << 16) | (0x30a00 >> 2),
771 	0x00000000,
772 	(0x0e00 << 16) | (0x8bf0 >> 2),
773 	0x00000000,
774 	(0x0e00 << 16) | (0x8bcc >> 2),
775 	0x00000000,
776 	(0x0e00 << 16) | (0x8b24 >> 2),
777 	0x00000000,
778 	(0x0e00 << 16) | (0x30a04 >> 2),
779 	0x00000000,
780 	(0x0600 << 16) | (0x30a10 >> 2),
781 	0x00000000,
782 	(0x0600 << 16) | (0x30a14 >> 2),
783 	0x00000000,
784 	(0x0600 << 16) | (0x30a18 >> 2),
785 	0x00000000,
786 	(0x0600 << 16) | (0x30a2c >> 2),
787 	0x00000000,
788 	(0x0e00 << 16) | (0xc700 >> 2),
789 	0x00000000,
790 	(0x0e00 << 16) | (0xc704 >> 2),
791 	0x00000000,
792 	(0x0e00 << 16) | (0xc708 >> 2),
793 	0x00000000,
794 	(0x0e00 << 16) | (0xc768 >> 2),
795 	0x00000000,
796 	(0x0400 << 16) | (0xc770 >> 2),
797 	0x00000000,
798 	(0x0400 << 16) | (0xc774 >> 2),
799 	0x00000000,
800 	(0x0400 << 16) | (0xc798 >> 2),
801 	0x00000000,
802 	(0x0400 << 16) | (0xc79c >> 2),
803 	0x00000000,
804 	(0x0e00 << 16) | (0x9100 >> 2),
805 	0x00000000,
806 	(0x0e00 << 16) | (0x3c010 >> 2),
807 	0x00000000,
808 	(0x0e00 << 16) | (0x8c00 >> 2),
809 	0x00000000,
810 	(0x0e00 << 16) | (0x8c04 >> 2),
811 	0x00000000,
812 	(0x0e00 << 16) | (0x8c20 >> 2),
813 	0x00000000,
814 	(0x0e00 << 16) | (0x8c38 >> 2),
815 	0x00000000,
816 	(0x0e00 << 16) | (0x8c3c >> 2),
817 	0x00000000,
818 	(0x0e00 << 16) | (0xae00 >> 2),
819 	0x00000000,
820 	(0x0e00 << 16) | (0x9604 >> 2),
821 	0x00000000,
822 	(0x0e00 << 16) | (0xac08 >> 2),
823 	0x00000000,
824 	(0x0e00 << 16) | (0xac0c >> 2),
825 	0x00000000,
826 	(0x0e00 << 16) | (0xac10 >> 2),
827 	0x00000000,
828 	(0x0e00 << 16) | (0xac14 >> 2),
829 	0x00000000,
830 	(0x0e00 << 16) | (0xac58 >> 2),
831 	0x00000000,
832 	(0x0e00 << 16) | (0xac68 >> 2),
833 	0x00000000,
834 	(0x0e00 << 16) | (0xac6c >> 2),
835 	0x00000000,
836 	(0x0e00 << 16) | (0xac70 >> 2),
837 	0x00000000,
838 	(0x0e00 << 16) | (0xac74 >> 2),
839 	0x00000000,
840 	(0x0e00 << 16) | (0xac78 >> 2),
841 	0x00000000,
842 	(0x0e00 << 16) | (0xac7c >> 2),
843 	0x00000000,
844 	(0x0e00 << 16) | (0xac80 >> 2),
845 	0x00000000,
846 	(0x0e00 << 16) | (0xac84 >> 2),
847 	0x00000000,
848 	(0x0e00 << 16) | (0xac88 >> 2),
849 	0x00000000,
850 	(0x0e00 << 16) | (0xac8c >> 2),
851 	0x00000000,
852 	(0x0e00 << 16) | (0x970c >> 2),
853 	0x00000000,
854 	(0x0e00 << 16) | (0x9714 >> 2),
855 	0x00000000,
856 	(0x0e00 << 16) | (0x9718 >> 2),
857 	0x00000000,
858 	(0x0e00 << 16) | (0x971c >> 2),
859 	0x00000000,
860 	(0x0e00 << 16) | (0x31068 >> 2),
861 	0x00000000,
862 	(0x4e00 << 16) | (0x31068 >> 2),
863 	0x00000000,
864 	(0x5e00 << 16) | (0x31068 >> 2),
865 	0x00000000,
866 	(0x6e00 << 16) | (0x31068 >> 2),
867 	0x00000000,
868 	(0x7e00 << 16) | (0x31068 >> 2),
869 	0x00000000,
870 	(0x0e00 << 16) | (0xcd10 >> 2),
871 	0x00000000,
872 	(0x0e00 << 16) | (0xcd14 >> 2),
873 	0x00000000,
874 	(0x0e00 << 16) | (0x88b0 >> 2),
875 	0x00000000,
876 	(0x0e00 << 16) | (0x88b4 >> 2),
877 	0x00000000,
878 	(0x0e00 << 16) | (0x88b8 >> 2),
879 	0x00000000,
880 	(0x0e00 << 16) | (0x88bc >> 2),
881 	0x00000000,
882 	(0x0400 << 16) | (0x89c0 >> 2),
883 	0x00000000,
884 	(0x0e00 << 16) | (0x88c4 >> 2),
885 	0x00000000,
886 	(0x0e00 << 16) | (0x88c8 >> 2),
887 	0x00000000,
888 	(0x0e00 << 16) | (0x88d0 >> 2),
889 	0x00000000,
890 	(0x0e00 << 16) | (0x88d4 >> 2),
891 	0x00000000,
892 	(0x0e00 << 16) | (0x88d8 >> 2),
893 	0x00000000,
894 	(0x0e00 << 16) | (0x8980 >> 2),
895 	0x00000000,
896 	(0x0e00 << 16) | (0x30938 >> 2),
897 	0x00000000,
898 	(0x0e00 << 16) | (0x3093c >> 2),
899 	0x00000000,
900 	(0x0e00 << 16) | (0x30940 >> 2),
901 	0x00000000,
902 	(0x0e00 << 16) | (0x89a0 >> 2),
903 	0x00000000,
904 	(0x0e00 << 16) | (0x30900 >> 2),
905 	0x00000000,
906 	(0x0e00 << 16) | (0x30904 >> 2),
907 	0x00000000,
908 	(0x0e00 << 16) | (0x89b4 >> 2),
909 	0x00000000,
910 	(0x0e00 << 16) | (0x3e1fc >> 2),
911 	0x00000000,
912 	(0x0e00 << 16) | (0x3c210 >> 2),
913 	0x00000000,
914 	(0x0e00 << 16) | (0x3c214 >> 2),
915 	0x00000000,
916 	(0x0e00 << 16) | (0x3c218 >> 2),
917 	0x00000000,
918 	(0x0e00 << 16) | (0x8904 >> 2),
919 	0x00000000,
920 	0x5,
921 	(0x0e00 << 16) | (0x8c28 >> 2),
922 	(0x0e00 << 16) | (0x8c2c >> 2),
923 	(0x0e00 << 16) | (0x8c30 >> 2),
924 	(0x0e00 << 16) | (0x8c34 >> 2),
925 	(0x0e00 << 16) | (0x9600 >> 2),
926 };
927 
928 static const u32 bonaire_golden_spm_registers[] =
929 {
930 	0x30800, 0xe0ffffff, 0xe0000000
931 };
932 
933 static const u32 bonaire_golden_common_registers[] =
934 {
935 	0xc770, 0xffffffff, 0x00000800,
936 	0xc774, 0xffffffff, 0x00000800,
937 	0xc798, 0xffffffff, 0x00007fbf,
938 	0xc79c, 0xffffffff, 0x00007faf
939 };
940 
941 static const u32 bonaire_golden_registers[] =
942 {
943 	0x3354, 0x00000333, 0x00000333,
944 	0x3350, 0x000c0fc0, 0x00040200,
945 	0x9a10, 0x00010000, 0x00058208,
946 	0x3c000, 0xffff1fff, 0x00140000,
947 	0x3c200, 0xfdfc0fff, 0x00000100,
948 	0x3c234, 0x40000000, 0x40000200,
949 	0x9830, 0xffffffff, 0x00000000,
950 	0x9834, 0xf00fffff, 0x00000400,
951 	0x9838, 0x0002021c, 0x00020200,
952 	0xc78, 0x00000080, 0x00000000,
953 	0x5bb0, 0x000000f0, 0x00000070,
954 	0x5bc0, 0xf0311fff, 0x80300000,
955 	0x98f8, 0x73773777, 0x12010001,
956 	0x350c, 0x00810000, 0x408af000,
957 	0x7030, 0x31000111, 0x00000011,
958 	0x2f48, 0x73773777, 0x12010001,
959 	0x220c, 0x00007fb6, 0x0021a1b1,
960 	0x2210, 0x00007fb6, 0x002021b1,
961 	0x2180, 0x00007fb6, 0x00002191,
962 	0x2218, 0x00007fb6, 0x002121b1,
963 	0x221c, 0x00007fb6, 0x002021b1,
964 	0x21dc, 0x00007fb6, 0x00002191,
965 	0x21e0, 0x00007fb6, 0x00002191,
966 	0x3628, 0x0000003f, 0x0000000a,
967 	0x362c, 0x0000003f, 0x0000000a,
968 	0x2ae4, 0x00073ffe, 0x000022a2,
969 	0x240c, 0x000007ff, 0x00000000,
970 	0x8a14, 0xf000003f, 0x00000007,
971 	0x8bf0, 0x00002001, 0x00000001,
972 	0x8b24, 0xffffffff, 0x00ffffff,
973 	0x30a04, 0x0000ff0f, 0x00000000,
974 	0x28a4c, 0x07ffffff, 0x06000000,
975 	0x4d8, 0x00000fff, 0x00000100,
976 	0x3e78, 0x00000001, 0x00000002,
977 	0x9100, 0x03000000, 0x0362c688,
978 	0x8c00, 0x000000ff, 0x00000001,
979 	0xe40, 0x00001fff, 0x00001fff,
980 	0x9060, 0x0000007f, 0x00000020,
981 	0x9508, 0x00010000, 0x00010000,
982 	0xac14, 0x000003ff, 0x000000f3,
983 	0xac0c, 0xffffffff, 0x00001032
984 };
985 
986 static const u32 bonaire_mgcg_cgcg_init[] =
987 {
988 	0xc420, 0xffffffff, 0xfffffffc,
989 	0x30800, 0xffffffff, 0xe0000000,
990 	0x3c2a0, 0xffffffff, 0x00000100,
991 	0x3c208, 0xffffffff, 0x00000100,
992 	0x3c2c0, 0xffffffff, 0xc0000100,
993 	0x3c2c8, 0xffffffff, 0xc0000100,
994 	0x3c2c4, 0xffffffff, 0xc0000100,
995 	0x55e4, 0xffffffff, 0x00600100,
996 	0x3c280, 0xffffffff, 0x00000100,
997 	0x3c214, 0xffffffff, 0x06000100,
998 	0x3c220, 0xffffffff, 0x00000100,
999 	0x3c218, 0xffffffff, 0x06000100,
1000 	0x3c204, 0xffffffff, 0x00000100,
1001 	0x3c2e0, 0xffffffff, 0x00000100,
1002 	0x3c224, 0xffffffff, 0x00000100,
1003 	0x3c200, 0xffffffff, 0x00000100,
1004 	0x3c230, 0xffffffff, 0x00000100,
1005 	0x3c234, 0xffffffff, 0x00000100,
1006 	0x3c250, 0xffffffff, 0x00000100,
1007 	0x3c254, 0xffffffff, 0x00000100,
1008 	0x3c258, 0xffffffff, 0x00000100,
1009 	0x3c25c, 0xffffffff, 0x00000100,
1010 	0x3c260, 0xffffffff, 0x00000100,
1011 	0x3c27c, 0xffffffff, 0x00000100,
1012 	0x3c278, 0xffffffff, 0x00000100,
1013 	0x3c210, 0xffffffff, 0x06000100,
1014 	0x3c290, 0xffffffff, 0x00000100,
1015 	0x3c274, 0xffffffff, 0x00000100,
1016 	0x3c2b4, 0xffffffff, 0x00000100,
1017 	0x3c2b0, 0xffffffff, 0x00000100,
1018 	0x3c270, 0xffffffff, 0x00000100,
1019 	0x30800, 0xffffffff, 0xe0000000,
1020 	0x3c020, 0xffffffff, 0x00010000,
1021 	0x3c024, 0xffffffff, 0x00030002,
1022 	0x3c028, 0xffffffff, 0x00040007,
1023 	0x3c02c, 0xffffffff, 0x00060005,
1024 	0x3c030, 0xffffffff, 0x00090008,
1025 	0x3c034, 0xffffffff, 0x00010000,
1026 	0x3c038, 0xffffffff, 0x00030002,
1027 	0x3c03c, 0xffffffff, 0x00040007,
1028 	0x3c040, 0xffffffff, 0x00060005,
1029 	0x3c044, 0xffffffff, 0x00090008,
1030 	0x3c048, 0xffffffff, 0x00010000,
1031 	0x3c04c, 0xffffffff, 0x00030002,
1032 	0x3c050, 0xffffffff, 0x00040007,
1033 	0x3c054, 0xffffffff, 0x00060005,
1034 	0x3c058, 0xffffffff, 0x00090008,
1035 	0x3c05c, 0xffffffff, 0x00010000,
1036 	0x3c060, 0xffffffff, 0x00030002,
1037 	0x3c064, 0xffffffff, 0x00040007,
1038 	0x3c068, 0xffffffff, 0x00060005,
1039 	0x3c06c, 0xffffffff, 0x00090008,
1040 	0x3c070, 0xffffffff, 0x00010000,
1041 	0x3c074, 0xffffffff, 0x00030002,
1042 	0x3c078, 0xffffffff, 0x00040007,
1043 	0x3c07c, 0xffffffff, 0x00060005,
1044 	0x3c080, 0xffffffff, 0x00090008,
1045 	0x3c084, 0xffffffff, 0x00010000,
1046 	0x3c088, 0xffffffff, 0x00030002,
1047 	0x3c08c, 0xffffffff, 0x00040007,
1048 	0x3c090, 0xffffffff, 0x00060005,
1049 	0x3c094, 0xffffffff, 0x00090008,
1050 	0x3c098, 0xffffffff, 0x00010000,
1051 	0x3c09c, 0xffffffff, 0x00030002,
1052 	0x3c0a0, 0xffffffff, 0x00040007,
1053 	0x3c0a4, 0xffffffff, 0x00060005,
1054 	0x3c0a8, 0xffffffff, 0x00090008,
1055 	0x3c000, 0xffffffff, 0x96e00200,
1056 	0x8708, 0xffffffff, 0x00900100,
1057 	0xc424, 0xffffffff, 0x0020003f,
1058 	0x38, 0xffffffff, 0x0140001c,
1059 	0x3c, 0x000f0000, 0x000f0000,
1060 	0x220, 0xffffffff, 0xC060000C,
1061 	0x224, 0xc0000fff, 0x00000100,
1062 	0xf90, 0xffffffff, 0x00000100,
1063 	0xf98, 0x00000101, 0x00000000,
1064 	0x20a8, 0xffffffff, 0x00000104,
1065 	0x55e4, 0xff000fff, 0x00000100,
1066 	0x30cc, 0xc0000fff, 0x00000104,
1067 	0xc1e4, 0x00000001, 0x00000001,
1068 	0xd00c, 0xff000ff0, 0x00000100,
1069 	0xd80c, 0xff000ff0, 0x00000100
1070 };
1071 
1072 static const u32 spectre_golden_spm_registers[] =
1073 {
1074 	0x30800, 0xe0ffffff, 0xe0000000
1075 };
1076 
1077 static const u32 spectre_golden_common_registers[] =
1078 {
1079 	0xc770, 0xffffffff, 0x00000800,
1080 	0xc774, 0xffffffff, 0x00000800,
1081 	0xc798, 0xffffffff, 0x00007fbf,
1082 	0xc79c, 0xffffffff, 0x00007faf
1083 };
1084 
1085 static const u32 spectre_golden_registers[] =
1086 {
1087 	0x3c000, 0xffff1fff, 0x96940200,
1088 	0x3c00c, 0xffff0001, 0xff000000,
1089 	0x3c200, 0xfffc0fff, 0x00000100,
1090 	0x6ed8, 0x00010101, 0x00010000,
1091 	0x9834, 0xf00fffff, 0x00000400,
1092 	0x9838, 0xfffffffc, 0x00020200,
1093 	0x5bb0, 0x000000f0, 0x00000070,
1094 	0x5bc0, 0xf0311fff, 0x80300000,
1095 	0x98f8, 0x73773777, 0x12010001,
1096 	0x9b7c, 0x00ff0000, 0x00fc0000,
1097 	0x2f48, 0x73773777, 0x12010001,
1098 	0x8a14, 0xf000003f, 0x00000007,
1099 	0x8b24, 0xffffffff, 0x00ffffff,
1100 	0x28350, 0x3f3f3fff, 0x00000082,
1101 	0x28354, 0x0000003f, 0x00000000,
1102 	0x3e78, 0x00000001, 0x00000002,
1103 	0x913c, 0xffff03df, 0x00000004,
1104 	0xc768, 0x00000008, 0x00000008,
1105 	0x8c00, 0x000008ff, 0x00000800,
1106 	0x9508, 0x00010000, 0x00010000,
1107 	0xac0c, 0xffffffff, 0x54763210,
1108 	0x214f8, 0x01ff01ff, 0x00000002,
1109 	0x21498, 0x007ff800, 0x00200000,
1110 	0x2015c, 0xffffffff, 0x00000f40,
1111 	0x30934, 0xffffffff, 0x00000001
1112 };
1113 
1114 static const u32 spectre_mgcg_cgcg_init[] =
1115 {
1116 	0xc420, 0xffffffff, 0xfffffffc,
1117 	0x30800, 0xffffffff, 0xe0000000,
1118 	0x3c2a0, 0xffffffff, 0x00000100,
1119 	0x3c208, 0xffffffff, 0x00000100,
1120 	0x3c2c0, 0xffffffff, 0x00000100,
1121 	0x3c2c8, 0xffffffff, 0x00000100,
1122 	0x3c2c4, 0xffffffff, 0x00000100,
1123 	0x55e4, 0xffffffff, 0x00600100,
1124 	0x3c280, 0xffffffff, 0x00000100,
1125 	0x3c214, 0xffffffff, 0x06000100,
1126 	0x3c220, 0xffffffff, 0x00000100,
1127 	0x3c218, 0xffffffff, 0x06000100,
1128 	0x3c204, 0xffffffff, 0x00000100,
1129 	0x3c2e0, 0xffffffff, 0x00000100,
1130 	0x3c224, 0xffffffff, 0x00000100,
1131 	0x3c200, 0xffffffff, 0x00000100,
1132 	0x3c230, 0xffffffff, 0x00000100,
1133 	0x3c234, 0xffffffff, 0x00000100,
1134 	0x3c250, 0xffffffff, 0x00000100,
1135 	0x3c254, 0xffffffff, 0x00000100,
1136 	0x3c258, 0xffffffff, 0x00000100,
1137 	0x3c25c, 0xffffffff, 0x00000100,
1138 	0x3c260, 0xffffffff, 0x00000100,
1139 	0x3c27c, 0xffffffff, 0x00000100,
1140 	0x3c278, 0xffffffff, 0x00000100,
1141 	0x3c210, 0xffffffff, 0x06000100,
1142 	0x3c290, 0xffffffff, 0x00000100,
1143 	0x3c274, 0xffffffff, 0x00000100,
1144 	0x3c2b4, 0xffffffff, 0x00000100,
1145 	0x3c2b0, 0xffffffff, 0x00000100,
1146 	0x3c270, 0xffffffff, 0x00000100,
1147 	0x30800, 0xffffffff, 0xe0000000,
1148 	0x3c020, 0xffffffff, 0x00010000,
1149 	0x3c024, 0xffffffff, 0x00030002,
1150 	0x3c028, 0xffffffff, 0x00040007,
1151 	0x3c02c, 0xffffffff, 0x00060005,
1152 	0x3c030, 0xffffffff, 0x00090008,
1153 	0x3c034, 0xffffffff, 0x00010000,
1154 	0x3c038, 0xffffffff, 0x00030002,
1155 	0x3c03c, 0xffffffff, 0x00040007,
1156 	0x3c040, 0xffffffff, 0x00060005,
1157 	0x3c044, 0xffffffff, 0x00090008,
1158 	0x3c048, 0xffffffff, 0x00010000,
1159 	0x3c04c, 0xffffffff, 0x00030002,
1160 	0x3c050, 0xffffffff, 0x00040007,
1161 	0x3c054, 0xffffffff, 0x00060005,
1162 	0x3c058, 0xffffffff, 0x00090008,
1163 	0x3c05c, 0xffffffff, 0x00010000,
1164 	0x3c060, 0xffffffff, 0x00030002,
1165 	0x3c064, 0xffffffff, 0x00040007,
1166 	0x3c068, 0xffffffff, 0x00060005,
1167 	0x3c06c, 0xffffffff, 0x00090008,
1168 	0x3c070, 0xffffffff, 0x00010000,
1169 	0x3c074, 0xffffffff, 0x00030002,
1170 	0x3c078, 0xffffffff, 0x00040007,
1171 	0x3c07c, 0xffffffff, 0x00060005,
1172 	0x3c080, 0xffffffff, 0x00090008,
1173 	0x3c084, 0xffffffff, 0x00010000,
1174 	0x3c088, 0xffffffff, 0x00030002,
1175 	0x3c08c, 0xffffffff, 0x00040007,
1176 	0x3c090, 0xffffffff, 0x00060005,
1177 	0x3c094, 0xffffffff, 0x00090008,
1178 	0x3c098, 0xffffffff, 0x00010000,
1179 	0x3c09c, 0xffffffff, 0x00030002,
1180 	0x3c0a0, 0xffffffff, 0x00040007,
1181 	0x3c0a4, 0xffffffff, 0x00060005,
1182 	0x3c0a8, 0xffffffff, 0x00090008,
1183 	0x3c0ac, 0xffffffff, 0x00010000,
1184 	0x3c0b0, 0xffffffff, 0x00030002,
1185 	0x3c0b4, 0xffffffff, 0x00040007,
1186 	0x3c0b8, 0xffffffff, 0x00060005,
1187 	0x3c0bc, 0xffffffff, 0x00090008,
1188 	0x3c000, 0xffffffff, 0x96e00200,
1189 	0x8708, 0xffffffff, 0x00900100,
1190 	0xc424, 0xffffffff, 0x0020003f,
1191 	0x38, 0xffffffff, 0x0140001c,
1192 	0x3c, 0x000f0000, 0x000f0000,
1193 	0x220, 0xffffffff, 0xC060000C,
1194 	0x224, 0xc0000fff, 0x00000100,
1195 	0xf90, 0xffffffff, 0x00000100,
1196 	0xf98, 0x00000101, 0x00000000,
1197 	0x20a8, 0xffffffff, 0x00000104,
1198 	0x55e4, 0xff000fff, 0x00000100,
1199 	0x30cc, 0xc0000fff, 0x00000104,
1200 	0xc1e4, 0x00000001, 0x00000001,
1201 	0xd00c, 0xff000ff0, 0x00000100,
1202 	0xd80c, 0xff000ff0, 0x00000100
1203 };
1204 
1205 static const u32 kalindi_golden_spm_registers[] =
1206 {
1207 	0x30800, 0xe0ffffff, 0xe0000000
1208 };
1209 
1210 static const u32 kalindi_golden_common_registers[] =
1211 {
1212 	0xc770, 0xffffffff, 0x00000800,
1213 	0xc774, 0xffffffff, 0x00000800,
1214 	0xc798, 0xffffffff, 0x00007fbf,
1215 	0xc79c, 0xffffffff, 0x00007faf
1216 };
1217 
1218 static const u32 kalindi_golden_registers[] =
1219 {
1220 	0x3c000, 0xffffdfff, 0x6e944040,
1221 	0x55e4, 0xff607fff, 0xfc000100,
1222 	0x3c220, 0xff000fff, 0x00000100,
1223 	0x3c224, 0xff000fff, 0x00000100,
1224 	0x3c200, 0xfffc0fff, 0x00000100,
1225 	0x6ed8, 0x00010101, 0x00010000,
1226 	0x9830, 0xffffffff, 0x00000000,
1227 	0x9834, 0xf00fffff, 0x00000400,
1228 	0x5bb0, 0x000000f0, 0x00000070,
1229 	0x5bc0, 0xf0311fff, 0x80300000,
1230 	0x98f8, 0x73773777, 0x12010001,
1231 	0x98fc, 0xffffffff, 0x00000010,
1232 	0x9b7c, 0x00ff0000, 0x00fc0000,
1233 	0x8030, 0x00001f0f, 0x0000100a,
1234 	0x2f48, 0x73773777, 0x12010001,
1235 	0x2408, 0x000fffff, 0x000c007f,
1236 	0x8a14, 0xf000003f, 0x00000007,
1237 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1238 	0x30a04, 0x0000ff0f, 0x00000000,
1239 	0x28a4c, 0x07ffffff, 0x06000000,
1240 	0x4d8, 0x00000fff, 0x00000100,
1241 	0x3e78, 0x00000001, 0x00000002,
1242 	0xc768, 0x00000008, 0x00000008,
1243 	0x8c00, 0x000000ff, 0x00000003,
1244 	0x214f8, 0x01ff01ff, 0x00000002,
1245 	0x21498, 0x007ff800, 0x00200000,
1246 	0x2015c, 0xffffffff, 0x00000f40,
1247 	0x88c4, 0x001f3ae3, 0x00000082,
1248 	0x88d4, 0x0000001f, 0x00000010,
1249 	0x30934, 0xffffffff, 0x00000000
1250 };
1251 
1252 static const u32 kalindi_mgcg_cgcg_init[] =
1253 {
1254 	0xc420, 0xffffffff, 0xfffffffc,
1255 	0x30800, 0xffffffff, 0xe0000000,
1256 	0x3c2a0, 0xffffffff, 0x00000100,
1257 	0x3c208, 0xffffffff, 0x00000100,
1258 	0x3c2c0, 0xffffffff, 0x00000100,
1259 	0x3c2c8, 0xffffffff, 0x00000100,
1260 	0x3c2c4, 0xffffffff, 0x00000100,
1261 	0x55e4, 0xffffffff, 0x00600100,
1262 	0x3c280, 0xffffffff, 0x00000100,
1263 	0x3c214, 0xffffffff, 0x06000100,
1264 	0x3c220, 0xffffffff, 0x00000100,
1265 	0x3c218, 0xffffffff, 0x06000100,
1266 	0x3c204, 0xffffffff, 0x00000100,
1267 	0x3c2e0, 0xffffffff, 0x00000100,
1268 	0x3c224, 0xffffffff, 0x00000100,
1269 	0x3c200, 0xffffffff, 0x00000100,
1270 	0x3c230, 0xffffffff, 0x00000100,
1271 	0x3c234, 0xffffffff, 0x00000100,
1272 	0x3c250, 0xffffffff, 0x00000100,
1273 	0x3c254, 0xffffffff, 0x00000100,
1274 	0x3c258, 0xffffffff, 0x00000100,
1275 	0x3c25c, 0xffffffff, 0x00000100,
1276 	0x3c260, 0xffffffff, 0x00000100,
1277 	0x3c27c, 0xffffffff, 0x00000100,
1278 	0x3c278, 0xffffffff, 0x00000100,
1279 	0x3c210, 0xffffffff, 0x06000100,
1280 	0x3c290, 0xffffffff, 0x00000100,
1281 	0x3c274, 0xffffffff, 0x00000100,
1282 	0x3c2b4, 0xffffffff, 0x00000100,
1283 	0x3c2b0, 0xffffffff, 0x00000100,
1284 	0x3c270, 0xffffffff, 0x00000100,
1285 	0x30800, 0xffffffff, 0xe0000000,
1286 	0x3c020, 0xffffffff, 0x00010000,
1287 	0x3c024, 0xffffffff, 0x00030002,
1288 	0x3c028, 0xffffffff, 0x00040007,
1289 	0x3c02c, 0xffffffff, 0x00060005,
1290 	0x3c030, 0xffffffff, 0x00090008,
1291 	0x3c034, 0xffffffff, 0x00010000,
1292 	0x3c038, 0xffffffff, 0x00030002,
1293 	0x3c03c, 0xffffffff, 0x00040007,
1294 	0x3c040, 0xffffffff, 0x00060005,
1295 	0x3c044, 0xffffffff, 0x00090008,
1296 	0x3c000, 0xffffffff, 0x96e00200,
1297 	0x8708, 0xffffffff, 0x00900100,
1298 	0xc424, 0xffffffff, 0x0020003f,
1299 	0x38, 0xffffffff, 0x0140001c,
1300 	0x3c, 0x000f0000, 0x000f0000,
1301 	0x220, 0xffffffff, 0xC060000C,
1302 	0x224, 0xc0000fff, 0x00000100,
1303 	0x20a8, 0xffffffff, 0x00000104,
1304 	0x55e4, 0xff000fff, 0x00000100,
1305 	0x30cc, 0xc0000fff, 0x00000104,
1306 	0xc1e4, 0x00000001, 0x00000001,
1307 	0xd00c, 0xff000ff0, 0x00000100,
1308 	0xd80c, 0xff000ff0, 0x00000100
1309 };
1310 
1311 static const u32 hawaii_golden_spm_registers[] =
1312 {
1313 	0x30800, 0xe0ffffff, 0xe0000000
1314 };
1315 
1316 static const u32 hawaii_golden_common_registers[] =
1317 {
1318 	0x30800, 0xffffffff, 0xe0000000,
1319 	0x28350, 0xffffffff, 0x3a00161a,
1320 	0x28354, 0xffffffff, 0x0000002e,
1321 	0x9a10, 0xffffffff, 0x00018208,
1322 	0x98f8, 0xffffffff, 0x12011003
1323 };
1324 
1325 static const u32 hawaii_golden_registers[] =
1326 {
1327 	0x3354, 0x00000333, 0x00000333,
1328 	0x9a10, 0x00010000, 0x00058208,
1329 	0x9830, 0xffffffff, 0x00000000,
1330 	0x9834, 0xf00fffff, 0x00000400,
1331 	0x9838, 0x0002021c, 0x00020200,
1332 	0xc78, 0x00000080, 0x00000000,
1333 	0x5bb0, 0x000000f0, 0x00000070,
1334 	0x5bc0, 0xf0311fff, 0x80300000,
1335 	0x350c, 0x00810000, 0x408af000,
1336 	0x7030, 0x31000111, 0x00000011,
1337 	0x2f48, 0x73773777, 0x12010001,
1338 	0x2120, 0x0000007f, 0x0000001b,
1339 	0x21dc, 0x00007fb6, 0x00002191,
1340 	0x3628, 0x0000003f, 0x0000000a,
1341 	0x362c, 0x0000003f, 0x0000000a,
1342 	0x2ae4, 0x00073ffe, 0x000022a2,
1343 	0x240c, 0x000007ff, 0x00000000,
1344 	0x8bf0, 0x00002001, 0x00000001,
1345 	0x8b24, 0xffffffff, 0x00ffffff,
1346 	0x30a04, 0x0000ff0f, 0x00000000,
1347 	0x28a4c, 0x07ffffff, 0x06000000,
1348 	0x3e78, 0x00000001, 0x00000002,
1349 	0xc768, 0x00000008, 0x00000008,
1350 	0xc770, 0x00000f00, 0x00000800,
1351 	0xc774, 0x00000f00, 0x00000800,
1352 	0xc798, 0x00ffffff, 0x00ff7fbf,
1353 	0xc79c, 0x00ffffff, 0x00ff7faf,
1354 	0x8c00, 0x000000ff, 0x00000800,
1355 	0xe40, 0x00001fff, 0x00001fff,
1356 	0x9060, 0x0000007f, 0x00000020,
1357 	0x9508, 0x00010000, 0x00010000,
1358 	0xae00, 0x00100000, 0x000ff07c,
1359 	0xac14, 0x000003ff, 0x0000000f,
1360 	0xac10, 0xffffffff, 0x7564fdec,
1361 	0xac0c, 0xffffffff, 0x3120b9a8,
1362 	0xac08, 0x20000000, 0x0f9c0000
1363 };
1364 
1365 static const u32 hawaii_mgcg_cgcg_init[] =
1366 {
1367 	0xc420, 0xffffffff, 0xfffffffd,
1368 	0x30800, 0xffffffff, 0xe0000000,
1369 	0x3c2a0, 0xffffffff, 0x00000100,
1370 	0x3c208, 0xffffffff, 0x00000100,
1371 	0x3c2c0, 0xffffffff, 0x00000100,
1372 	0x3c2c8, 0xffffffff, 0x00000100,
1373 	0x3c2c4, 0xffffffff, 0x00000100,
1374 	0x55e4, 0xffffffff, 0x00200100,
1375 	0x3c280, 0xffffffff, 0x00000100,
1376 	0x3c214, 0xffffffff, 0x06000100,
1377 	0x3c220, 0xffffffff, 0x00000100,
1378 	0x3c218, 0xffffffff, 0x06000100,
1379 	0x3c204, 0xffffffff, 0x00000100,
1380 	0x3c2e0, 0xffffffff, 0x00000100,
1381 	0x3c224, 0xffffffff, 0x00000100,
1382 	0x3c200, 0xffffffff, 0x00000100,
1383 	0x3c230, 0xffffffff, 0x00000100,
1384 	0x3c234, 0xffffffff, 0x00000100,
1385 	0x3c250, 0xffffffff, 0x00000100,
1386 	0x3c254, 0xffffffff, 0x00000100,
1387 	0x3c258, 0xffffffff, 0x00000100,
1388 	0x3c25c, 0xffffffff, 0x00000100,
1389 	0x3c260, 0xffffffff, 0x00000100,
1390 	0x3c27c, 0xffffffff, 0x00000100,
1391 	0x3c278, 0xffffffff, 0x00000100,
1392 	0x3c210, 0xffffffff, 0x06000100,
1393 	0x3c290, 0xffffffff, 0x00000100,
1394 	0x3c274, 0xffffffff, 0x00000100,
1395 	0x3c2b4, 0xffffffff, 0x00000100,
1396 	0x3c2b0, 0xffffffff, 0x00000100,
1397 	0x3c270, 0xffffffff, 0x00000100,
1398 	0x30800, 0xffffffff, 0xe0000000,
1399 	0x3c020, 0xffffffff, 0x00010000,
1400 	0x3c024, 0xffffffff, 0x00030002,
1401 	0x3c028, 0xffffffff, 0x00040007,
1402 	0x3c02c, 0xffffffff, 0x00060005,
1403 	0x3c030, 0xffffffff, 0x00090008,
1404 	0x3c034, 0xffffffff, 0x00010000,
1405 	0x3c038, 0xffffffff, 0x00030002,
1406 	0x3c03c, 0xffffffff, 0x00040007,
1407 	0x3c040, 0xffffffff, 0x00060005,
1408 	0x3c044, 0xffffffff, 0x00090008,
1409 	0x3c048, 0xffffffff, 0x00010000,
1410 	0x3c04c, 0xffffffff, 0x00030002,
1411 	0x3c050, 0xffffffff, 0x00040007,
1412 	0x3c054, 0xffffffff, 0x00060005,
1413 	0x3c058, 0xffffffff, 0x00090008,
1414 	0x3c05c, 0xffffffff, 0x00010000,
1415 	0x3c060, 0xffffffff, 0x00030002,
1416 	0x3c064, 0xffffffff, 0x00040007,
1417 	0x3c068, 0xffffffff, 0x00060005,
1418 	0x3c06c, 0xffffffff, 0x00090008,
1419 	0x3c070, 0xffffffff, 0x00010000,
1420 	0x3c074, 0xffffffff, 0x00030002,
1421 	0x3c078, 0xffffffff, 0x00040007,
1422 	0x3c07c, 0xffffffff, 0x00060005,
1423 	0x3c080, 0xffffffff, 0x00090008,
1424 	0x3c084, 0xffffffff, 0x00010000,
1425 	0x3c088, 0xffffffff, 0x00030002,
1426 	0x3c08c, 0xffffffff, 0x00040007,
1427 	0x3c090, 0xffffffff, 0x00060005,
1428 	0x3c094, 0xffffffff, 0x00090008,
1429 	0x3c098, 0xffffffff, 0x00010000,
1430 	0x3c09c, 0xffffffff, 0x00030002,
1431 	0x3c0a0, 0xffffffff, 0x00040007,
1432 	0x3c0a4, 0xffffffff, 0x00060005,
1433 	0x3c0a8, 0xffffffff, 0x00090008,
1434 	0x3c0ac, 0xffffffff, 0x00010000,
1435 	0x3c0b0, 0xffffffff, 0x00030002,
1436 	0x3c0b4, 0xffffffff, 0x00040007,
1437 	0x3c0b8, 0xffffffff, 0x00060005,
1438 	0x3c0bc, 0xffffffff, 0x00090008,
1439 	0x3c0c0, 0xffffffff, 0x00010000,
1440 	0x3c0c4, 0xffffffff, 0x00030002,
1441 	0x3c0c8, 0xffffffff, 0x00040007,
1442 	0x3c0cc, 0xffffffff, 0x00060005,
1443 	0x3c0d0, 0xffffffff, 0x00090008,
1444 	0x3c0d4, 0xffffffff, 0x00010000,
1445 	0x3c0d8, 0xffffffff, 0x00030002,
1446 	0x3c0dc, 0xffffffff, 0x00040007,
1447 	0x3c0e0, 0xffffffff, 0x00060005,
1448 	0x3c0e4, 0xffffffff, 0x00090008,
1449 	0x3c0e8, 0xffffffff, 0x00010000,
1450 	0x3c0ec, 0xffffffff, 0x00030002,
1451 	0x3c0f0, 0xffffffff, 0x00040007,
1452 	0x3c0f4, 0xffffffff, 0x00060005,
1453 	0x3c0f8, 0xffffffff, 0x00090008,
1454 	0xc318, 0xffffffff, 0x00020200,
1455 	0x3350, 0xffffffff, 0x00000200,
1456 	0x15c0, 0xffffffff, 0x00000400,
1457 	0x55e8, 0xffffffff, 0x00000000,
1458 	0x2f50, 0xffffffff, 0x00000902,
1459 	0x3c000, 0xffffffff, 0x96940200,
1460 	0x8708, 0xffffffff, 0x00900100,
1461 	0xc424, 0xffffffff, 0x0020003f,
1462 	0x38, 0xffffffff, 0x0140001c,
1463 	0x3c, 0x000f0000, 0x000f0000,
1464 	0x220, 0xffffffff, 0xc060000c,
1465 	0x224, 0xc0000fff, 0x00000100,
1466 	0xf90, 0xffffffff, 0x00000100,
1467 	0xf98, 0x00000101, 0x00000000,
1468 	0x20a8, 0xffffffff, 0x00000104,
1469 	0x55e4, 0xff000fff, 0x00000100,
1470 	0x30cc, 0xc0000fff, 0x00000104,
1471 	0xc1e4, 0x00000001, 0x00000001,
1472 	0xd00c, 0xff000ff0, 0x00000100,
1473 	0xd80c, 0xff000ff0, 0x00000100
1474 };
1475 
1476 static void cik_init_golden_registers(struct radeon_device *rdev)
1477 {
1478 	switch (rdev->family) {
1479 	case CHIP_BONAIRE:
1480 		radeon_program_register_sequence(rdev,
1481 						 bonaire_mgcg_cgcg_init,
1482 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1483 		radeon_program_register_sequence(rdev,
1484 						 bonaire_golden_registers,
1485 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1486 		radeon_program_register_sequence(rdev,
1487 						 bonaire_golden_common_registers,
1488 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1489 		radeon_program_register_sequence(rdev,
1490 						 bonaire_golden_spm_registers,
1491 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1492 		break;
1493 	case CHIP_KABINI:
1494 		radeon_program_register_sequence(rdev,
1495 						 kalindi_mgcg_cgcg_init,
1496 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1497 		radeon_program_register_sequence(rdev,
1498 						 kalindi_golden_registers,
1499 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1500 		radeon_program_register_sequence(rdev,
1501 						 kalindi_golden_common_registers,
1502 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1503 		radeon_program_register_sequence(rdev,
1504 						 kalindi_golden_spm_registers,
1505 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1506 		break;
1507 	case CHIP_KAVERI:
1508 		radeon_program_register_sequence(rdev,
1509 						 spectre_mgcg_cgcg_init,
1510 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1511 		radeon_program_register_sequence(rdev,
1512 						 spectre_golden_registers,
1513 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1514 		radeon_program_register_sequence(rdev,
1515 						 spectre_golden_common_registers,
1516 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1517 		radeon_program_register_sequence(rdev,
1518 						 spectre_golden_spm_registers,
1519 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1520 		break;
1521 	case CHIP_HAWAII:
1522 		radeon_program_register_sequence(rdev,
1523 						 hawaii_mgcg_cgcg_init,
1524 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1525 		radeon_program_register_sequence(rdev,
1526 						 hawaii_golden_registers,
1527 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1528 		radeon_program_register_sequence(rdev,
1529 						 hawaii_golden_common_registers,
1530 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1531 		radeon_program_register_sequence(rdev,
1532 						 hawaii_golden_spm_registers,
1533 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1534 		break;
1535 	default:
1536 		break;
1537 	}
1538 }
1539 
1540 /**
1541  * cik_get_xclk - get the xclk
1542  *
1543  * @rdev: radeon_device pointer
1544  *
1545  * Returns the reference clock used by the gfx engine
1546  * (CIK).
1547  */
1548 u32 cik_get_xclk(struct radeon_device *rdev)
1549 {
1550         u32 reference_clock = rdev->clock.spll.reference_freq;
1551 
1552 	if (rdev->flags & RADEON_IS_IGP) {
1553 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1554 			return reference_clock / 2;
1555 	} else {
1556 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1557 			return reference_clock / 4;
1558 	}
1559 	return reference_clock;
1560 }
1561 
1562 /**
1563  * cik_mm_rdoorbell - read a doorbell dword
1564  *
1565  * @rdev: radeon_device pointer
1566  * @index: doorbell index
1567  *
1568  * Returns the value in the doorbell aperture at the
1569  * requested doorbell index (CIK).
1570  */
1571 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1572 {
1573 	if (index < rdev->doorbell.num_doorbells) {
1574 		return readl(rdev->doorbell.ptr + index);
1575 	} else {
1576 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1577 		return 0;
1578 	}
1579 }
1580 
1581 /**
1582  * cik_mm_wdoorbell - write a doorbell dword
1583  *
1584  * @rdev: radeon_device pointer
1585  * @index: doorbell index
1586  * @v: value to write
1587  *
1588  * Writes @v to the doorbell aperture at the
1589  * requested doorbell index (CIK).
1590  */
1591 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1592 {
1593 	if (index < rdev->doorbell.num_doorbells) {
1594 		writel(v, rdev->doorbell.ptr + index);
1595 	} else {
1596 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1597 	}
1598 }
1599 
1600 #define BONAIRE_IO_MC_REGS_SIZE 36
1601 
1602 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1603 {
1604 	{0x00000070, 0x04400000},
1605 	{0x00000071, 0x80c01803},
1606 	{0x00000072, 0x00004004},
1607 	{0x00000073, 0x00000100},
1608 	{0x00000074, 0x00ff0000},
1609 	{0x00000075, 0x34000000},
1610 	{0x00000076, 0x08000014},
1611 	{0x00000077, 0x00cc08ec},
1612 	{0x00000078, 0x00000400},
1613 	{0x00000079, 0x00000000},
1614 	{0x0000007a, 0x04090000},
1615 	{0x0000007c, 0x00000000},
1616 	{0x0000007e, 0x4408a8e8},
1617 	{0x0000007f, 0x00000304},
1618 	{0x00000080, 0x00000000},
1619 	{0x00000082, 0x00000001},
1620 	{0x00000083, 0x00000002},
1621 	{0x00000084, 0xf3e4f400},
1622 	{0x00000085, 0x052024e3},
1623 	{0x00000087, 0x00000000},
1624 	{0x00000088, 0x01000000},
1625 	{0x0000008a, 0x1c0a0000},
1626 	{0x0000008b, 0xff010000},
1627 	{0x0000008d, 0xffffefff},
1628 	{0x0000008e, 0xfff3efff},
1629 	{0x0000008f, 0xfff3efbf},
1630 	{0x00000092, 0xf7ffffff},
1631 	{0x00000093, 0xffffff7f},
1632 	{0x00000095, 0x00101101},
1633 	{0x00000096, 0x00000fff},
1634 	{0x00000097, 0x00116fff},
1635 	{0x00000098, 0x60010000},
1636 	{0x00000099, 0x10010000},
1637 	{0x0000009a, 0x00006000},
1638 	{0x0000009b, 0x00001000},
1639 	{0x0000009f, 0x00b48000}
1640 };
1641 
1642 #define HAWAII_IO_MC_REGS_SIZE 22
1643 
1644 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1645 {
1646 	{0x0000007d, 0x40000000},
1647 	{0x0000007e, 0x40180304},
1648 	{0x0000007f, 0x0000ff00},
1649 	{0x00000081, 0x00000000},
1650 	{0x00000083, 0x00000800},
1651 	{0x00000086, 0x00000000},
1652 	{0x00000087, 0x00000100},
1653 	{0x00000088, 0x00020100},
1654 	{0x00000089, 0x00000000},
1655 	{0x0000008b, 0x00040000},
1656 	{0x0000008c, 0x00000100},
1657 	{0x0000008e, 0xff010000},
1658 	{0x00000090, 0xffffefff},
1659 	{0x00000091, 0xfff3efff},
1660 	{0x00000092, 0xfff3efbf},
1661 	{0x00000093, 0xf7ffffff},
1662 	{0x00000094, 0xffffff7f},
1663 	{0x00000095, 0x00000fff},
1664 	{0x00000096, 0x00116fff},
1665 	{0x00000097, 0x60010000},
1666 	{0x00000098, 0x10010000},
1667 	{0x0000009f, 0x00c79000}
1668 };
1669 
1670 
1671 /**
1672  * cik_srbm_select - select specific register instances
1673  *
1674  * @rdev: radeon_device pointer
1675  * @me: selected ME (micro engine)
1676  * @pipe: pipe
1677  * @queue: queue
1678  * @vmid: VMID
1679  *
1680  * Switches the currently active registers instances.  Some
1681  * registers are instanced per VMID, others are instanced per
1682  * me/pipe/queue combination.
1683  */
1684 static void cik_srbm_select(struct radeon_device *rdev,
1685 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1686 {
1687 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1688 			     MEID(me & 0x3) |
1689 			     VMID(vmid & 0xf) |
1690 			     QUEUEID(queue & 0x7));
1691 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1692 }
1693 
1694 /* ucode loading */
1695 /**
1696  * ci_mc_load_microcode - load MC ucode into the hw
1697  *
1698  * @rdev: radeon_device pointer
1699  *
1700  * Load the GDDR MC ucode into the hw (CIK).
1701  * Returns 0 on success, error on failure.
1702  */
1703 int ci_mc_load_microcode(struct radeon_device *rdev)
1704 {
1705 	const __be32 *fw_data;
1706 	u32 running, blackout = 0;
1707 	u32 *io_mc_regs;
1708 	int i, regs_size, ucode_size;
1709 
1710 	if (!rdev->mc_fw)
1711 		return -EINVAL;
1712 
1713 	ucode_size = rdev->mc_fw->size / 4;
1714 
1715 	switch (rdev->family) {
1716 	case CHIP_BONAIRE:
1717 		io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1718 		regs_size = BONAIRE_IO_MC_REGS_SIZE;
1719 		break;
1720 	case CHIP_HAWAII:
1721 		io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1722 		regs_size = HAWAII_IO_MC_REGS_SIZE;
1723 		break;
1724 	default:
1725 		return -EINVAL;
1726 	}
1727 
1728 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1729 
1730 	if (running == 0) {
1731 		if (running) {
1732 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1733 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1734 		}
1735 
1736 		/* reset the engine and set to writable */
1737 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1738 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1739 
1740 		/* load mc io regs */
1741 		for (i = 0; i < regs_size; i++) {
1742 			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1743 			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1744 		}
1745 		/* load the MC ucode */
1746 		fw_data = (const __be32 *)rdev->mc_fw->data;
1747 		for (i = 0; i < ucode_size; i++)
1748 			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1749 
1750 		/* put the engine back into the active state */
1751 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1752 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1753 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1754 
1755 		/* wait for training to complete */
1756 		for (i = 0; i < rdev->usec_timeout; i++) {
1757 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1758 				break;
1759 			udelay(1);
1760 		}
1761 		for (i = 0; i < rdev->usec_timeout; i++) {
1762 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1763 				break;
1764 			udelay(1);
1765 		}
1766 
1767 		if (running)
1768 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1769 	}
1770 
1771 	return 0;
1772 }
1773 
1774 /**
1775  * cik_init_microcode - load ucode images from disk
1776  *
1777  * @rdev: radeon_device pointer
1778  *
1779  * Use the firmware interface to load the ucode images into
1780  * the driver (not loaded into hw).
1781  * Returns 0 on success, error on failure.
1782  */
1783 static int cik_init_microcode(struct radeon_device *rdev)
1784 {
1785 	const char *chip_name;
1786 	size_t pfp_req_size, me_req_size, ce_req_size,
1787 		mec_req_size, rlc_req_size, mc_req_size = 0,
1788 		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1789 	char fw_name[30];
1790 	int err;
1791 
1792 	DRM_DEBUG("\n");
1793 
1794 	switch (rdev->family) {
1795 	case CHIP_BONAIRE:
1796 		chip_name = "BONAIRE";
1797 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1798 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1799 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1800 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1801 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1802 		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1803 		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1804 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1805 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1806 		break;
1807 	case CHIP_HAWAII:
1808 		chip_name = "HAWAII";
1809 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1810 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1811 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1812 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1813 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1814 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1815 		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
1816 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1817 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1818 		break;
1819 	case CHIP_KAVERI:
1820 		chip_name = "KAVERI";
1821 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1822 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1823 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1824 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1825 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1826 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1827 		break;
1828 	case CHIP_KABINI:
1829 		chip_name = "KABINI";
1830 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1831 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1832 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1833 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1834 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1835 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1836 		break;
1837 	default: BUG();
1838 	}
1839 
1840 	DRM_INFO("Loading %s Microcode\n", chip_name);
1841 
1842 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1843 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1844 	if (err)
1845 		goto out;
1846 	if (rdev->pfp_fw->size != pfp_req_size) {
1847 		printk(KERN_ERR
1848 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1849 		       rdev->pfp_fw->size, fw_name);
1850 		err = -EINVAL;
1851 		goto out;
1852 	}
1853 
1854 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1855 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1856 	if (err)
1857 		goto out;
1858 	if (rdev->me_fw->size != me_req_size) {
1859 		printk(KERN_ERR
1860 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1861 		       rdev->me_fw->size, fw_name);
1862 		err = -EINVAL;
1863 	}
1864 
1865 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1866 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1867 	if (err)
1868 		goto out;
1869 	if (rdev->ce_fw->size != ce_req_size) {
1870 		printk(KERN_ERR
1871 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1872 		       rdev->ce_fw->size, fw_name);
1873 		err = -EINVAL;
1874 	}
1875 
1876 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1877 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1878 	if (err)
1879 		goto out;
1880 	if (rdev->mec_fw->size != mec_req_size) {
1881 		printk(KERN_ERR
1882 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1883 		       rdev->mec_fw->size, fw_name);
1884 		err = -EINVAL;
1885 	}
1886 
1887 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1888 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1889 	if (err)
1890 		goto out;
1891 	if (rdev->rlc_fw->size != rlc_req_size) {
1892 		printk(KERN_ERR
1893 		       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1894 		       rdev->rlc_fw->size, fw_name);
1895 		err = -EINVAL;
1896 	}
1897 
1898 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1899 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1900 	if (err)
1901 		goto out;
1902 	if (rdev->sdma_fw->size != sdma_req_size) {
1903 		printk(KERN_ERR
1904 		       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1905 		       rdev->sdma_fw->size, fw_name);
1906 		err = -EINVAL;
1907 	}
1908 
1909 	/* No SMC, MC ucode on APUs */
1910 	if (!(rdev->flags & RADEON_IS_IGP)) {
1911 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
1912 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1913 		if (err) {
1914 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1915 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1916 			if (err)
1917 				goto out;
1918 		}
1919 		if ((rdev->mc_fw->size != mc_req_size) &&
1920 		    (rdev->mc_fw->size != mc2_req_size)){
1921 			printk(KERN_ERR
1922 			       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1923 			       rdev->mc_fw->size, fw_name);
1924 			err = -EINVAL;
1925 		}
1926 		DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
1927 
1928 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1929 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1930 		if (err) {
1931 			printk(KERN_ERR
1932 			       "smc: error loading firmware \"%s\"\n",
1933 			       fw_name);
1934 			release_firmware(rdev->smc_fw);
1935 			rdev->smc_fw = NULL;
1936 			err = 0;
1937 		} else if (rdev->smc_fw->size != smc_req_size) {
1938 			printk(KERN_ERR
1939 			       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1940 			       rdev->smc_fw->size, fw_name);
1941 			err = -EINVAL;
1942 		}
1943 	}
1944 
1945 out:
1946 	if (err) {
1947 		if (err != -EINVAL)
1948 			printk(KERN_ERR
1949 			       "cik_cp: Failed to load firmware \"%s\"\n",
1950 			       fw_name);
1951 		release_firmware(rdev->pfp_fw);
1952 		rdev->pfp_fw = NULL;
1953 		release_firmware(rdev->me_fw);
1954 		rdev->me_fw = NULL;
1955 		release_firmware(rdev->ce_fw);
1956 		rdev->ce_fw = NULL;
1957 		release_firmware(rdev->rlc_fw);
1958 		rdev->rlc_fw = NULL;
1959 		release_firmware(rdev->mc_fw);
1960 		rdev->mc_fw = NULL;
1961 		release_firmware(rdev->smc_fw);
1962 		rdev->smc_fw = NULL;
1963 	}
1964 	return err;
1965 }
1966 
1967 /*
1968  * Core functions
1969  */
1970 /**
1971  * cik_tiling_mode_table_init - init the hw tiling table
1972  *
1973  * @rdev: radeon_device pointer
1974  *
1975  * Starting with SI, the tiling setup is done globally in a
1976  * set of 32 tiling modes.  Rather than selecting each set of
1977  * parameters per surface as on older asics, we just select
1978  * which index in the tiling table we want to use, and the
1979  * surface uses those parameters (CIK).
1980  */
1981 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1982 {
1983 	const u32 num_tile_mode_states = 32;
1984 	const u32 num_secondary_tile_mode_states = 16;
1985 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1986 	u32 num_pipe_configs;
1987 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
1988 		rdev->config.cik.max_shader_engines;
1989 
1990 	switch (rdev->config.cik.mem_row_size_in_kb) {
1991 	case 1:
1992 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1993 		break;
1994 	case 2:
1995 	default:
1996 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1997 		break;
1998 	case 4:
1999 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2000 		break;
2001 	}
2002 
2003 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2004 	if (num_pipe_configs > 8)
2005 		num_pipe_configs = 16;
2006 
2007 	if (num_pipe_configs == 16) {
2008 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2009 			switch (reg_offset) {
2010 			case 0:
2011 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2012 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2013 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2014 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2015 				break;
2016 			case 1:
2017 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2018 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2019 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2020 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2021 				break;
2022 			case 2:
2023 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2024 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2025 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2026 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2027 				break;
2028 			case 3:
2029 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2030 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2031 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2032 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2033 				break;
2034 			case 4:
2035 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2036 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2037 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2038 						 TILE_SPLIT(split_equal_to_row_size));
2039 				break;
2040 			case 5:
2041 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2042 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2043 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2044 				break;
2045 			case 6:
2046 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2047 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2048 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2049 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2050 				break;
2051 			case 7:
2052 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2053 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2054 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2055 						 TILE_SPLIT(split_equal_to_row_size));
2056 				break;
2057 			case 8:
2058 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2059 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2060 				break;
2061 			case 9:
2062 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2063 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2064 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2065 				break;
2066 			case 10:
2067 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2068 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2069 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2070 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2071 				break;
2072 			case 11:
2073 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2074 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2075 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2076 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2077 				break;
2078 			case 12:
2079 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2080 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2081 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2082 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2083 				break;
2084 			case 13:
2085 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2086 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2087 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2088 				break;
2089 			case 14:
2090 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2091 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2092 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2093 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2094 				break;
2095 			case 16:
2096 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2097 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2098 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2099 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2100 				break;
2101 			case 17:
2102 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2103 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2104 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2105 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2106 				break;
2107 			case 27:
2108 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2109 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2110 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2111 				break;
2112 			case 28:
2113 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2114 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2115 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2116 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2117 				break;
2118 			case 29:
2119 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2120 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2121 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2122 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2123 				break;
2124 			case 30:
2125 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2126 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2127 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2128 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2129 				break;
2130 			default:
2131 				gb_tile_moden = 0;
2132 				break;
2133 			}
2134 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2135 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2136 		}
2137 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2138 			switch (reg_offset) {
2139 			case 0:
2140 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2141 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2142 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2143 						 NUM_BANKS(ADDR_SURF_16_BANK));
2144 				break;
2145 			case 1:
2146 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2147 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2148 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2149 						 NUM_BANKS(ADDR_SURF_16_BANK));
2150 				break;
2151 			case 2:
2152 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2153 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2154 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2155 						 NUM_BANKS(ADDR_SURF_16_BANK));
2156 				break;
2157 			case 3:
2158 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2159 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2160 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2161 						 NUM_BANKS(ADDR_SURF_16_BANK));
2162 				break;
2163 			case 4:
2164 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2165 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2166 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2167 						 NUM_BANKS(ADDR_SURF_8_BANK));
2168 				break;
2169 			case 5:
2170 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2171 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2172 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2173 						 NUM_BANKS(ADDR_SURF_4_BANK));
2174 				break;
2175 			case 6:
2176 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2177 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2178 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2179 						 NUM_BANKS(ADDR_SURF_2_BANK));
2180 				break;
2181 			case 8:
2182 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2183 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2184 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2185 						 NUM_BANKS(ADDR_SURF_16_BANK));
2186 				break;
2187 			case 9:
2188 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2189 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2190 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2191 						 NUM_BANKS(ADDR_SURF_16_BANK));
2192 				break;
2193 			case 10:
2194 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2195 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2196 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2197 						 NUM_BANKS(ADDR_SURF_16_BANK));
2198 				break;
2199 			case 11:
2200 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2201 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2202 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2203 						 NUM_BANKS(ADDR_SURF_8_BANK));
2204 				break;
2205 			case 12:
2206 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2207 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2208 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2209 						 NUM_BANKS(ADDR_SURF_4_BANK));
2210 				break;
2211 			case 13:
2212 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2213 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2214 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2215 						 NUM_BANKS(ADDR_SURF_2_BANK));
2216 				break;
2217 			case 14:
2218 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2219 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2220 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2221 						 NUM_BANKS(ADDR_SURF_2_BANK));
2222 				break;
2223 			default:
2224 				gb_tile_moden = 0;
2225 				break;
2226 			}
2227 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2228 		}
2229 	} else if (num_pipe_configs == 8) {
2230 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2231 			switch (reg_offset) {
2232 			case 0:
2233 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2234 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2235 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2236 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2237 				break;
2238 			case 1:
2239 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2240 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2241 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2242 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2243 				break;
2244 			case 2:
2245 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2246 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2247 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2248 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2249 				break;
2250 			case 3:
2251 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2252 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2253 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2254 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2255 				break;
2256 			case 4:
2257 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2258 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2259 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2260 						 TILE_SPLIT(split_equal_to_row_size));
2261 				break;
2262 			case 5:
2263 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2264 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2265 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2266 				break;
2267 			case 6:
2268 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2269 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2270 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2271 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2272 				break;
2273 			case 7:
2274 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2275 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2276 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2277 						 TILE_SPLIT(split_equal_to_row_size));
2278 				break;
2279 			case 8:
2280 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2281 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2282 				break;
2283 			case 9:
2284 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2285 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2286 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2287 				break;
2288 			case 10:
2289 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2290 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2291 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2292 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2293 				break;
2294 			case 11:
2295 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2296 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2297 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2298 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2299 				break;
2300 			case 12:
2301 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2302 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2303 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2304 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2305 				break;
2306 			case 13:
2307 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2308 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2309 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2310 				break;
2311 			case 14:
2312 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2313 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2314 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2315 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2316 				break;
2317 			case 16:
2318 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2319 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2320 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2321 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2322 				break;
2323 			case 17:
2324 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2325 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2326 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2327 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2328 				break;
2329 			case 27:
2330 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2331 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2332 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2333 				break;
2334 			case 28:
2335 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2336 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2337 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2338 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2339 				break;
2340 			case 29:
2341 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2342 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2343 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2344 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2345 				break;
2346 			case 30:
2347 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2348 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2349 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2350 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2351 				break;
2352 			default:
2353 				gb_tile_moden = 0;
2354 				break;
2355 			}
2356 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2357 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2358 		}
2359 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2360 			switch (reg_offset) {
2361 			case 0:
2362 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2363 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2364 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2365 						 NUM_BANKS(ADDR_SURF_16_BANK));
2366 				break;
2367 			case 1:
2368 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2369 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2370 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2371 						 NUM_BANKS(ADDR_SURF_16_BANK));
2372 				break;
2373 			case 2:
2374 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2375 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2376 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2377 						 NUM_BANKS(ADDR_SURF_16_BANK));
2378 				break;
2379 			case 3:
2380 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2381 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2382 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2383 						 NUM_BANKS(ADDR_SURF_16_BANK));
2384 				break;
2385 			case 4:
2386 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2387 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2388 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2389 						 NUM_BANKS(ADDR_SURF_8_BANK));
2390 				break;
2391 			case 5:
2392 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2393 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2394 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2395 						 NUM_BANKS(ADDR_SURF_4_BANK));
2396 				break;
2397 			case 6:
2398 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2399 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2400 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2401 						 NUM_BANKS(ADDR_SURF_2_BANK));
2402 				break;
2403 			case 8:
2404 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2405 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2406 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2407 						 NUM_BANKS(ADDR_SURF_16_BANK));
2408 				break;
2409 			case 9:
2410 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2411 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2412 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2413 						 NUM_BANKS(ADDR_SURF_16_BANK));
2414 				break;
2415 			case 10:
2416 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2417 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2418 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2419 						 NUM_BANKS(ADDR_SURF_16_BANK));
2420 				break;
2421 			case 11:
2422 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2423 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2424 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2425 						 NUM_BANKS(ADDR_SURF_16_BANK));
2426 				break;
2427 			case 12:
2428 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2429 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2430 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2431 						 NUM_BANKS(ADDR_SURF_8_BANK));
2432 				break;
2433 			case 13:
2434 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2435 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2436 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2437 						 NUM_BANKS(ADDR_SURF_4_BANK));
2438 				break;
2439 			case 14:
2440 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2441 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2442 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2443 						 NUM_BANKS(ADDR_SURF_2_BANK));
2444 				break;
2445 			default:
2446 				gb_tile_moden = 0;
2447 				break;
2448 			}
2449 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2450 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2451 		}
2452 	} else if (num_pipe_configs == 4) {
2453 		if (num_rbs == 4) {
2454 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2455 				switch (reg_offset) {
2456 				case 0:
2457 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2458 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2459 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2460 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2461 					break;
2462 				case 1:
2463 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2464 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2465 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2466 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2467 					break;
2468 				case 2:
2469 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2470 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2471 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2472 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2473 					break;
2474 				case 3:
2475 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2476 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2477 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2478 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2479 					break;
2480 				case 4:
2481 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2482 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2483 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2484 							 TILE_SPLIT(split_equal_to_row_size));
2485 					break;
2486 				case 5:
2487 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2488 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2489 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2490 					break;
2491 				case 6:
2492 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2493 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2494 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2495 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2496 					break;
2497 				case 7:
2498 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2499 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2500 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2501 							 TILE_SPLIT(split_equal_to_row_size));
2502 					break;
2503 				case 8:
2504 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2505 							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2506 					break;
2507 				case 9:
2508 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2509 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2510 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2511 					break;
2512 				case 10:
2513 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2514 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2515 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2516 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2517 					break;
2518 				case 11:
2519 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2520 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2521 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2522 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2523 					break;
2524 				case 12:
2525 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2526 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2527 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2528 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2529 					break;
2530 				case 13:
2531 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2532 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2533 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2534 					break;
2535 				case 14:
2536 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2537 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2538 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2539 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2540 					break;
2541 				case 16:
2542 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2543 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2544 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2545 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2546 					break;
2547 				case 17:
2548 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2549 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2550 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2551 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2552 					break;
2553 				case 27:
2554 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2555 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2556 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2557 					break;
2558 				case 28:
2559 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2560 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2561 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2562 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2563 					break;
2564 				case 29:
2565 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2566 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2567 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2568 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2569 					break;
2570 				case 30:
2571 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2572 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2573 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2574 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2575 					break;
2576 				default:
2577 					gb_tile_moden = 0;
2578 					break;
2579 				}
2580 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2581 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2582 			}
2583 		} else if (num_rbs < 4) {
2584 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2585 				switch (reg_offset) {
2586 				case 0:
2587 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2588 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2589 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2590 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2591 					break;
2592 				case 1:
2593 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2594 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2595 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2596 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2597 					break;
2598 				case 2:
2599 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2600 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2601 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2602 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2603 					break;
2604 				case 3:
2605 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2606 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2607 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2608 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2609 					break;
2610 				case 4:
2611 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2612 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2613 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2614 							 TILE_SPLIT(split_equal_to_row_size));
2615 					break;
2616 				case 5:
2617 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2618 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2619 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2620 					break;
2621 				case 6:
2622 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2623 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2624 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2625 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2626 					break;
2627 				case 7:
2628 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2629 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2630 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2631 							 TILE_SPLIT(split_equal_to_row_size));
2632 					break;
2633 				case 8:
2634 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2635 						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2636 					break;
2637 				case 9:
2638 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2639 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2640 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2641 					break;
2642 				case 10:
2643 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2644 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2645 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2646 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2647 					break;
2648 				case 11:
2649 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2650 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2651 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2652 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2653 					break;
2654 				case 12:
2655 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2656 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2657 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2658 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2659 					break;
2660 				case 13:
2661 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2662 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2663 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2664 					break;
2665 				case 14:
2666 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2667 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2668 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2669 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2670 					break;
2671 				case 16:
2672 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2673 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2674 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2675 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2676 					break;
2677 				case 17:
2678 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2679 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2680 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2681 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2682 					break;
2683 				case 27:
2684 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2685 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2686 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2687 					break;
2688 				case 28:
2689 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2690 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2691 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2692 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2693 					break;
2694 				case 29:
2695 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2696 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2697 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2698 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2699 					break;
2700 				case 30:
2701 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2702 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2703 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2704 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2705 					break;
2706 				default:
2707 					gb_tile_moden = 0;
2708 					break;
2709 				}
2710 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2711 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2712 			}
2713 		}
2714 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2715 			switch (reg_offset) {
2716 			case 0:
2717 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2718 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2719 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2720 						 NUM_BANKS(ADDR_SURF_16_BANK));
2721 				break;
2722 			case 1:
2723 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2724 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2725 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2726 						 NUM_BANKS(ADDR_SURF_16_BANK));
2727 				break;
2728 			case 2:
2729 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2730 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2731 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2732 						 NUM_BANKS(ADDR_SURF_16_BANK));
2733 				break;
2734 			case 3:
2735 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2736 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2737 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2738 						 NUM_BANKS(ADDR_SURF_16_BANK));
2739 				break;
2740 			case 4:
2741 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2742 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2743 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2744 						 NUM_BANKS(ADDR_SURF_16_BANK));
2745 				break;
2746 			case 5:
2747 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2748 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2749 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2750 						 NUM_BANKS(ADDR_SURF_8_BANK));
2751 				break;
2752 			case 6:
2753 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2754 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2755 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2756 						 NUM_BANKS(ADDR_SURF_4_BANK));
2757 				break;
2758 			case 8:
2759 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2760 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2761 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2762 						 NUM_BANKS(ADDR_SURF_16_BANK));
2763 				break;
2764 			case 9:
2765 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2766 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2767 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2768 						 NUM_BANKS(ADDR_SURF_16_BANK));
2769 				break;
2770 			case 10:
2771 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2772 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2773 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2774 						 NUM_BANKS(ADDR_SURF_16_BANK));
2775 				break;
2776 			case 11:
2777 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2778 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2779 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2780 						 NUM_BANKS(ADDR_SURF_16_BANK));
2781 				break;
2782 			case 12:
2783 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2784 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2785 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2786 						 NUM_BANKS(ADDR_SURF_16_BANK));
2787 				break;
2788 			case 13:
2789 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2790 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2791 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2792 						 NUM_BANKS(ADDR_SURF_8_BANK));
2793 				break;
2794 			case 14:
2795 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2796 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2797 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2798 						 NUM_BANKS(ADDR_SURF_4_BANK));
2799 				break;
2800 			default:
2801 				gb_tile_moden = 0;
2802 				break;
2803 			}
2804 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2805 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2806 		}
2807 	} else if (num_pipe_configs == 2) {
2808 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2809 			switch (reg_offset) {
2810 			case 0:
2811 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2812 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2813 						 PIPE_CONFIG(ADDR_SURF_P2) |
2814 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2815 				break;
2816 			case 1:
2817 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2818 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2819 						 PIPE_CONFIG(ADDR_SURF_P2) |
2820 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2821 				break;
2822 			case 2:
2823 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2824 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2825 						 PIPE_CONFIG(ADDR_SURF_P2) |
2826 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2827 				break;
2828 			case 3:
2829 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2830 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2831 						 PIPE_CONFIG(ADDR_SURF_P2) |
2832 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2833 				break;
2834 			case 4:
2835 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2836 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2837 						 PIPE_CONFIG(ADDR_SURF_P2) |
2838 						 TILE_SPLIT(split_equal_to_row_size));
2839 				break;
2840 			case 5:
2841 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2842 						 PIPE_CONFIG(ADDR_SURF_P2) |
2843 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2844 				break;
2845 			case 6:
2846 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2847 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2848 						 PIPE_CONFIG(ADDR_SURF_P2) |
2849 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2850 				break;
2851 			case 7:
2852 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2853 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2854 						 PIPE_CONFIG(ADDR_SURF_P2) |
2855 						 TILE_SPLIT(split_equal_to_row_size));
2856 				break;
2857 			case 8:
2858 				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2859 						PIPE_CONFIG(ADDR_SURF_P2);
2860 				break;
2861 			case 9:
2862 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2863 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2864 						 PIPE_CONFIG(ADDR_SURF_P2));
2865 				break;
2866 			case 10:
2867 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2868 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2869 						 PIPE_CONFIG(ADDR_SURF_P2) |
2870 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2871 				break;
2872 			case 11:
2873 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2874 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2875 						 PIPE_CONFIG(ADDR_SURF_P2) |
2876 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2877 				break;
2878 			case 12:
2879 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2880 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2881 						 PIPE_CONFIG(ADDR_SURF_P2) |
2882 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2883 				break;
2884 			case 13:
2885 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2886 						 PIPE_CONFIG(ADDR_SURF_P2) |
2887 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2888 				break;
2889 			case 14:
2890 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2891 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2892 						 PIPE_CONFIG(ADDR_SURF_P2) |
2893 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2894 				break;
2895 			case 16:
2896 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2897 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2898 						 PIPE_CONFIG(ADDR_SURF_P2) |
2899 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2900 				break;
2901 			case 17:
2902 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2903 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2904 						 PIPE_CONFIG(ADDR_SURF_P2) |
2905 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2906 				break;
2907 			case 27:
2908 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2909 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2910 						 PIPE_CONFIG(ADDR_SURF_P2));
2911 				break;
2912 			case 28:
2913 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2914 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2915 						 PIPE_CONFIG(ADDR_SURF_P2) |
2916 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2917 				break;
2918 			case 29:
2919 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2920 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2921 						 PIPE_CONFIG(ADDR_SURF_P2) |
2922 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2923 				break;
2924 			case 30:
2925 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2926 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2927 						 PIPE_CONFIG(ADDR_SURF_P2) |
2928 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2929 				break;
2930 			default:
2931 				gb_tile_moden = 0;
2932 				break;
2933 			}
2934 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2935 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2936 		}
2937 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2938 			switch (reg_offset) {
2939 			case 0:
2940 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2941 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2942 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2943 						 NUM_BANKS(ADDR_SURF_16_BANK));
2944 				break;
2945 			case 1:
2946 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2947 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2948 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2949 						 NUM_BANKS(ADDR_SURF_16_BANK));
2950 				break;
2951 			case 2:
2952 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2953 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2954 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2955 						 NUM_BANKS(ADDR_SURF_16_BANK));
2956 				break;
2957 			case 3:
2958 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2959 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2960 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2961 						 NUM_BANKS(ADDR_SURF_16_BANK));
2962 				break;
2963 			case 4:
2964 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2965 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2966 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2967 						 NUM_BANKS(ADDR_SURF_16_BANK));
2968 				break;
2969 			case 5:
2970 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2971 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2972 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2973 						 NUM_BANKS(ADDR_SURF_16_BANK));
2974 				break;
2975 			case 6:
2976 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2977 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2978 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2979 						 NUM_BANKS(ADDR_SURF_8_BANK));
2980 				break;
2981 			case 8:
2982 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2983 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2984 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2985 						 NUM_BANKS(ADDR_SURF_16_BANK));
2986 				break;
2987 			case 9:
2988 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2989 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2990 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2991 						 NUM_BANKS(ADDR_SURF_16_BANK));
2992 				break;
2993 			case 10:
2994 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2995 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2996 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2997 						 NUM_BANKS(ADDR_SURF_16_BANK));
2998 				break;
2999 			case 11:
3000 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3001 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3002 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3003 						 NUM_BANKS(ADDR_SURF_16_BANK));
3004 				break;
3005 			case 12:
3006 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3007 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3008 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3009 						 NUM_BANKS(ADDR_SURF_16_BANK));
3010 				break;
3011 			case 13:
3012 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3013 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3014 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3015 						 NUM_BANKS(ADDR_SURF_16_BANK));
3016 				break;
3017 			case 14:
3018 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3019 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3020 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3021 						 NUM_BANKS(ADDR_SURF_8_BANK));
3022 				break;
3023 			default:
3024 				gb_tile_moden = 0;
3025 				break;
3026 			}
3027 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3028 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3029 		}
3030 	} else
3031 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3032 }
3033 
3034 /**
3035  * cik_select_se_sh - select which SE, SH to address
3036  *
3037  * @rdev: radeon_device pointer
3038  * @se_num: shader engine to address
3039  * @sh_num: sh block to address
3040  *
3041  * Select which SE, SH combinations to address. Certain
3042  * registers are instanced per SE or SH.  0xffffffff means
3043  * broadcast to all SEs or SHs (CIK).
3044  */
3045 static void cik_select_se_sh(struct radeon_device *rdev,
3046 			     u32 se_num, u32 sh_num)
3047 {
3048 	u32 data = INSTANCE_BROADCAST_WRITES;
3049 
3050 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3051 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3052 	else if (se_num == 0xffffffff)
3053 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3054 	else if (sh_num == 0xffffffff)
3055 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3056 	else
3057 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3058 	WREG32(GRBM_GFX_INDEX, data);
3059 }
3060 
3061 /**
3062  * cik_create_bitmask - create a bitmask
3063  *
3064  * @bit_width: length of the mask
3065  *
3066  * create a variable length bit mask (CIK).
3067  * Returns the bitmask.
3068  */
3069 static u32 cik_create_bitmask(u32 bit_width)
3070 {
3071 	u32 i, mask = 0;
3072 
3073 	for (i = 0; i < bit_width; i++) {
3074 		mask <<= 1;
3075 		mask |= 1;
3076 	}
3077 	return mask;
3078 }
3079 
3080 /**
3081  * cik_get_rb_disabled - computes the mask of disabled RBs
3082  *
3083  * @rdev: radeon_device pointer
3084  * @max_rb_num: max RBs (render backends) for the asic
3085  * @se_num: number of SEs (shader engines) for the asic
3086  * @sh_per_se: number of SH blocks per SE for the asic
3087  *
3088  * Calculates the bitmask of disabled RBs (CIK).
3089  * Returns the disabled RB bitmask.
3090  */
3091 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3092 			      u32 max_rb_num_per_se,
3093 			      u32 sh_per_se)
3094 {
3095 	u32 data, mask;
3096 
3097 	data = RREG32(CC_RB_BACKEND_DISABLE);
3098 	if (data & 1)
3099 		data &= BACKEND_DISABLE_MASK;
3100 	else
3101 		data = 0;
3102 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3103 
3104 	data >>= BACKEND_DISABLE_SHIFT;
3105 
3106 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3107 
3108 	return data & mask;
3109 }
3110 
3111 /**
3112  * cik_setup_rb - setup the RBs on the asic
3113  *
3114  * @rdev: radeon_device pointer
3115  * @se_num: number of SEs (shader engines) for the asic
3116  * @sh_per_se: number of SH blocks per SE for the asic
3117  * @max_rb_num: max RBs (render backends) for the asic
3118  *
3119  * Configures per-SE/SH RB registers (CIK).
3120  */
3121 static void cik_setup_rb(struct radeon_device *rdev,
3122 			 u32 se_num, u32 sh_per_se,
3123 			 u32 max_rb_num_per_se)
3124 {
3125 	int i, j;
3126 	u32 data, mask;
3127 	u32 disabled_rbs = 0;
3128 	u32 enabled_rbs = 0;
3129 
3130 	for (i = 0; i < se_num; i++) {
3131 		for (j = 0; j < sh_per_se; j++) {
3132 			cik_select_se_sh(rdev, i, j);
3133 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3134 			if (rdev->family == CHIP_HAWAII)
3135 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3136 			else
3137 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3138 		}
3139 	}
3140 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3141 
3142 	mask = 1;
3143 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3144 		if (!(disabled_rbs & mask))
3145 			enabled_rbs |= mask;
3146 		mask <<= 1;
3147 	}
3148 
3149 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3150 
3151 	for (i = 0; i < se_num; i++) {
3152 		cik_select_se_sh(rdev, i, 0xffffffff);
3153 		data = 0;
3154 		for (j = 0; j < sh_per_se; j++) {
3155 			switch (enabled_rbs & 3) {
3156 			case 0:
3157 				if (j == 0)
3158 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3159 				else
3160 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3161 				break;
3162 			case 1:
3163 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3164 				break;
3165 			case 2:
3166 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3167 				break;
3168 			case 3:
3169 			default:
3170 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3171 				break;
3172 			}
3173 			enabled_rbs >>= 2;
3174 		}
3175 		WREG32(PA_SC_RASTER_CONFIG, data);
3176 	}
3177 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3178 }
3179 
3180 /**
3181  * cik_gpu_init - setup the 3D engine
3182  *
3183  * @rdev: radeon_device pointer
3184  *
3185  * Configures the 3D engine and tiling configuration
3186  * registers so that the 3D engine is usable.
3187  */
3188 static void cik_gpu_init(struct radeon_device *rdev)
3189 {
3190 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3191 	u32 mc_shared_chmap, mc_arb_ramcfg;
3192 	u32 hdp_host_path_cntl;
3193 	u32 tmp;
3194 	int i, j;
3195 
3196 	switch (rdev->family) {
3197 	case CHIP_BONAIRE:
3198 		rdev->config.cik.max_shader_engines = 2;
3199 		rdev->config.cik.max_tile_pipes = 4;
3200 		rdev->config.cik.max_cu_per_sh = 7;
3201 		rdev->config.cik.max_sh_per_se = 1;
3202 		rdev->config.cik.max_backends_per_se = 2;
3203 		rdev->config.cik.max_texture_channel_caches = 4;
3204 		rdev->config.cik.max_gprs = 256;
3205 		rdev->config.cik.max_gs_threads = 32;
3206 		rdev->config.cik.max_hw_contexts = 8;
3207 
3208 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3209 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3210 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3211 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3212 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3213 		break;
3214 	case CHIP_HAWAII:
3215 		rdev->config.cik.max_shader_engines = 4;
3216 		rdev->config.cik.max_tile_pipes = 16;
3217 		rdev->config.cik.max_cu_per_sh = 11;
3218 		rdev->config.cik.max_sh_per_se = 1;
3219 		rdev->config.cik.max_backends_per_se = 4;
3220 		rdev->config.cik.max_texture_channel_caches = 16;
3221 		rdev->config.cik.max_gprs = 256;
3222 		rdev->config.cik.max_gs_threads = 32;
3223 		rdev->config.cik.max_hw_contexts = 8;
3224 
3225 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3226 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3227 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3228 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3229 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3230 		break;
3231 	case CHIP_KAVERI:
3232 		rdev->config.cik.max_shader_engines = 1;
3233 		rdev->config.cik.max_tile_pipes = 4;
3234 		if ((rdev->pdev->device == 0x1304) ||
3235 		    (rdev->pdev->device == 0x1305) ||
3236 		    (rdev->pdev->device == 0x130C) ||
3237 		    (rdev->pdev->device == 0x130F) ||
3238 		    (rdev->pdev->device == 0x1310) ||
3239 		    (rdev->pdev->device == 0x1311) ||
3240 		    (rdev->pdev->device == 0x131C)) {
3241 			rdev->config.cik.max_cu_per_sh = 8;
3242 			rdev->config.cik.max_backends_per_se = 2;
3243 		} else if ((rdev->pdev->device == 0x1309) ||
3244 			   (rdev->pdev->device == 0x130A) ||
3245 			   (rdev->pdev->device == 0x130D) ||
3246 			   (rdev->pdev->device == 0x1313) ||
3247 			   (rdev->pdev->device == 0x131D)) {
3248 			rdev->config.cik.max_cu_per_sh = 6;
3249 			rdev->config.cik.max_backends_per_se = 2;
3250 		} else if ((rdev->pdev->device == 0x1306) ||
3251 			   (rdev->pdev->device == 0x1307) ||
3252 			   (rdev->pdev->device == 0x130B) ||
3253 			   (rdev->pdev->device == 0x130E) ||
3254 			   (rdev->pdev->device == 0x1315) ||
3255 			   (rdev->pdev->device == 0x131B)) {
3256 			rdev->config.cik.max_cu_per_sh = 4;
3257 			rdev->config.cik.max_backends_per_se = 1;
3258 		} else {
3259 			rdev->config.cik.max_cu_per_sh = 3;
3260 			rdev->config.cik.max_backends_per_se = 1;
3261 		}
3262 		rdev->config.cik.max_sh_per_se = 1;
3263 		rdev->config.cik.max_texture_channel_caches = 4;
3264 		rdev->config.cik.max_gprs = 256;
3265 		rdev->config.cik.max_gs_threads = 16;
3266 		rdev->config.cik.max_hw_contexts = 8;
3267 
3268 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3269 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3270 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3271 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3272 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3273 		break;
3274 	case CHIP_KABINI:
3275 	default:
3276 		rdev->config.cik.max_shader_engines = 1;
3277 		rdev->config.cik.max_tile_pipes = 2;
3278 		rdev->config.cik.max_cu_per_sh = 2;
3279 		rdev->config.cik.max_sh_per_se = 1;
3280 		rdev->config.cik.max_backends_per_se = 1;
3281 		rdev->config.cik.max_texture_channel_caches = 2;
3282 		rdev->config.cik.max_gprs = 256;
3283 		rdev->config.cik.max_gs_threads = 16;
3284 		rdev->config.cik.max_hw_contexts = 8;
3285 
3286 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3287 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3288 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3289 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3290 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3291 		break;
3292 	}
3293 
3294 	/* Initialize HDP */
3295 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3296 		WREG32((0x2c14 + j), 0x00000000);
3297 		WREG32((0x2c18 + j), 0x00000000);
3298 		WREG32((0x2c1c + j), 0x00000000);
3299 		WREG32((0x2c20 + j), 0x00000000);
3300 		WREG32((0x2c24 + j), 0x00000000);
3301 	}
3302 
3303 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3304 
3305 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3306 
3307 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3308 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3309 
3310 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3311 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3312 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3313 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3314 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3315 		rdev->config.cik.mem_row_size_in_kb = 4;
3316 	/* XXX use MC settings? */
3317 	rdev->config.cik.shader_engine_tile_size = 32;
3318 	rdev->config.cik.num_gpus = 1;
3319 	rdev->config.cik.multi_gpu_tile_size = 64;
3320 
3321 	/* fix up row size */
3322 	gb_addr_config &= ~ROW_SIZE_MASK;
3323 	switch (rdev->config.cik.mem_row_size_in_kb) {
3324 	case 1:
3325 	default:
3326 		gb_addr_config |= ROW_SIZE(0);
3327 		break;
3328 	case 2:
3329 		gb_addr_config |= ROW_SIZE(1);
3330 		break;
3331 	case 4:
3332 		gb_addr_config |= ROW_SIZE(2);
3333 		break;
3334 	}
3335 
3336 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3337 	 * not have bank info, so create a custom tiling dword.
3338 	 * bits 3:0   num_pipes
3339 	 * bits 7:4   num_banks
3340 	 * bits 11:8  group_size
3341 	 * bits 15:12 row_size
3342 	 */
3343 	rdev->config.cik.tile_config = 0;
3344 	switch (rdev->config.cik.num_tile_pipes) {
3345 	case 1:
3346 		rdev->config.cik.tile_config |= (0 << 0);
3347 		break;
3348 	case 2:
3349 		rdev->config.cik.tile_config |= (1 << 0);
3350 		break;
3351 	case 4:
3352 		rdev->config.cik.tile_config |= (2 << 0);
3353 		break;
3354 	case 8:
3355 	default:
3356 		/* XXX what about 12? */
3357 		rdev->config.cik.tile_config |= (3 << 0);
3358 		break;
3359 	}
3360 	rdev->config.cik.tile_config |=
3361 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3362 	rdev->config.cik.tile_config |=
3363 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3364 	rdev->config.cik.tile_config |=
3365 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3366 
3367 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3368 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3369 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3370 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3371 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3372 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3373 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3374 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3375 
3376 	cik_tiling_mode_table_init(rdev);
3377 
3378 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3379 		     rdev->config.cik.max_sh_per_se,
3380 		     rdev->config.cik.max_backends_per_se);
3381 
3382 	/* set HW defaults for 3D engine */
3383 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3384 
3385 	WREG32(SX_DEBUG_1, 0x20);
3386 
3387 	WREG32(TA_CNTL_AUX, 0x00010000);
3388 
3389 	tmp = RREG32(SPI_CONFIG_CNTL);
3390 	tmp |= 0x03000000;
3391 	WREG32(SPI_CONFIG_CNTL, tmp);
3392 
3393 	WREG32(SQ_CONFIG, 1);
3394 
3395 	WREG32(DB_DEBUG, 0);
3396 
3397 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3398 	tmp |= 0x00000400;
3399 	WREG32(DB_DEBUG2, tmp);
3400 
3401 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3402 	tmp |= 0x00020200;
3403 	WREG32(DB_DEBUG3, tmp);
3404 
3405 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3406 	tmp |= 0x00018208;
3407 	WREG32(CB_HW_CONTROL, tmp);
3408 
3409 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3410 
3411 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3412 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3413 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3414 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3415 
3416 	WREG32(VGT_NUM_INSTANCES, 1);
3417 
3418 	WREG32(CP_PERFMON_CNTL, 0);
3419 
3420 	WREG32(SQ_CONFIG, 0);
3421 
3422 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3423 					  FORCE_EOV_MAX_REZ_CNT(255)));
3424 
3425 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3426 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3427 
3428 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3429 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3430 
3431 	tmp = RREG32(HDP_MISC_CNTL);
3432 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3433 	WREG32(HDP_MISC_CNTL, tmp);
3434 
3435 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3436 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3437 
3438 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3439 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3440 
3441 	udelay(50);
3442 }
3443 
3444 /*
3445  * GPU scratch registers helpers function.
3446  */
3447 /**
3448  * cik_scratch_init - setup driver info for CP scratch regs
3449  *
3450  * @rdev: radeon_device pointer
3451  *
3452  * Set up the number and offset of the CP scratch registers.
3453  * NOTE: use of CP scratch registers is a legacy inferface and
3454  * is not used by default on newer asics (r6xx+).  On newer asics,
3455  * memory buffers are used for fences rather than scratch regs.
3456  */
3457 static void cik_scratch_init(struct radeon_device *rdev)
3458 {
3459 	int i;
3460 
3461 	rdev->scratch.num_reg = 7;
3462 	rdev->scratch.reg_base = SCRATCH_REG0;
3463 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3464 		rdev->scratch.free[i] = true;
3465 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3466 	}
3467 }
3468 
3469 /**
3470  * cik_ring_test - basic gfx ring test
3471  *
3472  * @rdev: radeon_device pointer
3473  * @ring: radeon_ring structure holding ring information
3474  *
3475  * Allocate a scratch register and write to it using the gfx ring (CIK).
3476  * Provides a basic gfx ring test to verify that the ring is working.
3477  * Used by cik_cp_gfx_resume();
3478  * Returns 0 on success, error on failure.
3479  */
3480 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3481 {
3482 	uint32_t scratch;
3483 	uint32_t tmp = 0;
3484 	unsigned i;
3485 	int r;
3486 
3487 	r = radeon_scratch_get(rdev, &scratch);
3488 	if (r) {
3489 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3490 		return r;
3491 	}
3492 	WREG32(scratch, 0xCAFEDEAD);
3493 	r = radeon_ring_lock(rdev, ring, 3);
3494 	if (r) {
3495 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3496 		radeon_scratch_free(rdev, scratch);
3497 		return r;
3498 	}
3499 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3500 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3501 	radeon_ring_write(ring, 0xDEADBEEF);
3502 	radeon_ring_unlock_commit(rdev, ring);
3503 
3504 	for (i = 0; i < rdev->usec_timeout; i++) {
3505 		tmp = RREG32(scratch);
3506 		if (tmp == 0xDEADBEEF)
3507 			break;
3508 		DRM_UDELAY(1);
3509 	}
3510 	if (i < rdev->usec_timeout) {
3511 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3512 	} else {
3513 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3514 			  ring->idx, scratch, tmp);
3515 		r = -EINVAL;
3516 	}
3517 	radeon_scratch_free(rdev, scratch);
3518 	return r;
3519 }
3520 
3521 /**
3522  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3523  *
3524  * @rdev: radeon_device pointer
3525  * @ridx: radeon ring index
3526  *
3527  * Emits an hdp flush on the cp.
3528  */
3529 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3530 				       int ridx)
3531 {
3532 	struct radeon_ring *ring = &rdev->ring[ridx];
3533 	u32 ref_and_mask;
3534 
3535 	switch (ring->idx) {
3536 	case CAYMAN_RING_TYPE_CP1_INDEX:
3537 	case CAYMAN_RING_TYPE_CP2_INDEX:
3538 	default:
3539 		switch (ring->me) {
3540 		case 0:
3541 			ref_and_mask = CP2 << ring->pipe;
3542 			break;
3543 		case 1:
3544 			ref_and_mask = CP6 << ring->pipe;
3545 			break;
3546 		default:
3547 			return;
3548 		}
3549 		break;
3550 	case RADEON_RING_TYPE_GFX_INDEX:
3551 		ref_and_mask = CP0;
3552 		break;
3553 	}
3554 
3555 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3556 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3557 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3558 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3559 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3560 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3561 	radeon_ring_write(ring, ref_and_mask);
3562 	radeon_ring_write(ring, ref_and_mask);
3563 	radeon_ring_write(ring, 0x20); /* poll interval */
3564 }
3565 
3566 /**
3567  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3568  *
3569  * @rdev: radeon_device pointer
3570  * @fence: radeon fence object
3571  *
3572  * Emits a fence sequnce number on the gfx ring and flushes
3573  * GPU caches.
3574  */
3575 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3576 			     struct radeon_fence *fence)
3577 {
3578 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3579 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3580 
3581 	/* EVENT_WRITE_EOP - flush caches, send int */
3582 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3583 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3584 				 EOP_TC_ACTION_EN |
3585 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3586 				 EVENT_INDEX(5)));
3587 	radeon_ring_write(ring, addr & 0xfffffffc);
3588 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3589 	radeon_ring_write(ring, fence->seq);
3590 	radeon_ring_write(ring, 0);
3591 	/* HDP flush */
3592 	cik_hdp_flush_cp_ring_emit(rdev, fence->ring);
3593 }
3594 
3595 /**
3596  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3597  *
3598  * @rdev: radeon_device pointer
3599  * @fence: radeon fence object
3600  *
3601  * Emits a fence sequnce number on the compute ring and flushes
3602  * GPU caches.
3603  */
3604 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3605 				 struct radeon_fence *fence)
3606 {
3607 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3608 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3609 
3610 	/* RELEASE_MEM - flush caches, send int */
3611 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3612 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3613 				 EOP_TC_ACTION_EN |
3614 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3615 				 EVENT_INDEX(5)));
3616 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3617 	radeon_ring_write(ring, addr & 0xfffffffc);
3618 	radeon_ring_write(ring, upper_32_bits(addr));
3619 	radeon_ring_write(ring, fence->seq);
3620 	radeon_ring_write(ring, 0);
3621 	/* HDP flush */
3622 	cik_hdp_flush_cp_ring_emit(rdev, fence->ring);
3623 }
3624 
3625 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3626 			     struct radeon_ring *ring,
3627 			     struct radeon_semaphore *semaphore,
3628 			     bool emit_wait)
3629 {
3630 	uint64_t addr = semaphore->gpu_addr;
3631 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3632 
3633 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3634 	radeon_ring_write(ring, addr & 0xffffffff);
3635 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3636 
3637 	return true;
3638 }
3639 
3640 /**
3641  * cik_copy_cpdma - copy pages using the CP DMA engine
3642  *
3643  * @rdev: radeon_device pointer
3644  * @src_offset: src GPU address
3645  * @dst_offset: dst GPU address
3646  * @num_gpu_pages: number of GPU pages to xfer
3647  * @fence: radeon fence object
3648  *
3649  * Copy GPU paging using the CP DMA engine (CIK+).
3650  * Used by the radeon ttm implementation to move pages if
3651  * registered as the asic copy callback.
3652  */
3653 int cik_copy_cpdma(struct radeon_device *rdev,
3654 		   uint64_t src_offset, uint64_t dst_offset,
3655 		   unsigned num_gpu_pages,
3656 		   struct radeon_fence **fence)
3657 {
3658 	struct radeon_semaphore *sem = NULL;
3659 	int ring_index = rdev->asic->copy.blit_ring_index;
3660 	struct radeon_ring *ring = &rdev->ring[ring_index];
3661 	u32 size_in_bytes, cur_size_in_bytes, control;
3662 	int i, num_loops;
3663 	int r = 0;
3664 
3665 	r = radeon_semaphore_create(rdev, &sem);
3666 	if (r) {
3667 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3668 		return r;
3669 	}
3670 
3671 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3672 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3673 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3674 	if (r) {
3675 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3676 		radeon_semaphore_free(rdev, &sem, NULL);
3677 		return r;
3678 	}
3679 
3680 	radeon_semaphore_sync_to(sem, *fence);
3681 	radeon_semaphore_sync_rings(rdev, sem, ring->idx);
3682 
3683 	for (i = 0; i < num_loops; i++) {
3684 		cur_size_in_bytes = size_in_bytes;
3685 		if (cur_size_in_bytes > 0x1fffff)
3686 			cur_size_in_bytes = 0x1fffff;
3687 		size_in_bytes -= cur_size_in_bytes;
3688 		control = 0;
3689 		if (size_in_bytes == 0)
3690 			control |= PACKET3_DMA_DATA_CP_SYNC;
3691 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3692 		radeon_ring_write(ring, control);
3693 		radeon_ring_write(ring, lower_32_bits(src_offset));
3694 		radeon_ring_write(ring, upper_32_bits(src_offset));
3695 		radeon_ring_write(ring, lower_32_bits(dst_offset));
3696 		radeon_ring_write(ring, upper_32_bits(dst_offset));
3697 		radeon_ring_write(ring, cur_size_in_bytes);
3698 		src_offset += cur_size_in_bytes;
3699 		dst_offset += cur_size_in_bytes;
3700 	}
3701 
3702 	r = radeon_fence_emit(rdev, fence, ring->idx);
3703 	if (r) {
3704 		radeon_ring_unlock_undo(rdev, ring);
3705 		return r;
3706 	}
3707 
3708 	radeon_ring_unlock_commit(rdev, ring);
3709 	radeon_semaphore_free(rdev, &sem, *fence);
3710 
3711 	return r;
3712 }
3713 
3714 /*
3715  * IB stuff
3716  */
3717 /**
3718  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3719  *
3720  * @rdev: radeon_device pointer
3721  * @ib: radeon indirect buffer object
3722  *
3723  * Emits an DE (drawing engine) or CE (constant engine) IB
3724  * on the gfx ring.  IBs are usually generated by userspace
3725  * acceleration drivers and submitted to the kernel for
3726  * sheduling on the ring.  This function schedules the IB
3727  * on the gfx ring for execution by the GPU.
3728  */
3729 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3730 {
3731 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3732 	u32 header, control = INDIRECT_BUFFER_VALID;
3733 
3734 	if (ib->is_const_ib) {
3735 		/* set switch buffer packet before const IB */
3736 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3737 		radeon_ring_write(ring, 0);
3738 
3739 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3740 	} else {
3741 		u32 next_rptr;
3742 		if (ring->rptr_save_reg) {
3743 			next_rptr = ring->wptr + 3 + 4;
3744 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3745 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3746 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
3747 			radeon_ring_write(ring, next_rptr);
3748 		} else if (rdev->wb.enabled) {
3749 			next_rptr = ring->wptr + 5 + 4;
3750 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3751 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3752 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3753 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3754 			radeon_ring_write(ring, next_rptr);
3755 		}
3756 
3757 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3758 	}
3759 
3760 	control |= ib->length_dw |
3761 		(ib->vm ? (ib->vm->id << 24) : 0);
3762 
3763 	radeon_ring_write(ring, header);
3764 	radeon_ring_write(ring,
3765 #ifdef __BIG_ENDIAN
3766 			  (2 << 0) |
3767 #endif
3768 			  (ib->gpu_addr & 0xFFFFFFFC));
3769 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3770 	radeon_ring_write(ring, control);
3771 }
3772 
3773 /**
3774  * cik_ib_test - basic gfx ring IB test
3775  *
3776  * @rdev: radeon_device pointer
3777  * @ring: radeon_ring structure holding ring information
3778  *
3779  * Allocate an IB and execute it on the gfx ring (CIK).
3780  * Provides a basic gfx ring test to verify that IBs are working.
3781  * Returns 0 on success, error on failure.
3782  */
3783 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3784 {
3785 	struct radeon_ib ib;
3786 	uint32_t scratch;
3787 	uint32_t tmp = 0;
3788 	unsigned i;
3789 	int r;
3790 
3791 	r = radeon_scratch_get(rdev, &scratch);
3792 	if (r) {
3793 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3794 		return r;
3795 	}
3796 	WREG32(scratch, 0xCAFEDEAD);
3797 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3798 	if (r) {
3799 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3800 		radeon_scratch_free(rdev, scratch);
3801 		return r;
3802 	}
3803 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3804 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3805 	ib.ptr[2] = 0xDEADBEEF;
3806 	ib.length_dw = 3;
3807 	r = radeon_ib_schedule(rdev, &ib, NULL);
3808 	if (r) {
3809 		radeon_scratch_free(rdev, scratch);
3810 		radeon_ib_free(rdev, &ib);
3811 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3812 		return r;
3813 	}
3814 	r = radeon_fence_wait(ib.fence, false);
3815 	if (r) {
3816 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3817 		radeon_scratch_free(rdev, scratch);
3818 		radeon_ib_free(rdev, &ib);
3819 		return r;
3820 	}
3821 	for (i = 0; i < rdev->usec_timeout; i++) {
3822 		tmp = RREG32(scratch);
3823 		if (tmp == 0xDEADBEEF)
3824 			break;
3825 		DRM_UDELAY(1);
3826 	}
3827 	if (i < rdev->usec_timeout) {
3828 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3829 	} else {
3830 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3831 			  scratch, tmp);
3832 		r = -EINVAL;
3833 	}
3834 	radeon_scratch_free(rdev, scratch);
3835 	radeon_ib_free(rdev, &ib);
3836 	return r;
3837 }
3838 
3839 /*
3840  * CP.
3841  * On CIK, gfx and compute now have independant command processors.
3842  *
3843  * GFX
3844  * Gfx consists of a single ring and can process both gfx jobs and
3845  * compute jobs.  The gfx CP consists of three microengines (ME):
3846  * PFP - Pre-Fetch Parser
3847  * ME - Micro Engine
3848  * CE - Constant Engine
3849  * The PFP and ME make up what is considered the Drawing Engine (DE).
3850  * The CE is an asynchronous engine used for updating buffer desciptors
3851  * used by the DE so that they can be loaded into cache in parallel
3852  * while the DE is processing state update packets.
3853  *
3854  * Compute
3855  * The compute CP consists of two microengines (ME):
3856  * MEC1 - Compute MicroEngine 1
3857  * MEC2 - Compute MicroEngine 2
3858  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3859  * The queues are exposed to userspace and are programmed directly
3860  * by the compute runtime.
3861  */
3862 /**
3863  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3864  *
3865  * @rdev: radeon_device pointer
3866  * @enable: enable or disable the MEs
3867  *
3868  * Halts or unhalts the gfx MEs.
3869  */
3870 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3871 {
3872 	if (enable)
3873 		WREG32(CP_ME_CNTL, 0);
3874 	else {
3875 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3876 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3877 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3878 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3879 	}
3880 	udelay(50);
3881 }
3882 
3883 /**
3884  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3885  *
3886  * @rdev: radeon_device pointer
3887  *
3888  * Loads the gfx PFP, ME, and CE ucode.
3889  * Returns 0 for success, -EINVAL if the ucode is not available.
3890  */
3891 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3892 {
3893 	const __be32 *fw_data;
3894 	int i;
3895 
3896 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3897 		return -EINVAL;
3898 
3899 	cik_cp_gfx_enable(rdev, false);
3900 
3901 	/* PFP */
3902 	fw_data = (const __be32 *)rdev->pfp_fw->data;
3903 	WREG32(CP_PFP_UCODE_ADDR, 0);
3904 	for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3905 		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3906 	WREG32(CP_PFP_UCODE_ADDR, 0);
3907 
3908 	/* CE */
3909 	fw_data = (const __be32 *)rdev->ce_fw->data;
3910 	WREG32(CP_CE_UCODE_ADDR, 0);
3911 	for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3912 		WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3913 	WREG32(CP_CE_UCODE_ADDR, 0);
3914 
3915 	/* ME */
3916 	fw_data = (const __be32 *)rdev->me_fw->data;
3917 	WREG32(CP_ME_RAM_WADDR, 0);
3918 	for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3919 		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3920 	WREG32(CP_ME_RAM_WADDR, 0);
3921 
3922 	WREG32(CP_PFP_UCODE_ADDR, 0);
3923 	WREG32(CP_CE_UCODE_ADDR, 0);
3924 	WREG32(CP_ME_RAM_WADDR, 0);
3925 	WREG32(CP_ME_RAM_RADDR, 0);
3926 	return 0;
3927 }
3928 
3929 /**
3930  * cik_cp_gfx_start - start the gfx ring
3931  *
3932  * @rdev: radeon_device pointer
3933  *
3934  * Enables the ring and loads the clear state context and other
3935  * packets required to init the ring.
3936  * Returns 0 for success, error for failure.
3937  */
3938 static int cik_cp_gfx_start(struct radeon_device *rdev)
3939 {
3940 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3941 	int r, i;
3942 
3943 	/* init the CP */
3944 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3945 	WREG32(CP_ENDIAN_SWAP, 0);
3946 	WREG32(CP_DEVICE_ID, 1);
3947 
3948 	cik_cp_gfx_enable(rdev, true);
3949 
3950 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3951 	if (r) {
3952 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3953 		return r;
3954 	}
3955 
3956 	/* init the CE partitions.  CE only used for gfx on CIK */
3957 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3958 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3959 	radeon_ring_write(ring, 0xc000);
3960 	radeon_ring_write(ring, 0xc000);
3961 
3962 	/* setup clear context state */
3963 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3964 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3965 
3966 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3967 	radeon_ring_write(ring, 0x80000000);
3968 	radeon_ring_write(ring, 0x80000000);
3969 
3970 	for (i = 0; i < cik_default_size; i++)
3971 		radeon_ring_write(ring, cik_default_state[i]);
3972 
3973 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3974 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3975 
3976 	/* set clear context state */
3977 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3978 	radeon_ring_write(ring, 0);
3979 
3980 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3981 	radeon_ring_write(ring, 0x00000316);
3982 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3983 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3984 
3985 	radeon_ring_unlock_commit(rdev, ring);
3986 
3987 	return 0;
3988 }
3989 
3990 /**
3991  * cik_cp_gfx_fini - stop the gfx ring
3992  *
3993  * @rdev: radeon_device pointer
3994  *
3995  * Stop the gfx ring and tear down the driver ring
3996  * info.
3997  */
3998 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3999 {
4000 	cik_cp_gfx_enable(rdev, false);
4001 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4002 }
4003 
4004 /**
4005  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4006  *
4007  * @rdev: radeon_device pointer
4008  *
4009  * Program the location and size of the gfx ring buffer
4010  * and test it to make sure it's working.
4011  * Returns 0 for success, error for failure.
4012  */
4013 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4014 {
4015 	struct radeon_ring *ring;
4016 	u32 tmp;
4017 	u32 rb_bufsz;
4018 	u64 rb_addr;
4019 	int r;
4020 
4021 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4022 	if (rdev->family != CHIP_HAWAII)
4023 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4024 
4025 	/* Set the write pointer delay */
4026 	WREG32(CP_RB_WPTR_DELAY, 0);
4027 
4028 	/* set the RB to use vmid 0 */
4029 	WREG32(CP_RB_VMID, 0);
4030 
4031 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4032 
4033 	/* ring 0 - compute and gfx */
4034 	/* Set ring buffer size */
4035 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4036 	rb_bufsz = order_base_2(ring->ring_size / 8);
4037 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4038 #ifdef __BIG_ENDIAN
4039 	tmp |= BUF_SWAP_32BIT;
4040 #endif
4041 	WREG32(CP_RB0_CNTL, tmp);
4042 
4043 	/* Initialize the ring buffer's read and write pointers */
4044 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4045 	ring->wptr = 0;
4046 	WREG32(CP_RB0_WPTR, ring->wptr);
4047 
4048 	/* set the wb address wether it's enabled or not */
4049 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4050 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4051 
4052 	/* scratch register shadowing is no longer supported */
4053 	WREG32(SCRATCH_UMSK, 0);
4054 
4055 	if (!rdev->wb.enabled)
4056 		tmp |= RB_NO_UPDATE;
4057 
4058 	mdelay(1);
4059 	WREG32(CP_RB0_CNTL, tmp);
4060 
4061 	rb_addr = ring->gpu_addr >> 8;
4062 	WREG32(CP_RB0_BASE, rb_addr);
4063 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4064 
4065 	/* start the ring */
4066 	cik_cp_gfx_start(rdev);
4067 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4068 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4069 	if (r) {
4070 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4071 		return r;
4072 	}
4073 
4074 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4075 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4076 
4077 	return 0;
4078 }
4079 
4080 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4081 		     struct radeon_ring *ring)
4082 {
4083 	u32 rptr;
4084 
4085 	if (rdev->wb.enabled)
4086 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4087 	else
4088 		rptr = RREG32(CP_RB0_RPTR);
4089 
4090 	return rptr;
4091 }
4092 
4093 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4094 		     struct radeon_ring *ring)
4095 {
4096 	u32 wptr;
4097 
4098 	wptr = RREG32(CP_RB0_WPTR);
4099 
4100 	return wptr;
4101 }
4102 
4103 void cik_gfx_set_wptr(struct radeon_device *rdev,
4104 		      struct radeon_ring *ring)
4105 {
4106 	WREG32(CP_RB0_WPTR, ring->wptr);
4107 	(void)RREG32(CP_RB0_WPTR);
4108 }
4109 
4110 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4111 			 struct radeon_ring *ring)
4112 {
4113 	u32 rptr;
4114 
4115 	if (rdev->wb.enabled) {
4116 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4117 	} else {
4118 		mutex_lock(&rdev->srbm_mutex);
4119 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4120 		rptr = RREG32(CP_HQD_PQ_RPTR);
4121 		cik_srbm_select(rdev, 0, 0, 0, 0);
4122 		mutex_unlock(&rdev->srbm_mutex);
4123 	}
4124 
4125 	return rptr;
4126 }
4127 
4128 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4129 			 struct radeon_ring *ring)
4130 {
4131 	u32 wptr;
4132 
4133 	if (rdev->wb.enabled) {
4134 		/* XXX check if swapping is necessary on BE */
4135 		wptr = rdev->wb.wb[ring->wptr_offs/4];
4136 	} else {
4137 		mutex_lock(&rdev->srbm_mutex);
4138 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4139 		wptr = RREG32(CP_HQD_PQ_WPTR);
4140 		cik_srbm_select(rdev, 0, 0, 0, 0);
4141 		mutex_unlock(&rdev->srbm_mutex);
4142 	}
4143 
4144 	return wptr;
4145 }
4146 
4147 void cik_compute_set_wptr(struct radeon_device *rdev,
4148 			  struct radeon_ring *ring)
4149 {
4150 	/* XXX check if swapping is necessary on BE */
4151 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4152 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4153 }
4154 
4155 /**
4156  * cik_cp_compute_enable - enable/disable the compute CP MEs
4157  *
4158  * @rdev: radeon_device pointer
4159  * @enable: enable or disable the MEs
4160  *
4161  * Halts or unhalts the compute MEs.
4162  */
4163 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4164 {
4165 	if (enable)
4166 		WREG32(CP_MEC_CNTL, 0);
4167 	else {
4168 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4169 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4170 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4171 	}
4172 	udelay(50);
4173 }
4174 
4175 /**
4176  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4177  *
4178  * @rdev: radeon_device pointer
4179  *
4180  * Loads the compute MEC1&2 ucode.
4181  * Returns 0 for success, -EINVAL if the ucode is not available.
4182  */
4183 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4184 {
4185 	const __be32 *fw_data;
4186 	int i;
4187 
4188 	if (!rdev->mec_fw)
4189 		return -EINVAL;
4190 
4191 	cik_cp_compute_enable(rdev, false);
4192 
4193 	/* MEC1 */
4194 	fw_data = (const __be32 *)rdev->mec_fw->data;
4195 	WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4196 	for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4197 		WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4198 	WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4199 
4200 	if (rdev->family == CHIP_KAVERI) {
4201 		/* MEC2 */
4202 		fw_data = (const __be32 *)rdev->mec_fw->data;
4203 		WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4204 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4205 			WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4206 		WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4207 	}
4208 
4209 	return 0;
4210 }
4211 
4212 /**
4213  * cik_cp_compute_start - start the compute queues
4214  *
4215  * @rdev: radeon_device pointer
4216  *
4217  * Enable the compute queues.
4218  * Returns 0 for success, error for failure.
4219  */
4220 static int cik_cp_compute_start(struct radeon_device *rdev)
4221 {
4222 	cik_cp_compute_enable(rdev, true);
4223 
4224 	return 0;
4225 }
4226 
4227 /**
4228  * cik_cp_compute_fini - stop the compute queues
4229  *
4230  * @rdev: radeon_device pointer
4231  *
4232  * Stop the compute queues and tear down the driver queue
4233  * info.
4234  */
4235 static void cik_cp_compute_fini(struct radeon_device *rdev)
4236 {
4237 	int i, idx, r;
4238 
4239 	cik_cp_compute_enable(rdev, false);
4240 
4241 	for (i = 0; i < 2; i++) {
4242 		if (i == 0)
4243 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4244 		else
4245 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4246 
4247 		if (rdev->ring[idx].mqd_obj) {
4248 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4249 			if (unlikely(r != 0))
4250 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4251 
4252 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4253 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4254 
4255 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4256 			rdev->ring[idx].mqd_obj = NULL;
4257 		}
4258 	}
4259 }
4260 
4261 static void cik_mec_fini(struct radeon_device *rdev)
4262 {
4263 	int r;
4264 
4265 	if (rdev->mec.hpd_eop_obj) {
4266 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4267 		if (unlikely(r != 0))
4268 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4269 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4270 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4271 
4272 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4273 		rdev->mec.hpd_eop_obj = NULL;
4274 	}
4275 }
4276 
4277 #define MEC_HPD_SIZE 2048
4278 
4279 static int cik_mec_init(struct radeon_device *rdev)
4280 {
4281 	int r;
4282 	u32 *hpd;
4283 
4284 	/*
4285 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4286 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4287 	 */
4288 	if (rdev->family == CHIP_KAVERI)
4289 		rdev->mec.num_mec = 2;
4290 	else
4291 		rdev->mec.num_mec = 1;
4292 	rdev->mec.num_pipe = 4;
4293 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4294 
4295 	if (rdev->mec.hpd_eop_obj == NULL) {
4296 		r = radeon_bo_create(rdev,
4297 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4298 				     PAGE_SIZE, true,
4299 				     RADEON_GEM_DOMAIN_GTT, NULL,
4300 				     &rdev->mec.hpd_eop_obj);
4301 		if (r) {
4302 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4303 			return r;
4304 		}
4305 	}
4306 
4307 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4308 	if (unlikely(r != 0)) {
4309 		cik_mec_fini(rdev);
4310 		return r;
4311 	}
4312 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4313 			  &rdev->mec.hpd_eop_gpu_addr);
4314 	if (r) {
4315 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4316 		cik_mec_fini(rdev);
4317 		return r;
4318 	}
4319 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4320 	if (r) {
4321 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4322 		cik_mec_fini(rdev);
4323 		return r;
4324 	}
4325 
4326 	/* clear memory.  Not sure if this is required or not */
4327 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4328 
4329 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4330 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4331 
4332 	return 0;
4333 }
4334 
4335 struct hqd_registers
4336 {
4337 	u32 cp_mqd_base_addr;
4338 	u32 cp_mqd_base_addr_hi;
4339 	u32 cp_hqd_active;
4340 	u32 cp_hqd_vmid;
4341 	u32 cp_hqd_persistent_state;
4342 	u32 cp_hqd_pipe_priority;
4343 	u32 cp_hqd_queue_priority;
4344 	u32 cp_hqd_quantum;
4345 	u32 cp_hqd_pq_base;
4346 	u32 cp_hqd_pq_base_hi;
4347 	u32 cp_hqd_pq_rptr;
4348 	u32 cp_hqd_pq_rptr_report_addr;
4349 	u32 cp_hqd_pq_rptr_report_addr_hi;
4350 	u32 cp_hqd_pq_wptr_poll_addr;
4351 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4352 	u32 cp_hqd_pq_doorbell_control;
4353 	u32 cp_hqd_pq_wptr;
4354 	u32 cp_hqd_pq_control;
4355 	u32 cp_hqd_ib_base_addr;
4356 	u32 cp_hqd_ib_base_addr_hi;
4357 	u32 cp_hqd_ib_rptr;
4358 	u32 cp_hqd_ib_control;
4359 	u32 cp_hqd_iq_timer;
4360 	u32 cp_hqd_iq_rptr;
4361 	u32 cp_hqd_dequeue_request;
4362 	u32 cp_hqd_dma_offload;
4363 	u32 cp_hqd_sema_cmd;
4364 	u32 cp_hqd_msg_type;
4365 	u32 cp_hqd_atomic0_preop_lo;
4366 	u32 cp_hqd_atomic0_preop_hi;
4367 	u32 cp_hqd_atomic1_preop_lo;
4368 	u32 cp_hqd_atomic1_preop_hi;
4369 	u32 cp_hqd_hq_scheduler0;
4370 	u32 cp_hqd_hq_scheduler1;
4371 	u32 cp_mqd_control;
4372 };
4373 
4374 struct bonaire_mqd
4375 {
4376 	u32 header;
4377 	u32 dispatch_initiator;
4378 	u32 dimensions[3];
4379 	u32 start_idx[3];
4380 	u32 num_threads[3];
4381 	u32 pipeline_stat_enable;
4382 	u32 perf_counter_enable;
4383 	u32 pgm[2];
4384 	u32 tba[2];
4385 	u32 tma[2];
4386 	u32 pgm_rsrc[2];
4387 	u32 vmid;
4388 	u32 resource_limits;
4389 	u32 static_thread_mgmt01[2];
4390 	u32 tmp_ring_size;
4391 	u32 static_thread_mgmt23[2];
4392 	u32 restart[3];
4393 	u32 thread_trace_enable;
4394 	u32 reserved1;
4395 	u32 user_data[16];
4396 	u32 vgtcs_invoke_count[2];
4397 	struct hqd_registers queue_state;
4398 	u32 dequeue_cntr;
4399 	u32 interrupt_queue[64];
4400 };
4401 
4402 /**
4403  * cik_cp_compute_resume - setup the compute queue registers
4404  *
4405  * @rdev: radeon_device pointer
4406  *
4407  * Program the compute queues and test them to make sure they
4408  * are working.
4409  * Returns 0 for success, error for failure.
4410  */
4411 static int cik_cp_compute_resume(struct radeon_device *rdev)
4412 {
4413 	int r, i, idx;
4414 	u32 tmp;
4415 	bool use_doorbell = true;
4416 	u64 hqd_gpu_addr;
4417 	u64 mqd_gpu_addr;
4418 	u64 eop_gpu_addr;
4419 	u64 wb_gpu_addr;
4420 	u32 *buf;
4421 	struct bonaire_mqd *mqd;
4422 
4423 	r = cik_cp_compute_start(rdev);
4424 	if (r)
4425 		return r;
4426 
4427 	/* fix up chicken bits */
4428 	tmp = RREG32(CP_CPF_DEBUG);
4429 	tmp |= (1 << 23);
4430 	WREG32(CP_CPF_DEBUG, tmp);
4431 
4432 	/* init the pipes */
4433 	mutex_lock(&rdev->srbm_mutex);
4434 	for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
4435 		int me = (i < 4) ? 1 : 2;
4436 		int pipe = (i < 4) ? i : (i - 4);
4437 
4438 		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
4439 
4440 		cik_srbm_select(rdev, me, pipe, 0, 0);
4441 
4442 		/* write the EOP addr */
4443 		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4444 		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4445 
4446 		/* set the VMID assigned */
4447 		WREG32(CP_HPD_EOP_VMID, 0);
4448 
4449 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4450 		tmp = RREG32(CP_HPD_EOP_CONTROL);
4451 		tmp &= ~EOP_SIZE_MASK;
4452 		tmp |= order_base_2(MEC_HPD_SIZE / 8);
4453 		WREG32(CP_HPD_EOP_CONTROL, tmp);
4454 	}
4455 	cik_srbm_select(rdev, 0, 0, 0, 0);
4456 	mutex_unlock(&rdev->srbm_mutex);
4457 
4458 	/* init the queues.  Just two for now. */
4459 	for (i = 0; i < 2; i++) {
4460 		if (i == 0)
4461 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4462 		else
4463 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4464 
4465 		if (rdev->ring[idx].mqd_obj == NULL) {
4466 			r = radeon_bo_create(rdev,
4467 					     sizeof(struct bonaire_mqd),
4468 					     PAGE_SIZE, true,
4469 					     RADEON_GEM_DOMAIN_GTT, NULL,
4470 					     &rdev->ring[idx].mqd_obj);
4471 			if (r) {
4472 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4473 				return r;
4474 			}
4475 		}
4476 
4477 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4478 		if (unlikely(r != 0)) {
4479 			cik_cp_compute_fini(rdev);
4480 			return r;
4481 		}
4482 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4483 				  &mqd_gpu_addr);
4484 		if (r) {
4485 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4486 			cik_cp_compute_fini(rdev);
4487 			return r;
4488 		}
4489 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4490 		if (r) {
4491 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4492 			cik_cp_compute_fini(rdev);
4493 			return r;
4494 		}
4495 
4496 		/* init the mqd struct */
4497 		memset(buf, 0, sizeof(struct bonaire_mqd));
4498 
4499 		mqd = (struct bonaire_mqd *)buf;
4500 		mqd->header = 0xC0310800;
4501 		mqd->static_thread_mgmt01[0] = 0xffffffff;
4502 		mqd->static_thread_mgmt01[1] = 0xffffffff;
4503 		mqd->static_thread_mgmt23[0] = 0xffffffff;
4504 		mqd->static_thread_mgmt23[1] = 0xffffffff;
4505 
4506 		mutex_lock(&rdev->srbm_mutex);
4507 		cik_srbm_select(rdev, rdev->ring[idx].me,
4508 				rdev->ring[idx].pipe,
4509 				rdev->ring[idx].queue, 0);
4510 
4511 		/* disable wptr polling */
4512 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4513 		tmp &= ~WPTR_POLL_EN;
4514 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4515 
4516 		/* enable doorbell? */
4517 		mqd->queue_state.cp_hqd_pq_doorbell_control =
4518 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4519 		if (use_doorbell)
4520 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4521 		else
4522 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4523 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4524 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4525 
4526 		/* disable the queue if it's active */
4527 		mqd->queue_state.cp_hqd_dequeue_request = 0;
4528 		mqd->queue_state.cp_hqd_pq_rptr = 0;
4529 		mqd->queue_state.cp_hqd_pq_wptr= 0;
4530 		if (RREG32(CP_HQD_ACTIVE) & 1) {
4531 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4532 			for (i = 0; i < rdev->usec_timeout; i++) {
4533 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4534 					break;
4535 				udelay(1);
4536 			}
4537 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4538 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4539 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4540 		}
4541 
4542 		/* set the pointer to the MQD */
4543 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4544 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4545 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4546 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4547 		/* set MQD vmid to 0 */
4548 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4549 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4550 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4551 
4552 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4553 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4554 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4555 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4556 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4557 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4558 
4559 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4560 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4561 		mqd->queue_state.cp_hqd_pq_control &=
4562 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4563 
4564 		mqd->queue_state.cp_hqd_pq_control |=
4565 			order_base_2(rdev->ring[idx].ring_size / 8);
4566 		mqd->queue_state.cp_hqd_pq_control |=
4567 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4568 #ifdef __BIG_ENDIAN
4569 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4570 #endif
4571 		mqd->queue_state.cp_hqd_pq_control &=
4572 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4573 		mqd->queue_state.cp_hqd_pq_control |=
4574 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4575 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4576 
4577 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4578 		if (i == 0)
4579 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4580 		else
4581 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4582 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4583 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4584 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4585 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4586 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4587 
4588 		/* set the wb address wether it's enabled or not */
4589 		if (i == 0)
4590 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4591 		else
4592 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4593 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4594 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4595 			upper_32_bits(wb_gpu_addr) & 0xffff;
4596 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4597 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4598 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4599 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4600 
4601 		/* enable the doorbell if requested */
4602 		if (use_doorbell) {
4603 			mqd->queue_state.cp_hqd_pq_doorbell_control =
4604 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4605 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4606 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
4607 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4608 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4609 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
4610 				~(DOORBELL_SOURCE | DOORBELL_HIT);
4611 
4612 		} else {
4613 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4614 		}
4615 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4616 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4617 
4618 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4619 		rdev->ring[idx].wptr = 0;
4620 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4621 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4622 		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4623 
4624 		/* set the vmid for the queue */
4625 		mqd->queue_state.cp_hqd_vmid = 0;
4626 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4627 
4628 		/* activate the queue */
4629 		mqd->queue_state.cp_hqd_active = 1;
4630 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4631 
4632 		cik_srbm_select(rdev, 0, 0, 0, 0);
4633 		mutex_unlock(&rdev->srbm_mutex);
4634 
4635 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4636 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4637 
4638 		rdev->ring[idx].ready = true;
4639 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4640 		if (r)
4641 			rdev->ring[idx].ready = false;
4642 	}
4643 
4644 	return 0;
4645 }
4646 
4647 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4648 {
4649 	cik_cp_gfx_enable(rdev, enable);
4650 	cik_cp_compute_enable(rdev, enable);
4651 }
4652 
4653 static int cik_cp_load_microcode(struct radeon_device *rdev)
4654 {
4655 	int r;
4656 
4657 	r = cik_cp_gfx_load_microcode(rdev);
4658 	if (r)
4659 		return r;
4660 	r = cik_cp_compute_load_microcode(rdev);
4661 	if (r)
4662 		return r;
4663 
4664 	return 0;
4665 }
4666 
4667 static void cik_cp_fini(struct radeon_device *rdev)
4668 {
4669 	cik_cp_gfx_fini(rdev);
4670 	cik_cp_compute_fini(rdev);
4671 }
4672 
4673 static int cik_cp_resume(struct radeon_device *rdev)
4674 {
4675 	int r;
4676 
4677 	cik_enable_gui_idle_interrupt(rdev, false);
4678 
4679 	r = cik_cp_load_microcode(rdev);
4680 	if (r)
4681 		return r;
4682 
4683 	r = cik_cp_gfx_resume(rdev);
4684 	if (r)
4685 		return r;
4686 	r = cik_cp_compute_resume(rdev);
4687 	if (r)
4688 		return r;
4689 
4690 	cik_enable_gui_idle_interrupt(rdev, true);
4691 
4692 	return 0;
4693 }
4694 
4695 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4696 {
4697 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4698 		RREG32(GRBM_STATUS));
4699 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4700 		RREG32(GRBM_STATUS2));
4701 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4702 		RREG32(GRBM_STATUS_SE0));
4703 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4704 		RREG32(GRBM_STATUS_SE1));
4705 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4706 		RREG32(GRBM_STATUS_SE2));
4707 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4708 		RREG32(GRBM_STATUS_SE3));
4709 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4710 		RREG32(SRBM_STATUS));
4711 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4712 		RREG32(SRBM_STATUS2));
4713 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4714 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4715 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4716 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4717 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4718 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4719 		 RREG32(CP_STALLED_STAT1));
4720 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4721 		 RREG32(CP_STALLED_STAT2));
4722 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4723 		 RREG32(CP_STALLED_STAT3));
4724 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4725 		 RREG32(CP_CPF_BUSY_STAT));
4726 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4727 		 RREG32(CP_CPF_STALLED_STAT1));
4728 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4729 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4730 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4731 		 RREG32(CP_CPC_STALLED_STAT1));
4732 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4733 }
4734 
4735 /**
4736  * cik_gpu_check_soft_reset - check which blocks are busy
4737  *
4738  * @rdev: radeon_device pointer
4739  *
4740  * Check which blocks are busy and return the relevant reset
4741  * mask to be used by cik_gpu_soft_reset().
4742  * Returns a mask of the blocks to be reset.
4743  */
4744 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4745 {
4746 	u32 reset_mask = 0;
4747 	u32 tmp;
4748 
4749 	/* GRBM_STATUS */
4750 	tmp = RREG32(GRBM_STATUS);
4751 	if (tmp & (PA_BUSY | SC_BUSY |
4752 		   BCI_BUSY | SX_BUSY |
4753 		   TA_BUSY | VGT_BUSY |
4754 		   DB_BUSY | CB_BUSY |
4755 		   GDS_BUSY | SPI_BUSY |
4756 		   IA_BUSY | IA_BUSY_NO_DMA))
4757 		reset_mask |= RADEON_RESET_GFX;
4758 
4759 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4760 		reset_mask |= RADEON_RESET_CP;
4761 
4762 	/* GRBM_STATUS2 */
4763 	tmp = RREG32(GRBM_STATUS2);
4764 	if (tmp & RLC_BUSY)
4765 		reset_mask |= RADEON_RESET_RLC;
4766 
4767 	/* SDMA0_STATUS_REG */
4768 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4769 	if (!(tmp & SDMA_IDLE))
4770 		reset_mask |= RADEON_RESET_DMA;
4771 
4772 	/* SDMA1_STATUS_REG */
4773 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4774 	if (!(tmp & SDMA_IDLE))
4775 		reset_mask |= RADEON_RESET_DMA1;
4776 
4777 	/* SRBM_STATUS2 */
4778 	tmp = RREG32(SRBM_STATUS2);
4779 	if (tmp & SDMA_BUSY)
4780 		reset_mask |= RADEON_RESET_DMA;
4781 
4782 	if (tmp & SDMA1_BUSY)
4783 		reset_mask |= RADEON_RESET_DMA1;
4784 
4785 	/* SRBM_STATUS */
4786 	tmp = RREG32(SRBM_STATUS);
4787 
4788 	if (tmp & IH_BUSY)
4789 		reset_mask |= RADEON_RESET_IH;
4790 
4791 	if (tmp & SEM_BUSY)
4792 		reset_mask |= RADEON_RESET_SEM;
4793 
4794 	if (tmp & GRBM_RQ_PENDING)
4795 		reset_mask |= RADEON_RESET_GRBM;
4796 
4797 	if (tmp & VMC_BUSY)
4798 		reset_mask |= RADEON_RESET_VMC;
4799 
4800 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4801 		   MCC_BUSY | MCD_BUSY))
4802 		reset_mask |= RADEON_RESET_MC;
4803 
4804 	if (evergreen_is_display_hung(rdev))
4805 		reset_mask |= RADEON_RESET_DISPLAY;
4806 
4807 	/* Skip MC reset as it's mostly likely not hung, just busy */
4808 	if (reset_mask & RADEON_RESET_MC) {
4809 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4810 		reset_mask &= ~RADEON_RESET_MC;
4811 	}
4812 
4813 	return reset_mask;
4814 }
4815 
4816 /**
4817  * cik_gpu_soft_reset - soft reset GPU
4818  *
4819  * @rdev: radeon_device pointer
4820  * @reset_mask: mask of which blocks to reset
4821  *
4822  * Soft reset the blocks specified in @reset_mask.
4823  */
4824 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4825 {
4826 	struct evergreen_mc_save save;
4827 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4828 	u32 tmp;
4829 
4830 	if (reset_mask == 0)
4831 		return;
4832 
4833 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4834 
4835 	cik_print_gpu_status_regs(rdev);
4836 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4837 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4838 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4839 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4840 
4841 	/* disable CG/PG */
4842 	cik_fini_pg(rdev);
4843 	cik_fini_cg(rdev);
4844 
4845 	/* stop the rlc */
4846 	cik_rlc_stop(rdev);
4847 
4848 	/* Disable GFX parsing/prefetching */
4849 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4850 
4851 	/* Disable MEC parsing/prefetching */
4852 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4853 
4854 	if (reset_mask & RADEON_RESET_DMA) {
4855 		/* sdma0 */
4856 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4857 		tmp |= SDMA_HALT;
4858 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4859 	}
4860 	if (reset_mask & RADEON_RESET_DMA1) {
4861 		/* sdma1 */
4862 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4863 		tmp |= SDMA_HALT;
4864 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4865 	}
4866 
4867 	evergreen_mc_stop(rdev, &save);
4868 	if (evergreen_mc_wait_for_idle(rdev)) {
4869 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4870 	}
4871 
4872 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4873 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4874 
4875 	if (reset_mask & RADEON_RESET_CP) {
4876 		grbm_soft_reset |= SOFT_RESET_CP;
4877 
4878 		srbm_soft_reset |= SOFT_RESET_GRBM;
4879 	}
4880 
4881 	if (reset_mask & RADEON_RESET_DMA)
4882 		srbm_soft_reset |= SOFT_RESET_SDMA;
4883 
4884 	if (reset_mask & RADEON_RESET_DMA1)
4885 		srbm_soft_reset |= SOFT_RESET_SDMA1;
4886 
4887 	if (reset_mask & RADEON_RESET_DISPLAY)
4888 		srbm_soft_reset |= SOFT_RESET_DC;
4889 
4890 	if (reset_mask & RADEON_RESET_RLC)
4891 		grbm_soft_reset |= SOFT_RESET_RLC;
4892 
4893 	if (reset_mask & RADEON_RESET_SEM)
4894 		srbm_soft_reset |= SOFT_RESET_SEM;
4895 
4896 	if (reset_mask & RADEON_RESET_IH)
4897 		srbm_soft_reset |= SOFT_RESET_IH;
4898 
4899 	if (reset_mask & RADEON_RESET_GRBM)
4900 		srbm_soft_reset |= SOFT_RESET_GRBM;
4901 
4902 	if (reset_mask & RADEON_RESET_VMC)
4903 		srbm_soft_reset |= SOFT_RESET_VMC;
4904 
4905 	if (!(rdev->flags & RADEON_IS_IGP)) {
4906 		if (reset_mask & RADEON_RESET_MC)
4907 			srbm_soft_reset |= SOFT_RESET_MC;
4908 	}
4909 
4910 	if (grbm_soft_reset) {
4911 		tmp = RREG32(GRBM_SOFT_RESET);
4912 		tmp |= grbm_soft_reset;
4913 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4914 		WREG32(GRBM_SOFT_RESET, tmp);
4915 		tmp = RREG32(GRBM_SOFT_RESET);
4916 
4917 		udelay(50);
4918 
4919 		tmp &= ~grbm_soft_reset;
4920 		WREG32(GRBM_SOFT_RESET, tmp);
4921 		tmp = RREG32(GRBM_SOFT_RESET);
4922 	}
4923 
4924 	if (srbm_soft_reset) {
4925 		tmp = RREG32(SRBM_SOFT_RESET);
4926 		tmp |= srbm_soft_reset;
4927 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4928 		WREG32(SRBM_SOFT_RESET, tmp);
4929 		tmp = RREG32(SRBM_SOFT_RESET);
4930 
4931 		udelay(50);
4932 
4933 		tmp &= ~srbm_soft_reset;
4934 		WREG32(SRBM_SOFT_RESET, tmp);
4935 		tmp = RREG32(SRBM_SOFT_RESET);
4936 	}
4937 
4938 	/* Wait a little for things to settle down */
4939 	udelay(50);
4940 
4941 	evergreen_mc_resume(rdev, &save);
4942 	udelay(50);
4943 
4944 	cik_print_gpu_status_regs(rdev);
4945 }
4946 
4947 struct kv_reset_save_regs {
4948 	u32 gmcon_reng_execute;
4949 	u32 gmcon_misc;
4950 	u32 gmcon_misc3;
4951 };
4952 
4953 static void kv_save_regs_for_reset(struct radeon_device *rdev,
4954 				   struct kv_reset_save_regs *save)
4955 {
4956 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
4957 	save->gmcon_misc = RREG32(GMCON_MISC);
4958 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
4959 
4960 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
4961 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
4962 						STCTRL_STUTTER_EN));
4963 }
4964 
4965 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
4966 				      struct kv_reset_save_regs *save)
4967 {
4968 	int i;
4969 
4970 	WREG32(GMCON_PGFSM_WRITE, 0);
4971 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
4972 
4973 	for (i = 0; i < 5; i++)
4974 		WREG32(GMCON_PGFSM_WRITE, 0);
4975 
4976 	WREG32(GMCON_PGFSM_WRITE, 0);
4977 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
4978 
4979 	for (i = 0; i < 5; i++)
4980 		WREG32(GMCON_PGFSM_WRITE, 0);
4981 
4982 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
4983 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
4984 
4985 	for (i = 0; i < 5; i++)
4986 		WREG32(GMCON_PGFSM_WRITE, 0);
4987 
4988 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
4989 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
4990 
4991 	for (i = 0; i < 5; i++)
4992 		WREG32(GMCON_PGFSM_WRITE, 0);
4993 
4994 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
4995 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
4996 
4997 	for (i = 0; i < 5; i++)
4998 		WREG32(GMCON_PGFSM_WRITE, 0);
4999 
5000 	WREG32(GMCON_PGFSM_WRITE, 0);
5001 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5002 
5003 	for (i = 0; i < 5; i++)
5004 		WREG32(GMCON_PGFSM_WRITE, 0);
5005 
5006 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5007 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5008 
5009 	for (i = 0; i < 5; i++)
5010 		WREG32(GMCON_PGFSM_WRITE, 0);
5011 
5012 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5013 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5014 
5015 	for (i = 0; i < 5; i++)
5016 		WREG32(GMCON_PGFSM_WRITE, 0);
5017 
5018 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5019 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5020 
5021 	for (i = 0; i < 5; i++)
5022 		WREG32(GMCON_PGFSM_WRITE, 0);
5023 
5024 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5025 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5026 
5027 	for (i = 0; i < 5; i++)
5028 		WREG32(GMCON_PGFSM_WRITE, 0);
5029 
5030 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5031 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5032 
5033 	WREG32(GMCON_MISC3, save->gmcon_misc3);
5034 	WREG32(GMCON_MISC, save->gmcon_misc);
5035 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5036 }
5037 
5038 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5039 {
5040 	struct evergreen_mc_save save;
5041 	struct kv_reset_save_regs kv_save = { 0 };
5042 	u32 tmp, i;
5043 
5044 	dev_info(rdev->dev, "GPU pci config reset\n");
5045 
5046 	/* disable dpm? */
5047 
5048 	/* disable cg/pg */
5049 	cik_fini_pg(rdev);
5050 	cik_fini_cg(rdev);
5051 
5052 	/* Disable GFX parsing/prefetching */
5053 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5054 
5055 	/* Disable MEC parsing/prefetching */
5056 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5057 
5058 	/* sdma0 */
5059 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5060 	tmp |= SDMA_HALT;
5061 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5062 	/* sdma1 */
5063 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5064 	tmp |= SDMA_HALT;
5065 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5066 	/* XXX other engines? */
5067 
5068 	/* halt the rlc, disable cp internal ints */
5069 	cik_rlc_stop(rdev);
5070 
5071 	udelay(50);
5072 
5073 	/* disable mem access */
5074 	evergreen_mc_stop(rdev, &save);
5075 	if (evergreen_mc_wait_for_idle(rdev)) {
5076 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5077 	}
5078 
5079 	if (rdev->flags & RADEON_IS_IGP)
5080 		kv_save_regs_for_reset(rdev, &kv_save);
5081 
5082 	/* disable BM */
5083 	pci_clear_master(rdev->pdev);
5084 	/* reset */
5085 	radeon_pci_config_reset(rdev);
5086 
5087 	udelay(100);
5088 
5089 	/* wait for asic to come out of reset */
5090 	for (i = 0; i < rdev->usec_timeout; i++) {
5091 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5092 			break;
5093 		udelay(1);
5094 	}
5095 
5096 	/* does asic init need to be run first??? */
5097 	if (rdev->flags & RADEON_IS_IGP)
5098 		kv_restore_regs_for_reset(rdev, &kv_save);
5099 }
5100 
5101 /**
5102  * cik_asic_reset - soft reset GPU
5103  *
5104  * @rdev: radeon_device pointer
5105  *
5106  * Look up which blocks are hung and attempt
5107  * to reset them.
5108  * Returns 0 for success.
5109  */
5110 int cik_asic_reset(struct radeon_device *rdev)
5111 {
5112 	u32 reset_mask;
5113 
5114 	reset_mask = cik_gpu_check_soft_reset(rdev);
5115 
5116 	if (reset_mask)
5117 		r600_set_bios_scratch_engine_hung(rdev, true);
5118 
5119 	/* try soft reset */
5120 	cik_gpu_soft_reset(rdev, reset_mask);
5121 
5122 	reset_mask = cik_gpu_check_soft_reset(rdev);
5123 
5124 	/* try pci config reset */
5125 	if (reset_mask && radeon_hard_reset)
5126 		cik_gpu_pci_config_reset(rdev);
5127 
5128 	reset_mask = cik_gpu_check_soft_reset(rdev);
5129 
5130 	if (!reset_mask)
5131 		r600_set_bios_scratch_engine_hung(rdev, false);
5132 
5133 	return 0;
5134 }
5135 
5136 /**
5137  * cik_gfx_is_lockup - check if the 3D engine is locked up
5138  *
5139  * @rdev: radeon_device pointer
5140  * @ring: radeon_ring structure holding ring information
5141  *
5142  * Check if the 3D engine is locked up (CIK).
5143  * Returns true if the engine is locked, false if not.
5144  */
5145 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5146 {
5147 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5148 
5149 	if (!(reset_mask & (RADEON_RESET_GFX |
5150 			    RADEON_RESET_COMPUTE |
5151 			    RADEON_RESET_CP))) {
5152 		radeon_ring_lockup_update(rdev, ring);
5153 		return false;
5154 	}
5155 	return radeon_ring_test_lockup(rdev, ring);
5156 }
5157 
5158 /* MC */
5159 /**
5160  * cik_mc_program - program the GPU memory controller
5161  *
5162  * @rdev: radeon_device pointer
5163  *
5164  * Set the location of vram, gart, and AGP in the GPU's
5165  * physical address space (CIK).
5166  */
5167 static void cik_mc_program(struct radeon_device *rdev)
5168 {
5169 	struct evergreen_mc_save save;
5170 	u32 tmp;
5171 	int i, j;
5172 
5173 	/* Initialize HDP */
5174 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5175 		WREG32((0x2c14 + j), 0x00000000);
5176 		WREG32((0x2c18 + j), 0x00000000);
5177 		WREG32((0x2c1c + j), 0x00000000);
5178 		WREG32((0x2c20 + j), 0x00000000);
5179 		WREG32((0x2c24 + j), 0x00000000);
5180 	}
5181 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5182 
5183 	evergreen_mc_stop(rdev, &save);
5184 	if (radeon_mc_wait_for_idle(rdev)) {
5185 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5186 	}
5187 	/* Lockout access through VGA aperture*/
5188 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5189 	/* Update configuration */
5190 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5191 	       rdev->mc.vram_start >> 12);
5192 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5193 	       rdev->mc.vram_end >> 12);
5194 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5195 	       rdev->vram_scratch.gpu_addr >> 12);
5196 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5197 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5198 	WREG32(MC_VM_FB_LOCATION, tmp);
5199 	/* XXX double check these! */
5200 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5201 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5202 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5203 	WREG32(MC_VM_AGP_BASE, 0);
5204 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5205 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5206 	if (radeon_mc_wait_for_idle(rdev)) {
5207 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5208 	}
5209 	evergreen_mc_resume(rdev, &save);
5210 	/* we need to own VRAM, so turn off the VGA renderer here
5211 	 * to stop it overwriting our objects */
5212 	rv515_vga_render_disable(rdev);
5213 }
5214 
5215 /**
5216  * cik_mc_init - initialize the memory controller driver params
5217  *
5218  * @rdev: radeon_device pointer
5219  *
5220  * Look up the amount of vram, vram width, and decide how to place
5221  * vram and gart within the GPU's physical address space (CIK).
5222  * Returns 0 for success.
5223  */
5224 static int cik_mc_init(struct radeon_device *rdev)
5225 {
5226 	u32 tmp;
5227 	int chansize, numchan;
5228 
5229 	/* Get VRAM informations */
5230 	rdev->mc.vram_is_ddr = true;
5231 	tmp = RREG32(MC_ARB_RAMCFG);
5232 	if (tmp & CHANSIZE_MASK) {
5233 		chansize = 64;
5234 	} else {
5235 		chansize = 32;
5236 	}
5237 	tmp = RREG32(MC_SHARED_CHMAP);
5238 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5239 	case 0:
5240 	default:
5241 		numchan = 1;
5242 		break;
5243 	case 1:
5244 		numchan = 2;
5245 		break;
5246 	case 2:
5247 		numchan = 4;
5248 		break;
5249 	case 3:
5250 		numchan = 8;
5251 		break;
5252 	case 4:
5253 		numchan = 3;
5254 		break;
5255 	case 5:
5256 		numchan = 6;
5257 		break;
5258 	case 6:
5259 		numchan = 10;
5260 		break;
5261 	case 7:
5262 		numchan = 12;
5263 		break;
5264 	case 8:
5265 		numchan = 16;
5266 		break;
5267 	}
5268 	rdev->mc.vram_width = numchan * chansize;
5269 	/* Could aper size report 0 ? */
5270 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5271 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5272 	/* size in MB on si */
5273 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5274 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5275 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5276 	si_vram_gtt_location(rdev, &rdev->mc);
5277 	radeon_update_bandwidth_info(rdev);
5278 
5279 	return 0;
5280 }
5281 
5282 /*
5283  * GART
5284  * VMID 0 is the physical GPU addresses as used by the kernel.
5285  * VMIDs 1-15 are used for userspace clients and are handled
5286  * by the radeon vm/hsa code.
5287  */
5288 /**
5289  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5290  *
5291  * @rdev: radeon_device pointer
5292  *
5293  * Flush the TLB for the VMID 0 page table (CIK).
5294  */
5295 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5296 {
5297 	/* flush hdp cache */
5298 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5299 
5300 	/* bits 0-15 are the VM contexts0-15 */
5301 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5302 }
5303 
5304 /**
5305  * cik_pcie_gart_enable - gart enable
5306  *
5307  * @rdev: radeon_device pointer
5308  *
5309  * This sets up the TLBs, programs the page tables for VMID0,
5310  * sets up the hw for VMIDs 1-15 which are allocated on
5311  * demand, and sets up the global locations for the LDS, GDS,
5312  * and GPUVM for FSA64 clients (CIK).
5313  * Returns 0 for success, errors for failure.
5314  */
5315 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5316 {
5317 	int r, i;
5318 
5319 	if (rdev->gart.robj == NULL) {
5320 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5321 		return -EINVAL;
5322 	}
5323 	r = radeon_gart_table_vram_pin(rdev);
5324 	if (r)
5325 		return r;
5326 	radeon_gart_restore(rdev);
5327 	/* Setup TLB control */
5328 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5329 	       (0xA << 7) |
5330 	       ENABLE_L1_TLB |
5331 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5332 	       ENABLE_ADVANCED_DRIVER_MODEL |
5333 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5334 	/* Setup L2 cache */
5335 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5336 	       ENABLE_L2_FRAGMENT_PROCESSING |
5337 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5338 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5339 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5340 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5341 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5342 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5343 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5344 	/* setup context0 */
5345 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5346 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5347 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5348 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5349 			(u32)(rdev->dummy_page.addr >> 12));
5350 	WREG32(VM_CONTEXT0_CNTL2, 0);
5351 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5352 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5353 
5354 	WREG32(0x15D4, 0);
5355 	WREG32(0x15D8, 0);
5356 	WREG32(0x15DC, 0);
5357 
5358 	/* empty context1-15 */
5359 	/* FIXME start with 4G, once using 2 level pt switch to full
5360 	 * vm size space
5361 	 */
5362 	/* set vm size, must be a multiple of 4 */
5363 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5364 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5365 	for (i = 1; i < 16; i++) {
5366 		if (i < 8)
5367 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5368 			       rdev->gart.table_addr >> 12);
5369 		else
5370 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5371 			       rdev->gart.table_addr >> 12);
5372 	}
5373 
5374 	/* enable context1-15 */
5375 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5376 	       (u32)(rdev->dummy_page.addr >> 12));
5377 	WREG32(VM_CONTEXT1_CNTL2, 4);
5378 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5379 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5380 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5381 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5382 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5383 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5384 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5385 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5386 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5387 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5388 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5389 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5390 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5391 
5392 	if (rdev->family == CHIP_KAVERI) {
5393 		u32 tmp = RREG32(CHUB_CONTROL);
5394 		tmp &= ~BYPASS_VM;
5395 		WREG32(CHUB_CONTROL, tmp);
5396 	}
5397 
5398 	/* XXX SH_MEM regs */
5399 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5400 	mutex_lock(&rdev->srbm_mutex);
5401 	for (i = 0; i < 16; i++) {
5402 		cik_srbm_select(rdev, 0, 0, 0, i);
5403 		/* CP and shaders */
5404 		WREG32(SH_MEM_CONFIG, 0);
5405 		WREG32(SH_MEM_APE1_BASE, 1);
5406 		WREG32(SH_MEM_APE1_LIMIT, 0);
5407 		WREG32(SH_MEM_BASES, 0);
5408 		/* SDMA GFX */
5409 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5410 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5411 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5412 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5413 		/* XXX SDMA RLC - todo */
5414 	}
5415 	cik_srbm_select(rdev, 0, 0, 0, 0);
5416 	mutex_unlock(&rdev->srbm_mutex);
5417 
5418 	cik_pcie_gart_tlb_flush(rdev);
5419 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5420 		 (unsigned)(rdev->mc.gtt_size >> 20),
5421 		 (unsigned long long)rdev->gart.table_addr);
5422 	rdev->gart.ready = true;
5423 	return 0;
5424 }
5425 
5426 /**
5427  * cik_pcie_gart_disable - gart disable
5428  *
5429  * @rdev: radeon_device pointer
5430  *
5431  * This disables all VM page table (CIK).
5432  */
5433 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5434 {
5435 	/* Disable all tables */
5436 	WREG32(VM_CONTEXT0_CNTL, 0);
5437 	WREG32(VM_CONTEXT1_CNTL, 0);
5438 	/* Setup TLB control */
5439 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5440 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5441 	/* Setup L2 cache */
5442 	WREG32(VM_L2_CNTL,
5443 	       ENABLE_L2_FRAGMENT_PROCESSING |
5444 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5445 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5446 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5447 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5448 	WREG32(VM_L2_CNTL2, 0);
5449 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5450 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5451 	radeon_gart_table_vram_unpin(rdev);
5452 }
5453 
5454 /**
5455  * cik_pcie_gart_fini - vm fini callback
5456  *
5457  * @rdev: radeon_device pointer
5458  *
5459  * Tears down the driver GART/VM setup (CIK).
5460  */
5461 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5462 {
5463 	cik_pcie_gart_disable(rdev);
5464 	radeon_gart_table_vram_free(rdev);
5465 	radeon_gart_fini(rdev);
5466 }
5467 
5468 /* vm parser */
5469 /**
5470  * cik_ib_parse - vm ib_parse callback
5471  *
5472  * @rdev: radeon_device pointer
5473  * @ib: indirect buffer pointer
5474  *
5475  * CIK uses hw IB checking so this is a nop (CIK).
5476  */
5477 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5478 {
5479 	return 0;
5480 }
5481 
5482 /*
5483  * vm
5484  * VMID 0 is the physical GPU addresses as used by the kernel.
5485  * VMIDs 1-15 are used for userspace clients and are handled
5486  * by the radeon vm/hsa code.
5487  */
5488 /**
5489  * cik_vm_init - cik vm init callback
5490  *
5491  * @rdev: radeon_device pointer
5492  *
5493  * Inits cik specific vm parameters (number of VMs, base of vram for
5494  * VMIDs 1-15) (CIK).
5495  * Returns 0 for success.
5496  */
5497 int cik_vm_init(struct radeon_device *rdev)
5498 {
5499 	/* number of VMs */
5500 	rdev->vm_manager.nvm = 16;
5501 	/* base offset of vram pages */
5502 	if (rdev->flags & RADEON_IS_IGP) {
5503 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5504 		tmp <<= 22;
5505 		rdev->vm_manager.vram_base_offset = tmp;
5506 	} else
5507 		rdev->vm_manager.vram_base_offset = 0;
5508 
5509 	return 0;
5510 }
5511 
5512 /**
5513  * cik_vm_fini - cik vm fini callback
5514  *
5515  * @rdev: radeon_device pointer
5516  *
5517  * Tear down any asic specific VM setup (CIK).
5518  */
5519 void cik_vm_fini(struct radeon_device *rdev)
5520 {
5521 }
5522 
5523 /**
5524  * cik_vm_decode_fault - print human readable fault info
5525  *
5526  * @rdev: radeon_device pointer
5527  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5528  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5529  *
5530  * Print human readable fault information (CIK).
5531  */
5532 static void cik_vm_decode_fault(struct radeon_device *rdev,
5533 				u32 status, u32 addr, u32 mc_client)
5534 {
5535 	u32 mc_id;
5536 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5537 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5538 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5539 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5540 
5541 	if (rdev->family == CHIP_HAWAII)
5542 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5543 	else
5544 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5545 
5546 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5547 	       protections, vmid, addr,
5548 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5549 	       block, mc_client, mc_id);
5550 }
5551 
5552 /**
5553  * cik_vm_flush - cik vm flush using the CP
5554  *
5555  * @rdev: radeon_device pointer
5556  *
5557  * Update the page table base and flush the VM TLB
5558  * using the CP (CIK).
5559  */
5560 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5561 {
5562 	struct radeon_ring *ring = &rdev->ring[ridx];
5563 
5564 	if (vm == NULL)
5565 		return;
5566 
5567 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5568 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5569 				 WRITE_DATA_DST_SEL(0)));
5570 	if (vm->id < 8) {
5571 		radeon_ring_write(ring,
5572 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5573 	} else {
5574 		radeon_ring_write(ring,
5575 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5576 	}
5577 	radeon_ring_write(ring, 0);
5578 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5579 
5580 	/* update SH_MEM_* regs */
5581 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5582 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5583 				 WRITE_DATA_DST_SEL(0)));
5584 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5585 	radeon_ring_write(ring, 0);
5586 	radeon_ring_write(ring, VMID(vm->id));
5587 
5588 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5589 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5590 				 WRITE_DATA_DST_SEL(0)));
5591 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
5592 	radeon_ring_write(ring, 0);
5593 
5594 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5595 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5596 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5597 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5598 
5599 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5600 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5601 				 WRITE_DATA_DST_SEL(0)));
5602 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5603 	radeon_ring_write(ring, 0);
5604 	radeon_ring_write(ring, VMID(0));
5605 
5606 	/* HDP flush */
5607 	cik_hdp_flush_cp_ring_emit(rdev, ridx);
5608 
5609 	/* bits 0-15 are the VM contexts0-15 */
5610 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5611 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5612 				 WRITE_DATA_DST_SEL(0)));
5613 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5614 	radeon_ring_write(ring, 0);
5615 	radeon_ring_write(ring, 1 << vm->id);
5616 
5617 	/* compute doesn't have PFP */
5618 	if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
5619 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5620 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5621 		radeon_ring_write(ring, 0x0);
5622 	}
5623 }
5624 
5625 /*
5626  * RLC
5627  * The RLC is a multi-purpose microengine that handles a
5628  * variety of functions, the most important of which is
5629  * the interrupt controller.
5630  */
5631 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5632 					  bool enable)
5633 {
5634 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5635 
5636 	if (enable)
5637 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5638 	else
5639 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5640 	WREG32(CP_INT_CNTL_RING0, tmp);
5641 }
5642 
5643 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5644 {
5645 	u32 tmp;
5646 
5647 	tmp = RREG32(RLC_LB_CNTL);
5648 	if (enable)
5649 		tmp |= LOAD_BALANCE_ENABLE;
5650 	else
5651 		tmp &= ~LOAD_BALANCE_ENABLE;
5652 	WREG32(RLC_LB_CNTL, tmp);
5653 }
5654 
5655 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5656 {
5657 	u32 i, j, k;
5658 	u32 mask;
5659 
5660 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5661 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5662 			cik_select_se_sh(rdev, i, j);
5663 			for (k = 0; k < rdev->usec_timeout; k++) {
5664 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5665 					break;
5666 				udelay(1);
5667 			}
5668 		}
5669 	}
5670 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5671 
5672 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5673 	for (k = 0; k < rdev->usec_timeout; k++) {
5674 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5675 			break;
5676 		udelay(1);
5677 	}
5678 }
5679 
5680 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5681 {
5682 	u32 tmp;
5683 
5684 	tmp = RREG32(RLC_CNTL);
5685 	if (tmp != rlc)
5686 		WREG32(RLC_CNTL, rlc);
5687 }
5688 
5689 static u32 cik_halt_rlc(struct radeon_device *rdev)
5690 {
5691 	u32 data, orig;
5692 
5693 	orig = data = RREG32(RLC_CNTL);
5694 
5695 	if (data & RLC_ENABLE) {
5696 		u32 i;
5697 
5698 		data &= ~RLC_ENABLE;
5699 		WREG32(RLC_CNTL, data);
5700 
5701 		for (i = 0; i < rdev->usec_timeout; i++) {
5702 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5703 				break;
5704 			udelay(1);
5705 		}
5706 
5707 		cik_wait_for_rlc_serdes(rdev);
5708 	}
5709 
5710 	return orig;
5711 }
5712 
5713 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5714 {
5715 	u32 tmp, i, mask;
5716 
5717 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5718 	WREG32(RLC_GPR_REG2, tmp);
5719 
5720 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5721 	for (i = 0; i < rdev->usec_timeout; i++) {
5722 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5723 			break;
5724 		udelay(1);
5725 	}
5726 
5727 	for (i = 0; i < rdev->usec_timeout; i++) {
5728 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5729 			break;
5730 		udelay(1);
5731 	}
5732 }
5733 
5734 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5735 {
5736 	u32 tmp;
5737 
5738 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5739 	WREG32(RLC_GPR_REG2, tmp);
5740 }
5741 
5742 /**
5743  * cik_rlc_stop - stop the RLC ME
5744  *
5745  * @rdev: radeon_device pointer
5746  *
5747  * Halt the RLC ME (MicroEngine) (CIK).
5748  */
5749 static void cik_rlc_stop(struct radeon_device *rdev)
5750 {
5751 	WREG32(RLC_CNTL, 0);
5752 
5753 	cik_enable_gui_idle_interrupt(rdev, false);
5754 
5755 	cik_wait_for_rlc_serdes(rdev);
5756 }
5757 
5758 /**
5759  * cik_rlc_start - start the RLC ME
5760  *
5761  * @rdev: radeon_device pointer
5762  *
5763  * Unhalt the RLC ME (MicroEngine) (CIK).
5764  */
5765 static void cik_rlc_start(struct radeon_device *rdev)
5766 {
5767 	WREG32(RLC_CNTL, RLC_ENABLE);
5768 
5769 	cik_enable_gui_idle_interrupt(rdev, true);
5770 
5771 	udelay(50);
5772 }
5773 
5774 /**
5775  * cik_rlc_resume - setup the RLC hw
5776  *
5777  * @rdev: radeon_device pointer
5778  *
5779  * Initialize the RLC registers, load the ucode,
5780  * and start the RLC (CIK).
5781  * Returns 0 for success, -EINVAL if the ucode is not available.
5782  */
5783 static int cik_rlc_resume(struct radeon_device *rdev)
5784 {
5785 	u32 i, size, tmp;
5786 	const __be32 *fw_data;
5787 
5788 	if (!rdev->rlc_fw)
5789 		return -EINVAL;
5790 
5791 	switch (rdev->family) {
5792 	case CHIP_BONAIRE:
5793 	case CHIP_HAWAII:
5794 	default:
5795 		size = BONAIRE_RLC_UCODE_SIZE;
5796 		break;
5797 	case CHIP_KAVERI:
5798 		size = KV_RLC_UCODE_SIZE;
5799 		break;
5800 	case CHIP_KABINI:
5801 		size = KB_RLC_UCODE_SIZE;
5802 		break;
5803 	}
5804 
5805 	cik_rlc_stop(rdev);
5806 
5807 	/* disable CG */
5808 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5809 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5810 
5811 	si_rlc_reset(rdev);
5812 
5813 	cik_init_pg(rdev);
5814 
5815 	cik_init_cg(rdev);
5816 
5817 	WREG32(RLC_LB_CNTR_INIT, 0);
5818 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5819 
5820 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5821 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5822 	WREG32(RLC_LB_PARAMS, 0x00600408);
5823 	WREG32(RLC_LB_CNTL, 0x80000004);
5824 
5825 	WREG32(RLC_MC_CNTL, 0);
5826 	WREG32(RLC_UCODE_CNTL, 0);
5827 
5828 	fw_data = (const __be32 *)rdev->rlc_fw->data;
5829 		WREG32(RLC_GPM_UCODE_ADDR, 0);
5830 	for (i = 0; i < size; i++)
5831 		WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5832 	WREG32(RLC_GPM_UCODE_ADDR, 0);
5833 
5834 	/* XXX - find out what chips support lbpw */
5835 	cik_enable_lbpw(rdev, false);
5836 
5837 	if (rdev->family == CHIP_BONAIRE)
5838 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
5839 
5840 	cik_rlc_start(rdev);
5841 
5842 	return 0;
5843 }
5844 
5845 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5846 {
5847 	u32 data, orig, tmp, tmp2;
5848 
5849 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5850 
5851 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5852 		cik_enable_gui_idle_interrupt(rdev, true);
5853 
5854 		tmp = cik_halt_rlc(rdev);
5855 
5856 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5857 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5858 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5859 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5860 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
5861 
5862 		cik_update_rlc(rdev, tmp);
5863 
5864 		data |= CGCG_EN | CGLS_EN;
5865 	} else {
5866 		cik_enable_gui_idle_interrupt(rdev, false);
5867 
5868 		RREG32(CB_CGTT_SCLK_CTRL);
5869 		RREG32(CB_CGTT_SCLK_CTRL);
5870 		RREG32(CB_CGTT_SCLK_CTRL);
5871 		RREG32(CB_CGTT_SCLK_CTRL);
5872 
5873 		data &= ~(CGCG_EN | CGLS_EN);
5874 	}
5875 
5876 	if (orig != data)
5877 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5878 
5879 }
5880 
5881 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5882 {
5883 	u32 data, orig, tmp = 0;
5884 
5885 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5886 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5887 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5888 				orig = data = RREG32(CP_MEM_SLP_CNTL);
5889 				data |= CP_MEM_LS_EN;
5890 				if (orig != data)
5891 					WREG32(CP_MEM_SLP_CNTL, data);
5892 			}
5893 		}
5894 
5895 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5896 		data &= 0xfffffffd;
5897 		if (orig != data)
5898 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5899 
5900 		tmp = cik_halt_rlc(rdev);
5901 
5902 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5903 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5904 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5905 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5906 		WREG32(RLC_SERDES_WR_CTRL, data);
5907 
5908 		cik_update_rlc(rdev, tmp);
5909 
5910 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
5911 			orig = data = RREG32(CGTS_SM_CTRL_REG);
5912 			data &= ~SM_MODE_MASK;
5913 			data |= SM_MODE(0x2);
5914 			data |= SM_MODE_ENABLE;
5915 			data &= ~CGTS_OVERRIDE;
5916 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
5917 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
5918 				data &= ~CGTS_LS_OVERRIDE;
5919 			data &= ~ON_MONITOR_ADD_MASK;
5920 			data |= ON_MONITOR_ADD_EN;
5921 			data |= ON_MONITOR_ADD(0x96);
5922 			if (orig != data)
5923 				WREG32(CGTS_SM_CTRL_REG, data);
5924 		}
5925 	} else {
5926 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5927 		data |= 0x00000002;
5928 		if (orig != data)
5929 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5930 
5931 		data = RREG32(RLC_MEM_SLP_CNTL);
5932 		if (data & RLC_MEM_LS_EN) {
5933 			data &= ~RLC_MEM_LS_EN;
5934 			WREG32(RLC_MEM_SLP_CNTL, data);
5935 		}
5936 
5937 		data = RREG32(CP_MEM_SLP_CNTL);
5938 		if (data & CP_MEM_LS_EN) {
5939 			data &= ~CP_MEM_LS_EN;
5940 			WREG32(CP_MEM_SLP_CNTL, data);
5941 		}
5942 
5943 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5944 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5945 		if (orig != data)
5946 			WREG32(CGTS_SM_CTRL_REG, data);
5947 
5948 		tmp = cik_halt_rlc(rdev);
5949 
5950 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5951 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5952 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5953 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5954 		WREG32(RLC_SERDES_WR_CTRL, data);
5955 
5956 		cik_update_rlc(rdev, tmp);
5957 	}
5958 }
5959 
5960 static const u32 mc_cg_registers[] =
5961 {
5962 	MC_HUB_MISC_HUB_CG,
5963 	MC_HUB_MISC_SIP_CG,
5964 	MC_HUB_MISC_VM_CG,
5965 	MC_XPB_CLK_GAT,
5966 	ATC_MISC_CG,
5967 	MC_CITF_MISC_WR_CG,
5968 	MC_CITF_MISC_RD_CG,
5969 	MC_CITF_MISC_VM_CG,
5970 	VM_L2_CG,
5971 };
5972 
5973 static void cik_enable_mc_ls(struct radeon_device *rdev,
5974 			     bool enable)
5975 {
5976 	int i;
5977 	u32 orig, data;
5978 
5979 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5980 		orig = data = RREG32(mc_cg_registers[i]);
5981 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5982 			data |= MC_LS_ENABLE;
5983 		else
5984 			data &= ~MC_LS_ENABLE;
5985 		if (data != orig)
5986 			WREG32(mc_cg_registers[i], data);
5987 	}
5988 }
5989 
5990 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5991 			       bool enable)
5992 {
5993 	int i;
5994 	u32 orig, data;
5995 
5996 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5997 		orig = data = RREG32(mc_cg_registers[i]);
5998 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5999 			data |= MC_CG_ENABLE;
6000 		else
6001 			data &= ~MC_CG_ENABLE;
6002 		if (data != orig)
6003 			WREG32(mc_cg_registers[i], data);
6004 	}
6005 }
6006 
6007 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6008 				 bool enable)
6009 {
6010 	u32 orig, data;
6011 
6012 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6013 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6014 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6015 	} else {
6016 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6017 		data |= 0xff000000;
6018 		if (data != orig)
6019 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6020 
6021 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6022 		data |= 0xff000000;
6023 		if (data != orig)
6024 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6025 	}
6026 }
6027 
6028 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6029 				 bool enable)
6030 {
6031 	u32 orig, data;
6032 
6033 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6034 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6035 		data |= 0x100;
6036 		if (orig != data)
6037 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6038 
6039 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6040 		data |= 0x100;
6041 		if (orig != data)
6042 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6043 	} else {
6044 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6045 		data &= ~0x100;
6046 		if (orig != data)
6047 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6048 
6049 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6050 		data &= ~0x100;
6051 		if (orig != data)
6052 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6053 	}
6054 }
6055 
6056 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6057 				bool enable)
6058 {
6059 	u32 orig, data;
6060 
6061 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6062 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6063 		data = 0xfff;
6064 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6065 
6066 		orig = data = RREG32(UVD_CGC_CTRL);
6067 		data |= DCM;
6068 		if (orig != data)
6069 			WREG32(UVD_CGC_CTRL, data);
6070 	} else {
6071 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6072 		data &= ~0xfff;
6073 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6074 
6075 		orig = data = RREG32(UVD_CGC_CTRL);
6076 		data &= ~DCM;
6077 		if (orig != data)
6078 			WREG32(UVD_CGC_CTRL, data);
6079 	}
6080 }
6081 
6082 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6083 			       bool enable)
6084 {
6085 	u32 orig, data;
6086 
6087 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6088 
6089 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6090 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6091 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6092 	else
6093 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6094 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6095 
6096 	if (orig != data)
6097 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6098 }
6099 
6100 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6101 				bool enable)
6102 {
6103 	u32 orig, data;
6104 
6105 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6106 
6107 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6108 		data &= ~CLOCK_GATING_DIS;
6109 	else
6110 		data |= CLOCK_GATING_DIS;
6111 
6112 	if (orig != data)
6113 		WREG32(HDP_HOST_PATH_CNTL, data);
6114 }
6115 
6116 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6117 			      bool enable)
6118 {
6119 	u32 orig, data;
6120 
6121 	orig = data = RREG32(HDP_MEM_POWER_LS);
6122 
6123 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6124 		data |= HDP_LS_ENABLE;
6125 	else
6126 		data &= ~HDP_LS_ENABLE;
6127 
6128 	if (orig != data)
6129 		WREG32(HDP_MEM_POWER_LS, data);
6130 }
6131 
6132 void cik_update_cg(struct radeon_device *rdev,
6133 		   u32 block, bool enable)
6134 {
6135 
6136 	if (block & RADEON_CG_BLOCK_GFX) {
6137 		cik_enable_gui_idle_interrupt(rdev, false);
6138 		/* order matters! */
6139 		if (enable) {
6140 			cik_enable_mgcg(rdev, true);
6141 			cik_enable_cgcg(rdev, true);
6142 		} else {
6143 			cik_enable_cgcg(rdev, false);
6144 			cik_enable_mgcg(rdev, false);
6145 		}
6146 		cik_enable_gui_idle_interrupt(rdev, true);
6147 	}
6148 
6149 	if (block & RADEON_CG_BLOCK_MC) {
6150 		if (!(rdev->flags & RADEON_IS_IGP)) {
6151 			cik_enable_mc_mgcg(rdev, enable);
6152 			cik_enable_mc_ls(rdev, enable);
6153 		}
6154 	}
6155 
6156 	if (block & RADEON_CG_BLOCK_SDMA) {
6157 		cik_enable_sdma_mgcg(rdev, enable);
6158 		cik_enable_sdma_mgls(rdev, enable);
6159 	}
6160 
6161 	if (block & RADEON_CG_BLOCK_BIF) {
6162 		cik_enable_bif_mgls(rdev, enable);
6163 	}
6164 
6165 	if (block & RADEON_CG_BLOCK_UVD) {
6166 		if (rdev->has_uvd)
6167 			cik_enable_uvd_mgcg(rdev, enable);
6168 	}
6169 
6170 	if (block & RADEON_CG_BLOCK_HDP) {
6171 		cik_enable_hdp_mgcg(rdev, enable);
6172 		cik_enable_hdp_ls(rdev, enable);
6173 	}
6174 
6175 	if (block & RADEON_CG_BLOCK_VCE) {
6176 		vce_v2_0_enable_mgcg(rdev, enable);
6177 	}
6178 }
6179 
6180 static void cik_init_cg(struct radeon_device *rdev)
6181 {
6182 
6183 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6184 
6185 	if (rdev->has_uvd)
6186 		si_init_uvd_internal_cg(rdev);
6187 
6188 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6189 			     RADEON_CG_BLOCK_SDMA |
6190 			     RADEON_CG_BLOCK_BIF |
6191 			     RADEON_CG_BLOCK_UVD |
6192 			     RADEON_CG_BLOCK_HDP), true);
6193 }
6194 
6195 static void cik_fini_cg(struct radeon_device *rdev)
6196 {
6197 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6198 			     RADEON_CG_BLOCK_SDMA |
6199 			     RADEON_CG_BLOCK_BIF |
6200 			     RADEON_CG_BLOCK_UVD |
6201 			     RADEON_CG_BLOCK_HDP), false);
6202 
6203 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6204 }
6205 
6206 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6207 					  bool enable)
6208 {
6209 	u32 data, orig;
6210 
6211 	orig = data = RREG32(RLC_PG_CNTL);
6212 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6213 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6214 	else
6215 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6216 	if (orig != data)
6217 		WREG32(RLC_PG_CNTL, data);
6218 }
6219 
6220 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6221 					  bool enable)
6222 {
6223 	u32 data, orig;
6224 
6225 	orig = data = RREG32(RLC_PG_CNTL);
6226 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6227 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6228 	else
6229 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6230 	if (orig != data)
6231 		WREG32(RLC_PG_CNTL, data);
6232 }
6233 
6234 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6235 {
6236 	u32 data, orig;
6237 
6238 	orig = data = RREG32(RLC_PG_CNTL);
6239 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6240 		data &= ~DISABLE_CP_PG;
6241 	else
6242 		data |= DISABLE_CP_PG;
6243 	if (orig != data)
6244 		WREG32(RLC_PG_CNTL, data);
6245 }
6246 
6247 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6248 {
6249 	u32 data, orig;
6250 
6251 	orig = data = RREG32(RLC_PG_CNTL);
6252 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6253 		data &= ~DISABLE_GDS_PG;
6254 	else
6255 		data |= DISABLE_GDS_PG;
6256 	if (orig != data)
6257 		WREG32(RLC_PG_CNTL, data);
6258 }
6259 
6260 #define CP_ME_TABLE_SIZE    96
6261 #define CP_ME_TABLE_OFFSET  2048
6262 #define CP_MEC_TABLE_OFFSET 4096
6263 
6264 void cik_init_cp_pg_table(struct radeon_device *rdev)
6265 {
6266 	const __be32 *fw_data;
6267 	volatile u32 *dst_ptr;
6268 	int me, i, max_me = 4;
6269 	u32 bo_offset = 0;
6270 	u32 table_offset;
6271 
6272 	if (rdev->family == CHIP_KAVERI)
6273 		max_me = 5;
6274 
6275 	if (rdev->rlc.cp_table_ptr == NULL)
6276 		return;
6277 
6278 	/* write the cp table buffer */
6279 	dst_ptr = rdev->rlc.cp_table_ptr;
6280 	for (me = 0; me < max_me; me++) {
6281 		if (me == 0) {
6282 			fw_data = (const __be32 *)rdev->ce_fw->data;
6283 			table_offset = CP_ME_TABLE_OFFSET;
6284 		} else if (me == 1) {
6285 			fw_data = (const __be32 *)rdev->pfp_fw->data;
6286 			table_offset = CP_ME_TABLE_OFFSET;
6287 		} else if (me == 2) {
6288 			fw_data = (const __be32 *)rdev->me_fw->data;
6289 			table_offset = CP_ME_TABLE_OFFSET;
6290 		} else {
6291 			fw_data = (const __be32 *)rdev->mec_fw->data;
6292 			table_offset = CP_MEC_TABLE_OFFSET;
6293 		}
6294 
6295 		for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
6296 			dst_ptr[bo_offset + i] = cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6297 		}
6298 		bo_offset += CP_ME_TABLE_SIZE;
6299 	}
6300 }
6301 
6302 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6303 				bool enable)
6304 {
6305 	u32 data, orig;
6306 
6307 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6308 		orig = data = RREG32(RLC_PG_CNTL);
6309 		data |= GFX_PG_ENABLE;
6310 		if (orig != data)
6311 			WREG32(RLC_PG_CNTL, data);
6312 
6313 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6314 		data |= AUTO_PG_EN;
6315 		if (orig != data)
6316 			WREG32(RLC_AUTO_PG_CTRL, data);
6317 	} else {
6318 		orig = data = RREG32(RLC_PG_CNTL);
6319 		data &= ~GFX_PG_ENABLE;
6320 		if (orig != data)
6321 			WREG32(RLC_PG_CNTL, data);
6322 
6323 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6324 		data &= ~AUTO_PG_EN;
6325 		if (orig != data)
6326 			WREG32(RLC_AUTO_PG_CTRL, data);
6327 
6328 		data = RREG32(DB_RENDER_CONTROL);
6329 	}
6330 }
6331 
6332 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6333 {
6334 	u32 mask = 0, tmp, tmp1;
6335 	int i;
6336 
6337 	cik_select_se_sh(rdev, se, sh);
6338 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6339 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6340 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6341 
6342 	tmp &= 0xffff0000;
6343 
6344 	tmp |= tmp1;
6345 	tmp >>= 16;
6346 
6347 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6348 		mask <<= 1;
6349 		mask |= 1;
6350 	}
6351 
6352 	return (~tmp) & mask;
6353 }
6354 
6355 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6356 {
6357 	u32 i, j, k, active_cu_number = 0;
6358 	u32 mask, counter, cu_bitmap;
6359 	u32 tmp = 0;
6360 
6361 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6362 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6363 			mask = 1;
6364 			cu_bitmap = 0;
6365 			counter = 0;
6366 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6367 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6368 					if (counter < 2)
6369 						cu_bitmap |= mask;
6370 					counter ++;
6371 				}
6372 				mask <<= 1;
6373 			}
6374 
6375 			active_cu_number += counter;
6376 			tmp |= (cu_bitmap << (i * 16 + j * 8));
6377 		}
6378 	}
6379 
6380 	WREG32(RLC_PG_AO_CU_MASK, tmp);
6381 
6382 	tmp = RREG32(RLC_MAX_PG_CU);
6383 	tmp &= ~MAX_PU_CU_MASK;
6384 	tmp |= MAX_PU_CU(active_cu_number);
6385 	WREG32(RLC_MAX_PG_CU, tmp);
6386 }
6387 
6388 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6389 				       bool enable)
6390 {
6391 	u32 data, orig;
6392 
6393 	orig = data = RREG32(RLC_PG_CNTL);
6394 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6395 		data |= STATIC_PER_CU_PG_ENABLE;
6396 	else
6397 		data &= ~STATIC_PER_CU_PG_ENABLE;
6398 	if (orig != data)
6399 		WREG32(RLC_PG_CNTL, data);
6400 }
6401 
6402 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6403 					bool enable)
6404 {
6405 	u32 data, orig;
6406 
6407 	orig = data = RREG32(RLC_PG_CNTL);
6408 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6409 		data |= DYN_PER_CU_PG_ENABLE;
6410 	else
6411 		data &= ~DYN_PER_CU_PG_ENABLE;
6412 	if (orig != data)
6413 		WREG32(RLC_PG_CNTL, data);
6414 }
6415 
6416 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6417 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6418 
6419 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6420 {
6421 	u32 data, orig;
6422 	u32 i;
6423 
6424 	if (rdev->rlc.cs_data) {
6425 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6426 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6427 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6428 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6429 	} else {
6430 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6431 		for (i = 0; i < 3; i++)
6432 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6433 	}
6434 	if (rdev->rlc.reg_list) {
6435 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6436 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6437 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6438 	}
6439 
6440 	orig = data = RREG32(RLC_PG_CNTL);
6441 	data |= GFX_PG_SRC;
6442 	if (orig != data)
6443 		WREG32(RLC_PG_CNTL, data);
6444 
6445 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6446 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6447 
6448 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6449 	data &= ~IDLE_POLL_COUNT_MASK;
6450 	data |= IDLE_POLL_COUNT(0x60);
6451 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6452 
6453 	data = 0x10101010;
6454 	WREG32(RLC_PG_DELAY, data);
6455 
6456 	data = RREG32(RLC_PG_DELAY_2);
6457 	data &= ~0xff;
6458 	data |= 0x3;
6459 	WREG32(RLC_PG_DELAY_2, data);
6460 
6461 	data = RREG32(RLC_AUTO_PG_CTRL);
6462 	data &= ~GRBM_REG_SGIT_MASK;
6463 	data |= GRBM_REG_SGIT(0x700);
6464 	WREG32(RLC_AUTO_PG_CTRL, data);
6465 
6466 }
6467 
6468 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6469 {
6470 	cik_enable_gfx_cgpg(rdev, enable);
6471 	cik_enable_gfx_static_mgpg(rdev, enable);
6472 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
6473 }
6474 
6475 u32 cik_get_csb_size(struct radeon_device *rdev)
6476 {
6477 	u32 count = 0;
6478 	const struct cs_section_def *sect = NULL;
6479 	const struct cs_extent_def *ext = NULL;
6480 
6481 	if (rdev->rlc.cs_data == NULL)
6482 		return 0;
6483 
6484 	/* begin clear state */
6485 	count += 2;
6486 	/* context control state */
6487 	count += 3;
6488 
6489 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6490 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6491 			if (sect->id == SECT_CONTEXT)
6492 				count += 2 + ext->reg_count;
6493 			else
6494 				return 0;
6495 		}
6496 	}
6497 	/* pa_sc_raster_config/pa_sc_raster_config1 */
6498 	count += 4;
6499 	/* end clear state */
6500 	count += 2;
6501 	/* clear state */
6502 	count += 2;
6503 
6504 	return count;
6505 }
6506 
6507 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6508 {
6509 	u32 count = 0, i;
6510 	const struct cs_section_def *sect = NULL;
6511 	const struct cs_extent_def *ext = NULL;
6512 
6513 	if (rdev->rlc.cs_data == NULL)
6514 		return;
6515 	if (buffer == NULL)
6516 		return;
6517 
6518 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6519 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6520 
6521 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6522 	buffer[count++] = cpu_to_le32(0x80000000);
6523 	buffer[count++] = cpu_to_le32(0x80000000);
6524 
6525 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6526 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6527 			if (sect->id == SECT_CONTEXT) {
6528 				buffer[count++] =
6529 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6530 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6531 				for (i = 0; i < ext->reg_count; i++)
6532 					buffer[count++] = cpu_to_le32(ext->extent[i]);
6533 			} else {
6534 				return;
6535 			}
6536 		}
6537 	}
6538 
6539 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6540 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6541 	switch (rdev->family) {
6542 	case CHIP_BONAIRE:
6543 		buffer[count++] = cpu_to_le32(0x16000012);
6544 		buffer[count++] = cpu_to_le32(0x00000000);
6545 		break;
6546 	case CHIP_KAVERI:
6547 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6548 		buffer[count++] = cpu_to_le32(0x00000000);
6549 		break;
6550 	case CHIP_KABINI:
6551 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6552 		buffer[count++] = cpu_to_le32(0x00000000);
6553 		break;
6554 	case CHIP_HAWAII:
6555 		buffer[count++] = cpu_to_le32(0x3a00161a);
6556 		buffer[count++] = cpu_to_le32(0x0000002e);
6557 		break;
6558 	default:
6559 		buffer[count++] = cpu_to_le32(0x00000000);
6560 		buffer[count++] = cpu_to_le32(0x00000000);
6561 		break;
6562 	}
6563 
6564 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6565 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6566 
6567 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6568 	buffer[count++] = cpu_to_le32(0);
6569 }
6570 
6571 static void cik_init_pg(struct radeon_device *rdev)
6572 {
6573 	if (rdev->pg_flags) {
6574 		cik_enable_sck_slowdown_on_pu(rdev, true);
6575 		cik_enable_sck_slowdown_on_pd(rdev, true);
6576 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6577 			cik_init_gfx_cgpg(rdev);
6578 			cik_enable_cp_pg(rdev, true);
6579 			cik_enable_gds_pg(rdev, true);
6580 		}
6581 		cik_init_ao_cu_mask(rdev);
6582 		cik_update_gfx_pg(rdev, true);
6583 	}
6584 }
6585 
6586 static void cik_fini_pg(struct radeon_device *rdev)
6587 {
6588 	if (rdev->pg_flags) {
6589 		cik_update_gfx_pg(rdev, false);
6590 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6591 			cik_enable_cp_pg(rdev, false);
6592 			cik_enable_gds_pg(rdev, false);
6593 		}
6594 	}
6595 }
6596 
6597 /*
6598  * Interrupts
6599  * Starting with r6xx, interrupts are handled via a ring buffer.
6600  * Ring buffers are areas of GPU accessible memory that the GPU
6601  * writes interrupt vectors into and the host reads vectors out of.
6602  * There is a rptr (read pointer) that determines where the
6603  * host is currently reading, and a wptr (write pointer)
6604  * which determines where the GPU has written.  When the
6605  * pointers are equal, the ring is idle.  When the GPU
6606  * writes vectors to the ring buffer, it increments the
6607  * wptr.  When there is an interrupt, the host then starts
6608  * fetching commands and processing them until the pointers are
6609  * equal again at which point it updates the rptr.
6610  */
6611 
6612 /**
6613  * cik_enable_interrupts - Enable the interrupt ring buffer
6614  *
6615  * @rdev: radeon_device pointer
6616  *
6617  * Enable the interrupt ring buffer (CIK).
6618  */
6619 static void cik_enable_interrupts(struct radeon_device *rdev)
6620 {
6621 	u32 ih_cntl = RREG32(IH_CNTL);
6622 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6623 
6624 	ih_cntl |= ENABLE_INTR;
6625 	ih_rb_cntl |= IH_RB_ENABLE;
6626 	WREG32(IH_CNTL, ih_cntl);
6627 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6628 	rdev->ih.enabled = true;
6629 }
6630 
6631 /**
6632  * cik_disable_interrupts - Disable the interrupt ring buffer
6633  *
6634  * @rdev: radeon_device pointer
6635  *
6636  * Disable the interrupt ring buffer (CIK).
6637  */
6638 static void cik_disable_interrupts(struct radeon_device *rdev)
6639 {
6640 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6641 	u32 ih_cntl = RREG32(IH_CNTL);
6642 
6643 	ih_rb_cntl &= ~IH_RB_ENABLE;
6644 	ih_cntl &= ~ENABLE_INTR;
6645 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6646 	WREG32(IH_CNTL, ih_cntl);
6647 	/* set rptr, wptr to 0 */
6648 	WREG32(IH_RB_RPTR, 0);
6649 	WREG32(IH_RB_WPTR, 0);
6650 	rdev->ih.enabled = false;
6651 	rdev->ih.rptr = 0;
6652 }
6653 
6654 /**
6655  * cik_disable_interrupt_state - Disable all interrupt sources
6656  *
6657  * @rdev: radeon_device pointer
6658  *
6659  * Clear all interrupt enable bits used by the driver (CIK).
6660  */
6661 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6662 {
6663 	u32 tmp;
6664 
6665 	/* gfx ring */
6666 	tmp = RREG32(CP_INT_CNTL_RING0) &
6667 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6668 	WREG32(CP_INT_CNTL_RING0, tmp);
6669 	/* sdma */
6670 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6671 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6672 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6673 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6674 	/* compute queues */
6675 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6676 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6677 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6678 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6679 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6680 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6681 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6682 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6683 	/* grbm */
6684 	WREG32(GRBM_INT_CNTL, 0);
6685 	/* vline/vblank, etc. */
6686 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6687 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6688 	if (rdev->num_crtc >= 4) {
6689 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6690 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6691 	}
6692 	if (rdev->num_crtc >= 6) {
6693 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6694 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6695 	}
6696 
6697 	/* dac hotplug */
6698 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6699 
6700 	/* digital hotplug */
6701 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6702 	WREG32(DC_HPD1_INT_CONTROL, tmp);
6703 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6704 	WREG32(DC_HPD2_INT_CONTROL, tmp);
6705 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6706 	WREG32(DC_HPD3_INT_CONTROL, tmp);
6707 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6708 	WREG32(DC_HPD4_INT_CONTROL, tmp);
6709 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6710 	WREG32(DC_HPD5_INT_CONTROL, tmp);
6711 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6712 	WREG32(DC_HPD6_INT_CONTROL, tmp);
6713 
6714 }
6715 
6716 /**
6717  * cik_irq_init - init and enable the interrupt ring
6718  *
6719  * @rdev: radeon_device pointer
6720  *
6721  * Allocate a ring buffer for the interrupt controller,
6722  * enable the RLC, disable interrupts, enable the IH
6723  * ring buffer and enable it (CIK).
6724  * Called at device load and reume.
6725  * Returns 0 for success, errors for failure.
6726  */
6727 static int cik_irq_init(struct radeon_device *rdev)
6728 {
6729 	int ret = 0;
6730 	int rb_bufsz;
6731 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6732 
6733 	/* allocate ring */
6734 	ret = r600_ih_ring_alloc(rdev);
6735 	if (ret)
6736 		return ret;
6737 
6738 	/* disable irqs */
6739 	cik_disable_interrupts(rdev);
6740 
6741 	/* init rlc */
6742 	ret = cik_rlc_resume(rdev);
6743 	if (ret) {
6744 		r600_ih_ring_fini(rdev);
6745 		return ret;
6746 	}
6747 
6748 	/* setup interrupt control */
6749 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
6750 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6751 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
6752 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6753 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6754 	 */
6755 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6756 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6757 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6758 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6759 
6760 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6761 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6762 
6763 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6764 		      IH_WPTR_OVERFLOW_CLEAR |
6765 		      (rb_bufsz << 1));
6766 
6767 	if (rdev->wb.enabled)
6768 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6769 
6770 	/* set the writeback address whether it's enabled or not */
6771 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6772 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6773 
6774 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6775 
6776 	/* set rptr, wptr to 0 */
6777 	WREG32(IH_RB_RPTR, 0);
6778 	WREG32(IH_RB_WPTR, 0);
6779 
6780 	/* Default settings for IH_CNTL (disabled at first) */
6781 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6782 	/* RPTR_REARM only works if msi's are enabled */
6783 	if (rdev->msi_enabled)
6784 		ih_cntl |= RPTR_REARM;
6785 	WREG32(IH_CNTL, ih_cntl);
6786 
6787 	/* force the active interrupt state to all disabled */
6788 	cik_disable_interrupt_state(rdev);
6789 
6790 	pci_set_master(rdev->pdev);
6791 
6792 	/* enable irqs */
6793 	cik_enable_interrupts(rdev);
6794 
6795 	return ret;
6796 }
6797 
6798 /**
6799  * cik_irq_set - enable/disable interrupt sources
6800  *
6801  * @rdev: radeon_device pointer
6802  *
6803  * Enable interrupt sources on the GPU (vblanks, hpd,
6804  * etc.) (CIK).
6805  * Returns 0 for success, errors for failure.
6806  */
6807 int cik_irq_set(struct radeon_device *rdev)
6808 {
6809 	u32 cp_int_cntl;
6810 	u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6811 	u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6812 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6813 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6814 	u32 grbm_int_cntl = 0;
6815 	u32 dma_cntl, dma_cntl1;
6816 	u32 thermal_int;
6817 
6818 	if (!rdev->irq.installed) {
6819 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6820 		return -EINVAL;
6821 	}
6822 	/* don't enable anything if the ih is disabled */
6823 	if (!rdev->ih.enabled) {
6824 		cik_disable_interrupts(rdev);
6825 		/* force the active interrupt state to all disabled */
6826 		cik_disable_interrupt_state(rdev);
6827 		return 0;
6828 	}
6829 
6830 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6831 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6832 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6833 
6834 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6835 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6836 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6837 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6838 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6839 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6840 
6841 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6842 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6843 
6844 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6845 	cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6846 	cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6847 	cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6848 	cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6849 	cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6850 	cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6851 	cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6852 
6853 	if (rdev->flags & RADEON_IS_IGP)
6854 		thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6855 			~(THERM_INTH_MASK | THERM_INTL_MASK);
6856 	else
6857 		thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6858 			~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6859 
6860 	/* enable CP interrupts on all rings */
6861 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6862 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
6863 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6864 	}
6865 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6866 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6867 		DRM_DEBUG("si_irq_set: sw int cp1\n");
6868 		if (ring->me == 1) {
6869 			switch (ring->pipe) {
6870 			case 0:
6871 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6872 				break;
6873 			case 1:
6874 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6875 				break;
6876 			case 2:
6877 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6878 				break;
6879 			case 3:
6880 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6881 				break;
6882 			default:
6883 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6884 				break;
6885 			}
6886 		} else if (ring->me == 2) {
6887 			switch (ring->pipe) {
6888 			case 0:
6889 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6890 				break;
6891 			case 1:
6892 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6893 				break;
6894 			case 2:
6895 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6896 				break;
6897 			case 3:
6898 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6899 				break;
6900 			default:
6901 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6902 				break;
6903 			}
6904 		} else {
6905 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6906 		}
6907 	}
6908 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6909 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6910 		DRM_DEBUG("si_irq_set: sw int cp2\n");
6911 		if (ring->me == 1) {
6912 			switch (ring->pipe) {
6913 			case 0:
6914 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6915 				break;
6916 			case 1:
6917 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6918 				break;
6919 			case 2:
6920 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6921 				break;
6922 			case 3:
6923 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6924 				break;
6925 			default:
6926 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6927 				break;
6928 			}
6929 		} else if (ring->me == 2) {
6930 			switch (ring->pipe) {
6931 			case 0:
6932 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6933 				break;
6934 			case 1:
6935 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6936 				break;
6937 			case 2:
6938 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6939 				break;
6940 			case 3:
6941 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6942 				break;
6943 			default:
6944 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6945 				break;
6946 			}
6947 		} else {
6948 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6949 		}
6950 	}
6951 
6952 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6953 		DRM_DEBUG("cik_irq_set: sw int dma\n");
6954 		dma_cntl |= TRAP_ENABLE;
6955 	}
6956 
6957 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6958 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
6959 		dma_cntl1 |= TRAP_ENABLE;
6960 	}
6961 
6962 	if (rdev->irq.crtc_vblank_int[0] ||
6963 	    atomic_read(&rdev->irq.pflip[0])) {
6964 		DRM_DEBUG("cik_irq_set: vblank 0\n");
6965 		crtc1 |= VBLANK_INTERRUPT_MASK;
6966 	}
6967 	if (rdev->irq.crtc_vblank_int[1] ||
6968 	    atomic_read(&rdev->irq.pflip[1])) {
6969 		DRM_DEBUG("cik_irq_set: vblank 1\n");
6970 		crtc2 |= VBLANK_INTERRUPT_MASK;
6971 	}
6972 	if (rdev->irq.crtc_vblank_int[2] ||
6973 	    atomic_read(&rdev->irq.pflip[2])) {
6974 		DRM_DEBUG("cik_irq_set: vblank 2\n");
6975 		crtc3 |= VBLANK_INTERRUPT_MASK;
6976 	}
6977 	if (rdev->irq.crtc_vblank_int[3] ||
6978 	    atomic_read(&rdev->irq.pflip[3])) {
6979 		DRM_DEBUG("cik_irq_set: vblank 3\n");
6980 		crtc4 |= VBLANK_INTERRUPT_MASK;
6981 	}
6982 	if (rdev->irq.crtc_vblank_int[4] ||
6983 	    atomic_read(&rdev->irq.pflip[4])) {
6984 		DRM_DEBUG("cik_irq_set: vblank 4\n");
6985 		crtc5 |= VBLANK_INTERRUPT_MASK;
6986 	}
6987 	if (rdev->irq.crtc_vblank_int[5] ||
6988 	    atomic_read(&rdev->irq.pflip[5])) {
6989 		DRM_DEBUG("cik_irq_set: vblank 5\n");
6990 		crtc6 |= VBLANK_INTERRUPT_MASK;
6991 	}
6992 	if (rdev->irq.hpd[0]) {
6993 		DRM_DEBUG("cik_irq_set: hpd 1\n");
6994 		hpd1 |= DC_HPDx_INT_EN;
6995 	}
6996 	if (rdev->irq.hpd[1]) {
6997 		DRM_DEBUG("cik_irq_set: hpd 2\n");
6998 		hpd2 |= DC_HPDx_INT_EN;
6999 	}
7000 	if (rdev->irq.hpd[2]) {
7001 		DRM_DEBUG("cik_irq_set: hpd 3\n");
7002 		hpd3 |= DC_HPDx_INT_EN;
7003 	}
7004 	if (rdev->irq.hpd[3]) {
7005 		DRM_DEBUG("cik_irq_set: hpd 4\n");
7006 		hpd4 |= DC_HPDx_INT_EN;
7007 	}
7008 	if (rdev->irq.hpd[4]) {
7009 		DRM_DEBUG("cik_irq_set: hpd 5\n");
7010 		hpd5 |= DC_HPDx_INT_EN;
7011 	}
7012 	if (rdev->irq.hpd[5]) {
7013 		DRM_DEBUG("cik_irq_set: hpd 6\n");
7014 		hpd6 |= DC_HPDx_INT_EN;
7015 	}
7016 
7017 	if (rdev->irq.dpm_thermal) {
7018 		DRM_DEBUG("dpm thermal\n");
7019 		if (rdev->flags & RADEON_IS_IGP)
7020 			thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
7021 		else
7022 			thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
7023 	}
7024 
7025 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7026 
7027 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7028 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7029 
7030 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7031 	WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7032 	WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7033 	WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7034 	WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7035 	WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7036 	WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7037 	WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7038 
7039 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7040 
7041 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7042 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7043 	if (rdev->num_crtc >= 4) {
7044 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7045 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7046 	}
7047 	if (rdev->num_crtc >= 6) {
7048 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7049 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7050 	}
7051 
7052 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7053 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7054 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7055 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7056 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7057 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7058 
7059 	if (rdev->flags & RADEON_IS_IGP)
7060 		WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
7061 	else
7062 		WREG32_SMC(CG_THERMAL_INT, thermal_int);
7063 
7064 	return 0;
7065 }
7066 
7067 /**
7068  * cik_irq_ack - ack interrupt sources
7069  *
7070  * @rdev: radeon_device pointer
7071  *
7072  * Ack interrupt sources on the GPU (vblanks, hpd,
7073  * etc.) (CIK).  Certain interrupts sources are sw
7074  * generated and do not require an explicit ack.
7075  */
7076 static inline void cik_irq_ack(struct radeon_device *rdev)
7077 {
7078 	u32 tmp;
7079 
7080 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7081 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7082 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7083 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7084 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7085 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7086 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7087 
7088 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7089 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7090 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7091 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7092 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7093 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7094 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7095 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7096 
7097 	if (rdev->num_crtc >= 4) {
7098 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7099 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7100 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7101 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7102 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7103 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7104 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7105 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7106 	}
7107 
7108 	if (rdev->num_crtc >= 6) {
7109 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7110 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7111 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7112 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7113 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7114 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7115 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7116 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7117 	}
7118 
7119 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7120 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7121 		tmp |= DC_HPDx_INT_ACK;
7122 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7123 	}
7124 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7125 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7126 		tmp |= DC_HPDx_INT_ACK;
7127 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7128 	}
7129 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7130 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7131 		tmp |= DC_HPDx_INT_ACK;
7132 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7133 	}
7134 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7135 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7136 		tmp |= DC_HPDx_INT_ACK;
7137 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7138 	}
7139 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7140 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7141 		tmp |= DC_HPDx_INT_ACK;
7142 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7143 	}
7144 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7145 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7146 		tmp |= DC_HPDx_INT_ACK;
7147 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7148 	}
7149 }
7150 
7151 /**
7152  * cik_irq_disable - disable interrupts
7153  *
7154  * @rdev: radeon_device pointer
7155  *
7156  * Disable interrupts on the hw (CIK).
7157  */
7158 static void cik_irq_disable(struct radeon_device *rdev)
7159 {
7160 	cik_disable_interrupts(rdev);
7161 	/* Wait and acknowledge irq */
7162 	mdelay(1);
7163 	cik_irq_ack(rdev);
7164 	cik_disable_interrupt_state(rdev);
7165 }
7166 
7167 /**
7168  * cik_irq_disable - disable interrupts for suspend
7169  *
7170  * @rdev: radeon_device pointer
7171  *
7172  * Disable interrupts and stop the RLC (CIK).
7173  * Used for suspend.
7174  */
7175 static void cik_irq_suspend(struct radeon_device *rdev)
7176 {
7177 	cik_irq_disable(rdev);
7178 	cik_rlc_stop(rdev);
7179 }
7180 
7181 /**
7182  * cik_irq_fini - tear down interrupt support
7183  *
7184  * @rdev: radeon_device pointer
7185  *
7186  * Disable interrupts on the hw and free the IH ring
7187  * buffer (CIK).
7188  * Used for driver unload.
7189  */
7190 static void cik_irq_fini(struct radeon_device *rdev)
7191 {
7192 	cik_irq_suspend(rdev);
7193 	r600_ih_ring_fini(rdev);
7194 }
7195 
7196 /**
7197  * cik_get_ih_wptr - get the IH ring buffer wptr
7198  *
7199  * @rdev: radeon_device pointer
7200  *
7201  * Get the IH ring buffer wptr from either the register
7202  * or the writeback memory buffer (CIK).  Also check for
7203  * ring buffer overflow and deal with it.
7204  * Used by cik_irq_process().
7205  * Returns the value of the wptr.
7206  */
7207 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7208 {
7209 	u32 wptr, tmp;
7210 
7211 	if (rdev->wb.enabled)
7212 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7213 	else
7214 		wptr = RREG32(IH_RB_WPTR);
7215 
7216 	if (wptr & RB_OVERFLOW) {
7217 		/* When a ring buffer overflow happen start parsing interrupt
7218 		 * from the last not overwritten vector (wptr + 16). Hopefully
7219 		 * this should allow us to catchup.
7220 		 */
7221 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
7222 			wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
7223 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7224 		tmp = RREG32(IH_RB_CNTL);
7225 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7226 		WREG32(IH_RB_CNTL, tmp);
7227 	}
7228 	return (wptr & rdev->ih.ptr_mask);
7229 }
7230 
7231 /*        CIK IV Ring
7232  * Each IV ring entry is 128 bits:
7233  * [7:0]    - interrupt source id
7234  * [31:8]   - reserved
7235  * [59:32]  - interrupt source data
7236  * [63:60]  - reserved
7237  * [71:64]  - RINGID
7238  *            CP:
7239  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7240  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7241  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7242  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7243  *            PIPE_ID - ME0 0=3D
7244  *                    - ME1&2 compute dispatcher (4 pipes each)
7245  *            SDMA:
7246  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7247  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7248  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7249  * [79:72]  - VMID
7250  * [95:80]  - PASID
7251  * [127:96] - reserved
7252  */
7253 /**
7254  * cik_irq_process - interrupt handler
7255  *
7256  * @rdev: radeon_device pointer
7257  *
7258  * Interrupt hander (CIK).  Walk the IH ring,
7259  * ack interrupts and schedule work to handle
7260  * interrupt events.
7261  * Returns irq process return code.
7262  */
7263 int cik_irq_process(struct radeon_device *rdev)
7264 {
7265 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7266 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7267 	u32 wptr;
7268 	u32 rptr;
7269 	u32 src_id, src_data, ring_id;
7270 	u8 me_id, pipe_id, queue_id;
7271 	u32 ring_index;
7272 	bool queue_hotplug = false;
7273 	bool queue_reset = false;
7274 	u32 addr, status, mc_client;
7275 	bool queue_thermal = false;
7276 
7277 	if (!rdev->ih.enabled || rdev->shutdown)
7278 		return IRQ_NONE;
7279 
7280 	wptr = cik_get_ih_wptr(rdev);
7281 
7282 restart_ih:
7283 	/* is somebody else already processing irqs? */
7284 	if (atomic_xchg(&rdev->ih.lock, 1))
7285 		return IRQ_NONE;
7286 
7287 	rptr = rdev->ih.rptr;
7288 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7289 
7290 	/* Order reading of wptr vs. reading of IH ring data */
7291 	rmb();
7292 
7293 	/* display interrupts */
7294 	cik_irq_ack(rdev);
7295 
7296 	while (rptr != wptr) {
7297 		/* wptr/rptr are in bytes! */
7298 		ring_index = rptr / 4;
7299 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7300 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7301 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7302 
7303 		switch (src_id) {
7304 		case 1: /* D1 vblank/vline */
7305 			switch (src_data) {
7306 			case 0: /* D1 vblank */
7307 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7308 					if (rdev->irq.crtc_vblank_int[0]) {
7309 						drm_handle_vblank(rdev->ddev, 0);
7310 						rdev->pm.vblank_sync = true;
7311 						wake_up(&rdev->irq.vblank_queue);
7312 					}
7313 					if (atomic_read(&rdev->irq.pflip[0]))
7314 						radeon_crtc_handle_flip(rdev, 0);
7315 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7316 					DRM_DEBUG("IH: D1 vblank\n");
7317 				}
7318 				break;
7319 			case 1: /* D1 vline */
7320 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7321 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7322 					DRM_DEBUG("IH: D1 vline\n");
7323 				}
7324 				break;
7325 			default:
7326 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7327 				break;
7328 			}
7329 			break;
7330 		case 2: /* D2 vblank/vline */
7331 			switch (src_data) {
7332 			case 0: /* D2 vblank */
7333 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7334 					if (rdev->irq.crtc_vblank_int[1]) {
7335 						drm_handle_vblank(rdev->ddev, 1);
7336 						rdev->pm.vblank_sync = true;
7337 						wake_up(&rdev->irq.vblank_queue);
7338 					}
7339 					if (atomic_read(&rdev->irq.pflip[1]))
7340 						radeon_crtc_handle_flip(rdev, 1);
7341 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7342 					DRM_DEBUG("IH: D2 vblank\n");
7343 				}
7344 				break;
7345 			case 1: /* D2 vline */
7346 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7347 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7348 					DRM_DEBUG("IH: D2 vline\n");
7349 				}
7350 				break;
7351 			default:
7352 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7353 				break;
7354 			}
7355 			break;
7356 		case 3: /* D3 vblank/vline */
7357 			switch (src_data) {
7358 			case 0: /* D3 vblank */
7359 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7360 					if (rdev->irq.crtc_vblank_int[2]) {
7361 						drm_handle_vblank(rdev->ddev, 2);
7362 						rdev->pm.vblank_sync = true;
7363 						wake_up(&rdev->irq.vblank_queue);
7364 					}
7365 					if (atomic_read(&rdev->irq.pflip[2]))
7366 						radeon_crtc_handle_flip(rdev, 2);
7367 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7368 					DRM_DEBUG("IH: D3 vblank\n");
7369 				}
7370 				break;
7371 			case 1: /* D3 vline */
7372 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7373 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7374 					DRM_DEBUG("IH: D3 vline\n");
7375 				}
7376 				break;
7377 			default:
7378 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7379 				break;
7380 			}
7381 			break;
7382 		case 4: /* D4 vblank/vline */
7383 			switch (src_data) {
7384 			case 0: /* D4 vblank */
7385 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7386 					if (rdev->irq.crtc_vblank_int[3]) {
7387 						drm_handle_vblank(rdev->ddev, 3);
7388 						rdev->pm.vblank_sync = true;
7389 						wake_up(&rdev->irq.vblank_queue);
7390 					}
7391 					if (atomic_read(&rdev->irq.pflip[3]))
7392 						radeon_crtc_handle_flip(rdev, 3);
7393 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7394 					DRM_DEBUG("IH: D4 vblank\n");
7395 				}
7396 				break;
7397 			case 1: /* D4 vline */
7398 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7399 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7400 					DRM_DEBUG("IH: D4 vline\n");
7401 				}
7402 				break;
7403 			default:
7404 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7405 				break;
7406 			}
7407 			break;
7408 		case 5: /* D5 vblank/vline */
7409 			switch (src_data) {
7410 			case 0: /* D5 vblank */
7411 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7412 					if (rdev->irq.crtc_vblank_int[4]) {
7413 						drm_handle_vblank(rdev->ddev, 4);
7414 						rdev->pm.vblank_sync = true;
7415 						wake_up(&rdev->irq.vblank_queue);
7416 					}
7417 					if (atomic_read(&rdev->irq.pflip[4]))
7418 						radeon_crtc_handle_flip(rdev, 4);
7419 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7420 					DRM_DEBUG("IH: D5 vblank\n");
7421 				}
7422 				break;
7423 			case 1: /* D5 vline */
7424 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7425 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7426 					DRM_DEBUG("IH: D5 vline\n");
7427 				}
7428 				break;
7429 			default:
7430 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7431 				break;
7432 			}
7433 			break;
7434 		case 6: /* D6 vblank/vline */
7435 			switch (src_data) {
7436 			case 0: /* D6 vblank */
7437 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7438 					if (rdev->irq.crtc_vblank_int[5]) {
7439 						drm_handle_vblank(rdev->ddev, 5);
7440 						rdev->pm.vblank_sync = true;
7441 						wake_up(&rdev->irq.vblank_queue);
7442 					}
7443 					if (atomic_read(&rdev->irq.pflip[5]))
7444 						radeon_crtc_handle_flip(rdev, 5);
7445 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7446 					DRM_DEBUG("IH: D6 vblank\n");
7447 				}
7448 				break;
7449 			case 1: /* D6 vline */
7450 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7451 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7452 					DRM_DEBUG("IH: D6 vline\n");
7453 				}
7454 				break;
7455 			default:
7456 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7457 				break;
7458 			}
7459 			break;
7460 		case 42: /* HPD hotplug */
7461 			switch (src_data) {
7462 			case 0:
7463 				if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7464 					rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7465 					queue_hotplug = true;
7466 					DRM_DEBUG("IH: HPD1\n");
7467 				}
7468 				break;
7469 			case 1:
7470 				if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7471 					rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7472 					queue_hotplug = true;
7473 					DRM_DEBUG("IH: HPD2\n");
7474 				}
7475 				break;
7476 			case 2:
7477 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7478 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7479 					queue_hotplug = true;
7480 					DRM_DEBUG("IH: HPD3\n");
7481 				}
7482 				break;
7483 			case 3:
7484 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7485 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7486 					queue_hotplug = true;
7487 					DRM_DEBUG("IH: HPD4\n");
7488 				}
7489 				break;
7490 			case 4:
7491 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7492 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7493 					queue_hotplug = true;
7494 					DRM_DEBUG("IH: HPD5\n");
7495 				}
7496 				break;
7497 			case 5:
7498 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7499 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7500 					queue_hotplug = true;
7501 					DRM_DEBUG("IH: HPD6\n");
7502 				}
7503 				break;
7504 			default:
7505 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7506 				break;
7507 			}
7508 			break;
7509 		case 124: /* UVD */
7510 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7511 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7512 			break;
7513 		case 146:
7514 		case 147:
7515 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7516 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7517 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7518 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7519 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7520 				addr);
7521 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7522 				status);
7523 			cik_vm_decode_fault(rdev, status, addr, mc_client);
7524 			/* reset addr and status */
7525 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7526 			break;
7527 		case 167: /* VCE */
7528 			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7529 			switch (src_data) {
7530 			case 0:
7531 				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7532 				break;
7533 			case 1:
7534 				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7535 				break;
7536 			default:
7537 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7538 				break;
7539 			}
7540 			break;
7541 		case 176: /* GFX RB CP_INT */
7542 		case 177: /* GFX IB CP_INT */
7543 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7544 			break;
7545 		case 181: /* CP EOP event */
7546 			DRM_DEBUG("IH: CP EOP\n");
7547 			/* XXX check the bitfield order! */
7548 			me_id = (ring_id & 0x60) >> 5;
7549 			pipe_id = (ring_id & 0x18) >> 3;
7550 			queue_id = (ring_id & 0x7) >> 0;
7551 			switch (me_id) {
7552 			case 0:
7553 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7554 				break;
7555 			case 1:
7556 			case 2:
7557 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7558 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7559 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7560 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7561 				break;
7562 			}
7563 			break;
7564 		case 184: /* CP Privileged reg access */
7565 			DRM_ERROR("Illegal register access in command stream\n");
7566 			/* XXX check the bitfield order! */
7567 			me_id = (ring_id & 0x60) >> 5;
7568 			pipe_id = (ring_id & 0x18) >> 3;
7569 			queue_id = (ring_id & 0x7) >> 0;
7570 			switch (me_id) {
7571 			case 0:
7572 				/* This results in a full GPU reset, but all we need to do is soft
7573 				 * reset the CP for gfx
7574 				 */
7575 				queue_reset = true;
7576 				break;
7577 			case 1:
7578 				/* XXX compute */
7579 				queue_reset = true;
7580 				break;
7581 			case 2:
7582 				/* XXX compute */
7583 				queue_reset = true;
7584 				break;
7585 			}
7586 			break;
7587 		case 185: /* CP Privileged inst */
7588 			DRM_ERROR("Illegal instruction in command stream\n");
7589 			/* XXX check the bitfield order! */
7590 			me_id = (ring_id & 0x60) >> 5;
7591 			pipe_id = (ring_id & 0x18) >> 3;
7592 			queue_id = (ring_id & 0x7) >> 0;
7593 			switch (me_id) {
7594 			case 0:
7595 				/* This results in a full GPU reset, but all we need to do is soft
7596 				 * reset the CP for gfx
7597 				 */
7598 				queue_reset = true;
7599 				break;
7600 			case 1:
7601 				/* XXX compute */
7602 				queue_reset = true;
7603 				break;
7604 			case 2:
7605 				/* XXX compute */
7606 				queue_reset = true;
7607 				break;
7608 			}
7609 			break;
7610 		case 224: /* SDMA trap event */
7611 			/* XXX check the bitfield order! */
7612 			me_id = (ring_id & 0x3) >> 0;
7613 			queue_id = (ring_id & 0xc) >> 2;
7614 			DRM_DEBUG("IH: SDMA trap\n");
7615 			switch (me_id) {
7616 			case 0:
7617 				switch (queue_id) {
7618 				case 0:
7619 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7620 					break;
7621 				case 1:
7622 					/* XXX compute */
7623 					break;
7624 				case 2:
7625 					/* XXX compute */
7626 					break;
7627 				}
7628 				break;
7629 			case 1:
7630 				switch (queue_id) {
7631 				case 0:
7632 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7633 					break;
7634 				case 1:
7635 					/* XXX compute */
7636 					break;
7637 				case 2:
7638 					/* XXX compute */
7639 					break;
7640 				}
7641 				break;
7642 			}
7643 			break;
7644 		case 230: /* thermal low to high */
7645 			DRM_DEBUG("IH: thermal low to high\n");
7646 			rdev->pm.dpm.thermal.high_to_low = false;
7647 			queue_thermal = true;
7648 			break;
7649 		case 231: /* thermal high to low */
7650 			DRM_DEBUG("IH: thermal high to low\n");
7651 			rdev->pm.dpm.thermal.high_to_low = true;
7652 			queue_thermal = true;
7653 			break;
7654 		case 233: /* GUI IDLE */
7655 			DRM_DEBUG("IH: GUI idle\n");
7656 			break;
7657 		case 241: /* SDMA Privileged inst */
7658 		case 247: /* SDMA Privileged inst */
7659 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
7660 			/* XXX check the bitfield order! */
7661 			me_id = (ring_id & 0x3) >> 0;
7662 			queue_id = (ring_id & 0xc) >> 2;
7663 			switch (me_id) {
7664 			case 0:
7665 				switch (queue_id) {
7666 				case 0:
7667 					queue_reset = true;
7668 					break;
7669 				case 1:
7670 					/* XXX compute */
7671 					queue_reset = true;
7672 					break;
7673 				case 2:
7674 					/* XXX compute */
7675 					queue_reset = true;
7676 					break;
7677 				}
7678 				break;
7679 			case 1:
7680 				switch (queue_id) {
7681 				case 0:
7682 					queue_reset = true;
7683 					break;
7684 				case 1:
7685 					/* XXX compute */
7686 					queue_reset = true;
7687 					break;
7688 				case 2:
7689 					/* XXX compute */
7690 					queue_reset = true;
7691 					break;
7692 				}
7693 				break;
7694 			}
7695 			break;
7696 		default:
7697 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7698 			break;
7699 		}
7700 
7701 		/* wptr/rptr are in bytes! */
7702 		rptr += 16;
7703 		rptr &= rdev->ih.ptr_mask;
7704 	}
7705 	if (queue_hotplug)
7706 		schedule_work(&rdev->hotplug_work);
7707 	if (queue_reset)
7708 		schedule_work(&rdev->reset_work);
7709 	if (queue_thermal)
7710 		schedule_work(&rdev->pm.dpm.thermal.work);
7711 	rdev->ih.rptr = rptr;
7712 	WREG32(IH_RB_RPTR, rdev->ih.rptr);
7713 	atomic_set(&rdev->ih.lock, 0);
7714 
7715 	/* make sure wptr hasn't changed while processing */
7716 	wptr = cik_get_ih_wptr(rdev);
7717 	if (wptr != rptr)
7718 		goto restart_ih;
7719 
7720 	return IRQ_HANDLED;
7721 }
7722 
7723 /*
7724  * startup/shutdown callbacks
7725  */
7726 /**
7727  * cik_startup - program the asic to a functional state
7728  *
7729  * @rdev: radeon_device pointer
7730  *
7731  * Programs the asic to a functional state (CIK).
7732  * Called by cik_init() and cik_resume().
7733  * Returns 0 for success, error for failure.
7734  */
7735 static int cik_startup(struct radeon_device *rdev)
7736 {
7737 	struct radeon_ring *ring;
7738 	int r;
7739 
7740 	/* enable pcie gen2/3 link */
7741 	cik_pcie_gen3_enable(rdev);
7742 	/* enable aspm */
7743 	cik_program_aspm(rdev);
7744 
7745 	/* scratch needs to be initialized before MC */
7746 	r = r600_vram_scratch_init(rdev);
7747 	if (r)
7748 		return r;
7749 
7750 	cik_mc_program(rdev);
7751 
7752 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
7753 		r = ci_mc_load_microcode(rdev);
7754 		if (r) {
7755 			DRM_ERROR("Failed to load MC firmware!\n");
7756 			return r;
7757 		}
7758 	}
7759 
7760 	r = cik_pcie_gart_enable(rdev);
7761 	if (r)
7762 		return r;
7763 	cik_gpu_init(rdev);
7764 
7765 	/* allocate rlc buffers */
7766 	if (rdev->flags & RADEON_IS_IGP) {
7767 		if (rdev->family == CHIP_KAVERI) {
7768 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7769 			rdev->rlc.reg_list_size =
7770 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7771 		} else {
7772 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7773 			rdev->rlc.reg_list_size =
7774 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7775 		}
7776 	}
7777 	rdev->rlc.cs_data = ci_cs_data;
7778 	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7779 	r = sumo_rlc_init(rdev);
7780 	if (r) {
7781 		DRM_ERROR("Failed to init rlc BOs!\n");
7782 		return r;
7783 	}
7784 
7785 	/* allocate wb buffer */
7786 	r = radeon_wb_init(rdev);
7787 	if (r)
7788 		return r;
7789 
7790 	/* allocate mec buffers */
7791 	r = cik_mec_init(rdev);
7792 	if (r) {
7793 		DRM_ERROR("Failed to init MEC BOs!\n");
7794 		return r;
7795 	}
7796 
7797 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7798 	if (r) {
7799 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7800 		return r;
7801 	}
7802 
7803 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7804 	if (r) {
7805 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7806 		return r;
7807 	}
7808 
7809 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7810 	if (r) {
7811 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7812 		return r;
7813 	}
7814 
7815 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7816 	if (r) {
7817 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7818 		return r;
7819 	}
7820 
7821 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7822 	if (r) {
7823 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7824 		return r;
7825 	}
7826 
7827 	r = radeon_uvd_resume(rdev);
7828 	if (!r) {
7829 		r = uvd_v4_2_resume(rdev);
7830 		if (!r) {
7831 			r = radeon_fence_driver_start_ring(rdev,
7832 							   R600_RING_TYPE_UVD_INDEX);
7833 			if (r)
7834 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7835 		}
7836 	}
7837 	if (r)
7838 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7839 
7840 	r = radeon_vce_resume(rdev);
7841 	if (!r) {
7842 		r = vce_v2_0_resume(rdev);
7843 		if (!r)
7844 			r = radeon_fence_driver_start_ring(rdev,
7845 							   TN_RING_TYPE_VCE1_INDEX);
7846 		if (!r)
7847 			r = radeon_fence_driver_start_ring(rdev,
7848 							   TN_RING_TYPE_VCE2_INDEX);
7849 	}
7850 	if (r) {
7851 		dev_err(rdev->dev, "VCE init error (%d).\n", r);
7852 		rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
7853 		rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
7854 	}
7855 
7856 	/* Enable IRQ */
7857 	if (!rdev->irq.installed) {
7858 		r = radeon_irq_kms_init(rdev);
7859 		if (r)
7860 			return r;
7861 	}
7862 
7863 	r = cik_irq_init(rdev);
7864 	if (r) {
7865 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
7866 		radeon_irq_kms_fini(rdev);
7867 		return r;
7868 	}
7869 	cik_irq_set(rdev);
7870 
7871 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7872 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7873 			     PACKET3(PACKET3_NOP, 0x3FFF));
7874 	if (r)
7875 		return r;
7876 
7877 	/* set up the compute queues */
7878 	/* type-2 packets are deprecated on MEC, use type-3 instead */
7879 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7880 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7881 			     PACKET3(PACKET3_NOP, 0x3FFF));
7882 	if (r)
7883 		return r;
7884 	ring->me = 1; /* first MEC */
7885 	ring->pipe = 0; /* first pipe */
7886 	ring->queue = 0; /* first queue */
7887 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
7888 
7889 	/* type-2 packets are deprecated on MEC, use type-3 instead */
7890 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7891 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7892 			     PACKET3(PACKET3_NOP, 0x3FFF));
7893 	if (r)
7894 		return r;
7895 	/* dGPU only have 1 MEC */
7896 	ring->me = 1; /* first MEC */
7897 	ring->pipe = 0; /* first pipe */
7898 	ring->queue = 1; /* second queue */
7899 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
7900 
7901 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7902 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7903 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7904 	if (r)
7905 		return r;
7906 
7907 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7908 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7909 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7910 	if (r)
7911 		return r;
7912 
7913 	r = cik_cp_resume(rdev);
7914 	if (r)
7915 		return r;
7916 
7917 	r = cik_sdma_resume(rdev);
7918 	if (r)
7919 		return r;
7920 
7921 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7922 	if (ring->ring_size) {
7923 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7924 				     RADEON_CP_PACKET2);
7925 		if (!r)
7926 			r = uvd_v1_0_init(rdev);
7927 		if (r)
7928 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7929 	}
7930 
7931 	r = -ENOENT;
7932 
7933 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
7934 	if (ring->ring_size)
7935 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7936 				     VCE_CMD_NO_OP);
7937 
7938 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
7939 	if (ring->ring_size)
7940 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7941 				     VCE_CMD_NO_OP);
7942 
7943 	if (!r)
7944 		r = vce_v1_0_init(rdev);
7945 	else if (r != -ENOENT)
7946 		DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
7947 
7948 	r = radeon_ib_pool_init(rdev);
7949 	if (r) {
7950 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7951 		return r;
7952 	}
7953 
7954 	r = radeon_vm_manager_init(rdev);
7955 	if (r) {
7956 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7957 		return r;
7958 	}
7959 
7960 	r = dce6_audio_init(rdev);
7961 	if (r)
7962 		return r;
7963 
7964 	return 0;
7965 }
7966 
7967 /**
7968  * cik_resume - resume the asic to a functional state
7969  *
7970  * @rdev: radeon_device pointer
7971  *
7972  * Programs the asic to a functional state (CIK).
7973  * Called at resume.
7974  * Returns 0 for success, error for failure.
7975  */
7976 int cik_resume(struct radeon_device *rdev)
7977 {
7978 	int r;
7979 
7980 	/* post card */
7981 	atom_asic_init(rdev->mode_info.atom_context);
7982 
7983 	/* init golden registers */
7984 	cik_init_golden_registers(rdev);
7985 
7986 	if (rdev->pm.pm_method == PM_METHOD_DPM)
7987 		radeon_pm_resume(rdev);
7988 
7989 	rdev->accel_working = true;
7990 	r = cik_startup(rdev);
7991 	if (r) {
7992 		DRM_ERROR("cik startup failed on resume\n");
7993 		rdev->accel_working = false;
7994 		return r;
7995 	}
7996 
7997 	return r;
7998 
7999 }
8000 
8001 /**
8002  * cik_suspend - suspend the asic
8003  *
8004  * @rdev: radeon_device pointer
8005  *
8006  * Bring the chip into a state suitable for suspend (CIK).
8007  * Called at suspend.
8008  * Returns 0 for success.
8009  */
8010 int cik_suspend(struct radeon_device *rdev)
8011 {
8012 	radeon_pm_suspend(rdev);
8013 	dce6_audio_fini(rdev);
8014 	radeon_vm_manager_fini(rdev);
8015 	cik_cp_enable(rdev, false);
8016 	cik_sdma_enable(rdev, false);
8017 	uvd_v1_0_fini(rdev);
8018 	radeon_uvd_suspend(rdev);
8019 	radeon_vce_suspend(rdev);
8020 	cik_fini_pg(rdev);
8021 	cik_fini_cg(rdev);
8022 	cik_irq_suspend(rdev);
8023 	radeon_wb_disable(rdev);
8024 	cik_pcie_gart_disable(rdev);
8025 	return 0;
8026 }
8027 
8028 /* Plan is to move initialization in that function and use
8029  * helper function so that radeon_device_init pretty much
8030  * do nothing more than calling asic specific function. This
8031  * should also allow to remove a bunch of callback function
8032  * like vram_info.
8033  */
8034 /**
8035  * cik_init - asic specific driver and hw init
8036  *
8037  * @rdev: radeon_device pointer
8038  *
8039  * Setup asic specific driver variables and program the hw
8040  * to a functional state (CIK).
8041  * Called at driver startup.
8042  * Returns 0 for success, errors for failure.
8043  */
8044 int cik_init(struct radeon_device *rdev)
8045 {
8046 	struct radeon_ring *ring;
8047 	int r;
8048 
8049 	/* Read BIOS */
8050 	if (!radeon_get_bios(rdev)) {
8051 		if (ASIC_IS_AVIVO(rdev))
8052 			return -EINVAL;
8053 	}
8054 	/* Must be an ATOMBIOS */
8055 	if (!rdev->is_atom_bios) {
8056 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8057 		return -EINVAL;
8058 	}
8059 	r = radeon_atombios_init(rdev);
8060 	if (r)
8061 		return r;
8062 
8063 	/* Post card if necessary */
8064 	if (!radeon_card_posted(rdev)) {
8065 		if (!rdev->bios) {
8066 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8067 			return -EINVAL;
8068 		}
8069 		DRM_INFO("GPU not posted. posting now...\n");
8070 		atom_asic_init(rdev->mode_info.atom_context);
8071 	}
8072 	/* init golden registers */
8073 	cik_init_golden_registers(rdev);
8074 	/* Initialize scratch registers */
8075 	cik_scratch_init(rdev);
8076 	/* Initialize surface registers */
8077 	radeon_surface_init(rdev);
8078 	/* Initialize clocks */
8079 	radeon_get_clock_info(rdev->ddev);
8080 
8081 	/* Fence driver */
8082 	r = radeon_fence_driver_init(rdev);
8083 	if (r)
8084 		return r;
8085 
8086 	/* initialize memory controller */
8087 	r = cik_mc_init(rdev);
8088 	if (r)
8089 		return r;
8090 	/* Memory manager */
8091 	r = radeon_bo_init(rdev);
8092 	if (r)
8093 		return r;
8094 
8095 	if (rdev->flags & RADEON_IS_IGP) {
8096 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8097 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8098 			r = cik_init_microcode(rdev);
8099 			if (r) {
8100 				DRM_ERROR("Failed to load firmware!\n");
8101 				return r;
8102 			}
8103 		}
8104 	} else {
8105 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8106 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8107 		    !rdev->mc_fw) {
8108 			r = cik_init_microcode(rdev);
8109 			if (r) {
8110 				DRM_ERROR("Failed to load firmware!\n");
8111 				return r;
8112 			}
8113 		}
8114 	}
8115 
8116 	/* Initialize power management */
8117 	radeon_pm_init(rdev);
8118 
8119 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8120 	ring->ring_obj = NULL;
8121 	r600_ring_init(rdev, ring, 1024 * 1024);
8122 
8123 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8124 	ring->ring_obj = NULL;
8125 	r600_ring_init(rdev, ring, 1024 * 1024);
8126 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8127 	if (r)
8128 		return r;
8129 
8130 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8131 	ring->ring_obj = NULL;
8132 	r600_ring_init(rdev, ring, 1024 * 1024);
8133 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8134 	if (r)
8135 		return r;
8136 
8137 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8138 	ring->ring_obj = NULL;
8139 	r600_ring_init(rdev, ring, 256 * 1024);
8140 
8141 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8142 	ring->ring_obj = NULL;
8143 	r600_ring_init(rdev, ring, 256 * 1024);
8144 
8145 	r = radeon_uvd_init(rdev);
8146 	if (!r) {
8147 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8148 		ring->ring_obj = NULL;
8149 		r600_ring_init(rdev, ring, 4096);
8150 	}
8151 
8152 	r = radeon_vce_init(rdev);
8153 	if (!r) {
8154 		ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8155 		ring->ring_obj = NULL;
8156 		r600_ring_init(rdev, ring, 4096);
8157 
8158 		ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8159 		ring->ring_obj = NULL;
8160 		r600_ring_init(rdev, ring, 4096);
8161 	}
8162 
8163 	rdev->ih.ring_obj = NULL;
8164 	r600_ih_ring_init(rdev, 64 * 1024);
8165 
8166 	r = r600_pcie_gart_init(rdev);
8167 	if (r)
8168 		return r;
8169 
8170 	rdev->accel_working = true;
8171 	r = cik_startup(rdev);
8172 	if (r) {
8173 		dev_err(rdev->dev, "disabling GPU acceleration\n");
8174 		cik_cp_fini(rdev);
8175 		cik_sdma_fini(rdev);
8176 		cik_irq_fini(rdev);
8177 		sumo_rlc_fini(rdev);
8178 		cik_mec_fini(rdev);
8179 		radeon_wb_fini(rdev);
8180 		radeon_ib_pool_fini(rdev);
8181 		radeon_vm_manager_fini(rdev);
8182 		radeon_irq_kms_fini(rdev);
8183 		cik_pcie_gart_fini(rdev);
8184 		rdev->accel_working = false;
8185 	}
8186 
8187 	/* Don't start up if the MC ucode is missing.
8188 	 * The default clocks and voltages before the MC ucode
8189 	 * is loaded are not suffient for advanced operations.
8190 	 */
8191 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8192 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8193 		return -EINVAL;
8194 	}
8195 
8196 	return 0;
8197 }
8198 
8199 /**
8200  * cik_fini - asic specific driver and hw fini
8201  *
8202  * @rdev: radeon_device pointer
8203  *
8204  * Tear down the asic specific driver variables and program the hw
8205  * to an idle state (CIK).
8206  * Called at driver unload.
8207  */
8208 void cik_fini(struct radeon_device *rdev)
8209 {
8210 	radeon_pm_fini(rdev);
8211 	cik_cp_fini(rdev);
8212 	cik_sdma_fini(rdev);
8213 	cik_fini_pg(rdev);
8214 	cik_fini_cg(rdev);
8215 	cik_irq_fini(rdev);
8216 	sumo_rlc_fini(rdev);
8217 	cik_mec_fini(rdev);
8218 	radeon_wb_fini(rdev);
8219 	radeon_vm_manager_fini(rdev);
8220 	radeon_ib_pool_fini(rdev);
8221 	radeon_irq_kms_fini(rdev);
8222 	uvd_v1_0_fini(rdev);
8223 	radeon_uvd_fini(rdev);
8224 	radeon_vce_fini(rdev);
8225 	cik_pcie_gart_fini(rdev);
8226 	r600_vram_scratch_fini(rdev);
8227 	radeon_gem_fini(rdev);
8228 	radeon_fence_driver_fini(rdev);
8229 	radeon_bo_fini(rdev);
8230 	radeon_atombios_fini(rdev);
8231 	kfree(rdev->bios);
8232 	rdev->bios = NULL;
8233 }
8234 
8235 void dce8_program_fmt(struct drm_encoder *encoder)
8236 {
8237 	struct drm_device *dev = encoder->dev;
8238 	struct radeon_device *rdev = dev->dev_private;
8239 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8240 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8241 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8242 	int bpc = 0;
8243 	u32 tmp = 0;
8244 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8245 
8246 	if (connector) {
8247 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8248 		bpc = radeon_get_monitor_bpc(connector);
8249 		dither = radeon_connector->dither;
8250 	}
8251 
8252 	/* LVDS/eDP FMT is set up by atom */
8253 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8254 		return;
8255 
8256 	/* not needed for analog */
8257 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8258 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8259 		return;
8260 
8261 	if (bpc == 0)
8262 		return;
8263 
8264 	switch (bpc) {
8265 	case 6:
8266 		if (dither == RADEON_FMT_DITHER_ENABLE)
8267 			/* XXX sort out optimal dither settings */
8268 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8269 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8270 		else
8271 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8272 		break;
8273 	case 8:
8274 		if (dither == RADEON_FMT_DITHER_ENABLE)
8275 			/* XXX sort out optimal dither settings */
8276 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8277 				FMT_RGB_RANDOM_ENABLE |
8278 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8279 		else
8280 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8281 		break;
8282 	case 10:
8283 		if (dither == RADEON_FMT_DITHER_ENABLE)
8284 			/* XXX sort out optimal dither settings */
8285 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8286 				FMT_RGB_RANDOM_ENABLE |
8287 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8288 		else
8289 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8290 		break;
8291 	default:
8292 		/* not needed */
8293 		break;
8294 	}
8295 
8296 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8297 }
8298 
8299 /* display watermark setup */
8300 /**
8301  * dce8_line_buffer_adjust - Set up the line buffer
8302  *
8303  * @rdev: radeon_device pointer
8304  * @radeon_crtc: the selected display controller
8305  * @mode: the current display mode on the selected display
8306  * controller
8307  *
8308  * Setup up the line buffer allocation for
8309  * the selected display controller (CIK).
8310  * Returns the line buffer size in pixels.
8311  */
8312 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8313 				   struct radeon_crtc *radeon_crtc,
8314 				   struct drm_display_mode *mode)
8315 {
8316 	u32 tmp, buffer_alloc, i;
8317 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8318 	/*
8319 	 * Line Buffer Setup
8320 	 * There are 6 line buffers, one for each display controllers.
8321 	 * There are 3 partitions per LB. Select the number of partitions
8322 	 * to enable based on the display width.  For display widths larger
8323 	 * than 4096, you need use to use 2 display controllers and combine
8324 	 * them using the stereo blender.
8325 	 */
8326 	if (radeon_crtc->base.enabled && mode) {
8327 		if (mode->crtc_hdisplay < 1920) {
8328 			tmp = 1;
8329 			buffer_alloc = 2;
8330 		} else if (mode->crtc_hdisplay < 2560) {
8331 			tmp = 2;
8332 			buffer_alloc = 2;
8333 		} else if (mode->crtc_hdisplay < 4096) {
8334 			tmp = 0;
8335 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8336 		} else {
8337 			DRM_DEBUG_KMS("Mode too big for LB!\n");
8338 			tmp = 0;
8339 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8340 		}
8341 	} else {
8342 		tmp = 1;
8343 		buffer_alloc = 0;
8344 	}
8345 
8346 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8347 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8348 
8349 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8350 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8351 	for (i = 0; i < rdev->usec_timeout; i++) {
8352 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8353 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8354 			break;
8355 		udelay(1);
8356 	}
8357 
8358 	if (radeon_crtc->base.enabled && mode) {
8359 		switch (tmp) {
8360 		case 0:
8361 		default:
8362 			return 4096 * 2;
8363 		case 1:
8364 			return 1920 * 2;
8365 		case 2:
8366 			return 2560 * 2;
8367 		}
8368 	}
8369 
8370 	/* controller not enabled, so no lb used */
8371 	return 0;
8372 }
8373 
8374 /**
8375  * cik_get_number_of_dram_channels - get the number of dram channels
8376  *
8377  * @rdev: radeon_device pointer
8378  *
8379  * Look up the number of video ram channels (CIK).
8380  * Used for display watermark bandwidth calculations
8381  * Returns the number of dram channels
8382  */
8383 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8384 {
8385 	u32 tmp = RREG32(MC_SHARED_CHMAP);
8386 
8387 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8388 	case 0:
8389 	default:
8390 		return 1;
8391 	case 1:
8392 		return 2;
8393 	case 2:
8394 		return 4;
8395 	case 3:
8396 		return 8;
8397 	case 4:
8398 		return 3;
8399 	case 5:
8400 		return 6;
8401 	case 6:
8402 		return 10;
8403 	case 7:
8404 		return 12;
8405 	case 8:
8406 		return 16;
8407 	}
8408 }
8409 
8410 struct dce8_wm_params {
8411 	u32 dram_channels; /* number of dram channels */
8412 	u32 yclk;          /* bandwidth per dram data pin in kHz */
8413 	u32 sclk;          /* engine clock in kHz */
8414 	u32 disp_clk;      /* display clock in kHz */
8415 	u32 src_width;     /* viewport width */
8416 	u32 active_time;   /* active display time in ns */
8417 	u32 blank_time;    /* blank time in ns */
8418 	bool interlaced;    /* mode is interlaced */
8419 	fixed20_12 vsc;    /* vertical scale ratio */
8420 	u32 num_heads;     /* number of active crtcs */
8421 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8422 	u32 lb_size;       /* line buffer allocated to pipe */
8423 	u32 vtaps;         /* vertical scaler taps */
8424 };
8425 
8426 /**
8427  * dce8_dram_bandwidth - get the dram bandwidth
8428  *
8429  * @wm: watermark calculation data
8430  *
8431  * Calculate the raw dram bandwidth (CIK).
8432  * Used for display watermark bandwidth calculations
8433  * Returns the dram bandwidth in MBytes/s
8434  */
8435 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8436 {
8437 	/* Calculate raw DRAM Bandwidth */
8438 	fixed20_12 dram_efficiency; /* 0.7 */
8439 	fixed20_12 yclk, dram_channels, bandwidth;
8440 	fixed20_12 a;
8441 
8442 	a.full = dfixed_const(1000);
8443 	yclk.full = dfixed_const(wm->yclk);
8444 	yclk.full = dfixed_div(yclk, a);
8445 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8446 	a.full = dfixed_const(10);
8447 	dram_efficiency.full = dfixed_const(7);
8448 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
8449 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8450 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8451 
8452 	return dfixed_trunc(bandwidth);
8453 }
8454 
8455 /**
8456  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8457  *
8458  * @wm: watermark calculation data
8459  *
8460  * Calculate the dram bandwidth used for display (CIK).
8461  * Used for display watermark bandwidth calculations
8462  * Returns the dram bandwidth for display in MBytes/s
8463  */
8464 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8465 {
8466 	/* Calculate DRAM Bandwidth and the part allocated to display. */
8467 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8468 	fixed20_12 yclk, dram_channels, bandwidth;
8469 	fixed20_12 a;
8470 
8471 	a.full = dfixed_const(1000);
8472 	yclk.full = dfixed_const(wm->yclk);
8473 	yclk.full = dfixed_div(yclk, a);
8474 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8475 	a.full = dfixed_const(10);
8476 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8477 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8478 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8479 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8480 
8481 	return dfixed_trunc(bandwidth);
8482 }
8483 
8484 /**
8485  * dce8_data_return_bandwidth - get the data return bandwidth
8486  *
8487  * @wm: watermark calculation data
8488  *
8489  * Calculate the data return bandwidth used for display (CIK).
8490  * Used for display watermark bandwidth calculations
8491  * Returns the data return bandwidth in MBytes/s
8492  */
8493 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8494 {
8495 	/* Calculate the display Data return Bandwidth */
8496 	fixed20_12 return_efficiency; /* 0.8 */
8497 	fixed20_12 sclk, bandwidth;
8498 	fixed20_12 a;
8499 
8500 	a.full = dfixed_const(1000);
8501 	sclk.full = dfixed_const(wm->sclk);
8502 	sclk.full = dfixed_div(sclk, a);
8503 	a.full = dfixed_const(10);
8504 	return_efficiency.full = dfixed_const(8);
8505 	return_efficiency.full = dfixed_div(return_efficiency, a);
8506 	a.full = dfixed_const(32);
8507 	bandwidth.full = dfixed_mul(a, sclk);
8508 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8509 
8510 	return dfixed_trunc(bandwidth);
8511 }
8512 
8513 /**
8514  * dce8_dmif_request_bandwidth - get the dmif bandwidth
8515  *
8516  * @wm: watermark calculation data
8517  *
8518  * Calculate the dmif bandwidth used for display (CIK).
8519  * Used for display watermark bandwidth calculations
8520  * Returns the dmif bandwidth in MBytes/s
8521  */
8522 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
8523 {
8524 	/* Calculate the DMIF Request Bandwidth */
8525 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
8526 	fixed20_12 disp_clk, bandwidth;
8527 	fixed20_12 a, b;
8528 
8529 	a.full = dfixed_const(1000);
8530 	disp_clk.full = dfixed_const(wm->disp_clk);
8531 	disp_clk.full = dfixed_div(disp_clk, a);
8532 	a.full = dfixed_const(32);
8533 	b.full = dfixed_mul(a, disp_clk);
8534 
8535 	a.full = dfixed_const(10);
8536 	disp_clk_request_efficiency.full = dfixed_const(8);
8537 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
8538 
8539 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
8540 
8541 	return dfixed_trunc(bandwidth);
8542 }
8543 
8544 /**
8545  * dce8_available_bandwidth - get the min available bandwidth
8546  *
8547  * @wm: watermark calculation data
8548  *
8549  * Calculate the min available bandwidth used for display (CIK).
8550  * Used for display watermark bandwidth calculations
8551  * Returns the min available bandwidth in MBytes/s
8552  */
8553 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
8554 {
8555 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8556 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
8557 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
8558 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
8559 
8560 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
8561 }
8562 
8563 /**
8564  * dce8_average_bandwidth - get the average available bandwidth
8565  *
8566  * @wm: watermark calculation data
8567  *
8568  * Calculate the average available bandwidth used for display (CIK).
8569  * Used for display watermark bandwidth calculations
8570  * Returns the average available bandwidth in MBytes/s
8571  */
8572 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
8573 {
8574 	/* Calculate the display mode Average Bandwidth
8575 	 * DisplayMode should contain the source and destination dimensions,
8576 	 * timing, etc.
8577 	 */
8578 	fixed20_12 bpp;
8579 	fixed20_12 line_time;
8580 	fixed20_12 src_width;
8581 	fixed20_12 bandwidth;
8582 	fixed20_12 a;
8583 
8584 	a.full = dfixed_const(1000);
8585 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
8586 	line_time.full = dfixed_div(line_time, a);
8587 	bpp.full = dfixed_const(wm->bytes_per_pixel);
8588 	src_width.full = dfixed_const(wm->src_width);
8589 	bandwidth.full = dfixed_mul(src_width, bpp);
8590 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
8591 	bandwidth.full = dfixed_div(bandwidth, line_time);
8592 
8593 	return dfixed_trunc(bandwidth);
8594 }
8595 
8596 /**
8597  * dce8_latency_watermark - get the latency watermark
8598  *
8599  * @wm: watermark calculation data
8600  *
8601  * Calculate the latency watermark (CIK).
8602  * Used for display watermark bandwidth calculations
8603  * Returns the latency watermark in ns
8604  */
8605 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
8606 {
8607 	/* First calculate the latency in ns */
8608 	u32 mc_latency = 2000; /* 2000 ns. */
8609 	u32 available_bandwidth = dce8_available_bandwidth(wm);
8610 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
8611 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
8612 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
8613 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
8614 		(wm->num_heads * cursor_line_pair_return_time);
8615 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
8616 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
8617 	u32 tmp, dmif_size = 12288;
8618 	fixed20_12 a, b, c;
8619 
8620 	if (wm->num_heads == 0)
8621 		return 0;
8622 
8623 	a.full = dfixed_const(2);
8624 	b.full = dfixed_const(1);
8625 	if ((wm->vsc.full > a.full) ||
8626 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
8627 	    (wm->vtaps >= 5) ||
8628 	    ((wm->vsc.full >= a.full) && wm->interlaced))
8629 		max_src_lines_per_dst_line = 4;
8630 	else
8631 		max_src_lines_per_dst_line = 2;
8632 
8633 	a.full = dfixed_const(available_bandwidth);
8634 	b.full = dfixed_const(wm->num_heads);
8635 	a.full = dfixed_div(a, b);
8636 
8637 	b.full = dfixed_const(mc_latency + 512);
8638 	c.full = dfixed_const(wm->disp_clk);
8639 	b.full = dfixed_div(b, c);
8640 
8641 	c.full = dfixed_const(dmif_size);
8642 	b.full = dfixed_div(c, b);
8643 
8644 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
8645 
8646 	b.full = dfixed_const(1000);
8647 	c.full = dfixed_const(wm->disp_clk);
8648 	b.full = dfixed_div(c, b);
8649 	c.full = dfixed_const(wm->bytes_per_pixel);
8650 	b.full = dfixed_mul(b, c);
8651 
8652 	lb_fill_bw = min(tmp, dfixed_trunc(b));
8653 
8654 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
8655 	b.full = dfixed_const(1000);
8656 	c.full = dfixed_const(lb_fill_bw);
8657 	b.full = dfixed_div(c, b);
8658 	a.full = dfixed_div(a, b);
8659 	line_fill_time = dfixed_trunc(a);
8660 
8661 	if (line_fill_time < wm->active_time)
8662 		return latency;
8663 	else
8664 		return latency + (line_fill_time - wm->active_time);
8665 
8666 }
8667 
8668 /**
8669  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
8670  * average and available dram bandwidth
8671  *
8672  * @wm: watermark calculation data
8673  *
8674  * Check if the display average bandwidth fits in the display
8675  * dram bandwidth (CIK).
8676  * Used for display watermark bandwidth calculations
8677  * Returns true if the display fits, false if not.
8678  */
8679 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8680 {
8681 	if (dce8_average_bandwidth(wm) <=
8682 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
8683 		return true;
8684 	else
8685 		return false;
8686 }
8687 
8688 /**
8689  * dce8_average_bandwidth_vs_available_bandwidth - check
8690  * average and available bandwidth
8691  *
8692  * @wm: watermark calculation data
8693  *
8694  * Check if the display average bandwidth fits in the display
8695  * available bandwidth (CIK).
8696  * Used for display watermark bandwidth calculations
8697  * Returns true if the display fits, false if not.
8698  */
8699 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
8700 {
8701 	if (dce8_average_bandwidth(wm) <=
8702 	    (dce8_available_bandwidth(wm) / wm->num_heads))
8703 		return true;
8704 	else
8705 		return false;
8706 }
8707 
8708 /**
8709  * dce8_check_latency_hiding - check latency hiding
8710  *
8711  * @wm: watermark calculation data
8712  *
8713  * Check latency hiding (CIK).
8714  * Used for display watermark bandwidth calculations
8715  * Returns true if the display fits, false if not.
8716  */
8717 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
8718 {
8719 	u32 lb_partitions = wm->lb_size / wm->src_width;
8720 	u32 line_time = wm->active_time + wm->blank_time;
8721 	u32 latency_tolerant_lines;
8722 	u32 latency_hiding;
8723 	fixed20_12 a;
8724 
8725 	a.full = dfixed_const(1);
8726 	if (wm->vsc.full > a.full)
8727 		latency_tolerant_lines = 1;
8728 	else {
8729 		if (lb_partitions <= (wm->vtaps + 1))
8730 			latency_tolerant_lines = 1;
8731 		else
8732 			latency_tolerant_lines = 2;
8733 	}
8734 
8735 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
8736 
8737 	if (dce8_latency_watermark(wm) <= latency_hiding)
8738 		return true;
8739 	else
8740 		return false;
8741 }
8742 
8743 /**
8744  * dce8_program_watermarks - program display watermarks
8745  *
8746  * @rdev: radeon_device pointer
8747  * @radeon_crtc: the selected display controller
8748  * @lb_size: line buffer size
8749  * @num_heads: number of display controllers in use
8750  *
8751  * Calculate and program the display watermarks for the
8752  * selected display controller (CIK).
8753  */
8754 static void dce8_program_watermarks(struct radeon_device *rdev,
8755 				    struct radeon_crtc *radeon_crtc,
8756 				    u32 lb_size, u32 num_heads)
8757 {
8758 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
8759 	struct dce8_wm_params wm_low, wm_high;
8760 	u32 pixel_period;
8761 	u32 line_time = 0;
8762 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
8763 	u32 tmp, wm_mask;
8764 
8765 	if (radeon_crtc->base.enabled && num_heads && mode) {
8766 		pixel_period = 1000000 / (u32)mode->clock;
8767 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
8768 
8769 		/* watermark for high clocks */
8770 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8771 		    rdev->pm.dpm_enabled) {
8772 			wm_high.yclk =
8773 				radeon_dpm_get_mclk(rdev, false) * 10;
8774 			wm_high.sclk =
8775 				radeon_dpm_get_sclk(rdev, false) * 10;
8776 		} else {
8777 			wm_high.yclk = rdev->pm.current_mclk * 10;
8778 			wm_high.sclk = rdev->pm.current_sclk * 10;
8779 		}
8780 
8781 		wm_high.disp_clk = mode->clock;
8782 		wm_high.src_width = mode->crtc_hdisplay;
8783 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
8784 		wm_high.blank_time = line_time - wm_high.active_time;
8785 		wm_high.interlaced = false;
8786 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8787 			wm_high.interlaced = true;
8788 		wm_high.vsc = radeon_crtc->vsc;
8789 		wm_high.vtaps = 1;
8790 		if (radeon_crtc->rmx_type != RMX_OFF)
8791 			wm_high.vtaps = 2;
8792 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
8793 		wm_high.lb_size = lb_size;
8794 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
8795 		wm_high.num_heads = num_heads;
8796 
8797 		/* set for high clocks */
8798 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
8799 
8800 		/* possibly force display priority to high */
8801 		/* should really do this at mode validation time... */
8802 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
8803 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
8804 		    !dce8_check_latency_hiding(&wm_high) ||
8805 		    (rdev->disp_priority == 2)) {
8806 			DRM_DEBUG_KMS("force priority to high\n");
8807 		}
8808 
8809 		/* watermark for low clocks */
8810 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8811 		    rdev->pm.dpm_enabled) {
8812 			wm_low.yclk =
8813 				radeon_dpm_get_mclk(rdev, true) * 10;
8814 			wm_low.sclk =
8815 				radeon_dpm_get_sclk(rdev, true) * 10;
8816 		} else {
8817 			wm_low.yclk = rdev->pm.current_mclk * 10;
8818 			wm_low.sclk = rdev->pm.current_sclk * 10;
8819 		}
8820 
8821 		wm_low.disp_clk = mode->clock;
8822 		wm_low.src_width = mode->crtc_hdisplay;
8823 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
8824 		wm_low.blank_time = line_time - wm_low.active_time;
8825 		wm_low.interlaced = false;
8826 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8827 			wm_low.interlaced = true;
8828 		wm_low.vsc = radeon_crtc->vsc;
8829 		wm_low.vtaps = 1;
8830 		if (radeon_crtc->rmx_type != RMX_OFF)
8831 			wm_low.vtaps = 2;
8832 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
8833 		wm_low.lb_size = lb_size;
8834 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
8835 		wm_low.num_heads = num_heads;
8836 
8837 		/* set for low clocks */
8838 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
8839 
8840 		/* possibly force display priority to high */
8841 		/* should really do this at mode validation time... */
8842 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
8843 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
8844 		    !dce8_check_latency_hiding(&wm_low) ||
8845 		    (rdev->disp_priority == 2)) {
8846 			DRM_DEBUG_KMS("force priority to high\n");
8847 		}
8848 	}
8849 
8850 	/* select wm A */
8851 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8852 	tmp = wm_mask;
8853 	tmp &= ~LATENCY_WATERMARK_MASK(3);
8854 	tmp |= LATENCY_WATERMARK_MASK(1);
8855 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8856 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8857 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
8858 		LATENCY_HIGH_WATERMARK(line_time)));
8859 	/* select wm B */
8860 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8861 	tmp &= ~LATENCY_WATERMARK_MASK(3);
8862 	tmp |= LATENCY_WATERMARK_MASK(2);
8863 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8864 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8865 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
8866 		LATENCY_HIGH_WATERMARK(line_time)));
8867 	/* restore original selection */
8868 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
8869 
8870 	/* save values for DPM */
8871 	radeon_crtc->line_time = line_time;
8872 	radeon_crtc->wm_high = latency_watermark_a;
8873 	radeon_crtc->wm_low = latency_watermark_b;
8874 }
8875 
8876 /**
8877  * dce8_bandwidth_update - program display watermarks
8878  *
8879  * @rdev: radeon_device pointer
8880  *
8881  * Calculate and program the display watermarks and line
8882  * buffer allocation (CIK).
8883  */
8884 void dce8_bandwidth_update(struct radeon_device *rdev)
8885 {
8886 	struct drm_display_mode *mode = NULL;
8887 	u32 num_heads = 0, lb_size;
8888 	int i;
8889 
8890 	radeon_update_display_priority(rdev);
8891 
8892 	for (i = 0; i < rdev->num_crtc; i++) {
8893 		if (rdev->mode_info.crtcs[i]->base.enabled)
8894 			num_heads++;
8895 	}
8896 	for (i = 0; i < rdev->num_crtc; i++) {
8897 		mode = &rdev->mode_info.crtcs[i]->base.mode;
8898 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
8899 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
8900 	}
8901 }
8902 
8903 /**
8904  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8905  *
8906  * @rdev: radeon_device pointer
8907  *
8908  * Fetches a GPU clock counter snapshot (SI).
8909  * Returns the 64 bit clock counter snapshot.
8910  */
8911 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
8912 {
8913 	uint64_t clock;
8914 
8915 	mutex_lock(&rdev->gpu_clock_mutex);
8916 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
8917 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
8918 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
8919 	mutex_unlock(&rdev->gpu_clock_mutex);
8920 	return clock;
8921 }
8922 
8923 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
8924                               u32 cntl_reg, u32 status_reg)
8925 {
8926 	int r, i;
8927 	struct atom_clock_dividers dividers;
8928 	uint32_t tmp;
8929 
8930 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8931 					   clock, false, &dividers);
8932 	if (r)
8933 		return r;
8934 
8935 	tmp = RREG32_SMC(cntl_reg);
8936 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
8937 	tmp |= dividers.post_divider;
8938 	WREG32_SMC(cntl_reg, tmp);
8939 
8940 	for (i = 0; i < 100; i++) {
8941 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
8942 			break;
8943 		mdelay(10);
8944 	}
8945 	if (i == 100)
8946 		return -ETIMEDOUT;
8947 
8948 	return 0;
8949 }
8950 
8951 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
8952 {
8953 	int r = 0;
8954 
8955 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
8956 	if (r)
8957 		return r;
8958 
8959 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
8960 	return r;
8961 }
8962 
8963 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
8964 {
8965 	int r, i;
8966 	struct atom_clock_dividers dividers;
8967 	u32 tmp;
8968 
8969 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8970 					   ecclk, false, &dividers);
8971 	if (r)
8972 		return r;
8973 
8974 	for (i = 0; i < 100; i++) {
8975 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
8976 			break;
8977 		mdelay(10);
8978 	}
8979 	if (i == 100)
8980 		return -ETIMEDOUT;
8981 
8982 	tmp = RREG32_SMC(CG_ECLK_CNTL);
8983 	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
8984 	tmp |= dividers.post_divider;
8985 	WREG32_SMC(CG_ECLK_CNTL, tmp);
8986 
8987 	for (i = 0; i < 100; i++) {
8988 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
8989 			break;
8990 		mdelay(10);
8991 	}
8992 	if (i == 100)
8993 		return -ETIMEDOUT;
8994 
8995 	return 0;
8996 }
8997 
8998 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
8999 {
9000 	struct pci_dev *root = rdev->pdev->bus->self;
9001 	int bridge_pos, gpu_pos;
9002 	u32 speed_cntl, mask, current_data_rate;
9003 	int ret, i;
9004 	u16 tmp16;
9005 
9006 	if (radeon_pcie_gen2 == 0)
9007 		return;
9008 
9009 	if (rdev->flags & RADEON_IS_IGP)
9010 		return;
9011 
9012 	if (!(rdev->flags & RADEON_IS_PCIE))
9013 		return;
9014 
9015 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9016 	if (ret != 0)
9017 		return;
9018 
9019 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9020 		return;
9021 
9022 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9023 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9024 		LC_CURRENT_DATA_RATE_SHIFT;
9025 	if (mask & DRM_PCIE_SPEED_80) {
9026 		if (current_data_rate == 2) {
9027 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9028 			return;
9029 		}
9030 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9031 	} else if (mask & DRM_PCIE_SPEED_50) {
9032 		if (current_data_rate == 1) {
9033 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9034 			return;
9035 		}
9036 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9037 	}
9038 
9039 	bridge_pos = pci_pcie_cap(root);
9040 	if (!bridge_pos)
9041 		return;
9042 
9043 	gpu_pos = pci_pcie_cap(rdev->pdev);
9044 	if (!gpu_pos)
9045 		return;
9046 
9047 	if (mask & DRM_PCIE_SPEED_80) {
9048 		/* re-try equalization if gen3 is not already enabled */
9049 		if (current_data_rate != 2) {
9050 			u16 bridge_cfg, gpu_cfg;
9051 			u16 bridge_cfg2, gpu_cfg2;
9052 			u32 max_lw, current_lw, tmp;
9053 
9054 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9055 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9056 
9057 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9058 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9059 
9060 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9061 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9062 
9063 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9064 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9065 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9066 
9067 			if (current_lw < max_lw) {
9068 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9069 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9070 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9071 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9072 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9073 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9074 				}
9075 			}
9076 
9077 			for (i = 0; i < 10; i++) {
9078 				/* check status */
9079 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9080 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9081 					break;
9082 
9083 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9084 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9085 
9086 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9087 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9088 
9089 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9090 				tmp |= LC_SET_QUIESCE;
9091 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9092 
9093 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9094 				tmp |= LC_REDO_EQ;
9095 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9096 
9097 				mdelay(100);
9098 
9099 				/* linkctl */
9100 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9101 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9102 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9103 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9104 
9105 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9106 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9107 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9108 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9109 
9110 				/* linkctl2 */
9111 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9112 				tmp16 &= ~((1 << 4) | (7 << 9));
9113 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9114 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9115 
9116 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9117 				tmp16 &= ~((1 << 4) | (7 << 9));
9118 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9119 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9120 
9121 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9122 				tmp &= ~LC_SET_QUIESCE;
9123 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9124 			}
9125 		}
9126 	}
9127 
9128 	/* set the link speed */
9129 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9130 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9131 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9132 
9133 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9134 	tmp16 &= ~0xf;
9135 	if (mask & DRM_PCIE_SPEED_80)
9136 		tmp16 |= 3; /* gen3 */
9137 	else if (mask & DRM_PCIE_SPEED_50)
9138 		tmp16 |= 2; /* gen2 */
9139 	else
9140 		tmp16 |= 1; /* gen1 */
9141 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9142 
9143 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9144 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9145 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9146 
9147 	for (i = 0; i < rdev->usec_timeout; i++) {
9148 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9149 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9150 			break;
9151 		udelay(1);
9152 	}
9153 }
9154 
9155 static void cik_program_aspm(struct radeon_device *rdev)
9156 {
9157 	u32 data, orig;
9158 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9159 	bool disable_clkreq = false;
9160 
9161 	if (radeon_aspm == 0)
9162 		return;
9163 
9164 	/* XXX double check IGPs */
9165 	if (rdev->flags & RADEON_IS_IGP)
9166 		return;
9167 
9168 	if (!(rdev->flags & RADEON_IS_PCIE))
9169 		return;
9170 
9171 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9172 	data &= ~LC_XMIT_N_FTS_MASK;
9173 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9174 	if (orig != data)
9175 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9176 
9177 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9178 	data |= LC_GO_TO_RECOVERY;
9179 	if (orig != data)
9180 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9181 
9182 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9183 	data |= P_IGNORE_EDB_ERR;
9184 	if (orig != data)
9185 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9186 
9187 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9188 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9189 	data |= LC_PMI_TO_L1_DIS;
9190 	if (!disable_l0s)
9191 		data |= LC_L0S_INACTIVITY(7);
9192 
9193 	if (!disable_l1) {
9194 		data |= LC_L1_INACTIVITY(7);
9195 		data &= ~LC_PMI_TO_L1_DIS;
9196 		if (orig != data)
9197 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9198 
9199 		if (!disable_plloff_in_l1) {
9200 			bool clk_req_support;
9201 
9202 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9203 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9204 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9205 			if (orig != data)
9206 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9207 
9208 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9209 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9210 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9211 			if (orig != data)
9212 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9213 
9214 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9215 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9216 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9217 			if (orig != data)
9218 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9219 
9220 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9221 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9222 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9223 			if (orig != data)
9224 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9225 
9226 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9227 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9228 			data |= LC_DYN_LANES_PWR_STATE(3);
9229 			if (orig != data)
9230 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9231 
9232 			if (!disable_clkreq) {
9233 				struct pci_dev *root = rdev->pdev->bus->self;
9234 				u32 lnkcap;
9235 
9236 				clk_req_support = false;
9237 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9238 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9239 					clk_req_support = true;
9240 			} else {
9241 				clk_req_support = false;
9242 			}
9243 
9244 			if (clk_req_support) {
9245 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9246 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9247 				if (orig != data)
9248 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9249 
9250 				orig = data = RREG32_SMC(THM_CLK_CNTL);
9251 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9252 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9253 				if (orig != data)
9254 					WREG32_SMC(THM_CLK_CNTL, data);
9255 
9256 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9257 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9258 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9259 				if (orig != data)
9260 					WREG32_SMC(MISC_CLK_CTRL, data);
9261 
9262 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9263 				data &= ~BCLK_AS_XCLK;
9264 				if (orig != data)
9265 					WREG32_SMC(CG_CLKPIN_CNTL, data);
9266 
9267 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9268 				data &= ~FORCE_BIF_REFCLK_EN;
9269 				if (orig != data)
9270 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9271 
9272 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9273 				data &= ~MPLL_CLKOUT_SEL_MASK;
9274 				data |= MPLL_CLKOUT_SEL(4);
9275 				if (orig != data)
9276 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9277 			}
9278 		}
9279 	} else {
9280 		if (orig != data)
9281 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9282 	}
9283 
9284 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9285 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9286 	if (orig != data)
9287 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9288 
9289 	if (!disable_l0s) {
9290 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9291 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9292 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9293 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9294 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9295 				data &= ~LC_L0S_INACTIVITY_MASK;
9296 				if (orig != data)
9297 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9298 			}
9299 		}
9300 	}
9301 }
9302