xref: /openbmc/linux/drivers/gpu/drm/radeon/cik.c (revision a2fb4d78)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35 
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
44 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
45 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
46 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
47 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
48 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
49 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
50 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
51 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
52 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
53 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
54 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
55 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
56 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
57 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
58 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
59 MODULE_FIRMWARE("radeon/KABINI_me.bin");
60 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
61 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
62 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
63 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
64 
65 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
66 extern void r600_ih_ring_fini(struct radeon_device *rdev);
67 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
68 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
69 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
70 extern void sumo_rlc_fini(struct radeon_device *rdev);
71 extern int sumo_rlc_init(struct radeon_device *rdev);
72 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
73 extern void si_rlc_reset(struct radeon_device *rdev);
74 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
75 extern int cik_sdma_resume(struct radeon_device *rdev);
76 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
77 extern void cik_sdma_fini(struct radeon_device *rdev);
78 static void cik_rlc_stop(struct radeon_device *rdev);
79 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
80 static void cik_program_aspm(struct radeon_device *rdev);
81 static void cik_init_pg(struct radeon_device *rdev);
82 static void cik_init_cg(struct radeon_device *rdev);
83 static void cik_fini_pg(struct radeon_device *rdev);
84 static void cik_fini_cg(struct radeon_device *rdev);
85 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
86 					  bool enable);
87 
88 /* get temperature in millidegrees */
89 int ci_get_temp(struct radeon_device *rdev)
90 {
91 	u32 temp;
92 	int actual_temp = 0;
93 
94 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
95 		CTF_TEMP_SHIFT;
96 
97 	if (temp & 0x200)
98 		actual_temp = 255;
99 	else
100 		actual_temp = temp & 0x1ff;
101 
102 	actual_temp = actual_temp * 1000;
103 
104 	return actual_temp;
105 }
106 
107 /* get temperature in millidegrees */
108 int kv_get_temp(struct radeon_device *rdev)
109 {
110 	u32 temp;
111 	int actual_temp = 0;
112 
113 	temp = RREG32_SMC(0xC0300E0C);
114 
115 	if (temp)
116 		actual_temp = (temp / 8) - 49;
117 	else
118 		actual_temp = 0;
119 
120 	actual_temp = actual_temp * 1000;
121 
122 	return actual_temp;
123 }
124 
125 /*
126  * Indirect registers accessor
127  */
128 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
129 {
130 	unsigned long flags;
131 	u32 r;
132 
133 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
134 	WREG32(PCIE_INDEX, reg);
135 	(void)RREG32(PCIE_INDEX);
136 	r = RREG32(PCIE_DATA);
137 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
138 	return r;
139 }
140 
141 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
142 {
143 	unsigned long flags;
144 
145 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
146 	WREG32(PCIE_INDEX, reg);
147 	(void)RREG32(PCIE_INDEX);
148 	WREG32(PCIE_DATA, v);
149 	(void)RREG32(PCIE_DATA);
150 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
151 }
152 
153 static const u32 spectre_rlc_save_restore_register_list[] =
154 {
155 	(0x0e00 << 16) | (0xc12c >> 2),
156 	0x00000000,
157 	(0x0e00 << 16) | (0xc140 >> 2),
158 	0x00000000,
159 	(0x0e00 << 16) | (0xc150 >> 2),
160 	0x00000000,
161 	(0x0e00 << 16) | (0xc15c >> 2),
162 	0x00000000,
163 	(0x0e00 << 16) | (0xc168 >> 2),
164 	0x00000000,
165 	(0x0e00 << 16) | (0xc170 >> 2),
166 	0x00000000,
167 	(0x0e00 << 16) | (0xc178 >> 2),
168 	0x00000000,
169 	(0x0e00 << 16) | (0xc204 >> 2),
170 	0x00000000,
171 	(0x0e00 << 16) | (0xc2b4 >> 2),
172 	0x00000000,
173 	(0x0e00 << 16) | (0xc2b8 >> 2),
174 	0x00000000,
175 	(0x0e00 << 16) | (0xc2bc >> 2),
176 	0x00000000,
177 	(0x0e00 << 16) | (0xc2c0 >> 2),
178 	0x00000000,
179 	(0x0e00 << 16) | (0x8228 >> 2),
180 	0x00000000,
181 	(0x0e00 << 16) | (0x829c >> 2),
182 	0x00000000,
183 	(0x0e00 << 16) | (0x869c >> 2),
184 	0x00000000,
185 	(0x0600 << 16) | (0x98f4 >> 2),
186 	0x00000000,
187 	(0x0e00 << 16) | (0x98f8 >> 2),
188 	0x00000000,
189 	(0x0e00 << 16) | (0x9900 >> 2),
190 	0x00000000,
191 	(0x0e00 << 16) | (0xc260 >> 2),
192 	0x00000000,
193 	(0x0e00 << 16) | (0x90e8 >> 2),
194 	0x00000000,
195 	(0x0e00 << 16) | (0x3c000 >> 2),
196 	0x00000000,
197 	(0x0e00 << 16) | (0x3c00c >> 2),
198 	0x00000000,
199 	(0x0e00 << 16) | (0x8c1c >> 2),
200 	0x00000000,
201 	(0x0e00 << 16) | (0x9700 >> 2),
202 	0x00000000,
203 	(0x0e00 << 16) | (0xcd20 >> 2),
204 	0x00000000,
205 	(0x4e00 << 16) | (0xcd20 >> 2),
206 	0x00000000,
207 	(0x5e00 << 16) | (0xcd20 >> 2),
208 	0x00000000,
209 	(0x6e00 << 16) | (0xcd20 >> 2),
210 	0x00000000,
211 	(0x7e00 << 16) | (0xcd20 >> 2),
212 	0x00000000,
213 	(0x8e00 << 16) | (0xcd20 >> 2),
214 	0x00000000,
215 	(0x9e00 << 16) | (0xcd20 >> 2),
216 	0x00000000,
217 	(0xae00 << 16) | (0xcd20 >> 2),
218 	0x00000000,
219 	(0xbe00 << 16) | (0xcd20 >> 2),
220 	0x00000000,
221 	(0x0e00 << 16) | (0x89bc >> 2),
222 	0x00000000,
223 	(0x0e00 << 16) | (0x8900 >> 2),
224 	0x00000000,
225 	0x3,
226 	(0x0e00 << 16) | (0xc130 >> 2),
227 	0x00000000,
228 	(0x0e00 << 16) | (0xc134 >> 2),
229 	0x00000000,
230 	(0x0e00 << 16) | (0xc1fc >> 2),
231 	0x00000000,
232 	(0x0e00 << 16) | (0xc208 >> 2),
233 	0x00000000,
234 	(0x0e00 << 16) | (0xc264 >> 2),
235 	0x00000000,
236 	(0x0e00 << 16) | (0xc268 >> 2),
237 	0x00000000,
238 	(0x0e00 << 16) | (0xc26c >> 2),
239 	0x00000000,
240 	(0x0e00 << 16) | (0xc270 >> 2),
241 	0x00000000,
242 	(0x0e00 << 16) | (0xc274 >> 2),
243 	0x00000000,
244 	(0x0e00 << 16) | (0xc278 >> 2),
245 	0x00000000,
246 	(0x0e00 << 16) | (0xc27c >> 2),
247 	0x00000000,
248 	(0x0e00 << 16) | (0xc280 >> 2),
249 	0x00000000,
250 	(0x0e00 << 16) | (0xc284 >> 2),
251 	0x00000000,
252 	(0x0e00 << 16) | (0xc288 >> 2),
253 	0x00000000,
254 	(0x0e00 << 16) | (0xc28c >> 2),
255 	0x00000000,
256 	(0x0e00 << 16) | (0xc290 >> 2),
257 	0x00000000,
258 	(0x0e00 << 16) | (0xc294 >> 2),
259 	0x00000000,
260 	(0x0e00 << 16) | (0xc298 >> 2),
261 	0x00000000,
262 	(0x0e00 << 16) | (0xc29c >> 2),
263 	0x00000000,
264 	(0x0e00 << 16) | (0xc2a0 >> 2),
265 	0x00000000,
266 	(0x0e00 << 16) | (0xc2a4 >> 2),
267 	0x00000000,
268 	(0x0e00 << 16) | (0xc2a8 >> 2),
269 	0x00000000,
270 	(0x0e00 << 16) | (0xc2ac  >> 2),
271 	0x00000000,
272 	(0x0e00 << 16) | (0xc2b0 >> 2),
273 	0x00000000,
274 	(0x0e00 << 16) | (0x301d0 >> 2),
275 	0x00000000,
276 	(0x0e00 << 16) | (0x30238 >> 2),
277 	0x00000000,
278 	(0x0e00 << 16) | (0x30250 >> 2),
279 	0x00000000,
280 	(0x0e00 << 16) | (0x30254 >> 2),
281 	0x00000000,
282 	(0x0e00 << 16) | (0x30258 >> 2),
283 	0x00000000,
284 	(0x0e00 << 16) | (0x3025c >> 2),
285 	0x00000000,
286 	(0x4e00 << 16) | (0xc900 >> 2),
287 	0x00000000,
288 	(0x5e00 << 16) | (0xc900 >> 2),
289 	0x00000000,
290 	(0x6e00 << 16) | (0xc900 >> 2),
291 	0x00000000,
292 	(0x7e00 << 16) | (0xc900 >> 2),
293 	0x00000000,
294 	(0x8e00 << 16) | (0xc900 >> 2),
295 	0x00000000,
296 	(0x9e00 << 16) | (0xc900 >> 2),
297 	0x00000000,
298 	(0xae00 << 16) | (0xc900 >> 2),
299 	0x00000000,
300 	(0xbe00 << 16) | (0xc900 >> 2),
301 	0x00000000,
302 	(0x4e00 << 16) | (0xc904 >> 2),
303 	0x00000000,
304 	(0x5e00 << 16) | (0xc904 >> 2),
305 	0x00000000,
306 	(0x6e00 << 16) | (0xc904 >> 2),
307 	0x00000000,
308 	(0x7e00 << 16) | (0xc904 >> 2),
309 	0x00000000,
310 	(0x8e00 << 16) | (0xc904 >> 2),
311 	0x00000000,
312 	(0x9e00 << 16) | (0xc904 >> 2),
313 	0x00000000,
314 	(0xae00 << 16) | (0xc904 >> 2),
315 	0x00000000,
316 	(0xbe00 << 16) | (0xc904 >> 2),
317 	0x00000000,
318 	(0x4e00 << 16) | (0xc908 >> 2),
319 	0x00000000,
320 	(0x5e00 << 16) | (0xc908 >> 2),
321 	0x00000000,
322 	(0x6e00 << 16) | (0xc908 >> 2),
323 	0x00000000,
324 	(0x7e00 << 16) | (0xc908 >> 2),
325 	0x00000000,
326 	(0x8e00 << 16) | (0xc908 >> 2),
327 	0x00000000,
328 	(0x9e00 << 16) | (0xc908 >> 2),
329 	0x00000000,
330 	(0xae00 << 16) | (0xc908 >> 2),
331 	0x00000000,
332 	(0xbe00 << 16) | (0xc908 >> 2),
333 	0x00000000,
334 	(0x4e00 << 16) | (0xc90c >> 2),
335 	0x00000000,
336 	(0x5e00 << 16) | (0xc90c >> 2),
337 	0x00000000,
338 	(0x6e00 << 16) | (0xc90c >> 2),
339 	0x00000000,
340 	(0x7e00 << 16) | (0xc90c >> 2),
341 	0x00000000,
342 	(0x8e00 << 16) | (0xc90c >> 2),
343 	0x00000000,
344 	(0x9e00 << 16) | (0xc90c >> 2),
345 	0x00000000,
346 	(0xae00 << 16) | (0xc90c >> 2),
347 	0x00000000,
348 	(0xbe00 << 16) | (0xc90c >> 2),
349 	0x00000000,
350 	(0x4e00 << 16) | (0xc910 >> 2),
351 	0x00000000,
352 	(0x5e00 << 16) | (0xc910 >> 2),
353 	0x00000000,
354 	(0x6e00 << 16) | (0xc910 >> 2),
355 	0x00000000,
356 	(0x7e00 << 16) | (0xc910 >> 2),
357 	0x00000000,
358 	(0x8e00 << 16) | (0xc910 >> 2),
359 	0x00000000,
360 	(0x9e00 << 16) | (0xc910 >> 2),
361 	0x00000000,
362 	(0xae00 << 16) | (0xc910 >> 2),
363 	0x00000000,
364 	(0xbe00 << 16) | (0xc910 >> 2),
365 	0x00000000,
366 	(0x0e00 << 16) | (0xc99c >> 2),
367 	0x00000000,
368 	(0x0e00 << 16) | (0x9834 >> 2),
369 	0x00000000,
370 	(0x0000 << 16) | (0x30f00 >> 2),
371 	0x00000000,
372 	(0x0001 << 16) | (0x30f00 >> 2),
373 	0x00000000,
374 	(0x0000 << 16) | (0x30f04 >> 2),
375 	0x00000000,
376 	(0x0001 << 16) | (0x30f04 >> 2),
377 	0x00000000,
378 	(0x0000 << 16) | (0x30f08 >> 2),
379 	0x00000000,
380 	(0x0001 << 16) | (0x30f08 >> 2),
381 	0x00000000,
382 	(0x0000 << 16) | (0x30f0c >> 2),
383 	0x00000000,
384 	(0x0001 << 16) | (0x30f0c >> 2),
385 	0x00000000,
386 	(0x0600 << 16) | (0x9b7c >> 2),
387 	0x00000000,
388 	(0x0e00 << 16) | (0x8a14 >> 2),
389 	0x00000000,
390 	(0x0e00 << 16) | (0x8a18 >> 2),
391 	0x00000000,
392 	(0x0600 << 16) | (0x30a00 >> 2),
393 	0x00000000,
394 	(0x0e00 << 16) | (0x8bf0 >> 2),
395 	0x00000000,
396 	(0x0e00 << 16) | (0x8bcc >> 2),
397 	0x00000000,
398 	(0x0e00 << 16) | (0x8b24 >> 2),
399 	0x00000000,
400 	(0x0e00 << 16) | (0x30a04 >> 2),
401 	0x00000000,
402 	(0x0600 << 16) | (0x30a10 >> 2),
403 	0x00000000,
404 	(0x0600 << 16) | (0x30a14 >> 2),
405 	0x00000000,
406 	(0x0600 << 16) | (0x30a18 >> 2),
407 	0x00000000,
408 	(0x0600 << 16) | (0x30a2c >> 2),
409 	0x00000000,
410 	(0x0e00 << 16) | (0xc700 >> 2),
411 	0x00000000,
412 	(0x0e00 << 16) | (0xc704 >> 2),
413 	0x00000000,
414 	(0x0e00 << 16) | (0xc708 >> 2),
415 	0x00000000,
416 	(0x0e00 << 16) | (0xc768 >> 2),
417 	0x00000000,
418 	(0x0400 << 16) | (0xc770 >> 2),
419 	0x00000000,
420 	(0x0400 << 16) | (0xc774 >> 2),
421 	0x00000000,
422 	(0x0400 << 16) | (0xc778 >> 2),
423 	0x00000000,
424 	(0x0400 << 16) | (0xc77c >> 2),
425 	0x00000000,
426 	(0x0400 << 16) | (0xc780 >> 2),
427 	0x00000000,
428 	(0x0400 << 16) | (0xc784 >> 2),
429 	0x00000000,
430 	(0x0400 << 16) | (0xc788 >> 2),
431 	0x00000000,
432 	(0x0400 << 16) | (0xc78c >> 2),
433 	0x00000000,
434 	(0x0400 << 16) | (0xc798 >> 2),
435 	0x00000000,
436 	(0x0400 << 16) | (0xc79c >> 2),
437 	0x00000000,
438 	(0x0400 << 16) | (0xc7a0 >> 2),
439 	0x00000000,
440 	(0x0400 << 16) | (0xc7a4 >> 2),
441 	0x00000000,
442 	(0x0400 << 16) | (0xc7a8 >> 2),
443 	0x00000000,
444 	(0x0400 << 16) | (0xc7ac >> 2),
445 	0x00000000,
446 	(0x0400 << 16) | (0xc7b0 >> 2),
447 	0x00000000,
448 	(0x0400 << 16) | (0xc7b4 >> 2),
449 	0x00000000,
450 	(0x0e00 << 16) | (0x9100 >> 2),
451 	0x00000000,
452 	(0x0e00 << 16) | (0x3c010 >> 2),
453 	0x00000000,
454 	(0x0e00 << 16) | (0x92a8 >> 2),
455 	0x00000000,
456 	(0x0e00 << 16) | (0x92ac >> 2),
457 	0x00000000,
458 	(0x0e00 << 16) | (0x92b4 >> 2),
459 	0x00000000,
460 	(0x0e00 << 16) | (0x92b8 >> 2),
461 	0x00000000,
462 	(0x0e00 << 16) | (0x92bc >> 2),
463 	0x00000000,
464 	(0x0e00 << 16) | (0x92c0 >> 2),
465 	0x00000000,
466 	(0x0e00 << 16) | (0x92c4 >> 2),
467 	0x00000000,
468 	(0x0e00 << 16) | (0x92c8 >> 2),
469 	0x00000000,
470 	(0x0e00 << 16) | (0x92cc >> 2),
471 	0x00000000,
472 	(0x0e00 << 16) | (0x92d0 >> 2),
473 	0x00000000,
474 	(0x0e00 << 16) | (0x8c00 >> 2),
475 	0x00000000,
476 	(0x0e00 << 16) | (0x8c04 >> 2),
477 	0x00000000,
478 	(0x0e00 << 16) | (0x8c20 >> 2),
479 	0x00000000,
480 	(0x0e00 << 16) | (0x8c38 >> 2),
481 	0x00000000,
482 	(0x0e00 << 16) | (0x8c3c >> 2),
483 	0x00000000,
484 	(0x0e00 << 16) | (0xae00 >> 2),
485 	0x00000000,
486 	(0x0e00 << 16) | (0x9604 >> 2),
487 	0x00000000,
488 	(0x0e00 << 16) | (0xac08 >> 2),
489 	0x00000000,
490 	(0x0e00 << 16) | (0xac0c >> 2),
491 	0x00000000,
492 	(0x0e00 << 16) | (0xac10 >> 2),
493 	0x00000000,
494 	(0x0e00 << 16) | (0xac14 >> 2),
495 	0x00000000,
496 	(0x0e00 << 16) | (0xac58 >> 2),
497 	0x00000000,
498 	(0x0e00 << 16) | (0xac68 >> 2),
499 	0x00000000,
500 	(0x0e00 << 16) | (0xac6c >> 2),
501 	0x00000000,
502 	(0x0e00 << 16) | (0xac70 >> 2),
503 	0x00000000,
504 	(0x0e00 << 16) | (0xac74 >> 2),
505 	0x00000000,
506 	(0x0e00 << 16) | (0xac78 >> 2),
507 	0x00000000,
508 	(0x0e00 << 16) | (0xac7c >> 2),
509 	0x00000000,
510 	(0x0e00 << 16) | (0xac80 >> 2),
511 	0x00000000,
512 	(0x0e00 << 16) | (0xac84 >> 2),
513 	0x00000000,
514 	(0x0e00 << 16) | (0xac88 >> 2),
515 	0x00000000,
516 	(0x0e00 << 16) | (0xac8c >> 2),
517 	0x00000000,
518 	(0x0e00 << 16) | (0x970c >> 2),
519 	0x00000000,
520 	(0x0e00 << 16) | (0x9714 >> 2),
521 	0x00000000,
522 	(0x0e00 << 16) | (0x9718 >> 2),
523 	0x00000000,
524 	(0x0e00 << 16) | (0x971c >> 2),
525 	0x00000000,
526 	(0x0e00 << 16) | (0x31068 >> 2),
527 	0x00000000,
528 	(0x4e00 << 16) | (0x31068 >> 2),
529 	0x00000000,
530 	(0x5e00 << 16) | (0x31068 >> 2),
531 	0x00000000,
532 	(0x6e00 << 16) | (0x31068 >> 2),
533 	0x00000000,
534 	(0x7e00 << 16) | (0x31068 >> 2),
535 	0x00000000,
536 	(0x8e00 << 16) | (0x31068 >> 2),
537 	0x00000000,
538 	(0x9e00 << 16) | (0x31068 >> 2),
539 	0x00000000,
540 	(0xae00 << 16) | (0x31068 >> 2),
541 	0x00000000,
542 	(0xbe00 << 16) | (0x31068 >> 2),
543 	0x00000000,
544 	(0x0e00 << 16) | (0xcd10 >> 2),
545 	0x00000000,
546 	(0x0e00 << 16) | (0xcd14 >> 2),
547 	0x00000000,
548 	(0x0e00 << 16) | (0x88b0 >> 2),
549 	0x00000000,
550 	(0x0e00 << 16) | (0x88b4 >> 2),
551 	0x00000000,
552 	(0x0e00 << 16) | (0x88b8 >> 2),
553 	0x00000000,
554 	(0x0e00 << 16) | (0x88bc >> 2),
555 	0x00000000,
556 	(0x0400 << 16) | (0x89c0 >> 2),
557 	0x00000000,
558 	(0x0e00 << 16) | (0x88c4 >> 2),
559 	0x00000000,
560 	(0x0e00 << 16) | (0x88c8 >> 2),
561 	0x00000000,
562 	(0x0e00 << 16) | (0x88d0 >> 2),
563 	0x00000000,
564 	(0x0e00 << 16) | (0x88d4 >> 2),
565 	0x00000000,
566 	(0x0e00 << 16) | (0x88d8 >> 2),
567 	0x00000000,
568 	(0x0e00 << 16) | (0x8980 >> 2),
569 	0x00000000,
570 	(0x0e00 << 16) | (0x30938 >> 2),
571 	0x00000000,
572 	(0x0e00 << 16) | (0x3093c >> 2),
573 	0x00000000,
574 	(0x0e00 << 16) | (0x30940 >> 2),
575 	0x00000000,
576 	(0x0e00 << 16) | (0x89a0 >> 2),
577 	0x00000000,
578 	(0x0e00 << 16) | (0x30900 >> 2),
579 	0x00000000,
580 	(0x0e00 << 16) | (0x30904 >> 2),
581 	0x00000000,
582 	(0x0e00 << 16) | (0x89b4 >> 2),
583 	0x00000000,
584 	(0x0e00 << 16) | (0x3c210 >> 2),
585 	0x00000000,
586 	(0x0e00 << 16) | (0x3c214 >> 2),
587 	0x00000000,
588 	(0x0e00 << 16) | (0x3c218 >> 2),
589 	0x00000000,
590 	(0x0e00 << 16) | (0x8904 >> 2),
591 	0x00000000,
592 	0x5,
593 	(0x0e00 << 16) | (0x8c28 >> 2),
594 	(0x0e00 << 16) | (0x8c2c >> 2),
595 	(0x0e00 << 16) | (0x8c30 >> 2),
596 	(0x0e00 << 16) | (0x8c34 >> 2),
597 	(0x0e00 << 16) | (0x9600 >> 2),
598 };
599 
600 static const u32 kalindi_rlc_save_restore_register_list[] =
601 {
602 	(0x0e00 << 16) | (0xc12c >> 2),
603 	0x00000000,
604 	(0x0e00 << 16) | (0xc140 >> 2),
605 	0x00000000,
606 	(0x0e00 << 16) | (0xc150 >> 2),
607 	0x00000000,
608 	(0x0e00 << 16) | (0xc15c >> 2),
609 	0x00000000,
610 	(0x0e00 << 16) | (0xc168 >> 2),
611 	0x00000000,
612 	(0x0e00 << 16) | (0xc170 >> 2),
613 	0x00000000,
614 	(0x0e00 << 16) | (0xc204 >> 2),
615 	0x00000000,
616 	(0x0e00 << 16) | (0xc2b4 >> 2),
617 	0x00000000,
618 	(0x0e00 << 16) | (0xc2b8 >> 2),
619 	0x00000000,
620 	(0x0e00 << 16) | (0xc2bc >> 2),
621 	0x00000000,
622 	(0x0e00 << 16) | (0xc2c0 >> 2),
623 	0x00000000,
624 	(0x0e00 << 16) | (0x8228 >> 2),
625 	0x00000000,
626 	(0x0e00 << 16) | (0x829c >> 2),
627 	0x00000000,
628 	(0x0e00 << 16) | (0x869c >> 2),
629 	0x00000000,
630 	(0x0600 << 16) | (0x98f4 >> 2),
631 	0x00000000,
632 	(0x0e00 << 16) | (0x98f8 >> 2),
633 	0x00000000,
634 	(0x0e00 << 16) | (0x9900 >> 2),
635 	0x00000000,
636 	(0x0e00 << 16) | (0xc260 >> 2),
637 	0x00000000,
638 	(0x0e00 << 16) | (0x90e8 >> 2),
639 	0x00000000,
640 	(0x0e00 << 16) | (0x3c000 >> 2),
641 	0x00000000,
642 	(0x0e00 << 16) | (0x3c00c >> 2),
643 	0x00000000,
644 	(0x0e00 << 16) | (0x8c1c >> 2),
645 	0x00000000,
646 	(0x0e00 << 16) | (0x9700 >> 2),
647 	0x00000000,
648 	(0x0e00 << 16) | (0xcd20 >> 2),
649 	0x00000000,
650 	(0x4e00 << 16) | (0xcd20 >> 2),
651 	0x00000000,
652 	(0x5e00 << 16) | (0xcd20 >> 2),
653 	0x00000000,
654 	(0x6e00 << 16) | (0xcd20 >> 2),
655 	0x00000000,
656 	(0x7e00 << 16) | (0xcd20 >> 2),
657 	0x00000000,
658 	(0x0e00 << 16) | (0x89bc >> 2),
659 	0x00000000,
660 	(0x0e00 << 16) | (0x8900 >> 2),
661 	0x00000000,
662 	0x3,
663 	(0x0e00 << 16) | (0xc130 >> 2),
664 	0x00000000,
665 	(0x0e00 << 16) | (0xc134 >> 2),
666 	0x00000000,
667 	(0x0e00 << 16) | (0xc1fc >> 2),
668 	0x00000000,
669 	(0x0e00 << 16) | (0xc208 >> 2),
670 	0x00000000,
671 	(0x0e00 << 16) | (0xc264 >> 2),
672 	0x00000000,
673 	(0x0e00 << 16) | (0xc268 >> 2),
674 	0x00000000,
675 	(0x0e00 << 16) | (0xc26c >> 2),
676 	0x00000000,
677 	(0x0e00 << 16) | (0xc270 >> 2),
678 	0x00000000,
679 	(0x0e00 << 16) | (0xc274 >> 2),
680 	0x00000000,
681 	(0x0e00 << 16) | (0xc28c >> 2),
682 	0x00000000,
683 	(0x0e00 << 16) | (0xc290 >> 2),
684 	0x00000000,
685 	(0x0e00 << 16) | (0xc294 >> 2),
686 	0x00000000,
687 	(0x0e00 << 16) | (0xc298 >> 2),
688 	0x00000000,
689 	(0x0e00 << 16) | (0xc2a0 >> 2),
690 	0x00000000,
691 	(0x0e00 << 16) | (0xc2a4 >> 2),
692 	0x00000000,
693 	(0x0e00 << 16) | (0xc2a8 >> 2),
694 	0x00000000,
695 	(0x0e00 << 16) | (0xc2ac >> 2),
696 	0x00000000,
697 	(0x0e00 << 16) | (0x301d0 >> 2),
698 	0x00000000,
699 	(0x0e00 << 16) | (0x30238 >> 2),
700 	0x00000000,
701 	(0x0e00 << 16) | (0x30250 >> 2),
702 	0x00000000,
703 	(0x0e00 << 16) | (0x30254 >> 2),
704 	0x00000000,
705 	(0x0e00 << 16) | (0x30258 >> 2),
706 	0x00000000,
707 	(0x0e00 << 16) | (0x3025c >> 2),
708 	0x00000000,
709 	(0x4e00 << 16) | (0xc900 >> 2),
710 	0x00000000,
711 	(0x5e00 << 16) | (0xc900 >> 2),
712 	0x00000000,
713 	(0x6e00 << 16) | (0xc900 >> 2),
714 	0x00000000,
715 	(0x7e00 << 16) | (0xc900 >> 2),
716 	0x00000000,
717 	(0x4e00 << 16) | (0xc904 >> 2),
718 	0x00000000,
719 	(0x5e00 << 16) | (0xc904 >> 2),
720 	0x00000000,
721 	(0x6e00 << 16) | (0xc904 >> 2),
722 	0x00000000,
723 	(0x7e00 << 16) | (0xc904 >> 2),
724 	0x00000000,
725 	(0x4e00 << 16) | (0xc908 >> 2),
726 	0x00000000,
727 	(0x5e00 << 16) | (0xc908 >> 2),
728 	0x00000000,
729 	(0x6e00 << 16) | (0xc908 >> 2),
730 	0x00000000,
731 	(0x7e00 << 16) | (0xc908 >> 2),
732 	0x00000000,
733 	(0x4e00 << 16) | (0xc90c >> 2),
734 	0x00000000,
735 	(0x5e00 << 16) | (0xc90c >> 2),
736 	0x00000000,
737 	(0x6e00 << 16) | (0xc90c >> 2),
738 	0x00000000,
739 	(0x7e00 << 16) | (0xc90c >> 2),
740 	0x00000000,
741 	(0x4e00 << 16) | (0xc910 >> 2),
742 	0x00000000,
743 	(0x5e00 << 16) | (0xc910 >> 2),
744 	0x00000000,
745 	(0x6e00 << 16) | (0xc910 >> 2),
746 	0x00000000,
747 	(0x7e00 << 16) | (0xc910 >> 2),
748 	0x00000000,
749 	(0x0e00 << 16) | (0xc99c >> 2),
750 	0x00000000,
751 	(0x0e00 << 16) | (0x9834 >> 2),
752 	0x00000000,
753 	(0x0000 << 16) | (0x30f00 >> 2),
754 	0x00000000,
755 	(0x0000 << 16) | (0x30f04 >> 2),
756 	0x00000000,
757 	(0x0000 << 16) | (0x30f08 >> 2),
758 	0x00000000,
759 	(0x0000 << 16) | (0x30f0c >> 2),
760 	0x00000000,
761 	(0x0600 << 16) | (0x9b7c >> 2),
762 	0x00000000,
763 	(0x0e00 << 16) | (0x8a14 >> 2),
764 	0x00000000,
765 	(0x0e00 << 16) | (0x8a18 >> 2),
766 	0x00000000,
767 	(0x0600 << 16) | (0x30a00 >> 2),
768 	0x00000000,
769 	(0x0e00 << 16) | (0x8bf0 >> 2),
770 	0x00000000,
771 	(0x0e00 << 16) | (0x8bcc >> 2),
772 	0x00000000,
773 	(0x0e00 << 16) | (0x8b24 >> 2),
774 	0x00000000,
775 	(0x0e00 << 16) | (0x30a04 >> 2),
776 	0x00000000,
777 	(0x0600 << 16) | (0x30a10 >> 2),
778 	0x00000000,
779 	(0x0600 << 16) | (0x30a14 >> 2),
780 	0x00000000,
781 	(0x0600 << 16) | (0x30a18 >> 2),
782 	0x00000000,
783 	(0x0600 << 16) | (0x30a2c >> 2),
784 	0x00000000,
785 	(0x0e00 << 16) | (0xc700 >> 2),
786 	0x00000000,
787 	(0x0e00 << 16) | (0xc704 >> 2),
788 	0x00000000,
789 	(0x0e00 << 16) | (0xc708 >> 2),
790 	0x00000000,
791 	(0x0e00 << 16) | (0xc768 >> 2),
792 	0x00000000,
793 	(0x0400 << 16) | (0xc770 >> 2),
794 	0x00000000,
795 	(0x0400 << 16) | (0xc774 >> 2),
796 	0x00000000,
797 	(0x0400 << 16) | (0xc798 >> 2),
798 	0x00000000,
799 	(0x0400 << 16) | (0xc79c >> 2),
800 	0x00000000,
801 	(0x0e00 << 16) | (0x9100 >> 2),
802 	0x00000000,
803 	(0x0e00 << 16) | (0x3c010 >> 2),
804 	0x00000000,
805 	(0x0e00 << 16) | (0x8c00 >> 2),
806 	0x00000000,
807 	(0x0e00 << 16) | (0x8c04 >> 2),
808 	0x00000000,
809 	(0x0e00 << 16) | (0x8c20 >> 2),
810 	0x00000000,
811 	(0x0e00 << 16) | (0x8c38 >> 2),
812 	0x00000000,
813 	(0x0e00 << 16) | (0x8c3c >> 2),
814 	0x00000000,
815 	(0x0e00 << 16) | (0xae00 >> 2),
816 	0x00000000,
817 	(0x0e00 << 16) | (0x9604 >> 2),
818 	0x00000000,
819 	(0x0e00 << 16) | (0xac08 >> 2),
820 	0x00000000,
821 	(0x0e00 << 16) | (0xac0c >> 2),
822 	0x00000000,
823 	(0x0e00 << 16) | (0xac10 >> 2),
824 	0x00000000,
825 	(0x0e00 << 16) | (0xac14 >> 2),
826 	0x00000000,
827 	(0x0e00 << 16) | (0xac58 >> 2),
828 	0x00000000,
829 	(0x0e00 << 16) | (0xac68 >> 2),
830 	0x00000000,
831 	(0x0e00 << 16) | (0xac6c >> 2),
832 	0x00000000,
833 	(0x0e00 << 16) | (0xac70 >> 2),
834 	0x00000000,
835 	(0x0e00 << 16) | (0xac74 >> 2),
836 	0x00000000,
837 	(0x0e00 << 16) | (0xac78 >> 2),
838 	0x00000000,
839 	(0x0e00 << 16) | (0xac7c >> 2),
840 	0x00000000,
841 	(0x0e00 << 16) | (0xac80 >> 2),
842 	0x00000000,
843 	(0x0e00 << 16) | (0xac84 >> 2),
844 	0x00000000,
845 	(0x0e00 << 16) | (0xac88 >> 2),
846 	0x00000000,
847 	(0x0e00 << 16) | (0xac8c >> 2),
848 	0x00000000,
849 	(0x0e00 << 16) | (0x970c >> 2),
850 	0x00000000,
851 	(0x0e00 << 16) | (0x9714 >> 2),
852 	0x00000000,
853 	(0x0e00 << 16) | (0x9718 >> 2),
854 	0x00000000,
855 	(0x0e00 << 16) | (0x971c >> 2),
856 	0x00000000,
857 	(0x0e00 << 16) | (0x31068 >> 2),
858 	0x00000000,
859 	(0x4e00 << 16) | (0x31068 >> 2),
860 	0x00000000,
861 	(0x5e00 << 16) | (0x31068 >> 2),
862 	0x00000000,
863 	(0x6e00 << 16) | (0x31068 >> 2),
864 	0x00000000,
865 	(0x7e00 << 16) | (0x31068 >> 2),
866 	0x00000000,
867 	(0x0e00 << 16) | (0xcd10 >> 2),
868 	0x00000000,
869 	(0x0e00 << 16) | (0xcd14 >> 2),
870 	0x00000000,
871 	(0x0e00 << 16) | (0x88b0 >> 2),
872 	0x00000000,
873 	(0x0e00 << 16) | (0x88b4 >> 2),
874 	0x00000000,
875 	(0x0e00 << 16) | (0x88b8 >> 2),
876 	0x00000000,
877 	(0x0e00 << 16) | (0x88bc >> 2),
878 	0x00000000,
879 	(0x0400 << 16) | (0x89c0 >> 2),
880 	0x00000000,
881 	(0x0e00 << 16) | (0x88c4 >> 2),
882 	0x00000000,
883 	(0x0e00 << 16) | (0x88c8 >> 2),
884 	0x00000000,
885 	(0x0e00 << 16) | (0x88d0 >> 2),
886 	0x00000000,
887 	(0x0e00 << 16) | (0x88d4 >> 2),
888 	0x00000000,
889 	(0x0e00 << 16) | (0x88d8 >> 2),
890 	0x00000000,
891 	(0x0e00 << 16) | (0x8980 >> 2),
892 	0x00000000,
893 	(0x0e00 << 16) | (0x30938 >> 2),
894 	0x00000000,
895 	(0x0e00 << 16) | (0x3093c >> 2),
896 	0x00000000,
897 	(0x0e00 << 16) | (0x30940 >> 2),
898 	0x00000000,
899 	(0x0e00 << 16) | (0x89a0 >> 2),
900 	0x00000000,
901 	(0x0e00 << 16) | (0x30900 >> 2),
902 	0x00000000,
903 	(0x0e00 << 16) | (0x30904 >> 2),
904 	0x00000000,
905 	(0x0e00 << 16) | (0x89b4 >> 2),
906 	0x00000000,
907 	(0x0e00 << 16) | (0x3e1fc >> 2),
908 	0x00000000,
909 	(0x0e00 << 16) | (0x3c210 >> 2),
910 	0x00000000,
911 	(0x0e00 << 16) | (0x3c214 >> 2),
912 	0x00000000,
913 	(0x0e00 << 16) | (0x3c218 >> 2),
914 	0x00000000,
915 	(0x0e00 << 16) | (0x8904 >> 2),
916 	0x00000000,
917 	0x5,
918 	(0x0e00 << 16) | (0x8c28 >> 2),
919 	(0x0e00 << 16) | (0x8c2c >> 2),
920 	(0x0e00 << 16) | (0x8c30 >> 2),
921 	(0x0e00 << 16) | (0x8c34 >> 2),
922 	(0x0e00 << 16) | (0x9600 >> 2),
923 };
924 
925 static const u32 bonaire_golden_spm_registers[] =
926 {
927 	0x30800, 0xe0ffffff, 0xe0000000
928 };
929 
930 static const u32 bonaire_golden_common_registers[] =
931 {
932 	0xc770, 0xffffffff, 0x00000800,
933 	0xc774, 0xffffffff, 0x00000800,
934 	0xc798, 0xffffffff, 0x00007fbf,
935 	0xc79c, 0xffffffff, 0x00007faf
936 };
937 
938 static const u32 bonaire_golden_registers[] =
939 {
940 	0x3354, 0x00000333, 0x00000333,
941 	0x3350, 0x000c0fc0, 0x00040200,
942 	0x9a10, 0x00010000, 0x00058208,
943 	0x3c000, 0xffff1fff, 0x00140000,
944 	0x3c200, 0xfdfc0fff, 0x00000100,
945 	0x3c234, 0x40000000, 0x40000200,
946 	0x9830, 0xffffffff, 0x00000000,
947 	0x9834, 0xf00fffff, 0x00000400,
948 	0x9838, 0x0002021c, 0x00020200,
949 	0xc78, 0x00000080, 0x00000000,
950 	0x5bb0, 0x000000f0, 0x00000070,
951 	0x5bc0, 0xf0311fff, 0x80300000,
952 	0x98f8, 0x73773777, 0x12010001,
953 	0x350c, 0x00810000, 0x408af000,
954 	0x7030, 0x31000111, 0x00000011,
955 	0x2f48, 0x73773777, 0x12010001,
956 	0x220c, 0x00007fb6, 0x0021a1b1,
957 	0x2210, 0x00007fb6, 0x002021b1,
958 	0x2180, 0x00007fb6, 0x00002191,
959 	0x2218, 0x00007fb6, 0x002121b1,
960 	0x221c, 0x00007fb6, 0x002021b1,
961 	0x21dc, 0x00007fb6, 0x00002191,
962 	0x21e0, 0x00007fb6, 0x00002191,
963 	0x3628, 0x0000003f, 0x0000000a,
964 	0x362c, 0x0000003f, 0x0000000a,
965 	0x2ae4, 0x00073ffe, 0x000022a2,
966 	0x240c, 0x000007ff, 0x00000000,
967 	0x8a14, 0xf000003f, 0x00000007,
968 	0x8bf0, 0x00002001, 0x00000001,
969 	0x8b24, 0xffffffff, 0x00ffffff,
970 	0x30a04, 0x0000ff0f, 0x00000000,
971 	0x28a4c, 0x07ffffff, 0x06000000,
972 	0x4d8, 0x00000fff, 0x00000100,
973 	0x3e78, 0x00000001, 0x00000002,
974 	0x9100, 0x03000000, 0x0362c688,
975 	0x8c00, 0x000000ff, 0x00000001,
976 	0xe40, 0x00001fff, 0x00001fff,
977 	0x9060, 0x0000007f, 0x00000020,
978 	0x9508, 0x00010000, 0x00010000,
979 	0xac14, 0x000003ff, 0x000000f3,
980 	0xac0c, 0xffffffff, 0x00001032
981 };
982 
983 static const u32 bonaire_mgcg_cgcg_init[] =
984 {
985 	0xc420, 0xffffffff, 0xfffffffc,
986 	0x30800, 0xffffffff, 0xe0000000,
987 	0x3c2a0, 0xffffffff, 0x00000100,
988 	0x3c208, 0xffffffff, 0x00000100,
989 	0x3c2c0, 0xffffffff, 0xc0000100,
990 	0x3c2c8, 0xffffffff, 0xc0000100,
991 	0x3c2c4, 0xffffffff, 0xc0000100,
992 	0x55e4, 0xffffffff, 0x00600100,
993 	0x3c280, 0xffffffff, 0x00000100,
994 	0x3c214, 0xffffffff, 0x06000100,
995 	0x3c220, 0xffffffff, 0x00000100,
996 	0x3c218, 0xffffffff, 0x06000100,
997 	0x3c204, 0xffffffff, 0x00000100,
998 	0x3c2e0, 0xffffffff, 0x00000100,
999 	0x3c224, 0xffffffff, 0x00000100,
1000 	0x3c200, 0xffffffff, 0x00000100,
1001 	0x3c230, 0xffffffff, 0x00000100,
1002 	0x3c234, 0xffffffff, 0x00000100,
1003 	0x3c250, 0xffffffff, 0x00000100,
1004 	0x3c254, 0xffffffff, 0x00000100,
1005 	0x3c258, 0xffffffff, 0x00000100,
1006 	0x3c25c, 0xffffffff, 0x00000100,
1007 	0x3c260, 0xffffffff, 0x00000100,
1008 	0x3c27c, 0xffffffff, 0x00000100,
1009 	0x3c278, 0xffffffff, 0x00000100,
1010 	0x3c210, 0xffffffff, 0x06000100,
1011 	0x3c290, 0xffffffff, 0x00000100,
1012 	0x3c274, 0xffffffff, 0x00000100,
1013 	0x3c2b4, 0xffffffff, 0x00000100,
1014 	0x3c2b0, 0xffffffff, 0x00000100,
1015 	0x3c270, 0xffffffff, 0x00000100,
1016 	0x30800, 0xffffffff, 0xe0000000,
1017 	0x3c020, 0xffffffff, 0x00010000,
1018 	0x3c024, 0xffffffff, 0x00030002,
1019 	0x3c028, 0xffffffff, 0x00040007,
1020 	0x3c02c, 0xffffffff, 0x00060005,
1021 	0x3c030, 0xffffffff, 0x00090008,
1022 	0x3c034, 0xffffffff, 0x00010000,
1023 	0x3c038, 0xffffffff, 0x00030002,
1024 	0x3c03c, 0xffffffff, 0x00040007,
1025 	0x3c040, 0xffffffff, 0x00060005,
1026 	0x3c044, 0xffffffff, 0x00090008,
1027 	0x3c048, 0xffffffff, 0x00010000,
1028 	0x3c04c, 0xffffffff, 0x00030002,
1029 	0x3c050, 0xffffffff, 0x00040007,
1030 	0x3c054, 0xffffffff, 0x00060005,
1031 	0x3c058, 0xffffffff, 0x00090008,
1032 	0x3c05c, 0xffffffff, 0x00010000,
1033 	0x3c060, 0xffffffff, 0x00030002,
1034 	0x3c064, 0xffffffff, 0x00040007,
1035 	0x3c068, 0xffffffff, 0x00060005,
1036 	0x3c06c, 0xffffffff, 0x00090008,
1037 	0x3c070, 0xffffffff, 0x00010000,
1038 	0x3c074, 0xffffffff, 0x00030002,
1039 	0x3c078, 0xffffffff, 0x00040007,
1040 	0x3c07c, 0xffffffff, 0x00060005,
1041 	0x3c080, 0xffffffff, 0x00090008,
1042 	0x3c084, 0xffffffff, 0x00010000,
1043 	0x3c088, 0xffffffff, 0x00030002,
1044 	0x3c08c, 0xffffffff, 0x00040007,
1045 	0x3c090, 0xffffffff, 0x00060005,
1046 	0x3c094, 0xffffffff, 0x00090008,
1047 	0x3c098, 0xffffffff, 0x00010000,
1048 	0x3c09c, 0xffffffff, 0x00030002,
1049 	0x3c0a0, 0xffffffff, 0x00040007,
1050 	0x3c0a4, 0xffffffff, 0x00060005,
1051 	0x3c0a8, 0xffffffff, 0x00090008,
1052 	0x3c000, 0xffffffff, 0x96e00200,
1053 	0x8708, 0xffffffff, 0x00900100,
1054 	0xc424, 0xffffffff, 0x0020003f,
1055 	0x38, 0xffffffff, 0x0140001c,
1056 	0x3c, 0x000f0000, 0x000f0000,
1057 	0x220, 0xffffffff, 0xC060000C,
1058 	0x224, 0xc0000fff, 0x00000100,
1059 	0xf90, 0xffffffff, 0x00000100,
1060 	0xf98, 0x00000101, 0x00000000,
1061 	0x20a8, 0xffffffff, 0x00000104,
1062 	0x55e4, 0xff000fff, 0x00000100,
1063 	0x30cc, 0xc0000fff, 0x00000104,
1064 	0xc1e4, 0x00000001, 0x00000001,
1065 	0xd00c, 0xff000ff0, 0x00000100,
1066 	0xd80c, 0xff000ff0, 0x00000100
1067 };
1068 
1069 static const u32 spectre_golden_spm_registers[] =
1070 {
1071 	0x30800, 0xe0ffffff, 0xe0000000
1072 };
1073 
1074 static const u32 spectre_golden_common_registers[] =
1075 {
1076 	0xc770, 0xffffffff, 0x00000800,
1077 	0xc774, 0xffffffff, 0x00000800,
1078 	0xc798, 0xffffffff, 0x00007fbf,
1079 	0xc79c, 0xffffffff, 0x00007faf
1080 };
1081 
1082 static const u32 spectre_golden_registers[] =
1083 {
1084 	0x3c000, 0xffff1fff, 0x96940200,
1085 	0x3c00c, 0xffff0001, 0xff000000,
1086 	0x3c200, 0xfffc0fff, 0x00000100,
1087 	0x6ed8, 0x00010101, 0x00010000,
1088 	0x9834, 0xf00fffff, 0x00000400,
1089 	0x9838, 0xfffffffc, 0x00020200,
1090 	0x5bb0, 0x000000f0, 0x00000070,
1091 	0x5bc0, 0xf0311fff, 0x80300000,
1092 	0x98f8, 0x73773777, 0x12010001,
1093 	0x9b7c, 0x00ff0000, 0x00fc0000,
1094 	0x2f48, 0x73773777, 0x12010001,
1095 	0x8a14, 0xf000003f, 0x00000007,
1096 	0x8b24, 0xffffffff, 0x00ffffff,
1097 	0x28350, 0x3f3f3fff, 0x00000082,
1098 	0x28355, 0x0000003f, 0x00000000,
1099 	0x3e78, 0x00000001, 0x00000002,
1100 	0x913c, 0xffff03df, 0x00000004,
1101 	0xc768, 0x00000008, 0x00000008,
1102 	0x8c00, 0x000008ff, 0x00000800,
1103 	0x9508, 0x00010000, 0x00010000,
1104 	0xac0c, 0xffffffff, 0x54763210,
1105 	0x214f8, 0x01ff01ff, 0x00000002,
1106 	0x21498, 0x007ff800, 0x00200000,
1107 	0x2015c, 0xffffffff, 0x00000f40,
1108 	0x30934, 0xffffffff, 0x00000001
1109 };
1110 
1111 static const u32 spectre_mgcg_cgcg_init[] =
1112 {
1113 	0xc420, 0xffffffff, 0xfffffffc,
1114 	0x30800, 0xffffffff, 0xe0000000,
1115 	0x3c2a0, 0xffffffff, 0x00000100,
1116 	0x3c208, 0xffffffff, 0x00000100,
1117 	0x3c2c0, 0xffffffff, 0x00000100,
1118 	0x3c2c8, 0xffffffff, 0x00000100,
1119 	0x3c2c4, 0xffffffff, 0x00000100,
1120 	0x55e4, 0xffffffff, 0x00600100,
1121 	0x3c280, 0xffffffff, 0x00000100,
1122 	0x3c214, 0xffffffff, 0x06000100,
1123 	0x3c220, 0xffffffff, 0x00000100,
1124 	0x3c218, 0xffffffff, 0x06000100,
1125 	0x3c204, 0xffffffff, 0x00000100,
1126 	0x3c2e0, 0xffffffff, 0x00000100,
1127 	0x3c224, 0xffffffff, 0x00000100,
1128 	0x3c200, 0xffffffff, 0x00000100,
1129 	0x3c230, 0xffffffff, 0x00000100,
1130 	0x3c234, 0xffffffff, 0x00000100,
1131 	0x3c250, 0xffffffff, 0x00000100,
1132 	0x3c254, 0xffffffff, 0x00000100,
1133 	0x3c258, 0xffffffff, 0x00000100,
1134 	0x3c25c, 0xffffffff, 0x00000100,
1135 	0x3c260, 0xffffffff, 0x00000100,
1136 	0x3c27c, 0xffffffff, 0x00000100,
1137 	0x3c278, 0xffffffff, 0x00000100,
1138 	0x3c210, 0xffffffff, 0x06000100,
1139 	0x3c290, 0xffffffff, 0x00000100,
1140 	0x3c274, 0xffffffff, 0x00000100,
1141 	0x3c2b4, 0xffffffff, 0x00000100,
1142 	0x3c2b0, 0xffffffff, 0x00000100,
1143 	0x3c270, 0xffffffff, 0x00000100,
1144 	0x30800, 0xffffffff, 0xe0000000,
1145 	0x3c020, 0xffffffff, 0x00010000,
1146 	0x3c024, 0xffffffff, 0x00030002,
1147 	0x3c028, 0xffffffff, 0x00040007,
1148 	0x3c02c, 0xffffffff, 0x00060005,
1149 	0x3c030, 0xffffffff, 0x00090008,
1150 	0x3c034, 0xffffffff, 0x00010000,
1151 	0x3c038, 0xffffffff, 0x00030002,
1152 	0x3c03c, 0xffffffff, 0x00040007,
1153 	0x3c040, 0xffffffff, 0x00060005,
1154 	0x3c044, 0xffffffff, 0x00090008,
1155 	0x3c048, 0xffffffff, 0x00010000,
1156 	0x3c04c, 0xffffffff, 0x00030002,
1157 	0x3c050, 0xffffffff, 0x00040007,
1158 	0x3c054, 0xffffffff, 0x00060005,
1159 	0x3c058, 0xffffffff, 0x00090008,
1160 	0x3c05c, 0xffffffff, 0x00010000,
1161 	0x3c060, 0xffffffff, 0x00030002,
1162 	0x3c064, 0xffffffff, 0x00040007,
1163 	0x3c068, 0xffffffff, 0x00060005,
1164 	0x3c06c, 0xffffffff, 0x00090008,
1165 	0x3c070, 0xffffffff, 0x00010000,
1166 	0x3c074, 0xffffffff, 0x00030002,
1167 	0x3c078, 0xffffffff, 0x00040007,
1168 	0x3c07c, 0xffffffff, 0x00060005,
1169 	0x3c080, 0xffffffff, 0x00090008,
1170 	0x3c084, 0xffffffff, 0x00010000,
1171 	0x3c088, 0xffffffff, 0x00030002,
1172 	0x3c08c, 0xffffffff, 0x00040007,
1173 	0x3c090, 0xffffffff, 0x00060005,
1174 	0x3c094, 0xffffffff, 0x00090008,
1175 	0x3c098, 0xffffffff, 0x00010000,
1176 	0x3c09c, 0xffffffff, 0x00030002,
1177 	0x3c0a0, 0xffffffff, 0x00040007,
1178 	0x3c0a4, 0xffffffff, 0x00060005,
1179 	0x3c0a8, 0xffffffff, 0x00090008,
1180 	0x3c0ac, 0xffffffff, 0x00010000,
1181 	0x3c0b0, 0xffffffff, 0x00030002,
1182 	0x3c0b4, 0xffffffff, 0x00040007,
1183 	0x3c0b8, 0xffffffff, 0x00060005,
1184 	0x3c0bc, 0xffffffff, 0x00090008,
1185 	0x3c000, 0xffffffff, 0x96e00200,
1186 	0x8708, 0xffffffff, 0x00900100,
1187 	0xc424, 0xffffffff, 0x0020003f,
1188 	0x38, 0xffffffff, 0x0140001c,
1189 	0x3c, 0x000f0000, 0x000f0000,
1190 	0x220, 0xffffffff, 0xC060000C,
1191 	0x224, 0xc0000fff, 0x00000100,
1192 	0xf90, 0xffffffff, 0x00000100,
1193 	0xf98, 0x00000101, 0x00000000,
1194 	0x20a8, 0xffffffff, 0x00000104,
1195 	0x55e4, 0xff000fff, 0x00000100,
1196 	0x30cc, 0xc0000fff, 0x00000104,
1197 	0xc1e4, 0x00000001, 0x00000001,
1198 	0xd00c, 0xff000ff0, 0x00000100,
1199 	0xd80c, 0xff000ff0, 0x00000100
1200 };
1201 
1202 static const u32 kalindi_golden_spm_registers[] =
1203 {
1204 	0x30800, 0xe0ffffff, 0xe0000000
1205 };
1206 
1207 static const u32 kalindi_golden_common_registers[] =
1208 {
1209 	0xc770, 0xffffffff, 0x00000800,
1210 	0xc774, 0xffffffff, 0x00000800,
1211 	0xc798, 0xffffffff, 0x00007fbf,
1212 	0xc79c, 0xffffffff, 0x00007faf
1213 };
1214 
1215 static const u32 kalindi_golden_registers[] =
1216 {
1217 	0x3c000, 0xffffdfff, 0x6e944040,
1218 	0x55e4, 0xff607fff, 0xfc000100,
1219 	0x3c220, 0xff000fff, 0x00000100,
1220 	0x3c224, 0xff000fff, 0x00000100,
1221 	0x3c200, 0xfffc0fff, 0x00000100,
1222 	0x6ed8, 0x00010101, 0x00010000,
1223 	0x9830, 0xffffffff, 0x00000000,
1224 	0x9834, 0xf00fffff, 0x00000400,
1225 	0x5bb0, 0x000000f0, 0x00000070,
1226 	0x5bc0, 0xf0311fff, 0x80300000,
1227 	0x98f8, 0x73773777, 0x12010001,
1228 	0x98fc, 0xffffffff, 0x00000010,
1229 	0x9b7c, 0x00ff0000, 0x00fc0000,
1230 	0x8030, 0x00001f0f, 0x0000100a,
1231 	0x2f48, 0x73773777, 0x12010001,
1232 	0x2408, 0x000fffff, 0x000c007f,
1233 	0x8a14, 0xf000003f, 0x00000007,
1234 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1235 	0x30a04, 0x0000ff0f, 0x00000000,
1236 	0x28a4c, 0x07ffffff, 0x06000000,
1237 	0x4d8, 0x00000fff, 0x00000100,
1238 	0x3e78, 0x00000001, 0x00000002,
1239 	0xc768, 0x00000008, 0x00000008,
1240 	0x8c00, 0x000000ff, 0x00000003,
1241 	0x214f8, 0x01ff01ff, 0x00000002,
1242 	0x21498, 0x007ff800, 0x00200000,
1243 	0x2015c, 0xffffffff, 0x00000f40,
1244 	0x88c4, 0x001f3ae3, 0x00000082,
1245 	0x88d4, 0x0000001f, 0x00000010,
1246 	0x30934, 0xffffffff, 0x00000000
1247 };
1248 
1249 static const u32 kalindi_mgcg_cgcg_init[] =
1250 {
1251 	0xc420, 0xffffffff, 0xfffffffc,
1252 	0x30800, 0xffffffff, 0xe0000000,
1253 	0x3c2a0, 0xffffffff, 0x00000100,
1254 	0x3c208, 0xffffffff, 0x00000100,
1255 	0x3c2c0, 0xffffffff, 0x00000100,
1256 	0x3c2c8, 0xffffffff, 0x00000100,
1257 	0x3c2c4, 0xffffffff, 0x00000100,
1258 	0x55e4, 0xffffffff, 0x00600100,
1259 	0x3c280, 0xffffffff, 0x00000100,
1260 	0x3c214, 0xffffffff, 0x06000100,
1261 	0x3c220, 0xffffffff, 0x00000100,
1262 	0x3c218, 0xffffffff, 0x06000100,
1263 	0x3c204, 0xffffffff, 0x00000100,
1264 	0x3c2e0, 0xffffffff, 0x00000100,
1265 	0x3c224, 0xffffffff, 0x00000100,
1266 	0x3c200, 0xffffffff, 0x00000100,
1267 	0x3c230, 0xffffffff, 0x00000100,
1268 	0x3c234, 0xffffffff, 0x00000100,
1269 	0x3c250, 0xffffffff, 0x00000100,
1270 	0x3c254, 0xffffffff, 0x00000100,
1271 	0x3c258, 0xffffffff, 0x00000100,
1272 	0x3c25c, 0xffffffff, 0x00000100,
1273 	0x3c260, 0xffffffff, 0x00000100,
1274 	0x3c27c, 0xffffffff, 0x00000100,
1275 	0x3c278, 0xffffffff, 0x00000100,
1276 	0x3c210, 0xffffffff, 0x06000100,
1277 	0x3c290, 0xffffffff, 0x00000100,
1278 	0x3c274, 0xffffffff, 0x00000100,
1279 	0x3c2b4, 0xffffffff, 0x00000100,
1280 	0x3c2b0, 0xffffffff, 0x00000100,
1281 	0x3c270, 0xffffffff, 0x00000100,
1282 	0x30800, 0xffffffff, 0xe0000000,
1283 	0x3c020, 0xffffffff, 0x00010000,
1284 	0x3c024, 0xffffffff, 0x00030002,
1285 	0x3c028, 0xffffffff, 0x00040007,
1286 	0x3c02c, 0xffffffff, 0x00060005,
1287 	0x3c030, 0xffffffff, 0x00090008,
1288 	0x3c034, 0xffffffff, 0x00010000,
1289 	0x3c038, 0xffffffff, 0x00030002,
1290 	0x3c03c, 0xffffffff, 0x00040007,
1291 	0x3c040, 0xffffffff, 0x00060005,
1292 	0x3c044, 0xffffffff, 0x00090008,
1293 	0x3c000, 0xffffffff, 0x96e00200,
1294 	0x8708, 0xffffffff, 0x00900100,
1295 	0xc424, 0xffffffff, 0x0020003f,
1296 	0x38, 0xffffffff, 0x0140001c,
1297 	0x3c, 0x000f0000, 0x000f0000,
1298 	0x220, 0xffffffff, 0xC060000C,
1299 	0x224, 0xc0000fff, 0x00000100,
1300 	0x20a8, 0xffffffff, 0x00000104,
1301 	0x55e4, 0xff000fff, 0x00000100,
1302 	0x30cc, 0xc0000fff, 0x00000104,
1303 	0xc1e4, 0x00000001, 0x00000001,
1304 	0xd00c, 0xff000ff0, 0x00000100,
1305 	0xd80c, 0xff000ff0, 0x00000100
1306 };
1307 
1308 static const u32 hawaii_golden_spm_registers[] =
1309 {
1310 	0x30800, 0xe0ffffff, 0xe0000000
1311 };
1312 
1313 static const u32 hawaii_golden_common_registers[] =
1314 {
1315 	0x30800, 0xffffffff, 0xe0000000,
1316 	0x28350, 0xffffffff, 0x3a00161a,
1317 	0x28354, 0xffffffff, 0x0000002e,
1318 	0x9a10, 0xffffffff, 0x00018208,
1319 	0x98f8, 0xffffffff, 0x12011003
1320 };
1321 
1322 static const u32 hawaii_golden_registers[] =
1323 {
1324 	0x3354, 0x00000333, 0x00000333,
1325 	0x9a10, 0x00010000, 0x00058208,
1326 	0x9830, 0xffffffff, 0x00000000,
1327 	0x9834, 0xf00fffff, 0x00000400,
1328 	0x9838, 0x0002021c, 0x00020200,
1329 	0xc78, 0x00000080, 0x00000000,
1330 	0x5bb0, 0x000000f0, 0x00000070,
1331 	0x5bc0, 0xf0311fff, 0x80300000,
1332 	0x350c, 0x00810000, 0x408af000,
1333 	0x7030, 0x31000111, 0x00000011,
1334 	0x2f48, 0x73773777, 0x12010001,
1335 	0x2120, 0x0000007f, 0x0000001b,
1336 	0x21dc, 0x00007fb6, 0x00002191,
1337 	0x3628, 0x0000003f, 0x0000000a,
1338 	0x362c, 0x0000003f, 0x0000000a,
1339 	0x2ae4, 0x00073ffe, 0x000022a2,
1340 	0x240c, 0x000007ff, 0x00000000,
1341 	0x8bf0, 0x00002001, 0x00000001,
1342 	0x8b24, 0xffffffff, 0x00ffffff,
1343 	0x30a04, 0x0000ff0f, 0x00000000,
1344 	0x28a4c, 0x07ffffff, 0x06000000,
1345 	0x3e78, 0x00000001, 0x00000002,
1346 	0xc768, 0x00000008, 0x00000008,
1347 	0xc770, 0x00000f00, 0x00000800,
1348 	0xc774, 0x00000f00, 0x00000800,
1349 	0xc798, 0x00ffffff, 0x00ff7fbf,
1350 	0xc79c, 0x00ffffff, 0x00ff7faf,
1351 	0x8c00, 0x000000ff, 0x00000800,
1352 	0xe40, 0x00001fff, 0x00001fff,
1353 	0x9060, 0x0000007f, 0x00000020,
1354 	0x9508, 0x00010000, 0x00010000,
1355 	0xae00, 0x00100000, 0x000ff07c,
1356 	0xac14, 0x000003ff, 0x0000000f,
1357 	0xac10, 0xffffffff, 0x7564fdec,
1358 	0xac0c, 0xffffffff, 0x3120b9a8,
1359 	0xac08, 0x20000000, 0x0f9c0000
1360 };
1361 
1362 static const u32 hawaii_mgcg_cgcg_init[] =
1363 {
1364 	0xc420, 0xffffffff, 0xfffffffd,
1365 	0x30800, 0xffffffff, 0xe0000000,
1366 	0x3c2a0, 0xffffffff, 0x00000100,
1367 	0x3c208, 0xffffffff, 0x00000100,
1368 	0x3c2c0, 0xffffffff, 0x00000100,
1369 	0x3c2c8, 0xffffffff, 0x00000100,
1370 	0x3c2c4, 0xffffffff, 0x00000100,
1371 	0x55e4, 0xffffffff, 0x00200100,
1372 	0x3c280, 0xffffffff, 0x00000100,
1373 	0x3c214, 0xffffffff, 0x06000100,
1374 	0x3c220, 0xffffffff, 0x00000100,
1375 	0x3c218, 0xffffffff, 0x06000100,
1376 	0x3c204, 0xffffffff, 0x00000100,
1377 	0x3c2e0, 0xffffffff, 0x00000100,
1378 	0x3c224, 0xffffffff, 0x00000100,
1379 	0x3c200, 0xffffffff, 0x00000100,
1380 	0x3c230, 0xffffffff, 0x00000100,
1381 	0x3c234, 0xffffffff, 0x00000100,
1382 	0x3c250, 0xffffffff, 0x00000100,
1383 	0x3c254, 0xffffffff, 0x00000100,
1384 	0x3c258, 0xffffffff, 0x00000100,
1385 	0x3c25c, 0xffffffff, 0x00000100,
1386 	0x3c260, 0xffffffff, 0x00000100,
1387 	0x3c27c, 0xffffffff, 0x00000100,
1388 	0x3c278, 0xffffffff, 0x00000100,
1389 	0x3c210, 0xffffffff, 0x06000100,
1390 	0x3c290, 0xffffffff, 0x00000100,
1391 	0x3c274, 0xffffffff, 0x00000100,
1392 	0x3c2b4, 0xffffffff, 0x00000100,
1393 	0x3c2b0, 0xffffffff, 0x00000100,
1394 	0x3c270, 0xffffffff, 0x00000100,
1395 	0x30800, 0xffffffff, 0xe0000000,
1396 	0x3c020, 0xffffffff, 0x00010000,
1397 	0x3c024, 0xffffffff, 0x00030002,
1398 	0x3c028, 0xffffffff, 0x00040007,
1399 	0x3c02c, 0xffffffff, 0x00060005,
1400 	0x3c030, 0xffffffff, 0x00090008,
1401 	0x3c034, 0xffffffff, 0x00010000,
1402 	0x3c038, 0xffffffff, 0x00030002,
1403 	0x3c03c, 0xffffffff, 0x00040007,
1404 	0x3c040, 0xffffffff, 0x00060005,
1405 	0x3c044, 0xffffffff, 0x00090008,
1406 	0x3c048, 0xffffffff, 0x00010000,
1407 	0x3c04c, 0xffffffff, 0x00030002,
1408 	0x3c050, 0xffffffff, 0x00040007,
1409 	0x3c054, 0xffffffff, 0x00060005,
1410 	0x3c058, 0xffffffff, 0x00090008,
1411 	0x3c05c, 0xffffffff, 0x00010000,
1412 	0x3c060, 0xffffffff, 0x00030002,
1413 	0x3c064, 0xffffffff, 0x00040007,
1414 	0x3c068, 0xffffffff, 0x00060005,
1415 	0x3c06c, 0xffffffff, 0x00090008,
1416 	0x3c070, 0xffffffff, 0x00010000,
1417 	0x3c074, 0xffffffff, 0x00030002,
1418 	0x3c078, 0xffffffff, 0x00040007,
1419 	0x3c07c, 0xffffffff, 0x00060005,
1420 	0x3c080, 0xffffffff, 0x00090008,
1421 	0x3c084, 0xffffffff, 0x00010000,
1422 	0x3c088, 0xffffffff, 0x00030002,
1423 	0x3c08c, 0xffffffff, 0x00040007,
1424 	0x3c090, 0xffffffff, 0x00060005,
1425 	0x3c094, 0xffffffff, 0x00090008,
1426 	0x3c098, 0xffffffff, 0x00010000,
1427 	0x3c09c, 0xffffffff, 0x00030002,
1428 	0x3c0a0, 0xffffffff, 0x00040007,
1429 	0x3c0a4, 0xffffffff, 0x00060005,
1430 	0x3c0a8, 0xffffffff, 0x00090008,
1431 	0x3c0ac, 0xffffffff, 0x00010000,
1432 	0x3c0b0, 0xffffffff, 0x00030002,
1433 	0x3c0b4, 0xffffffff, 0x00040007,
1434 	0x3c0b8, 0xffffffff, 0x00060005,
1435 	0x3c0bc, 0xffffffff, 0x00090008,
1436 	0x3c0c0, 0xffffffff, 0x00010000,
1437 	0x3c0c4, 0xffffffff, 0x00030002,
1438 	0x3c0c8, 0xffffffff, 0x00040007,
1439 	0x3c0cc, 0xffffffff, 0x00060005,
1440 	0x3c0d0, 0xffffffff, 0x00090008,
1441 	0x3c0d4, 0xffffffff, 0x00010000,
1442 	0x3c0d8, 0xffffffff, 0x00030002,
1443 	0x3c0dc, 0xffffffff, 0x00040007,
1444 	0x3c0e0, 0xffffffff, 0x00060005,
1445 	0x3c0e4, 0xffffffff, 0x00090008,
1446 	0x3c0e8, 0xffffffff, 0x00010000,
1447 	0x3c0ec, 0xffffffff, 0x00030002,
1448 	0x3c0f0, 0xffffffff, 0x00040007,
1449 	0x3c0f4, 0xffffffff, 0x00060005,
1450 	0x3c0f8, 0xffffffff, 0x00090008,
1451 	0xc318, 0xffffffff, 0x00020200,
1452 	0x3350, 0xffffffff, 0x00000200,
1453 	0x15c0, 0xffffffff, 0x00000400,
1454 	0x55e8, 0xffffffff, 0x00000000,
1455 	0x2f50, 0xffffffff, 0x00000902,
1456 	0x3c000, 0xffffffff, 0x96940200,
1457 	0x8708, 0xffffffff, 0x00900100,
1458 	0xc424, 0xffffffff, 0x0020003f,
1459 	0x38, 0xffffffff, 0x0140001c,
1460 	0x3c, 0x000f0000, 0x000f0000,
1461 	0x220, 0xffffffff, 0xc060000c,
1462 	0x224, 0xc0000fff, 0x00000100,
1463 	0xf90, 0xffffffff, 0x00000100,
1464 	0xf98, 0x00000101, 0x00000000,
1465 	0x20a8, 0xffffffff, 0x00000104,
1466 	0x55e4, 0xff000fff, 0x00000100,
1467 	0x30cc, 0xc0000fff, 0x00000104,
1468 	0xc1e4, 0x00000001, 0x00000001,
1469 	0xd00c, 0xff000ff0, 0x00000100,
1470 	0xd80c, 0xff000ff0, 0x00000100
1471 };
1472 
1473 static void cik_init_golden_registers(struct radeon_device *rdev)
1474 {
1475 	switch (rdev->family) {
1476 	case CHIP_BONAIRE:
1477 		radeon_program_register_sequence(rdev,
1478 						 bonaire_mgcg_cgcg_init,
1479 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1480 		radeon_program_register_sequence(rdev,
1481 						 bonaire_golden_registers,
1482 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1483 		radeon_program_register_sequence(rdev,
1484 						 bonaire_golden_common_registers,
1485 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1486 		radeon_program_register_sequence(rdev,
1487 						 bonaire_golden_spm_registers,
1488 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1489 		break;
1490 	case CHIP_KABINI:
1491 		radeon_program_register_sequence(rdev,
1492 						 kalindi_mgcg_cgcg_init,
1493 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1494 		radeon_program_register_sequence(rdev,
1495 						 kalindi_golden_registers,
1496 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1497 		radeon_program_register_sequence(rdev,
1498 						 kalindi_golden_common_registers,
1499 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1500 		radeon_program_register_sequence(rdev,
1501 						 kalindi_golden_spm_registers,
1502 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1503 		break;
1504 	case CHIP_KAVERI:
1505 		radeon_program_register_sequence(rdev,
1506 						 spectre_mgcg_cgcg_init,
1507 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1508 		radeon_program_register_sequence(rdev,
1509 						 spectre_golden_registers,
1510 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1511 		radeon_program_register_sequence(rdev,
1512 						 spectre_golden_common_registers,
1513 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1514 		radeon_program_register_sequence(rdev,
1515 						 spectre_golden_spm_registers,
1516 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1517 		break;
1518 	case CHIP_HAWAII:
1519 		radeon_program_register_sequence(rdev,
1520 						 hawaii_mgcg_cgcg_init,
1521 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1522 		radeon_program_register_sequence(rdev,
1523 						 hawaii_golden_registers,
1524 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1525 		radeon_program_register_sequence(rdev,
1526 						 hawaii_golden_common_registers,
1527 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1528 		radeon_program_register_sequence(rdev,
1529 						 hawaii_golden_spm_registers,
1530 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1531 		break;
1532 	default:
1533 		break;
1534 	}
1535 }
1536 
1537 /**
1538  * cik_get_xclk - get the xclk
1539  *
1540  * @rdev: radeon_device pointer
1541  *
1542  * Returns the reference clock used by the gfx engine
1543  * (CIK).
1544  */
1545 u32 cik_get_xclk(struct radeon_device *rdev)
1546 {
1547         u32 reference_clock = rdev->clock.spll.reference_freq;
1548 
1549 	if (rdev->flags & RADEON_IS_IGP) {
1550 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1551 			return reference_clock / 2;
1552 	} else {
1553 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1554 			return reference_clock / 4;
1555 	}
1556 	return reference_clock;
1557 }
1558 
1559 /**
1560  * cik_mm_rdoorbell - read a doorbell dword
1561  *
1562  * @rdev: radeon_device pointer
1563  * @index: doorbell index
1564  *
1565  * Returns the value in the doorbell aperture at the
1566  * requested doorbell index (CIK).
1567  */
1568 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1569 {
1570 	if (index < rdev->doorbell.num_doorbells) {
1571 		return readl(rdev->doorbell.ptr + index);
1572 	} else {
1573 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1574 		return 0;
1575 	}
1576 }
1577 
1578 /**
1579  * cik_mm_wdoorbell - write a doorbell dword
1580  *
1581  * @rdev: radeon_device pointer
1582  * @index: doorbell index
1583  * @v: value to write
1584  *
1585  * Writes @v to the doorbell aperture at the
1586  * requested doorbell index (CIK).
1587  */
1588 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1589 {
1590 	if (index < rdev->doorbell.num_doorbells) {
1591 		writel(v, rdev->doorbell.ptr + index);
1592 	} else {
1593 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1594 	}
1595 }
1596 
1597 #define BONAIRE_IO_MC_REGS_SIZE 36
1598 
1599 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1600 {
1601 	{0x00000070, 0x04400000},
1602 	{0x00000071, 0x80c01803},
1603 	{0x00000072, 0x00004004},
1604 	{0x00000073, 0x00000100},
1605 	{0x00000074, 0x00ff0000},
1606 	{0x00000075, 0x34000000},
1607 	{0x00000076, 0x08000014},
1608 	{0x00000077, 0x00cc08ec},
1609 	{0x00000078, 0x00000400},
1610 	{0x00000079, 0x00000000},
1611 	{0x0000007a, 0x04090000},
1612 	{0x0000007c, 0x00000000},
1613 	{0x0000007e, 0x4408a8e8},
1614 	{0x0000007f, 0x00000304},
1615 	{0x00000080, 0x00000000},
1616 	{0x00000082, 0x00000001},
1617 	{0x00000083, 0x00000002},
1618 	{0x00000084, 0xf3e4f400},
1619 	{0x00000085, 0x052024e3},
1620 	{0x00000087, 0x00000000},
1621 	{0x00000088, 0x01000000},
1622 	{0x0000008a, 0x1c0a0000},
1623 	{0x0000008b, 0xff010000},
1624 	{0x0000008d, 0xffffefff},
1625 	{0x0000008e, 0xfff3efff},
1626 	{0x0000008f, 0xfff3efbf},
1627 	{0x00000092, 0xf7ffffff},
1628 	{0x00000093, 0xffffff7f},
1629 	{0x00000095, 0x00101101},
1630 	{0x00000096, 0x00000fff},
1631 	{0x00000097, 0x00116fff},
1632 	{0x00000098, 0x60010000},
1633 	{0x00000099, 0x10010000},
1634 	{0x0000009a, 0x00006000},
1635 	{0x0000009b, 0x00001000},
1636 	{0x0000009f, 0x00b48000}
1637 };
1638 
1639 #define HAWAII_IO_MC_REGS_SIZE 22
1640 
1641 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1642 {
1643 	{0x0000007d, 0x40000000},
1644 	{0x0000007e, 0x40180304},
1645 	{0x0000007f, 0x0000ff00},
1646 	{0x00000081, 0x00000000},
1647 	{0x00000083, 0x00000800},
1648 	{0x00000086, 0x00000000},
1649 	{0x00000087, 0x00000100},
1650 	{0x00000088, 0x00020100},
1651 	{0x00000089, 0x00000000},
1652 	{0x0000008b, 0x00040000},
1653 	{0x0000008c, 0x00000100},
1654 	{0x0000008e, 0xff010000},
1655 	{0x00000090, 0xffffefff},
1656 	{0x00000091, 0xfff3efff},
1657 	{0x00000092, 0xfff3efbf},
1658 	{0x00000093, 0xf7ffffff},
1659 	{0x00000094, 0xffffff7f},
1660 	{0x00000095, 0x00000fff},
1661 	{0x00000096, 0x00116fff},
1662 	{0x00000097, 0x60010000},
1663 	{0x00000098, 0x10010000},
1664 	{0x0000009f, 0x00c79000}
1665 };
1666 
1667 
1668 /**
1669  * cik_srbm_select - select specific register instances
1670  *
1671  * @rdev: radeon_device pointer
1672  * @me: selected ME (micro engine)
1673  * @pipe: pipe
1674  * @queue: queue
1675  * @vmid: VMID
1676  *
1677  * Switches the currently active registers instances.  Some
1678  * registers are instanced per VMID, others are instanced per
1679  * me/pipe/queue combination.
1680  */
1681 static void cik_srbm_select(struct radeon_device *rdev,
1682 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1683 {
1684 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1685 			     MEID(me & 0x3) |
1686 			     VMID(vmid & 0xf) |
1687 			     QUEUEID(queue & 0x7));
1688 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1689 }
1690 
1691 /* ucode loading */
1692 /**
1693  * ci_mc_load_microcode - load MC ucode into the hw
1694  *
1695  * @rdev: radeon_device pointer
1696  *
1697  * Load the GDDR MC ucode into the hw (CIK).
1698  * Returns 0 on success, error on failure.
1699  */
1700 int ci_mc_load_microcode(struct radeon_device *rdev)
1701 {
1702 	const __be32 *fw_data;
1703 	u32 running, blackout = 0;
1704 	u32 *io_mc_regs;
1705 	int i, ucode_size, regs_size;
1706 
1707 	if (!rdev->mc_fw)
1708 		return -EINVAL;
1709 
1710 	switch (rdev->family) {
1711 	case CHIP_BONAIRE:
1712 		io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1713 		ucode_size = CIK_MC_UCODE_SIZE;
1714 		regs_size = BONAIRE_IO_MC_REGS_SIZE;
1715 		break;
1716 	case CHIP_HAWAII:
1717 		io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1718 		ucode_size = HAWAII_MC_UCODE_SIZE;
1719 		regs_size = HAWAII_IO_MC_REGS_SIZE;
1720 		break;
1721 	default:
1722 		return -EINVAL;
1723 	}
1724 
1725 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1726 
1727 	if (running == 0) {
1728 		if (running) {
1729 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1730 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1731 		}
1732 
1733 		/* reset the engine and set to writable */
1734 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1735 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1736 
1737 		/* load mc io regs */
1738 		for (i = 0; i < regs_size; i++) {
1739 			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1740 			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1741 		}
1742 		/* load the MC ucode */
1743 		fw_data = (const __be32 *)rdev->mc_fw->data;
1744 		for (i = 0; i < ucode_size; i++)
1745 			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1746 
1747 		/* put the engine back into the active state */
1748 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1749 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1750 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1751 
1752 		/* wait for training to complete */
1753 		for (i = 0; i < rdev->usec_timeout; i++) {
1754 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1755 				break;
1756 			udelay(1);
1757 		}
1758 		for (i = 0; i < rdev->usec_timeout; i++) {
1759 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1760 				break;
1761 			udelay(1);
1762 		}
1763 
1764 		if (running)
1765 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1766 	}
1767 
1768 	return 0;
1769 }
1770 
1771 /**
1772  * cik_init_microcode - load ucode images from disk
1773  *
1774  * @rdev: radeon_device pointer
1775  *
1776  * Use the firmware interface to load the ucode images into
1777  * the driver (not loaded into hw).
1778  * Returns 0 on success, error on failure.
1779  */
1780 static int cik_init_microcode(struct radeon_device *rdev)
1781 {
1782 	const char *chip_name;
1783 	size_t pfp_req_size, me_req_size, ce_req_size,
1784 		mec_req_size, rlc_req_size, mc_req_size = 0,
1785 		sdma_req_size, smc_req_size = 0;
1786 	char fw_name[30];
1787 	int err;
1788 
1789 	DRM_DEBUG("\n");
1790 
1791 	switch (rdev->family) {
1792 	case CHIP_BONAIRE:
1793 		chip_name = "BONAIRE";
1794 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1795 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1796 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1797 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1798 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1799 		mc_req_size = CIK_MC_UCODE_SIZE * 4;
1800 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1801 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1802 		break;
1803 	case CHIP_HAWAII:
1804 		chip_name = "HAWAII";
1805 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1806 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1807 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1808 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1809 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1810 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1811 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1812 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1813 		break;
1814 	case CHIP_KAVERI:
1815 		chip_name = "KAVERI";
1816 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1817 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1818 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1819 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1820 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1821 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1822 		break;
1823 	case CHIP_KABINI:
1824 		chip_name = "KABINI";
1825 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1826 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1827 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1828 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1829 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1830 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1831 		break;
1832 	default: BUG();
1833 	}
1834 
1835 	DRM_INFO("Loading %s Microcode\n", chip_name);
1836 
1837 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1838 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1839 	if (err)
1840 		goto out;
1841 	if (rdev->pfp_fw->size != pfp_req_size) {
1842 		printk(KERN_ERR
1843 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1844 		       rdev->pfp_fw->size, fw_name);
1845 		err = -EINVAL;
1846 		goto out;
1847 	}
1848 
1849 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1850 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1851 	if (err)
1852 		goto out;
1853 	if (rdev->me_fw->size != me_req_size) {
1854 		printk(KERN_ERR
1855 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1856 		       rdev->me_fw->size, fw_name);
1857 		err = -EINVAL;
1858 	}
1859 
1860 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1861 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1862 	if (err)
1863 		goto out;
1864 	if (rdev->ce_fw->size != ce_req_size) {
1865 		printk(KERN_ERR
1866 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1867 		       rdev->ce_fw->size, fw_name);
1868 		err = -EINVAL;
1869 	}
1870 
1871 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1872 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1873 	if (err)
1874 		goto out;
1875 	if (rdev->mec_fw->size != mec_req_size) {
1876 		printk(KERN_ERR
1877 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1878 		       rdev->mec_fw->size, fw_name);
1879 		err = -EINVAL;
1880 	}
1881 
1882 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1883 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1884 	if (err)
1885 		goto out;
1886 	if (rdev->rlc_fw->size != rlc_req_size) {
1887 		printk(KERN_ERR
1888 		       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1889 		       rdev->rlc_fw->size, fw_name);
1890 		err = -EINVAL;
1891 	}
1892 
1893 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1894 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1895 	if (err)
1896 		goto out;
1897 	if (rdev->sdma_fw->size != sdma_req_size) {
1898 		printk(KERN_ERR
1899 		       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1900 		       rdev->sdma_fw->size, fw_name);
1901 		err = -EINVAL;
1902 	}
1903 
1904 	/* No SMC, MC ucode on APUs */
1905 	if (!(rdev->flags & RADEON_IS_IGP)) {
1906 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1907 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1908 		if (err)
1909 			goto out;
1910 		if (rdev->mc_fw->size != mc_req_size) {
1911 			printk(KERN_ERR
1912 			       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1913 			       rdev->mc_fw->size, fw_name);
1914 			err = -EINVAL;
1915 		}
1916 
1917 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1918 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1919 		if (err) {
1920 			printk(KERN_ERR
1921 			       "smc: error loading firmware \"%s\"\n",
1922 			       fw_name);
1923 			release_firmware(rdev->smc_fw);
1924 			rdev->smc_fw = NULL;
1925 			err = 0;
1926 		} else if (rdev->smc_fw->size != smc_req_size) {
1927 			printk(KERN_ERR
1928 			       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1929 			       rdev->smc_fw->size, fw_name);
1930 			err = -EINVAL;
1931 		}
1932 	}
1933 
1934 out:
1935 	if (err) {
1936 		if (err != -EINVAL)
1937 			printk(KERN_ERR
1938 			       "cik_cp: Failed to load firmware \"%s\"\n",
1939 			       fw_name);
1940 		release_firmware(rdev->pfp_fw);
1941 		rdev->pfp_fw = NULL;
1942 		release_firmware(rdev->me_fw);
1943 		rdev->me_fw = NULL;
1944 		release_firmware(rdev->ce_fw);
1945 		rdev->ce_fw = NULL;
1946 		release_firmware(rdev->rlc_fw);
1947 		rdev->rlc_fw = NULL;
1948 		release_firmware(rdev->mc_fw);
1949 		rdev->mc_fw = NULL;
1950 		release_firmware(rdev->smc_fw);
1951 		rdev->smc_fw = NULL;
1952 	}
1953 	return err;
1954 }
1955 
1956 /*
1957  * Core functions
1958  */
1959 /**
1960  * cik_tiling_mode_table_init - init the hw tiling table
1961  *
1962  * @rdev: radeon_device pointer
1963  *
1964  * Starting with SI, the tiling setup is done globally in a
1965  * set of 32 tiling modes.  Rather than selecting each set of
1966  * parameters per surface as on older asics, we just select
1967  * which index in the tiling table we want to use, and the
1968  * surface uses those parameters (CIK).
1969  */
1970 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1971 {
1972 	const u32 num_tile_mode_states = 32;
1973 	const u32 num_secondary_tile_mode_states = 16;
1974 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1975 	u32 num_pipe_configs;
1976 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
1977 		rdev->config.cik.max_shader_engines;
1978 
1979 	switch (rdev->config.cik.mem_row_size_in_kb) {
1980 	case 1:
1981 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1982 		break;
1983 	case 2:
1984 	default:
1985 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1986 		break;
1987 	case 4:
1988 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1989 		break;
1990 	}
1991 
1992 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
1993 	if (num_pipe_configs > 8)
1994 		num_pipe_configs = 16;
1995 
1996 	if (num_pipe_configs == 16) {
1997 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1998 			switch (reg_offset) {
1999 			case 0:
2000 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2001 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2002 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2003 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2004 				break;
2005 			case 1:
2006 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2007 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2008 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2009 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2010 				break;
2011 			case 2:
2012 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2013 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2014 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2015 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2016 				break;
2017 			case 3:
2018 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2019 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2020 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2021 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2022 				break;
2023 			case 4:
2024 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2025 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2026 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2027 						 TILE_SPLIT(split_equal_to_row_size));
2028 				break;
2029 			case 5:
2030 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2031 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2032 				break;
2033 			case 6:
2034 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2035 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2036 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2037 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2038 				break;
2039 			case 7:
2040 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2041 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2042 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2043 						 TILE_SPLIT(split_equal_to_row_size));
2044 				break;
2045 			case 8:
2046 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2047 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2048 				break;
2049 			case 9:
2050 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2051 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2052 				break;
2053 			case 10:
2054 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2055 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2056 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2057 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2058 				break;
2059 			case 11:
2060 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2061 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2062 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2063 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2064 				break;
2065 			case 12:
2066 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2067 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2068 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2069 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2070 				break;
2071 			case 13:
2072 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2073 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2074 				break;
2075 			case 14:
2076 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2077 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2078 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2079 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2080 				break;
2081 			case 16:
2082 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2083 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2084 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2085 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2086 				break;
2087 			case 17:
2088 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2089 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2090 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2091 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2092 				break;
2093 			case 27:
2094 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2095 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2096 				break;
2097 			case 28:
2098 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2099 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2100 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2101 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2102 				break;
2103 			case 29:
2104 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2105 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2106 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2107 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2108 				break;
2109 			case 30:
2110 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2111 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2112 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2113 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2114 				break;
2115 			default:
2116 				gb_tile_moden = 0;
2117 				break;
2118 			}
2119 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2120 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2121 		}
2122 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2123 			switch (reg_offset) {
2124 			case 0:
2125 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2126 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2127 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2128 						 NUM_BANKS(ADDR_SURF_16_BANK));
2129 				break;
2130 			case 1:
2131 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2132 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2133 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2134 						 NUM_BANKS(ADDR_SURF_16_BANK));
2135 				break;
2136 			case 2:
2137 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2138 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2139 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2140 						 NUM_BANKS(ADDR_SURF_16_BANK));
2141 				break;
2142 			case 3:
2143 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2144 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2145 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2146 						 NUM_BANKS(ADDR_SURF_16_BANK));
2147 				break;
2148 			case 4:
2149 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2150 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2151 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2152 						 NUM_BANKS(ADDR_SURF_8_BANK));
2153 				break;
2154 			case 5:
2155 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2156 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2157 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2158 						 NUM_BANKS(ADDR_SURF_4_BANK));
2159 				break;
2160 			case 6:
2161 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2162 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2163 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2164 						 NUM_BANKS(ADDR_SURF_2_BANK));
2165 				break;
2166 			case 8:
2167 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2168 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2169 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2170 						 NUM_BANKS(ADDR_SURF_16_BANK));
2171 				break;
2172 			case 9:
2173 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2174 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2175 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2176 						 NUM_BANKS(ADDR_SURF_16_BANK));
2177 				break;
2178 			case 10:
2179 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2180 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2181 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2182 						 NUM_BANKS(ADDR_SURF_16_BANK));
2183 				break;
2184 			case 11:
2185 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2186 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2187 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2188 						 NUM_BANKS(ADDR_SURF_8_BANK));
2189 				break;
2190 			case 12:
2191 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2192 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2193 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2194 						 NUM_BANKS(ADDR_SURF_4_BANK));
2195 				break;
2196 			case 13:
2197 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2198 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2199 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2200 						 NUM_BANKS(ADDR_SURF_2_BANK));
2201 				break;
2202 			case 14:
2203 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2204 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2205 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2206 						 NUM_BANKS(ADDR_SURF_2_BANK));
2207 				break;
2208 			default:
2209 				gb_tile_moden = 0;
2210 				break;
2211 			}
2212 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2213 		}
2214 	} else if (num_pipe_configs == 8) {
2215 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2216 			switch (reg_offset) {
2217 			case 0:
2218 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2219 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2220 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2221 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2222 				break;
2223 			case 1:
2224 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2225 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2226 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2227 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2228 				break;
2229 			case 2:
2230 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2231 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2232 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2233 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2234 				break;
2235 			case 3:
2236 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2237 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2238 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2239 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2240 				break;
2241 			case 4:
2242 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2243 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2244 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2245 						 TILE_SPLIT(split_equal_to_row_size));
2246 				break;
2247 			case 5:
2248 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2249 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2250 				break;
2251 			case 6:
2252 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2253 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2254 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2255 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2256 				break;
2257 			case 7:
2258 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2259 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2260 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2261 						 TILE_SPLIT(split_equal_to_row_size));
2262 				break;
2263 			case 8:
2264 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2265 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2266 				break;
2267 			case 9:
2268 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2269 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2270 				break;
2271 			case 10:
2272 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2273 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2274 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2275 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2276 				break;
2277 			case 11:
2278 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2279 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2280 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2281 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2282 				break;
2283 			case 12:
2284 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2285 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2286 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2287 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2288 				break;
2289 			case 13:
2290 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2291 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2292 				break;
2293 			case 14:
2294 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2295 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2296 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2297 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2298 				break;
2299 			case 16:
2300 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2301 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2302 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2303 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2304 				break;
2305 			case 17:
2306 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2307 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2308 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2309 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2310 				break;
2311 			case 27:
2312 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2313 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2314 				break;
2315 			case 28:
2316 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2317 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2318 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2319 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2320 				break;
2321 			case 29:
2322 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2323 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2324 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2325 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2326 				break;
2327 			case 30:
2328 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2329 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2330 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2331 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2332 				break;
2333 			default:
2334 				gb_tile_moden = 0;
2335 				break;
2336 			}
2337 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2338 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2339 		}
2340 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2341 			switch (reg_offset) {
2342 			case 0:
2343 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2344 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2345 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2346 						 NUM_BANKS(ADDR_SURF_16_BANK));
2347 				break;
2348 			case 1:
2349 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2350 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2351 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2352 						 NUM_BANKS(ADDR_SURF_16_BANK));
2353 				break;
2354 			case 2:
2355 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2356 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2357 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2358 						 NUM_BANKS(ADDR_SURF_16_BANK));
2359 				break;
2360 			case 3:
2361 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2362 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2363 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2364 						 NUM_BANKS(ADDR_SURF_16_BANK));
2365 				break;
2366 			case 4:
2367 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2368 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2369 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2370 						 NUM_BANKS(ADDR_SURF_8_BANK));
2371 				break;
2372 			case 5:
2373 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2374 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2375 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2376 						 NUM_BANKS(ADDR_SURF_4_BANK));
2377 				break;
2378 			case 6:
2379 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2380 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2381 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2382 						 NUM_BANKS(ADDR_SURF_2_BANK));
2383 				break;
2384 			case 8:
2385 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2386 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2387 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2388 						 NUM_BANKS(ADDR_SURF_16_BANK));
2389 				break;
2390 			case 9:
2391 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2392 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2393 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2394 						 NUM_BANKS(ADDR_SURF_16_BANK));
2395 				break;
2396 			case 10:
2397 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2398 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2399 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2400 						 NUM_BANKS(ADDR_SURF_16_BANK));
2401 				break;
2402 			case 11:
2403 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2404 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2405 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2406 						 NUM_BANKS(ADDR_SURF_16_BANK));
2407 				break;
2408 			case 12:
2409 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2410 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2411 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2412 						 NUM_BANKS(ADDR_SURF_8_BANK));
2413 				break;
2414 			case 13:
2415 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2416 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2417 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2418 						 NUM_BANKS(ADDR_SURF_4_BANK));
2419 				break;
2420 			case 14:
2421 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2422 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2423 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2424 						 NUM_BANKS(ADDR_SURF_2_BANK));
2425 				break;
2426 			default:
2427 				gb_tile_moden = 0;
2428 				break;
2429 			}
2430 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2431 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2432 		}
2433 	} else if (num_pipe_configs == 4) {
2434 		if (num_rbs == 4) {
2435 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2436 				switch (reg_offset) {
2437 				case 0:
2438 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2439 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2440 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2441 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2442 					break;
2443 				case 1:
2444 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2445 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2446 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2447 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2448 					break;
2449 				case 2:
2450 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2451 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2452 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2453 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2454 					break;
2455 				case 3:
2456 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2457 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2458 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2459 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2460 					break;
2461 				case 4:
2462 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2463 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2464 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2465 							 TILE_SPLIT(split_equal_to_row_size));
2466 					break;
2467 				case 5:
2468 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2469 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2470 					break;
2471 				case 6:
2472 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2473 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2474 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2475 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2476 					break;
2477 				case 7:
2478 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2479 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2480 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2481 							 TILE_SPLIT(split_equal_to_row_size));
2482 					break;
2483 				case 8:
2484 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2485 							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2486 					break;
2487 				case 9:
2488 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2489 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2490 					break;
2491 				case 10:
2492 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2493 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2494 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2495 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2496 					break;
2497 				case 11:
2498 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2499 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2500 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2501 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2502 					break;
2503 				case 12:
2504 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2505 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2506 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2507 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2508 					break;
2509 				case 13:
2510 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2511 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2512 					break;
2513 				case 14:
2514 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2515 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2516 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2517 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2518 					break;
2519 				case 16:
2520 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2521 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2522 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2523 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2524 					break;
2525 				case 17:
2526 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2527 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2528 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2529 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2530 					break;
2531 				case 27:
2532 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2533 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2534 					break;
2535 				case 28:
2536 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2537 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2538 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2539 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2540 					break;
2541 				case 29:
2542 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2543 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2544 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2545 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2546 					break;
2547 				case 30:
2548 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2549 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2550 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2551 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2552 					break;
2553 				default:
2554 					gb_tile_moden = 0;
2555 					break;
2556 				}
2557 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2558 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2559 			}
2560 		} else if (num_rbs < 4) {
2561 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2562 				switch (reg_offset) {
2563 				case 0:
2564 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2565 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2566 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2567 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2568 					break;
2569 				case 1:
2570 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2571 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2572 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2573 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2574 					break;
2575 				case 2:
2576 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2577 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2578 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2579 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2580 					break;
2581 				case 3:
2582 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2583 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2584 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2585 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2586 					break;
2587 				case 4:
2588 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2589 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2590 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2591 							 TILE_SPLIT(split_equal_to_row_size));
2592 					break;
2593 				case 5:
2594 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2595 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2596 					break;
2597 				case 6:
2598 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2599 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2600 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2601 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2602 					break;
2603 				case 7:
2604 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2605 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2606 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2607 							 TILE_SPLIT(split_equal_to_row_size));
2608 					break;
2609 				case 8:
2610 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2611 						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2612 					break;
2613 				case 9:
2614 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2615 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2616 					break;
2617 				case 10:
2618 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2619 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2620 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2621 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2622 					break;
2623 				case 11:
2624 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2625 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2626 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2627 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2628 					break;
2629 				case 12:
2630 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2631 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2632 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2633 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2634 					break;
2635 				case 13:
2636 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2637 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2638 					break;
2639 				case 14:
2640 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2641 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2642 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2643 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2644 					break;
2645 				case 16:
2646 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2647 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2648 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2649 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2650 					break;
2651 				case 17:
2652 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2653 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2654 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2655 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2656 					break;
2657 				case 27:
2658 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2659 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2660 					break;
2661 				case 28:
2662 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2663 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2664 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2665 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2666 					break;
2667 				case 29:
2668 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2669 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2670 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2671 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2672 					break;
2673 				case 30:
2674 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2675 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2676 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2677 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2678 					break;
2679 				default:
2680 					gb_tile_moden = 0;
2681 					break;
2682 				}
2683 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2684 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2685 			}
2686 		}
2687 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2688 			switch (reg_offset) {
2689 			case 0:
2690 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2691 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2692 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2693 						 NUM_BANKS(ADDR_SURF_16_BANK));
2694 				break;
2695 			case 1:
2696 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2697 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2698 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2699 						 NUM_BANKS(ADDR_SURF_16_BANK));
2700 				break;
2701 			case 2:
2702 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2703 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2704 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2705 						 NUM_BANKS(ADDR_SURF_16_BANK));
2706 				break;
2707 			case 3:
2708 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2709 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2710 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2711 						 NUM_BANKS(ADDR_SURF_16_BANK));
2712 				break;
2713 			case 4:
2714 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2715 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2716 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2717 						 NUM_BANKS(ADDR_SURF_16_BANK));
2718 				break;
2719 			case 5:
2720 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2721 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2722 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2723 						 NUM_BANKS(ADDR_SURF_8_BANK));
2724 				break;
2725 			case 6:
2726 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2728 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2729 						 NUM_BANKS(ADDR_SURF_4_BANK));
2730 				break;
2731 			case 8:
2732 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2733 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2734 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2735 						 NUM_BANKS(ADDR_SURF_16_BANK));
2736 				break;
2737 			case 9:
2738 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2739 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2740 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2741 						 NUM_BANKS(ADDR_SURF_16_BANK));
2742 				break;
2743 			case 10:
2744 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2745 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2746 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2747 						 NUM_BANKS(ADDR_SURF_16_BANK));
2748 				break;
2749 			case 11:
2750 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2751 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2752 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2753 						 NUM_BANKS(ADDR_SURF_16_BANK));
2754 				break;
2755 			case 12:
2756 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2757 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2758 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2759 						 NUM_BANKS(ADDR_SURF_16_BANK));
2760 				break;
2761 			case 13:
2762 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2763 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2764 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2765 						 NUM_BANKS(ADDR_SURF_8_BANK));
2766 				break;
2767 			case 14:
2768 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2769 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2770 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2771 						 NUM_BANKS(ADDR_SURF_4_BANK));
2772 				break;
2773 			default:
2774 				gb_tile_moden = 0;
2775 				break;
2776 			}
2777 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2778 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2779 		}
2780 	} else if (num_pipe_configs == 2) {
2781 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2782 			switch (reg_offset) {
2783 			case 0:
2784 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2785 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2786 						 PIPE_CONFIG(ADDR_SURF_P2) |
2787 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2788 				break;
2789 			case 1:
2790 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2791 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2792 						 PIPE_CONFIG(ADDR_SURF_P2) |
2793 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2794 				break;
2795 			case 2:
2796 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2797 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2798 						 PIPE_CONFIG(ADDR_SURF_P2) |
2799 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2800 				break;
2801 			case 3:
2802 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2803 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2804 						 PIPE_CONFIG(ADDR_SURF_P2) |
2805 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2806 				break;
2807 			case 4:
2808 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2809 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2810 						 PIPE_CONFIG(ADDR_SURF_P2) |
2811 						 TILE_SPLIT(split_equal_to_row_size));
2812 				break;
2813 			case 5:
2814 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2815 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2816 				break;
2817 			case 6:
2818 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2819 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2820 						 PIPE_CONFIG(ADDR_SURF_P2) |
2821 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2822 				break;
2823 			case 7:
2824 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2825 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2826 						 PIPE_CONFIG(ADDR_SURF_P2) |
2827 						 TILE_SPLIT(split_equal_to_row_size));
2828 				break;
2829 			case 8:
2830 				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
2831 				break;
2832 			case 9:
2833 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2834 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2835 				break;
2836 			case 10:
2837 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2838 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2839 						 PIPE_CONFIG(ADDR_SURF_P2) |
2840 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2841 				break;
2842 			case 11:
2843 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2844 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2845 						 PIPE_CONFIG(ADDR_SURF_P2) |
2846 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2847 				break;
2848 			case 12:
2849 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2850 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2851 						 PIPE_CONFIG(ADDR_SURF_P2) |
2852 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2853 				break;
2854 			case 13:
2855 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2856 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2857 				break;
2858 			case 14:
2859 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2860 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2861 						 PIPE_CONFIG(ADDR_SURF_P2) |
2862 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2863 				break;
2864 			case 16:
2865 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2866 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2867 						 PIPE_CONFIG(ADDR_SURF_P2) |
2868 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2869 				break;
2870 			case 17:
2871 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2872 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2873 						 PIPE_CONFIG(ADDR_SURF_P2) |
2874 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2875 				break;
2876 			case 27:
2877 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2878 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2879 				break;
2880 			case 28:
2881 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2882 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2883 						 PIPE_CONFIG(ADDR_SURF_P2) |
2884 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2885 				break;
2886 			case 29:
2887 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2888 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2889 						 PIPE_CONFIG(ADDR_SURF_P2) |
2890 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2891 				break;
2892 			case 30:
2893 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2894 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2895 						 PIPE_CONFIG(ADDR_SURF_P2) |
2896 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2897 				break;
2898 			default:
2899 				gb_tile_moden = 0;
2900 				break;
2901 			}
2902 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2903 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2904 		}
2905 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2906 			switch (reg_offset) {
2907 			case 0:
2908 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2909 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2910 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2911 						 NUM_BANKS(ADDR_SURF_16_BANK));
2912 				break;
2913 			case 1:
2914 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2915 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2916 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2917 						 NUM_BANKS(ADDR_SURF_16_BANK));
2918 				break;
2919 			case 2:
2920 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2921 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2922 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2923 						 NUM_BANKS(ADDR_SURF_16_BANK));
2924 				break;
2925 			case 3:
2926 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2927 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2928 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2929 						 NUM_BANKS(ADDR_SURF_16_BANK));
2930 				break;
2931 			case 4:
2932 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2933 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2934 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2935 						 NUM_BANKS(ADDR_SURF_16_BANK));
2936 				break;
2937 			case 5:
2938 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2939 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2940 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2941 						 NUM_BANKS(ADDR_SURF_16_BANK));
2942 				break;
2943 			case 6:
2944 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2945 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2946 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2947 						 NUM_BANKS(ADDR_SURF_8_BANK));
2948 				break;
2949 			case 8:
2950 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2951 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2952 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2953 						 NUM_BANKS(ADDR_SURF_16_BANK));
2954 				break;
2955 			case 9:
2956 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2957 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2958 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2959 						 NUM_BANKS(ADDR_SURF_16_BANK));
2960 				break;
2961 			case 10:
2962 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2963 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2964 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2965 						 NUM_BANKS(ADDR_SURF_16_BANK));
2966 				break;
2967 			case 11:
2968 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2969 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2970 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2971 						 NUM_BANKS(ADDR_SURF_16_BANK));
2972 				break;
2973 			case 12:
2974 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2975 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2976 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2977 						 NUM_BANKS(ADDR_SURF_16_BANK));
2978 				break;
2979 			case 13:
2980 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2981 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2982 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2983 						 NUM_BANKS(ADDR_SURF_16_BANK));
2984 				break;
2985 			case 14:
2986 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2987 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2988 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2989 						 NUM_BANKS(ADDR_SURF_8_BANK));
2990 				break;
2991 			default:
2992 				gb_tile_moden = 0;
2993 				break;
2994 			}
2995 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2996 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2997 		}
2998 	} else
2999 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3000 }
3001 
3002 /**
3003  * cik_select_se_sh - select which SE, SH to address
3004  *
3005  * @rdev: radeon_device pointer
3006  * @se_num: shader engine to address
3007  * @sh_num: sh block to address
3008  *
3009  * Select which SE, SH combinations to address. Certain
3010  * registers are instanced per SE or SH.  0xffffffff means
3011  * broadcast to all SEs or SHs (CIK).
3012  */
3013 static void cik_select_se_sh(struct radeon_device *rdev,
3014 			     u32 se_num, u32 sh_num)
3015 {
3016 	u32 data = INSTANCE_BROADCAST_WRITES;
3017 
3018 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3019 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3020 	else if (se_num == 0xffffffff)
3021 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3022 	else if (sh_num == 0xffffffff)
3023 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3024 	else
3025 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3026 	WREG32(GRBM_GFX_INDEX, data);
3027 }
3028 
3029 /**
3030  * cik_create_bitmask - create a bitmask
3031  *
3032  * @bit_width: length of the mask
3033  *
3034  * create a variable length bit mask (CIK).
3035  * Returns the bitmask.
3036  */
3037 static u32 cik_create_bitmask(u32 bit_width)
3038 {
3039 	u32 i, mask = 0;
3040 
3041 	for (i = 0; i < bit_width; i++) {
3042 		mask <<= 1;
3043 		mask |= 1;
3044 	}
3045 	return mask;
3046 }
3047 
3048 /**
3049  * cik_get_rb_disabled - computes the mask of disabled RBs
3050  *
3051  * @rdev: radeon_device pointer
3052  * @max_rb_num: max RBs (render backends) for the asic
3053  * @se_num: number of SEs (shader engines) for the asic
3054  * @sh_per_se: number of SH blocks per SE for the asic
3055  *
3056  * Calculates the bitmask of disabled RBs (CIK).
3057  * Returns the disabled RB bitmask.
3058  */
3059 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3060 			      u32 max_rb_num_per_se,
3061 			      u32 sh_per_se)
3062 {
3063 	u32 data, mask;
3064 
3065 	data = RREG32(CC_RB_BACKEND_DISABLE);
3066 	if (data & 1)
3067 		data &= BACKEND_DISABLE_MASK;
3068 	else
3069 		data = 0;
3070 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3071 
3072 	data >>= BACKEND_DISABLE_SHIFT;
3073 
3074 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3075 
3076 	return data & mask;
3077 }
3078 
3079 /**
3080  * cik_setup_rb - setup the RBs on the asic
3081  *
3082  * @rdev: radeon_device pointer
3083  * @se_num: number of SEs (shader engines) for the asic
3084  * @sh_per_se: number of SH blocks per SE for the asic
3085  * @max_rb_num: max RBs (render backends) for the asic
3086  *
3087  * Configures per-SE/SH RB registers (CIK).
3088  */
3089 static void cik_setup_rb(struct radeon_device *rdev,
3090 			 u32 se_num, u32 sh_per_se,
3091 			 u32 max_rb_num_per_se)
3092 {
3093 	int i, j;
3094 	u32 data, mask;
3095 	u32 disabled_rbs = 0;
3096 	u32 enabled_rbs = 0;
3097 
3098 	for (i = 0; i < se_num; i++) {
3099 		for (j = 0; j < sh_per_se; j++) {
3100 			cik_select_se_sh(rdev, i, j);
3101 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3102 			if (rdev->family == CHIP_HAWAII)
3103 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3104 			else
3105 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3106 		}
3107 	}
3108 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3109 
3110 	mask = 1;
3111 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3112 		if (!(disabled_rbs & mask))
3113 			enabled_rbs |= mask;
3114 		mask <<= 1;
3115 	}
3116 
3117 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3118 
3119 	for (i = 0; i < se_num; i++) {
3120 		cik_select_se_sh(rdev, i, 0xffffffff);
3121 		data = 0;
3122 		for (j = 0; j < sh_per_se; j++) {
3123 			switch (enabled_rbs & 3) {
3124 			case 0:
3125 				if (j == 0)
3126 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3127 				else
3128 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3129 				break;
3130 			case 1:
3131 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3132 				break;
3133 			case 2:
3134 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3135 				break;
3136 			case 3:
3137 			default:
3138 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3139 				break;
3140 			}
3141 			enabled_rbs >>= 2;
3142 		}
3143 		WREG32(PA_SC_RASTER_CONFIG, data);
3144 	}
3145 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3146 }
3147 
3148 /**
3149  * cik_gpu_init - setup the 3D engine
3150  *
3151  * @rdev: radeon_device pointer
3152  *
3153  * Configures the 3D engine and tiling configuration
3154  * registers so that the 3D engine is usable.
3155  */
3156 static void cik_gpu_init(struct radeon_device *rdev)
3157 {
3158 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3159 	u32 mc_shared_chmap, mc_arb_ramcfg;
3160 	u32 hdp_host_path_cntl;
3161 	u32 tmp;
3162 	int i, j;
3163 
3164 	switch (rdev->family) {
3165 	case CHIP_BONAIRE:
3166 		rdev->config.cik.max_shader_engines = 2;
3167 		rdev->config.cik.max_tile_pipes = 4;
3168 		rdev->config.cik.max_cu_per_sh = 7;
3169 		rdev->config.cik.max_sh_per_se = 1;
3170 		rdev->config.cik.max_backends_per_se = 2;
3171 		rdev->config.cik.max_texture_channel_caches = 4;
3172 		rdev->config.cik.max_gprs = 256;
3173 		rdev->config.cik.max_gs_threads = 32;
3174 		rdev->config.cik.max_hw_contexts = 8;
3175 
3176 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3177 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3178 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3179 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3180 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3181 		break;
3182 	case CHIP_HAWAII:
3183 		rdev->config.cik.max_shader_engines = 4;
3184 		rdev->config.cik.max_tile_pipes = 16;
3185 		rdev->config.cik.max_cu_per_sh = 11;
3186 		rdev->config.cik.max_sh_per_se = 1;
3187 		rdev->config.cik.max_backends_per_se = 4;
3188 		rdev->config.cik.max_texture_channel_caches = 16;
3189 		rdev->config.cik.max_gprs = 256;
3190 		rdev->config.cik.max_gs_threads = 32;
3191 		rdev->config.cik.max_hw_contexts = 8;
3192 
3193 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3194 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3195 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3196 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3197 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3198 		break;
3199 	case CHIP_KAVERI:
3200 		rdev->config.cik.max_shader_engines = 1;
3201 		rdev->config.cik.max_tile_pipes = 4;
3202 		if ((rdev->pdev->device == 0x1304) ||
3203 		    (rdev->pdev->device == 0x1305) ||
3204 		    (rdev->pdev->device == 0x130C) ||
3205 		    (rdev->pdev->device == 0x130F) ||
3206 		    (rdev->pdev->device == 0x1310) ||
3207 		    (rdev->pdev->device == 0x1311) ||
3208 		    (rdev->pdev->device == 0x131C)) {
3209 			rdev->config.cik.max_cu_per_sh = 8;
3210 			rdev->config.cik.max_backends_per_se = 2;
3211 		} else if ((rdev->pdev->device == 0x1309) ||
3212 			   (rdev->pdev->device == 0x130A) ||
3213 			   (rdev->pdev->device == 0x130D) ||
3214 			   (rdev->pdev->device == 0x1313) ||
3215 			   (rdev->pdev->device == 0x131D)) {
3216 			rdev->config.cik.max_cu_per_sh = 6;
3217 			rdev->config.cik.max_backends_per_se = 2;
3218 		} else if ((rdev->pdev->device == 0x1306) ||
3219 			   (rdev->pdev->device == 0x1307) ||
3220 			   (rdev->pdev->device == 0x130B) ||
3221 			   (rdev->pdev->device == 0x130E) ||
3222 			   (rdev->pdev->device == 0x1315) ||
3223 			   (rdev->pdev->device == 0x131B)) {
3224 			rdev->config.cik.max_cu_per_sh = 4;
3225 			rdev->config.cik.max_backends_per_se = 1;
3226 		} else {
3227 			rdev->config.cik.max_cu_per_sh = 3;
3228 			rdev->config.cik.max_backends_per_se = 1;
3229 		}
3230 		rdev->config.cik.max_sh_per_se = 1;
3231 		rdev->config.cik.max_texture_channel_caches = 4;
3232 		rdev->config.cik.max_gprs = 256;
3233 		rdev->config.cik.max_gs_threads = 16;
3234 		rdev->config.cik.max_hw_contexts = 8;
3235 
3236 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3237 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3238 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3239 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3240 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3241 		break;
3242 	case CHIP_KABINI:
3243 	default:
3244 		rdev->config.cik.max_shader_engines = 1;
3245 		rdev->config.cik.max_tile_pipes = 2;
3246 		rdev->config.cik.max_cu_per_sh = 2;
3247 		rdev->config.cik.max_sh_per_se = 1;
3248 		rdev->config.cik.max_backends_per_se = 1;
3249 		rdev->config.cik.max_texture_channel_caches = 2;
3250 		rdev->config.cik.max_gprs = 256;
3251 		rdev->config.cik.max_gs_threads = 16;
3252 		rdev->config.cik.max_hw_contexts = 8;
3253 
3254 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3255 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3256 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3257 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3258 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3259 		break;
3260 	}
3261 
3262 	/* Initialize HDP */
3263 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3264 		WREG32((0x2c14 + j), 0x00000000);
3265 		WREG32((0x2c18 + j), 0x00000000);
3266 		WREG32((0x2c1c + j), 0x00000000);
3267 		WREG32((0x2c20 + j), 0x00000000);
3268 		WREG32((0x2c24 + j), 0x00000000);
3269 	}
3270 
3271 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3272 
3273 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3274 
3275 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3276 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3277 
3278 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3279 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3280 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3281 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3282 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3283 		rdev->config.cik.mem_row_size_in_kb = 4;
3284 	/* XXX use MC settings? */
3285 	rdev->config.cik.shader_engine_tile_size = 32;
3286 	rdev->config.cik.num_gpus = 1;
3287 	rdev->config.cik.multi_gpu_tile_size = 64;
3288 
3289 	/* fix up row size */
3290 	gb_addr_config &= ~ROW_SIZE_MASK;
3291 	switch (rdev->config.cik.mem_row_size_in_kb) {
3292 	case 1:
3293 	default:
3294 		gb_addr_config |= ROW_SIZE(0);
3295 		break;
3296 	case 2:
3297 		gb_addr_config |= ROW_SIZE(1);
3298 		break;
3299 	case 4:
3300 		gb_addr_config |= ROW_SIZE(2);
3301 		break;
3302 	}
3303 
3304 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3305 	 * not have bank info, so create a custom tiling dword.
3306 	 * bits 3:0   num_pipes
3307 	 * bits 7:4   num_banks
3308 	 * bits 11:8  group_size
3309 	 * bits 15:12 row_size
3310 	 */
3311 	rdev->config.cik.tile_config = 0;
3312 	switch (rdev->config.cik.num_tile_pipes) {
3313 	case 1:
3314 		rdev->config.cik.tile_config |= (0 << 0);
3315 		break;
3316 	case 2:
3317 		rdev->config.cik.tile_config |= (1 << 0);
3318 		break;
3319 	case 4:
3320 		rdev->config.cik.tile_config |= (2 << 0);
3321 		break;
3322 	case 8:
3323 	default:
3324 		/* XXX what about 12? */
3325 		rdev->config.cik.tile_config |= (3 << 0);
3326 		break;
3327 	}
3328 	rdev->config.cik.tile_config |=
3329 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3330 	rdev->config.cik.tile_config |=
3331 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3332 	rdev->config.cik.tile_config |=
3333 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3334 
3335 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3336 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3337 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3338 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3339 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3340 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3341 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3342 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3343 
3344 	cik_tiling_mode_table_init(rdev);
3345 
3346 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3347 		     rdev->config.cik.max_sh_per_se,
3348 		     rdev->config.cik.max_backends_per_se);
3349 
3350 	/* set HW defaults for 3D engine */
3351 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3352 
3353 	WREG32(SX_DEBUG_1, 0x20);
3354 
3355 	WREG32(TA_CNTL_AUX, 0x00010000);
3356 
3357 	tmp = RREG32(SPI_CONFIG_CNTL);
3358 	tmp |= 0x03000000;
3359 	WREG32(SPI_CONFIG_CNTL, tmp);
3360 
3361 	WREG32(SQ_CONFIG, 1);
3362 
3363 	WREG32(DB_DEBUG, 0);
3364 
3365 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3366 	tmp |= 0x00000400;
3367 	WREG32(DB_DEBUG2, tmp);
3368 
3369 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3370 	tmp |= 0x00020200;
3371 	WREG32(DB_DEBUG3, tmp);
3372 
3373 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3374 	tmp |= 0x00018208;
3375 	WREG32(CB_HW_CONTROL, tmp);
3376 
3377 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3378 
3379 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3380 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3381 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3382 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3383 
3384 	WREG32(VGT_NUM_INSTANCES, 1);
3385 
3386 	WREG32(CP_PERFMON_CNTL, 0);
3387 
3388 	WREG32(SQ_CONFIG, 0);
3389 
3390 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3391 					  FORCE_EOV_MAX_REZ_CNT(255)));
3392 
3393 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3394 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3395 
3396 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3397 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3398 
3399 	tmp = RREG32(HDP_MISC_CNTL);
3400 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3401 	WREG32(HDP_MISC_CNTL, tmp);
3402 
3403 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3404 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3405 
3406 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3407 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3408 
3409 	udelay(50);
3410 }
3411 
3412 /*
3413  * GPU scratch registers helpers function.
3414  */
3415 /**
3416  * cik_scratch_init - setup driver info for CP scratch regs
3417  *
3418  * @rdev: radeon_device pointer
3419  *
3420  * Set up the number and offset of the CP scratch registers.
3421  * NOTE: use of CP scratch registers is a legacy inferface and
3422  * is not used by default on newer asics (r6xx+).  On newer asics,
3423  * memory buffers are used for fences rather than scratch regs.
3424  */
3425 static void cik_scratch_init(struct radeon_device *rdev)
3426 {
3427 	int i;
3428 
3429 	rdev->scratch.num_reg = 7;
3430 	rdev->scratch.reg_base = SCRATCH_REG0;
3431 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3432 		rdev->scratch.free[i] = true;
3433 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3434 	}
3435 }
3436 
3437 /**
3438  * cik_ring_test - basic gfx ring test
3439  *
3440  * @rdev: radeon_device pointer
3441  * @ring: radeon_ring structure holding ring information
3442  *
3443  * Allocate a scratch register and write to it using the gfx ring (CIK).
3444  * Provides a basic gfx ring test to verify that the ring is working.
3445  * Used by cik_cp_gfx_resume();
3446  * Returns 0 on success, error on failure.
3447  */
3448 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3449 {
3450 	uint32_t scratch;
3451 	uint32_t tmp = 0;
3452 	unsigned i;
3453 	int r;
3454 
3455 	r = radeon_scratch_get(rdev, &scratch);
3456 	if (r) {
3457 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3458 		return r;
3459 	}
3460 	WREG32(scratch, 0xCAFEDEAD);
3461 	r = radeon_ring_lock(rdev, ring, 3);
3462 	if (r) {
3463 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3464 		radeon_scratch_free(rdev, scratch);
3465 		return r;
3466 	}
3467 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3468 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3469 	radeon_ring_write(ring, 0xDEADBEEF);
3470 	radeon_ring_unlock_commit(rdev, ring);
3471 
3472 	for (i = 0; i < rdev->usec_timeout; i++) {
3473 		tmp = RREG32(scratch);
3474 		if (tmp == 0xDEADBEEF)
3475 			break;
3476 		DRM_UDELAY(1);
3477 	}
3478 	if (i < rdev->usec_timeout) {
3479 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3480 	} else {
3481 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3482 			  ring->idx, scratch, tmp);
3483 		r = -EINVAL;
3484 	}
3485 	radeon_scratch_free(rdev, scratch);
3486 	return r;
3487 }
3488 
3489 /**
3490  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3491  *
3492  * @rdev: radeon_device pointer
3493  * @ridx: radeon ring index
3494  *
3495  * Emits an hdp flush on the cp.
3496  */
3497 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3498 				       int ridx)
3499 {
3500 	struct radeon_ring *ring = &rdev->ring[ridx];
3501 	u32 ref_and_mask;
3502 
3503 	switch (ring->idx) {
3504 	case CAYMAN_RING_TYPE_CP1_INDEX:
3505 	case CAYMAN_RING_TYPE_CP2_INDEX:
3506 	default:
3507 		switch (ring->me) {
3508 		case 0:
3509 			ref_and_mask = CP2 << ring->pipe;
3510 			break;
3511 		case 1:
3512 			ref_and_mask = CP6 << ring->pipe;
3513 			break;
3514 		default:
3515 			return;
3516 		}
3517 		break;
3518 	case RADEON_RING_TYPE_GFX_INDEX:
3519 		ref_and_mask = CP0;
3520 		break;
3521 	}
3522 
3523 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3524 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3525 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3526 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3527 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3528 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3529 	radeon_ring_write(ring, ref_and_mask);
3530 	radeon_ring_write(ring, ref_and_mask);
3531 	radeon_ring_write(ring, 0x20); /* poll interval */
3532 }
3533 
3534 /**
3535  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3536  *
3537  * @rdev: radeon_device pointer
3538  * @fence: radeon fence object
3539  *
3540  * Emits a fence sequnce number on the gfx ring and flushes
3541  * GPU caches.
3542  */
3543 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3544 			     struct radeon_fence *fence)
3545 {
3546 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3547 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3548 
3549 	/* EVENT_WRITE_EOP - flush caches, send int */
3550 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3551 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3552 				 EOP_TC_ACTION_EN |
3553 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3554 				 EVENT_INDEX(5)));
3555 	radeon_ring_write(ring, addr & 0xfffffffc);
3556 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3557 	radeon_ring_write(ring, fence->seq);
3558 	radeon_ring_write(ring, 0);
3559 	/* HDP flush */
3560 	cik_hdp_flush_cp_ring_emit(rdev, fence->ring);
3561 }
3562 
3563 /**
3564  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3565  *
3566  * @rdev: radeon_device pointer
3567  * @fence: radeon fence object
3568  *
3569  * Emits a fence sequnce number on the compute ring and flushes
3570  * GPU caches.
3571  */
3572 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3573 				 struct radeon_fence *fence)
3574 {
3575 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3576 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3577 
3578 	/* RELEASE_MEM - flush caches, send int */
3579 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3580 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3581 				 EOP_TC_ACTION_EN |
3582 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3583 				 EVENT_INDEX(5)));
3584 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3585 	radeon_ring_write(ring, addr & 0xfffffffc);
3586 	radeon_ring_write(ring, upper_32_bits(addr));
3587 	radeon_ring_write(ring, fence->seq);
3588 	radeon_ring_write(ring, 0);
3589 	/* HDP flush */
3590 	cik_hdp_flush_cp_ring_emit(rdev, fence->ring);
3591 }
3592 
3593 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3594 			     struct radeon_ring *ring,
3595 			     struct radeon_semaphore *semaphore,
3596 			     bool emit_wait)
3597 {
3598 	uint64_t addr = semaphore->gpu_addr;
3599 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3600 
3601 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3602 	radeon_ring_write(ring, addr & 0xffffffff);
3603 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3604 
3605 	return true;
3606 }
3607 
3608 /**
3609  * cik_copy_cpdma - copy pages using the CP DMA engine
3610  *
3611  * @rdev: radeon_device pointer
3612  * @src_offset: src GPU address
3613  * @dst_offset: dst GPU address
3614  * @num_gpu_pages: number of GPU pages to xfer
3615  * @fence: radeon fence object
3616  *
3617  * Copy GPU paging using the CP DMA engine (CIK+).
3618  * Used by the radeon ttm implementation to move pages if
3619  * registered as the asic copy callback.
3620  */
3621 int cik_copy_cpdma(struct radeon_device *rdev,
3622 		   uint64_t src_offset, uint64_t dst_offset,
3623 		   unsigned num_gpu_pages,
3624 		   struct radeon_fence **fence)
3625 {
3626 	struct radeon_semaphore *sem = NULL;
3627 	int ring_index = rdev->asic->copy.blit_ring_index;
3628 	struct radeon_ring *ring = &rdev->ring[ring_index];
3629 	u32 size_in_bytes, cur_size_in_bytes, control;
3630 	int i, num_loops;
3631 	int r = 0;
3632 
3633 	r = radeon_semaphore_create(rdev, &sem);
3634 	if (r) {
3635 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3636 		return r;
3637 	}
3638 
3639 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3640 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3641 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3642 	if (r) {
3643 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3644 		radeon_semaphore_free(rdev, &sem, NULL);
3645 		return r;
3646 	}
3647 
3648 	radeon_semaphore_sync_to(sem, *fence);
3649 	radeon_semaphore_sync_rings(rdev, sem, ring->idx);
3650 
3651 	for (i = 0; i < num_loops; i++) {
3652 		cur_size_in_bytes = size_in_bytes;
3653 		if (cur_size_in_bytes > 0x1fffff)
3654 			cur_size_in_bytes = 0x1fffff;
3655 		size_in_bytes -= cur_size_in_bytes;
3656 		control = 0;
3657 		if (size_in_bytes == 0)
3658 			control |= PACKET3_DMA_DATA_CP_SYNC;
3659 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3660 		radeon_ring_write(ring, control);
3661 		radeon_ring_write(ring, lower_32_bits(src_offset));
3662 		radeon_ring_write(ring, upper_32_bits(src_offset));
3663 		radeon_ring_write(ring, lower_32_bits(dst_offset));
3664 		radeon_ring_write(ring, upper_32_bits(dst_offset));
3665 		radeon_ring_write(ring, cur_size_in_bytes);
3666 		src_offset += cur_size_in_bytes;
3667 		dst_offset += cur_size_in_bytes;
3668 	}
3669 
3670 	r = radeon_fence_emit(rdev, fence, ring->idx);
3671 	if (r) {
3672 		radeon_ring_unlock_undo(rdev, ring);
3673 		return r;
3674 	}
3675 
3676 	radeon_ring_unlock_commit(rdev, ring);
3677 	radeon_semaphore_free(rdev, &sem, *fence);
3678 
3679 	return r;
3680 }
3681 
3682 /*
3683  * IB stuff
3684  */
3685 /**
3686  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3687  *
3688  * @rdev: radeon_device pointer
3689  * @ib: radeon indirect buffer object
3690  *
3691  * Emits an DE (drawing engine) or CE (constant engine) IB
3692  * on the gfx ring.  IBs are usually generated by userspace
3693  * acceleration drivers and submitted to the kernel for
3694  * sheduling on the ring.  This function schedules the IB
3695  * on the gfx ring for execution by the GPU.
3696  */
3697 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3698 {
3699 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3700 	u32 header, control = INDIRECT_BUFFER_VALID;
3701 
3702 	if (ib->is_const_ib) {
3703 		/* set switch buffer packet before const IB */
3704 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3705 		radeon_ring_write(ring, 0);
3706 
3707 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3708 	} else {
3709 		u32 next_rptr;
3710 		if (ring->rptr_save_reg) {
3711 			next_rptr = ring->wptr + 3 + 4;
3712 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3713 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3714 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
3715 			radeon_ring_write(ring, next_rptr);
3716 		} else if (rdev->wb.enabled) {
3717 			next_rptr = ring->wptr + 5 + 4;
3718 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3719 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3720 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3721 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3722 			radeon_ring_write(ring, next_rptr);
3723 		}
3724 
3725 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3726 	}
3727 
3728 	control |= ib->length_dw |
3729 		(ib->vm ? (ib->vm->id << 24) : 0);
3730 
3731 	radeon_ring_write(ring, header);
3732 	radeon_ring_write(ring,
3733 #ifdef __BIG_ENDIAN
3734 			  (2 << 0) |
3735 #endif
3736 			  (ib->gpu_addr & 0xFFFFFFFC));
3737 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3738 	radeon_ring_write(ring, control);
3739 }
3740 
3741 /**
3742  * cik_ib_test - basic gfx ring IB test
3743  *
3744  * @rdev: radeon_device pointer
3745  * @ring: radeon_ring structure holding ring information
3746  *
3747  * Allocate an IB and execute it on the gfx ring (CIK).
3748  * Provides a basic gfx ring test to verify that IBs are working.
3749  * Returns 0 on success, error on failure.
3750  */
3751 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3752 {
3753 	struct radeon_ib ib;
3754 	uint32_t scratch;
3755 	uint32_t tmp = 0;
3756 	unsigned i;
3757 	int r;
3758 
3759 	r = radeon_scratch_get(rdev, &scratch);
3760 	if (r) {
3761 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3762 		return r;
3763 	}
3764 	WREG32(scratch, 0xCAFEDEAD);
3765 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3766 	if (r) {
3767 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3768 		radeon_scratch_free(rdev, scratch);
3769 		return r;
3770 	}
3771 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3772 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3773 	ib.ptr[2] = 0xDEADBEEF;
3774 	ib.length_dw = 3;
3775 	r = radeon_ib_schedule(rdev, &ib, NULL);
3776 	if (r) {
3777 		radeon_scratch_free(rdev, scratch);
3778 		radeon_ib_free(rdev, &ib);
3779 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3780 		return r;
3781 	}
3782 	r = radeon_fence_wait(ib.fence, false);
3783 	if (r) {
3784 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3785 		radeon_scratch_free(rdev, scratch);
3786 		radeon_ib_free(rdev, &ib);
3787 		return r;
3788 	}
3789 	for (i = 0; i < rdev->usec_timeout; i++) {
3790 		tmp = RREG32(scratch);
3791 		if (tmp == 0xDEADBEEF)
3792 			break;
3793 		DRM_UDELAY(1);
3794 	}
3795 	if (i < rdev->usec_timeout) {
3796 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3797 	} else {
3798 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3799 			  scratch, tmp);
3800 		r = -EINVAL;
3801 	}
3802 	radeon_scratch_free(rdev, scratch);
3803 	radeon_ib_free(rdev, &ib);
3804 	return r;
3805 }
3806 
3807 /*
3808  * CP.
3809  * On CIK, gfx and compute now have independant command processors.
3810  *
3811  * GFX
3812  * Gfx consists of a single ring and can process both gfx jobs and
3813  * compute jobs.  The gfx CP consists of three microengines (ME):
3814  * PFP - Pre-Fetch Parser
3815  * ME - Micro Engine
3816  * CE - Constant Engine
3817  * The PFP and ME make up what is considered the Drawing Engine (DE).
3818  * The CE is an asynchronous engine used for updating buffer desciptors
3819  * used by the DE so that they can be loaded into cache in parallel
3820  * while the DE is processing state update packets.
3821  *
3822  * Compute
3823  * The compute CP consists of two microengines (ME):
3824  * MEC1 - Compute MicroEngine 1
3825  * MEC2 - Compute MicroEngine 2
3826  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3827  * The queues are exposed to userspace and are programmed directly
3828  * by the compute runtime.
3829  */
3830 /**
3831  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3832  *
3833  * @rdev: radeon_device pointer
3834  * @enable: enable or disable the MEs
3835  *
3836  * Halts or unhalts the gfx MEs.
3837  */
3838 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3839 {
3840 	if (enable)
3841 		WREG32(CP_ME_CNTL, 0);
3842 	else {
3843 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3844 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3845 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3846 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3847 	}
3848 	udelay(50);
3849 }
3850 
3851 /**
3852  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3853  *
3854  * @rdev: radeon_device pointer
3855  *
3856  * Loads the gfx PFP, ME, and CE ucode.
3857  * Returns 0 for success, -EINVAL if the ucode is not available.
3858  */
3859 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3860 {
3861 	const __be32 *fw_data;
3862 	int i;
3863 
3864 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3865 		return -EINVAL;
3866 
3867 	cik_cp_gfx_enable(rdev, false);
3868 
3869 	/* PFP */
3870 	fw_data = (const __be32 *)rdev->pfp_fw->data;
3871 	WREG32(CP_PFP_UCODE_ADDR, 0);
3872 	for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3873 		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3874 	WREG32(CP_PFP_UCODE_ADDR, 0);
3875 
3876 	/* CE */
3877 	fw_data = (const __be32 *)rdev->ce_fw->data;
3878 	WREG32(CP_CE_UCODE_ADDR, 0);
3879 	for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3880 		WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3881 	WREG32(CP_CE_UCODE_ADDR, 0);
3882 
3883 	/* ME */
3884 	fw_data = (const __be32 *)rdev->me_fw->data;
3885 	WREG32(CP_ME_RAM_WADDR, 0);
3886 	for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3887 		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3888 	WREG32(CP_ME_RAM_WADDR, 0);
3889 
3890 	WREG32(CP_PFP_UCODE_ADDR, 0);
3891 	WREG32(CP_CE_UCODE_ADDR, 0);
3892 	WREG32(CP_ME_RAM_WADDR, 0);
3893 	WREG32(CP_ME_RAM_RADDR, 0);
3894 	return 0;
3895 }
3896 
3897 /**
3898  * cik_cp_gfx_start - start the gfx ring
3899  *
3900  * @rdev: radeon_device pointer
3901  *
3902  * Enables the ring and loads the clear state context and other
3903  * packets required to init the ring.
3904  * Returns 0 for success, error for failure.
3905  */
3906 static int cik_cp_gfx_start(struct radeon_device *rdev)
3907 {
3908 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3909 	int r, i;
3910 
3911 	/* init the CP */
3912 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3913 	WREG32(CP_ENDIAN_SWAP, 0);
3914 	WREG32(CP_DEVICE_ID, 1);
3915 
3916 	cik_cp_gfx_enable(rdev, true);
3917 
3918 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3919 	if (r) {
3920 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3921 		return r;
3922 	}
3923 
3924 	/* init the CE partitions.  CE only used for gfx on CIK */
3925 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3926 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3927 	radeon_ring_write(ring, 0xc000);
3928 	radeon_ring_write(ring, 0xc000);
3929 
3930 	/* setup clear context state */
3931 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3932 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3933 
3934 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3935 	radeon_ring_write(ring, 0x80000000);
3936 	radeon_ring_write(ring, 0x80000000);
3937 
3938 	for (i = 0; i < cik_default_size; i++)
3939 		radeon_ring_write(ring, cik_default_state[i]);
3940 
3941 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3942 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3943 
3944 	/* set clear context state */
3945 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3946 	radeon_ring_write(ring, 0);
3947 
3948 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3949 	radeon_ring_write(ring, 0x00000316);
3950 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3951 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3952 
3953 	radeon_ring_unlock_commit(rdev, ring);
3954 
3955 	return 0;
3956 }
3957 
3958 /**
3959  * cik_cp_gfx_fini - stop the gfx ring
3960  *
3961  * @rdev: radeon_device pointer
3962  *
3963  * Stop the gfx ring and tear down the driver ring
3964  * info.
3965  */
3966 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3967 {
3968 	cik_cp_gfx_enable(rdev, false);
3969 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3970 }
3971 
3972 /**
3973  * cik_cp_gfx_resume - setup the gfx ring buffer registers
3974  *
3975  * @rdev: radeon_device pointer
3976  *
3977  * Program the location and size of the gfx ring buffer
3978  * and test it to make sure it's working.
3979  * Returns 0 for success, error for failure.
3980  */
3981 static int cik_cp_gfx_resume(struct radeon_device *rdev)
3982 {
3983 	struct radeon_ring *ring;
3984 	u32 tmp;
3985 	u32 rb_bufsz;
3986 	u64 rb_addr;
3987 	int r;
3988 
3989 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3990 	if (rdev->family != CHIP_HAWAII)
3991 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3992 
3993 	/* Set the write pointer delay */
3994 	WREG32(CP_RB_WPTR_DELAY, 0);
3995 
3996 	/* set the RB to use vmid 0 */
3997 	WREG32(CP_RB_VMID, 0);
3998 
3999 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4000 
4001 	/* ring 0 - compute and gfx */
4002 	/* Set ring buffer size */
4003 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4004 	rb_bufsz = order_base_2(ring->ring_size / 8);
4005 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4006 #ifdef __BIG_ENDIAN
4007 	tmp |= BUF_SWAP_32BIT;
4008 #endif
4009 	WREG32(CP_RB0_CNTL, tmp);
4010 
4011 	/* Initialize the ring buffer's read and write pointers */
4012 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4013 	ring->wptr = 0;
4014 	WREG32(CP_RB0_WPTR, ring->wptr);
4015 
4016 	/* set the wb address wether it's enabled or not */
4017 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4018 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4019 
4020 	/* scratch register shadowing is no longer supported */
4021 	WREG32(SCRATCH_UMSK, 0);
4022 
4023 	if (!rdev->wb.enabled)
4024 		tmp |= RB_NO_UPDATE;
4025 
4026 	mdelay(1);
4027 	WREG32(CP_RB0_CNTL, tmp);
4028 
4029 	rb_addr = ring->gpu_addr >> 8;
4030 	WREG32(CP_RB0_BASE, rb_addr);
4031 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4032 
4033 	ring->rptr = RREG32(CP_RB0_RPTR);
4034 
4035 	/* start the ring */
4036 	cik_cp_gfx_start(rdev);
4037 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4038 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4039 	if (r) {
4040 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4041 		return r;
4042 	}
4043 
4044 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4045 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4046 
4047 	return 0;
4048 }
4049 
4050 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4051 		     struct radeon_ring *ring)
4052 {
4053 	u32 rptr;
4054 
4055 	if (rdev->wb.enabled)
4056 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4057 	else
4058 		rptr = RREG32(CP_RB0_RPTR);
4059 
4060 	return rptr;
4061 }
4062 
4063 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4064 		     struct radeon_ring *ring)
4065 {
4066 	u32 wptr;
4067 
4068 	wptr = RREG32(CP_RB0_WPTR);
4069 
4070 	return wptr;
4071 }
4072 
4073 void cik_gfx_set_wptr(struct radeon_device *rdev,
4074 		      struct radeon_ring *ring)
4075 {
4076 	WREG32(CP_RB0_WPTR, ring->wptr);
4077 	(void)RREG32(CP_RB0_WPTR);
4078 }
4079 
4080 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4081 			 struct radeon_ring *ring)
4082 {
4083 	u32 rptr;
4084 
4085 	if (rdev->wb.enabled) {
4086 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4087 	} else {
4088 		mutex_lock(&rdev->srbm_mutex);
4089 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4090 		rptr = RREG32(CP_HQD_PQ_RPTR);
4091 		cik_srbm_select(rdev, 0, 0, 0, 0);
4092 		mutex_unlock(&rdev->srbm_mutex);
4093 	}
4094 
4095 	return rptr;
4096 }
4097 
4098 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4099 			 struct radeon_ring *ring)
4100 {
4101 	u32 wptr;
4102 
4103 	if (rdev->wb.enabled) {
4104 		/* XXX check if swapping is necessary on BE */
4105 		wptr = rdev->wb.wb[ring->wptr_offs/4];
4106 	} else {
4107 		mutex_lock(&rdev->srbm_mutex);
4108 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4109 		wptr = RREG32(CP_HQD_PQ_WPTR);
4110 		cik_srbm_select(rdev, 0, 0, 0, 0);
4111 		mutex_unlock(&rdev->srbm_mutex);
4112 	}
4113 
4114 	return wptr;
4115 }
4116 
4117 void cik_compute_set_wptr(struct radeon_device *rdev,
4118 			  struct radeon_ring *ring)
4119 {
4120 	/* XXX check if swapping is necessary on BE */
4121 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4122 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4123 }
4124 
4125 /**
4126  * cik_cp_compute_enable - enable/disable the compute CP MEs
4127  *
4128  * @rdev: radeon_device pointer
4129  * @enable: enable or disable the MEs
4130  *
4131  * Halts or unhalts the compute MEs.
4132  */
4133 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4134 {
4135 	if (enable)
4136 		WREG32(CP_MEC_CNTL, 0);
4137 	else {
4138 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4139 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4140 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4141 	}
4142 	udelay(50);
4143 }
4144 
4145 /**
4146  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4147  *
4148  * @rdev: radeon_device pointer
4149  *
4150  * Loads the compute MEC1&2 ucode.
4151  * Returns 0 for success, -EINVAL if the ucode is not available.
4152  */
4153 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4154 {
4155 	const __be32 *fw_data;
4156 	int i;
4157 
4158 	if (!rdev->mec_fw)
4159 		return -EINVAL;
4160 
4161 	cik_cp_compute_enable(rdev, false);
4162 
4163 	/* MEC1 */
4164 	fw_data = (const __be32 *)rdev->mec_fw->data;
4165 	WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4166 	for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4167 		WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4168 	WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4169 
4170 	if (rdev->family == CHIP_KAVERI) {
4171 		/* MEC2 */
4172 		fw_data = (const __be32 *)rdev->mec_fw->data;
4173 		WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4174 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4175 			WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4176 		WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4177 	}
4178 
4179 	return 0;
4180 }
4181 
4182 /**
4183  * cik_cp_compute_start - start the compute queues
4184  *
4185  * @rdev: radeon_device pointer
4186  *
4187  * Enable the compute queues.
4188  * Returns 0 for success, error for failure.
4189  */
4190 static int cik_cp_compute_start(struct radeon_device *rdev)
4191 {
4192 	cik_cp_compute_enable(rdev, true);
4193 
4194 	return 0;
4195 }
4196 
4197 /**
4198  * cik_cp_compute_fini - stop the compute queues
4199  *
4200  * @rdev: radeon_device pointer
4201  *
4202  * Stop the compute queues and tear down the driver queue
4203  * info.
4204  */
4205 static void cik_cp_compute_fini(struct radeon_device *rdev)
4206 {
4207 	int i, idx, r;
4208 
4209 	cik_cp_compute_enable(rdev, false);
4210 
4211 	for (i = 0; i < 2; i++) {
4212 		if (i == 0)
4213 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4214 		else
4215 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4216 
4217 		if (rdev->ring[idx].mqd_obj) {
4218 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4219 			if (unlikely(r != 0))
4220 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4221 
4222 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4223 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4224 
4225 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4226 			rdev->ring[idx].mqd_obj = NULL;
4227 		}
4228 	}
4229 }
4230 
4231 static void cik_mec_fini(struct radeon_device *rdev)
4232 {
4233 	int r;
4234 
4235 	if (rdev->mec.hpd_eop_obj) {
4236 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4237 		if (unlikely(r != 0))
4238 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4239 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4240 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4241 
4242 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4243 		rdev->mec.hpd_eop_obj = NULL;
4244 	}
4245 }
4246 
4247 #define MEC_HPD_SIZE 2048
4248 
4249 static int cik_mec_init(struct radeon_device *rdev)
4250 {
4251 	int r;
4252 	u32 *hpd;
4253 
4254 	/*
4255 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4256 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4257 	 */
4258 	if (rdev->family == CHIP_KAVERI)
4259 		rdev->mec.num_mec = 2;
4260 	else
4261 		rdev->mec.num_mec = 1;
4262 	rdev->mec.num_pipe = 4;
4263 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4264 
4265 	if (rdev->mec.hpd_eop_obj == NULL) {
4266 		r = radeon_bo_create(rdev,
4267 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4268 				     PAGE_SIZE, true,
4269 				     RADEON_GEM_DOMAIN_GTT, NULL,
4270 				     &rdev->mec.hpd_eop_obj);
4271 		if (r) {
4272 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4273 			return r;
4274 		}
4275 	}
4276 
4277 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4278 	if (unlikely(r != 0)) {
4279 		cik_mec_fini(rdev);
4280 		return r;
4281 	}
4282 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4283 			  &rdev->mec.hpd_eop_gpu_addr);
4284 	if (r) {
4285 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4286 		cik_mec_fini(rdev);
4287 		return r;
4288 	}
4289 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4290 	if (r) {
4291 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4292 		cik_mec_fini(rdev);
4293 		return r;
4294 	}
4295 
4296 	/* clear memory.  Not sure if this is required or not */
4297 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4298 
4299 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4300 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4301 
4302 	return 0;
4303 }
4304 
4305 struct hqd_registers
4306 {
4307 	u32 cp_mqd_base_addr;
4308 	u32 cp_mqd_base_addr_hi;
4309 	u32 cp_hqd_active;
4310 	u32 cp_hqd_vmid;
4311 	u32 cp_hqd_persistent_state;
4312 	u32 cp_hqd_pipe_priority;
4313 	u32 cp_hqd_queue_priority;
4314 	u32 cp_hqd_quantum;
4315 	u32 cp_hqd_pq_base;
4316 	u32 cp_hqd_pq_base_hi;
4317 	u32 cp_hqd_pq_rptr;
4318 	u32 cp_hqd_pq_rptr_report_addr;
4319 	u32 cp_hqd_pq_rptr_report_addr_hi;
4320 	u32 cp_hqd_pq_wptr_poll_addr;
4321 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4322 	u32 cp_hqd_pq_doorbell_control;
4323 	u32 cp_hqd_pq_wptr;
4324 	u32 cp_hqd_pq_control;
4325 	u32 cp_hqd_ib_base_addr;
4326 	u32 cp_hqd_ib_base_addr_hi;
4327 	u32 cp_hqd_ib_rptr;
4328 	u32 cp_hqd_ib_control;
4329 	u32 cp_hqd_iq_timer;
4330 	u32 cp_hqd_iq_rptr;
4331 	u32 cp_hqd_dequeue_request;
4332 	u32 cp_hqd_dma_offload;
4333 	u32 cp_hqd_sema_cmd;
4334 	u32 cp_hqd_msg_type;
4335 	u32 cp_hqd_atomic0_preop_lo;
4336 	u32 cp_hqd_atomic0_preop_hi;
4337 	u32 cp_hqd_atomic1_preop_lo;
4338 	u32 cp_hqd_atomic1_preop_hi;
4339 	u32 cp_hqd_hq_scheduler0;
4340 	u32 cp_hqd_hq_scheduler1;
4341 	u32 cp_mqd_control;
4342 };
4343 
4344 struct bonaire_mqd
4345 {
4346 	u32 header;
4347 	u32 dispatch_initiator;
4348 	u32 dimensions[3];
4349 	u32 start_idx[3];
4350 	u32 num_threads[3];
4351 	u32 pipeline_stat_enable;
4352 	u32 perf_counter_enable;
4353 	u32 pgm[2];
4354 	u32 tba[2];
4355 	u32 tma[2];
4356 	u32 pgm_rsrc[2];
4357 	u32 vmid;
4358 	u32 resource_limits;
4359 	u32 static_thread_mgmt01[2];
4360 	u32 tmp_ring_size;
4361 	u32 static_thread_mgmt23[2];
4362 	u32 restart[3];
4363 	u32 thread_trace_enable;
4364 	u32 reserved1;
4365 	u32 user_data[16];
4366 	u32 vgtcs_invoke_count[2];
4367 	struct hqd_registers queue_state;
4368 	u32 dequeue_cntr;
4369 	u32 interrupt_queue[64];
4370 };
4371 
4372 /**
4373  * cik_cp_compute_resume - setup the compute queue registers
4374  *
4375  * @rdev: radeon_device pointer
4376  *
4377  * Program the compute queues and test them to make sure they
4378  * are working.
4379  * Returns 0 for success, error for failure.
4380  */
4381 static int cik_cp_compute_resume(struct radeon_device *rdev)
4382 {
4383 	int r, i, idx;
4384 	u32 tmp;
4385 	bool use_doorbell = true;
4386 	u64 hqd_gpu_addr;
4387 	u64 mqd_gpu_addr;
4388 	u64 eop_gpu_addr;
4389 	u64 wb_gpu_addr;
4390 	u32 *buf;
4391 	struct bonaire_mqd *mqd;
4392 
4393 	r = cik_cp_compute_start(rdev);
4394 	if (r)
4395 		return r;
4396 
4397 	/* fix up chicken bits */
4398 	tmp = RREG32(CP_CPF_DEBUG);
4399 	tmp |= (1 << 23);
4400 	WREG32(CP_CPF_DEBUG, tmp);
4401 
4402 	/* init the pipes */
4403 	mutex_lock(&rdev->srbm_mutex);
4404 	for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
4405 		int me = (i < 4) ? 1 : 2;
4406 		int pipe = (i < 4) ? i : (i - 4);
4407 
4408 		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
4409 
4410 		cik_srbm_select(rdev, me, pipe, 0, 0);
4411 
4412 		/* write the EOP addr */
4413 		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4414 		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4415 
4416 		/* set the VMID assigned */
4417 		WREG32(CP_HPD_EOP_VMID, 0);
4418 
4419 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4420 		tmp = RREG32(CP_HPD_EOP_CONTROL);
4421 		tmp &= ~EOP_SIZE_MASK;
4422 		tmp |= order_base_2(MEC_HPD_SIZE / 8);
4423 		WREG32(CP_HPD_EOP_CONTROL, tmp);
4424 	}
4425 	cik_srbm_select(rdev, 0, 0, 0, 0);
4426 	mutex_unlock(&rdev->srbm_mutex);
4427 
4428 	/* init the queues.  Just two for now. */
4429 	for (i = 0; i < 2; i++) {
4430 		if (i == 0)
4431 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4432 		else
4433 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4434 
4435 		if (rdev->ring[idx].mqd_obj == NULL) {
4436 			r = radeon_bo_create(rdev,
4437 					     sizeof(struct bonaire_mqd),
4438 					     PAGE_SIZE, true,
4439 					     RADEON_GEM_DOMAIN_GTT, NULL,
4440 					     &rdev->ring[idx].mqd_obj);
4441 			if (r) {
4442 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4443 				return r;
4444 			}
4445 		}
4446 
4447 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4448 		if (unlikely(r != 0)) {
4449 			cik_cp_compute_fini(rdev);
4450 			return r;
4451 		}
4452 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4453 				  &mqd_gpu_addr);
4454 		if (r) {
4455 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4456 			cik_cp_compute_fini(rdev);
4457 			return r;
4458 		}
4459 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4460 		if (r) {
4461 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4462 			cik_cp_compute_fini(rdev);
4463 			return r;
4464 		}
4465 
4466 		/* init the mqd struct */
4467 		memset(buf, 0, sizeof(struct bonaire_mqd));
4468 
4469 		mqd = (struct bonaire_mqd *)buf;
4470 		mqd->header = 0xC0310800;
4471 		mqd->static_thread_mgmt01[0] = 0xffffffff;
4472 		mqd->static_thread_mgmt01[1] = 0xffffffff;
4473 		mqd->static_thread_mgmt23[0] = 0xffffffff;
4474 		mqd->static_thread_mgmt23[1] = 0xffffffff;
4475 
4476 		mutex_lock(&rdev->srbm_mutex);
4477 		cik_srbm_select(rdev, rdev->ring[idx].me,
4478 				rdev->ring[idx].pipe,
4479 				rdev->ring[idx].queue, 0);
4480 
4481 		/* disable wptr polling */
4482 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4483 		tmp &= ~WPTR_POLL_EN;
4484 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4485 
4486 		/* enable doorbell? */
4487 		mqd->queue_state.cp_hqd_pq_doorbell_control =
4488 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4489 		if (use_doorbell)
4490 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4491 		else
4492 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4493 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4494 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4495 
4496 		/* disable the queue if it's active */
4497 		mqd->queue_state.cp_hqd_dequeue_request = 0;
4498 		mqd->queue_state.cp_hqd_pq_rptr = 0;
4499 		mqd->queue_state.cp_hqd_pq_wptr= 0;
4500 		if (RREG32(CP_HQD_ACTIVE) & 1) {
4501 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4502 			for (i = 0; i < rdev->usec_timeout; i++) {
4503 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4504 					break;
4505 				udelay(1);
4506 			}
4507 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4508 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4509 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4510 		}
4511 
4512 		/* set the pointer to the MQD */
4513 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4514 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4515 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4516 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4517 		/* set MQD vmid to 0 */
4518 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4519 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4520 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4521 
4522 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4523 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4524 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4525 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4526 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4527 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4528 
4529 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4530 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4531 		mqd->queue_state.cp_hqd_pq_control &=
4532 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4533 
4534 		mqd->queue_state.cp_hqd_pq_control |=
4535 			order_base_2(rdev->ring[idx].ring_size / 8);
4536 		mqd->queue_state.cp_hqd_pq_control |=
4537 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4538 #ifdef __BIG_ENDIAN
4539 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4540 #endif
4541 		mqd->queue_state.cp_hqd_pq_control &=
4542 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4543 		mqd->queue_state.cp_hqd_pq_control |=
4544 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4545 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4546 
4547 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4548 		if (i == 0)
4549 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4550 		else
4551 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4552 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4553 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4554 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4555 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4556 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4557 
4558 		/* set the wb address wether it's enabled or not */
4559 		if (i == 0)
4560 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4561 		else
4562 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4563 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4564 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4565 			upper_32_bits(wb_gpu_addr) & 0xffff;
4566 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4567 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4568 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4569 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4570 
4571 		/* enable the doorbell if requested */
4572 		if (use_doorbell) {
4573 			mqd->queue_state.cp_hqd_pq_doorbell_control =
4574 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4575 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4576 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
4577 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4578 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4579 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
4580 				~(DOORBELL_SOURCE | DOORBELL_HIT);
4581 
4582 		} else {
4583 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4584 		}
4585 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4586 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4587 
4588 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4589 		rdev->ring[idx].wptr = 0;
4590 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4591 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4592 		rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
4593 		mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
4594 
4595 		/* set the vmid for the queue */
4596 		mqd->queue_state.cp_hqd_vmid = 0;
4597 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4598 
4599 		/* activate the queue */
4600 		mqd->queue_state.cp_hqd_active = 1;
4601 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4602 
4603 		cik_srbm_select(rdev, 0, 0, 0, 0);
4604 		mutex_unlock(&rdev->srbm_mutex);
4605 
4606 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4607 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4608 
4609 		rdev->ring[idx].ready = true;
4610 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4611 		if (r)
4612 			rdev->ring[idx].ready = false;
4613 	}
4614 
4615 	return 0;
4616 }
4617 
4618 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4619 {
4620 	cik_cp_gfx_enable(rdev, enable);
4621 	cik_cp_compute_enable(rdev, enable);
4622 }
4623 
4624 static int cik_cp_load_microcode(struct radeon_device *rdev)
4625 {
4626 	int r;
4627 
4628 	r = cik_cp_gfx_load_microcode(rdev);
4629 	if (r)
4630 		return r;
4631 	r = cik_cp_compute_load_microcode(rdev);
4632 	if (r)
4633 		return r;
4634 
4635 	return 0;
4636 }
4637 
4638 static void cik_cp_fini(struct radeon_device *rdev)
4639 {
4640 	cik_cp_gfx_fini(rdev);
4641 	cik_cp_compute_fini(rdev);
4642 }
4643 
4644 static int cik_cp_resume(struct radeon_device *rdev)
4645 {
4646 	int r;
4647 
4648 	cik_enable_gui_idle_interrupt(rdev, false);
4649 
4650 	r = cik_cp_load_microcode(rdev);
4651 	if (r)
4652 		return r;
4653 
4654 	r = cik_cp_gfx_resume(rdev);
4655 	if (r)
4656 		return r;
4657 	r = cik_cp_compute_resume(rdev);
4658 	if (r)
4659 		return r;
4660 
4661 	cik_enable_gui_idle_interrupt(rdev, true);
4662 
4663 	return 0;
4664 }
4665 
4666 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4667 {
4668 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4669 		RREG32(GRBM_STATUS));
4670 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4671 		RREG32(GRBM_STATUS2));
4672 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4673 		RREG32(GRBM_STATUS_SE0));
4674 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4675 		RREG32(GRBM_STATUS_SE1));
4676 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4677 		RREG32(GRBM_STATUS_SE2));
4678 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4679 		RREG32(GRBM_STATUS_SE3));
4680 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4681 		RREG32(SRBM_STATUS));
4682 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4683 		RREG32(SRBM_STATUS2));
4684 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4685 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4686 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4687 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4688 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4689 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4690 		 RREG32(CP_STALLED_STAT1));
4691 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4692 		 RREG32(CP_STALLED_STAT2));
4693 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4694 		 RREG32(CP_STALLED_STAT3));
4695 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4696 		 RREG32(CP_CPF_BUSY_STAT));
4697 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4698 		 RREG32(CP_CPF_STALLED_STAT1));
4699 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4700 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4701 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4702 		 RREG32(CP_CPC_STALLED_STAT1));
4703 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4704 }
4705 
4706 /**
4707  * cik_gpu_check_soft_reset - check which blocks are busy
4708  *
4709  * @rdev: radeon_device pointer
4710  *
4711  * Check which blocks are busy and return the relevant reset
4712  * mask to be used by cik_gpu_soft_reset().
4713  * Returns a mask of the blocks to be reset.
4714  */
4715 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4716 {
4717 	u32 reset_mask = 0;
4718 	u32 tmp;
4719 
4720 	/* GRBM_STATUS */
4721 	tmp = RREG32(GRBM_STATUS);
4722 	if (tmp & (PA_BUSY | SC_BUSY |
4723 		   BCI_BUSY | SX_BUSY |
4724 		   TA_BUSY | VGT_BUSY |
4725 		   DB_BUSY | CB_BUSY |
4726 		   GDS_BUSY | SPI_BUSY |
4727 		   IA_BUSY | IA_BUSY_NO_DMA))
4728 		reset_mask |= RADEON_RESET_GFX;
4729 
4730 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4731 		reset_mask |= RADEON_RESET_CP;
4732 
4733 	/* GRBM_STATUS2 */
4734 	tmp = RREG32(GRBM_STATUS2);
4735 	if (tmp & RLC_BUSY)
4736 		reset_mask |= RADEON_RESET_RLC;
4737 
4738 	/* SDMA0_STATUS_REG */
4739 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4740 	if (!(tmp & SDMA_IDLE))
4741 		reset_mask |= RADEON_RESET_DMA;
4742 
4743 	/* SDMA1_STATUS_REG */
4744 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4745 	if (!(tmp & SDMA_IDLE))
4746 		reset_mask |= RADEON_RESET_DMA1;
4747 
4748 	/* SRBM_STATUS2 */
4749 	tmp = RREG32(SRBM_STATUS2);
4750 	if (tmp & SDMA_BUSY)
4751 		reset_mask |= RADEON_RESET_DMA;
4752 
4753 	if (tmp & SDMA1_BUSY)
4754 		reset_mask |= RADEON_RESET_DMA1;
4755 
4756 	/* SRBM_STATUS */
4757 	tmp = RREG32(SRBM_STATUS);
4758 
4759 	if (tmp & IH_BUSY)
4760 		reset_mask |= RADEON_RESET_IH;
4761 
4762 	if (tmp & SEM_BUSY)
4763 		reset_mask |= RADEON_RESET_SEM;
4764 
4765 	if (tmp & GRBM_RQ_PENDING)
4766 		reset_mask |= RADEON_RESET_GRBM;
4767 
4768 	if (tmp & VMC_BUSY)
4769 		reset_mask |= RADEON_RESET_VMC;
4770 
4771 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4772 		   MCC_BUSY | MCD_BUSY))
4773 		reset_mask |= RADEON_RESET_MC;
4774 
4775 	if (evergreen_is_display_hung(rdev))
4776 		reset_mask |= RADEON_RESET_DISPLAY;
4777 
4778 	/* Skip MC reset as it's mostly likely not hung, just busy */
4779 	if (reset_mask & RADEON_RESET_MC) {
4780 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4781 		reset_mask &= ~RADEON_RESET_MC;
4782 	}
4783 
4784 	return reset_mask;
4785 }
4786 
4787 /**
4788  * cik_gpu_soft_reset - soft reset GPU
4789  *
4790  * @rdev: radeon_device pointer
4791  * @reset_mask: mask of which blocks to reset
4792  *
4793  * Soft reset the blocks specified in @reset_mask.
4794  */
4795 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4796 {
4797 	struct evergreen_mc_save save;
4798 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4799 	u32 tmp;
4800 
4801 	if (reset_mask == 0)
4802 		return;
4803 
4804 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4805 
4806 	cik_print_gpu_status_regs(rdev);
4807 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4808 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4809 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4810 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4811 
4812 	/* disable CG/PG */
4813 	cik_fini_pg(rdev);
4814 	cik_fini_cg(rdev);
4815 
4816 	/* stop the rlc */
4817 	cik_rlc_stop(rdev);
4818 
4819 	/* Disable GFX parsing/prefetching */
4820 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4821 
4822 	/* Disable MEC parsing/prefetching */
4823 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4824 
4825 	if (reset_mask & RADEON_RESET_DMA) {
4826 		/* sdma0 */
4827 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4828 		tmp |= SDMA_HALT;
4829 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4830 	}
4831 	if (reset_mask & RADEON_RESET_DMA1) {
4832 		/* sdma1 */
4833 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4834 		tmp |= SDMA_HALT;
4835 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4836 	}
4837 
4838 	evergreen_mc_stop(rdev, &save);
4839 	if (evergreen_mc_wait_for_idle(rdev)) {
4840 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4841 	}
4842 
4843 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4844 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4845 
4846 	if (reset_mask & RADEON_RESET_CP) {
4847 		grbm_soft_reset |= SOFT_RESET_CP;
4848 
4849 		srbm_soft_reset |= SOFT_RESET_GRBM;
4850 	}
4851 
4852 	if (reset_mask & RADEON_RESET_DMA)
4853 		srbm_soft_reset |= SOFT_RESET_SDMA;
4854 
4855 	if (reset_mask & RADEON_RESET_DMA1)
4856 		srbm_soft_reset |= SOFT_RESET_SDMA1;
4857 
4858 	if (reset_mask & RADEON_RESET_DISPLAY)
4859 		srbm_soft_reset |= SOFT_RESET_DC;
4860 
4861 	if (reset_mask & RADEON_RESET_RLC)
4862 		grbm_soft_reset |= SOFT_RESET_RLC;
4863 
4864 	if (reset_mask & RADEON_RESET_SEM)
4865 		srbm_soft_reset |= SOFT_RESET_SEM;
4866 
4867 	if (reset_mask & RADEON_RESET_IH)
4868 		srbm_soft_reset |= SOFT_RESET_IH;
4869 
4870 	if (reset_mask & RADEON_RESET_GRBM)
4871 		srbm_soft_reset |= SOFT_RESET_GRBM;
4872 
4873 	if (reset_mask & RADEON_RESET_VMC)
4874 		srbm_soft_reset |= SOFT_RESET_VMC;
4875 
4876 	if (!(rdev->flags & RADEON_IS_IGP)) {
4877 		if (reset_mask & RADEON_RESET_MC)
4878 			srbm_soft_reset |= SOFT_RESET_MC;
4879 	}
4880 
4881 	if (grbm_soft_reset) {
4882 		tmp = RREG32(GRBM_SOFT_RESET);
4883 		tmp |= grbm_soft_reset;
4884 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4885 		WREG32(GRBM_SOFT_RESET, tmp);
4886 		tmp = RREG32(GRBM_SOFT_RESET);
4887 
4888 		udelay(50);
4889 
4890 		tmp &= ~grbm_soft_reset;
4891 		WREG32(GRBM_SOFT_RESET, tmp);
4892 		tmp = RREG32(GRBM_SOFT_RESET);
4893 	}
4894 
4895 	if (srbm_soft_reset) {
4896 		tmp = RREG32(SRBM_SOFT_RESET);
4897 		tmp |= srbm_soft_reset;
4898 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4899 		WREG32(SRBM_SOFT_RESET, tmp);
4900 		tmp = RREG32(SRBM_SOFT_RESET);
4901 
4902 		udelay(50);
4903 
4904 		tmp &= ~srbm_soft_reset;
4905 		WREG32(SRBM_SOFT_RESET, tmp);
4906 		tmp = RREG32(SRBM_SOFT_RESET);
4907 	}
4908 
4909 	/* Wait a little for things to settle down */
4910 	udelay(50);
4911 
4912 	evergreen_mc_resume(rdev, &save);
4913 	udelay(50);
4914 
4915 	cik_print_gpu_status_regs(rdev);
4916 }
4917 
4918 struct kv_reset_save_regs {
4919 	u32 gmcon_reng_execute;
4920 	u32 gmcon_misc;
4921 	u32 gmcon_misc3;
4922 };
4923 
4924 static void kv_save_regs_for_reset(struct radeon_device *rdev,
4925 				   struct kv_reset_save_regs *save)
4926 {
4927 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
4928 	save->gmcon_misc = RREG32(GMCON_MISC);
4929 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
4930 
4931 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
4932 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
4933 						STCTRL_STUTTER_EN));
4934 }
4935 
4936 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
4937 				      struct kv_reset_save_regs *save)
4938 {
4939 	int i;
4940 
4941 	WREG32(GMCON_PGFSM_WRITE, 0);
4942 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
4943 
4944 	for (i = 0; i < 5; i++)
4945 		WREG32(GMCON_PGFSM_WRITE, 0);
4946 
4947 	WREG32(GMCON_PGFSM_WRITE, 0);
4948 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
4949 
4950 	for (i = 0; i < 5; i++)
4951 		WREG32(GMCON_PGFSM_WRITE, 0);
4952 
4953 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
4954 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
4955 
4956 	for (i = 0; i < 5; i++)
4957 		WREG32(GMCON_PGFSM_WRITE, 0);
4958 
4959 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
4960 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
4961 
4962 	for (i = 0; i < 5; i++)
4963 		WREG32(GMCON_PGFSM_WRITE, 0);
4964 
4965 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
4966 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
4967 
4968 	for (i = 0; i < 5; i++)
4969 		WREG32(GMCON_PGFSM_WRITE, 0);
4970 
4971 	WREG32(GMCON_PGFSM_WRITE, 0);
4972 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
4973 
4974 	for (i = 0; i < 5; i++)
4975 		WREG32(GMCON_PGFSM_WRITE, 0);
4976 
4977 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
4978 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
4979 
4980 	for (i = 0; i < 5; i++)
4981 		WREG32(GMCON_PGFSM_WRITE, 0);
4982 
4983 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
4984 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
4985 
4986 	for (i = 0; i < 5; i++)
4987 		WREG32(GMCON_PGFSM_WRITE, 0);
4988 
4989 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
4990 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
4991 
4992 	for (i = 0; i < 5; i++)
4993 		WREG32(GMCON_PGFSM_WRITE, 0);
4994 
4995 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
4996 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
4997 
4998 	for (i = 0; i < 5; i++)
4999 		WREG32(GMCON_PGFSM_WRITE, 0);
5000 
5001 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5002 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5003 
5004 	WREG32(GMCON_MISC3, save->gmcon_misc3);
5005 	WREG32(GMCON_MISC, save->gmcon_misc);
5006 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5007 }
5008 
5009 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5010 {
5011 	struct evergreen_mc_save save;
5012 	struct kv_reset_save_regs kv_save = { 0 };
5013 	u32 tmp, i;
5014 
5015 	dev_info(rdev->dev, "GPU pci config reset\n");
5016 
5017 	/* disable dpm? */
5018 
5019 	/* disable cg/pg */
5020 	cik_fini_pg(rdev);
5021 	cik_fini_cg(rdev);
5022 
5023 	/* Disable GFX parsing/prefetching */
5024 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5025 
5026 	/* Disable MEC parsing/prefetching */
5027 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5028 
5029 	/* sdma0 */
5030 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5031 	tmp |= SDMA_HALT;
5032 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5033 	/* sdma1 */
5034 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5035 	tmp |= SDMA_HALT;
5036 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5037 	/* XXX other engines? */
5038 
5039 	/* halt the rlc, disable cp internal ints */
5040 	cik_rlc_stop(rdev);
5041 
5042 	udelay(50);
5043 
5044 	/* disable mem access */
5045 	evergreen_mc_stop(rdev, &save);
5046 	if (evergreen_mc_wait_for_idle(rdev)) {
5047 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5048 	}
5049 
5050 	if (rdev->flags & RADEON_IS_IGP)
5051 		kv_save_regs_for_reset(rdev, &kv_save);
5052 
5053 	/* disable BM */
5054 	pci_clear_master(rdev->pdev);
5055 	/* reset */
5056 	radeon_pci_config_reset(rdev);
5057 
5058 	udelay(100);
5059 
5060 	/* wait for asic to come out of reset */
5061 	for (i = 0; i < rdev->usec_timeout; i++) {
5062 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5063 			break;
5064 		udelay(1);
5065 	}
5066 
5067 	/* does asic init need to be run first??? */
5068 	if (rdev->flags & RADEON_IS_IGP)
5069 		kv_restore_regs_for_reset(rdev, &kv_save);
5070 }
5071 
5072 /**
5073  * cik_asic_reset - soft reset GPU
5074  *
5075  * @rdev: radeon_device pointer
5076  *
5077  * Look up which blocks are hung and attempt
5078  * to reset them.
5079  * Returns 0 for success.
5080  */
5081 int cik_asic_reset(struct radeon_device *rdev)
5082 {
5083 	u32 reset_mask;
5084 
5085 	reset_mask = cik_gpu_check_soft_reset(rdev);
5086 
5087 	if (reset_mask)
5088 		r600_set_bios_scratch_engine_hung(rdev, true);
5089 
5090 	/* try soft reset */
5091 	cik_gpu_soft_reset(rdev, reset_mask);
5092 
5093 	reset_mask = cik_gpu_check_soft_reset(rdev);
5094 
5095 	/* try pci config reset */
5096 	if (reset_mask && radeon_hard_reset)
5097 		cik_gpu_pci_config_reset(rdev);
5098 
5099 	reset_mask = cik_gpu_check_soft_reset(rdev);
5100 
5101 	if (!reset_mask)
5102 		r600_set_bios_scratch_engine_hung(rdev, false);
5103 
5104 	return 0;
5105 }
5106 
5107 /**
5108  * cik_gfx_is_lockup - check if the 3D engine is locked up
5109  *
5110  * @rdev: radeon_device pointer
5111  * @ring: radeon_ring structure holding ring information
5112  *
5113  * Check if the 3D engine is locked up (CIK).
5114  * Returns true if the engine is locked, false if not.
5115  */
5116 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5117 {
5118 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5119 
5120 	if (!(reset_mask & (RADEON_RESET_GFX |
5121 			    RADEON_RESET_COMPUTE |
5122 			    RADEON_RESET_CP))) {
5123 		radeon_ring_lockup_update(ring);
5124 		return false;
5125 	}
5126 	/* force CP activities */
5127 	radeon_ring_force_activity(rdev, ring);
5128 	return radeon_ring_test_lockup(rdev, ring);
5129 }
5130 
5131 /* MC */
5132 /**
5133  * cik_mc_program - program the GPU memory controller
5134  *
5135  * @rdev: radeon_device pointer
5136  *
5137  * Set the location of vram, gart, and AGP in the GPU's
5138  * physical address space (CIK).
5139  */
5140 static void cik_mc_program(struct radeon_device *rdev)
5141 {
5142 	struct evergreen_mc_save save;
5143 	u32 tmp;
5144 	int i, j;
5145 
5146 	/* Initialize HDP */
5147 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5148 		WREG32((0x2c14 + j), 0x00000000);
5149 		WREG32((0x2c18 + j), 0x00000000);
5150 		WREG32((0x2c1c + j), 0x00000000);
5151 		WREG32((0x2c20 + j), 0x00000000);
5152 		WREG32((0x2c24 + j), 0x00000000);
5153 	}
5154 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5155 
5156 	evergreen_mc_stop(rdev, &save);
5157 	if (radeon_mc_wait_for_idle(rdev)) {
5158 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5159 	}
5160 	/* Lockout access through VGA aperture*/
5161 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5162 	/* Update configuration */
5163 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5164 	       rdev->mc.vram_start >> 12);
5165 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5166 	       rdev->mc.vram_end >> 12);
5167 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5168 	       rdev->vram_scratch.gpu_addr >> 12);
5169 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5170 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5171 	WREG32(MC_VM_FB_LOCATION, tmp);
5172 	/* XXX double check these! */
5173 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5174 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5175 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5176 	WREG32(MC_VM_AGP_BASE, 0);
5177 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5178 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5179 	if (radeon_mc_wait_for_idle(rdev)) {
5180 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5181 	}
5182 	evergreen_mc_resume(rdev, &save);
5183 	/* we need to own VRAM, so turn off the VGA renderer here
5184 	 * to stop it overwriting our objects */
5185 	rv515_vga_render_disable(rdev);
5186 }
5187 
5188 /**
5189  * cik_mc_init - initialize the memory controller driver params
5190  *
5191  * @rdev: radeon_device pointer
5192  *
5193  * Look up the amount of vram, vram width, and decide how to place
5194  * vram and gart within the GPU's physical address space (CIK).
5195  * Returns 0 for success.
5196  */
5197 static int cik_mc_init(struct radeon_device *rdev)
5198 {
5199 	u32 tmp;
5200 	int chansize, numchan;
5201 
5202 	/* Get VRAM informations */
5203 	rdev->mc.vram_is_ddr = true;
5204 	tmp = RREG32(MC_ARB_RAMCFG);
5205 	if (tmp & CHANSIZE_MASK) {
5206 		chansize = 64;
5207 	} else {
5208 		chansize = 32;
5209 	}
5210 	tmp = RREG32(MC_SHARED_CHMAP);
5211 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5212 	case 0:
5213 	default:
5214 		numchan = 1;
5215 		break;
5216 	case 1:
5217 		numchan = 2;
5218 		break;
5219 	case 2:
5220 		numchan = 4;
5221 		break;
5222 	case 3:
5223 		numchan = 8;
5224 		break;
5225 	case 4:
5226 		numchan = 3;
5227 		break;
5228 	case 5:
5229 		numchan = 6;
5230 		break;
5231 	case 6:
5232 		numchan = 10;
5233 		break;
5234 	case 7:
5235 		numchan = 12;
5236 		break;
5237 	case 8:
5238 		numchan = 16;
5239 		break;
5240 	}
5241 	rdev->mc.vram_width = numchan * chansize;
5242 	/* Could aper size report 0 ? */
5243 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5244 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5245 	/* size in MB on si */
5246 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5247 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5248 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5249 	si_vram_gtt_location(rdev, &rdev->mc);
5250 	radeon_update_bandwidth_info(rdev);
5251 
5252 	return 0;
5253 }
5254 
5255 /*
5256  * GART
5257  * VMID 0 is the physical GPU addresses as used by the kernel.
5258  * VMIDs 1-15 are used for userspace clients and are handled
5259  * by the radeon vm/hsa code.
5260  */
5261 /**
5262  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5263  *
5264  * @rdev: radeon_device pointer
5265  *
5266  * Flush the TLB for the VMID 0 page table (CIK).
5267  */
5268 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5269 {
5270 	/* flush hdp cache */
5271 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5272 
5273 	/* bits 0-15 are the VM contexts0-15 */
5274 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5275 }
5276 
5277 /**
5278  * cik_pcie_gart_enable - gart enable
5279  *
5280  * @rdev: radeon_device pointer
5281  *
5282  * This sets up the TLBs, programs the page tables for VMID0,
5283  * sets up the hw for VMIDs 1-15 which are allocated on
5284  * demand, and sets up the global locations for the LDS, GDS,
5285  * and GPUVM for FSA64 clients (CIK).
5286  * Returns 0 for success, errors for failure.
5287  */
5288 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5289 {
5290 	int r, i;
5291 
5292 	if (rdev->gart.robj == NULL) {
5293 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5294 		return -EINVAL;
5295 	}
5296 	r = radeon_gart_table_vram_pin(rdev);
5297 	if (r)
5298 		return r;
5299 	radeon_gart_restore(rdev);
5300 	/* Setup TLB control */
5301 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5302 	       (0xA << 7) |
5303 	       ENABLE_L1_TLB |
5304 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5305 	       ENABLE_ADVANCED_DRIVER_MODEL |
5306 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5307 	/* Setup L2 cache */
5308 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5309 	       ENABLE_L2_FRAGMENT_PROCESSING |
5310 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5311 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5312 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5313 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5314 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5315 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5316 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5317 	/* setup context0 */
5318 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5319 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5320 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5321 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5322 			(u32)(rdev->dummy_page.addr >> 12));
5323 	WREG32(VM_CONTEXT0_CNTL2, 0);
5324 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5325 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5326 
5327 	WREG32(0x15D4, 0);
5328 	WREG32(0x15D8, 0);
5329 	WREG32(0x15DC, 0);
5330 
5331 	/* empty context1-15 */
5332 	/* FIXME start with 4G, once using 2 level pt switch to full
5333 	 * vm size space
5334 	 */
5335 	/* set vm size, must be a multiple of 4 */
5336 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5337 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5338 	for (i = 1; i < 16; i++) {
5339 		if (i < 8)
5340 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5341 			       rdev->gart.table_addr >> 12);
5342 		else
5343 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5344 			       rdev->gart.table_addr >> 12);
5345 	}
5346 
5347 	/* enable context1-15 */
5348 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5349 	       (u32)(rdev->dummy_page.addr >> 12));
5350 	WREG32(VM_CONTEXT1_CNTL2, 4);
5351 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5352 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5353 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5354 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5355 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5356 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5357 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5358 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5359 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5360 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5361 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5362 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5363 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5364 
5365 	if (rdev->family == CHIP_KAVERI) {
5366 		u32 tmp = RREG32(CHUB_CONTROL);
5367 		tmp &= ~BYPASS_VM;
5368 		WREG32(CHUB_CONTROL, tmp);
5369 	}
5370 
5371 	/* XXX SH_MEM regs */
5372 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5373 	mutex_lock(&rdev->srbm_mutex);
5374 	for (i = 0; i < 16; i++) {
5375 		cik_srbm_select(rdev, 0, 0, 0, i);
5376 		/* CP and shaders */
5377 		WREG32(SH_MEM_CONFIG, 0);
5378 		WREG32(SH_MEM_APE1_BASE, 1);
5379 		WREG32(SH_MEM_APE1_LIMIT, 0);
5380 		WREG32(SH_MEM_BASES, 0);
5381 		/* SDMA GFX */
5382 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5383 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5384 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5385 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5386 		/* XXX SDMA RLC - todo */
5387 	}
5388 	cik_srbm_select(rdev, 0, 0, 0, 0);
5389 	mutex_unlock(&rdev->srbm_mutex);
5390 
5391 	cik_pcie_gart_tlb_flush(rdev);
5392 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5393 		 (unsigned)(rdev->mc.gtt_size >> 20),
5394 		 (unsigned long long)rdev->gart.table_addr);
5395 	rdev->gart.ready = true;
5396 	return 0;
5397 }
5398 
5399 /**
5400  * cik_pcie_gart_disable - gart disable
5401  *
5402  * @rdev: radeon_device pointer
5403  *
5404  * This disables all VM page table (CIK).
5405  */
5406 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5407 {
5408 	/* Disable all tables */
5409 	WREG32(VM_CONTEXT0_CNTL, 0);
5410 	WREG32(VM_CONTEXT1_CNTL, 0);
5411 	/* Setup TLB control */
5412 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5413 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5414 	/* Setup L2 cache */
5415 	WREG32(VM_L2_CNTL,
5416 	       ENABLE_L2_FRAGMENT_PROCESSING |
5417 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5418 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5419 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5420 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5421 	WREG32(VM_L2_CNTL2, 0);
5422 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5423 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5424 	radeon_gart_table_vram_unpin(rdev);
5425 }
5426 
5427 /**
5428  * cik_pcie_gart_fini - vm fini callback
5429  *
5430  * @rdev: radeon_device pointer
5431  *
5432  * Tears down the driver GART/VM setup (CIK).
5433  */
5434 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5435 {
5436 	cik_pcie_gart_disable(rdev);
5437 	radeon_gart_table_vram_free(rdev);
5438 	radeon_gart_fini(rdev);
5439 }
5440 
5441 /* vm parser */
5442 /**
5443  * cik_ib_parse - vm ib_parse callback
5444  *
5445  * @rdev: radeon_device pointer
5446  * @ib: indirect buffer pointer
5447  *
5448  * CIK uses hw IB checking so this is a nop (CIK).
5449  */
5450 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5451 {
5452 	return 0;
5453 }
5454 
5455 /*
5456  * vm
5457  * VMID 0 is the physical GPU addresses as used by the kernel.
5458  * VMIDs 1-15 are used for userspace clients and are handled
5459  * by the radeon vm/hsa code.
5460  */
5461 /**
5462  * cik_vm_init - cik vm init callback
5463  *
5464  * @rdev: radeon_device pointer
5465  *
5466  * Inits cik specific vm parameters (number of VMs, base of vram for
5467  * VMIDs 1-15) (CIK).
5468  * Returns 0 for success.
5469  */
5470 int cik_vm_init(struct radeon_device *rdev)
5471 {
5472 	/* number of VMs */
5473 	rdev->vm_manager.nvm = 16;
5474 	/* base offset of vram pages */
5475 	if (rdev->flags & RADEON_IS_IGP) {
5476 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5477 		tmp <<= 22;
5478 		rdev->vm_manager.vram_base_offset = tmp;
5479 	} else
5480 		rdev->vm_manager.vram_base_offset = 0;
5481 
5482 	return 0;
5483 }
5484 
5485 /**
5486  * cik_vm_fini - cik vm fini callback
5487  *
5488  * @rdev: radeon_device pointer
5489  *
5490  * Tear down any asic specific VM setup (CIK).
5491  */
5492 void cik_vm_fini(struct radeon_device *rdev)
5493 {
5494 }
5495 
5496 /**
5497  * cik_vm_decode_fault - print human readable fault info
5498  *
5499  * @rdev: radeon_device pointer
5500  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5501  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5502  *
5503  * Print human readable fault information (CIK).
5504  */
5505 static void cik_vm_decode_fault(struct radeon_device *rdev,
5506 				u32 status, u32 addr, u32 mc_client)
5507 {
5508 	u32 mc_id;
5509 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5510 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5511 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5512 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5513 
5514 	if (rdev->family == CHIP_HAWAII)
5515 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5516 	else
5517 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5518 
5519 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5520 	       protections, vmid, addr,
5521 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5522 	       block, mc_client, mc_id);
5523 }
5524 
5525 /**
5526  * cik_vm_flush - cik vm flush using the CP
5527  *
5528  * @rdev: radeon_device pointer
5529  *
5530  * Update the page table base and flush the VM TLB
5531  * using the CP (CIK).
5532  */
5533 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5534 {
5535 	struct radeon_ring *ring = &rdev->ring[ridx];
5536 
5537 	if (vm == NULL)
5538 		return;
5539 
5540 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5541 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5542 				 WRITE_DATA_DST_SEL(0)));
5543 	if (vm->id < 8) {
5544 		radeon_ring_write(ring,
5545 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5546 	} else {
5547 		radeon_ring_write(ring,
5548 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5549 	}
5550 	radeon_ring_write(ring, 0);
5551 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5552 
5553 	/* update SH_MEM_* regs */
5554 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5555 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5556 				 WRITE_DATA_DST_SEL(0)));
5557 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5558 	radeon_ring_write(ring, 0);
5559 	radeon_ring_write(ring, VMID(vm->id));
5560 
5561 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5562 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5563 				 WRITE_DATA_DST_SEL(0)));
5564 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
5565 	radeon_ring_write(ring, 0);
5566 
5567 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5568 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5569 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5570 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5571 
5572 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5573 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5574 				 WRITE_DATA_DST_SEL(0)));
5575 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5576 	radeon_ring_write(ring, 0);
5577 	radeon_ring_write(ring, VMID(0));
5578 
5579 	/* HDP flush */
5580 	cik_hdp_flush_cp_ring_emit(rdev, ridx);
5581 
5582 	/* bits 0-15 are the VM contexts0-15 */
5583 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5584 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5585 				 WRITE_DATA_DST_SEL(0)));
5586 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5587 	radeon_ring_write(ring, 0);
5588 	radeon_ring_write(ring, 1 << vm->id);
5589 
5590 	/* compute doesn't have PFP */
5591 	if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
5592 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5593 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5594 		radeon_ring_write(ring, 0x0);
5595 	}
5596 }
5597 
5598 /*
5599  * RLC
5600  * The RLC is a multi-purpose microengine that handles a
5601  * variety of functions, the most important of which is
5602  * the interrupt controller.
5603  */
5604 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5605 					  bool enable)
5606 {
5607 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5608 
5609 	if (enable)
5610 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5611 	else
5612 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5613 	WREG32(CP_INT_CNTL_RING0, tmp);
5614 }
5615 
5616 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5617 {
5618 	u32 tmp;
5619 
5620 	tmp = RREG32(RLC_LB_CNTL);
5621 	if (enable)
5622 		tmp |= LOAD_BALANCE_ENABLE;
5623 	else
5624 		tmp &= ~LOAD_BALANCE_ENABLE;
5625 	WREG32(RLC_LB_CNTL, tmp);
5626 }
5627 
5628 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5629 {
5630 	u32 i, j, k;
5631 	u32 mask;
5632 
5633 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5634 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5635 			cik_select_se_sh(rdev, i, j);
5636 			for (k = 0; k < rdev->usec_timeout; k++) {
5637 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5638 					break;
5639 				udelay(1);
5640 			}
5641 		}
5642 	}
5643 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5644 
5645 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5646 	for (k = 0; k < rdev->usec_timeout; k++) {
5647 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5648 			break;
5649 		udelay(1);
5650 	}
5651 }
5652 
5653 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5654 {
5655 	u32 tmp;
5656 
5657 	tmp = RREG32(RLC_CNTL);
5658 	if (tmp != rlc)
5659 		WREG32(RLC_CNTL, rlc);
5660 }
5661 
5662 static u32 cik_halt_rlc(struct radeon_device *rdev)
5663 {
5664 	u32 data, orig;
5665 
5666 	orig = data = RREG32(RLC_CNTL);
5667 
5668 	if (data & RLC_ENABLE) {
5669 		u32 i;
5670 
5671 		data &= ~RLC_ENABLE;
5672 		WREG32(RLC_CNTL, data);
5673 
5674 		for (i = 0; i < rdev->usec_timeout; i++) {
5675 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5676 				break;
5677 			udelay(1);
5678 		}
5679 
5680 		cik_wait_for_rlc_serdes(rdev);
5681 	}
5682 
5683 	return orig;
5684 }
5685 
5686 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5687 {
5688 	u32 tmp, i, mask;
5689 
5690 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5691 	WREG32(RLC_GPR_REG2, tmp);
5692 
5693 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5694 	for (i = 0; i < rdev->usec_timeout; i++) {
5695 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5696 			break;
5697 		udelay(1);
5698 	}
5699 
5700 	for (i = 0; i < rdev->usec_timeout; i++) {
5701 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5702 			break;
5703 		udelay(1);
5704 	}
5705 }
5706 
5707 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5708 {
5709 	u32 tmp;
5710 
5711 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5712 	WREG32(RLC_GPR_REG2, tmp);
5713 }
5714 
5715 /**
5716  * cik_rlc_stop - stop the RLC ME
5717  *
5718  * @rdev: radeon_device pointer
5719  *
5720  * Halt the RLC ME (MicroEngine) (CIK).
5721  */
5722 static void cik_rlc_stop(struct radeon_device *rdev)
5723 {
5724 	WREG32(RLC_CNTL, 0);
5725 
5726 	cik_enable_gui_idle_interrupt(rdev, false);
5727 
5728 	cik_wait_for_rlc_serdes(rdev);
5729 }
5730 
5731 /**
5732  * cik_rlc_start - start the RLC ME
5733  *
5734  * @rdev: radeon_device pointer
5735  *
5736  * Unhalt the RLC ME (MicroEngine) (CIK).
5737  */
5738 static void cik_rlc_start(struct radeon_device *rdev)
5739 {
5740 	WREG32(RLC_CNTL, RLC_ENABLE);
5741 
5742 	cik_enable_gui_idle_interrupt(rdev, true);
5743 
5744 	udelay(50);
5745 }
5746 
5747 /**
5748  * cik_rlc_resume - setup the RLC hw
5749  *
5750  * @rdev: radeon_device pointer
5751  *
5752  * Initialize the RLC registers, load the ucode,
5753  * and start the RLC (CIK).
5754  * Returns 0 for success, -EINVAL if the ucode is not available.
5755  */
5756 static int cik_rlc_resume(struct radeon_device *rdev)
5757 {
5758 	u32 i, size, tmp;
5759 	const __be32 *fw_data;
5760 
5761 	if (!rdev->rlc_fw)
5762 		return -EINVAL;
5763 
5764 	switch (rdev->family) {
5765 	case CHIP_BONAIRE:
5766 	case CHIP_HAWAII:
5767 	default:
5768 		size = BONAIRE_RLC_UCODE_SIZE;
5769 		break;
5770 	case CHIP_KAVERI:
5771 		size = KV_RLC_UCODE_SIZE;
5772 		break;
5773 	case CHIP_KABINI:
5774 		size = KB_RLC_UCODE_SIZE;
5775 		break;
5776 	}
5777 
5778 	cik_rlc_stop(rdev);
5779 
5780 	/* disable CG */
5781 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5782 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5783 
5784 	si_rlc_reset(rdev);
5785 
5786 	cik_init_pg(rdev);
5787 
5788 	cik_init_cg(rdev);
5789 
5790 	WREG32(RLC_LB_CNTR_INIT, 0);
5791 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5792 
5793 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5794 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5795 	WREG32(RLC_LB_PARAMS, 0x00600408);
5796 	WREG32(RLC_LB_CNTL, 0x80000004);
5797 
5798 	WREG32(RLC_MC_CNTL, 0);
5799 	WREG32(RLC_UCODE_CNTL, 0);
5800 
5801 	fw_data = (const __be32 *)rdev->rlc_fw->data;
5802 		WREG32(RLC_GPM_UCODE_ADDR, 0);
5803 	for (i = 0; i < size; i++)
5804 		WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5805 	WREG32(RLC_GPM_UCODE_ADDR, 0);
5806 
5807 	/* XXX - find out what chips support lbpw */
5808 	cik_enable_lbpw(rdev, false);
5809 
5810 	if (rdev->family == CHIP_BONAIRE)
5811 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
5812 
5813 	cik_rlc_start(rdev);
5814 
5815 	return 0;
5816 }
5817 
5818 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5819 {
5820 	u32 data, orig, tmp, tmp2;
5821 
5822 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5823 
5824 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5825 		cik_enable_gui_idle_interrupt(rdev, true);
5826 
5827 		tmp = cik_halt_rlc(rdev);
5828 
5829 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5830 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5831 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5832 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5833 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
5834 
5835 		cik_update_rlc(rdev, tmp);
5836 
5837 		data |= CGCG_EN | CGLS_EN;
5838 	} else {
5839 		cik_enable_gui_idle_interrupt(rdev, false);
5840 
5841 		RREG32(CB_CGTT_SCLK_CTRL);
5842 		RREG32(CB_CGTT_SCLK_CTRL);
5843 		RREG32(CB_CGTT_SCLK_CTRL);
5844 		RREG32(CB_CGTT_SCLK_CTRL);
5845 
5846 		data &= ~(CGCG_EN | CGLS_EN);
5847 	}
5848 
5849 	if (orig != data)
5850 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5851 
5852 }
5853 
5854 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5855 {
5856 	u32 data, orig, tmp = 0;
5857 
5858 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5859 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5860 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5861 				orig = data = RREG32(CP_MEM_SLP_CNTL);
5862 				data |= CP_MEM_LS_EN;
5863 				if (orig != data)
5864 					WREG32(CP_MEM_SLP_CNTL, data);
5865 			}
5866 		}
5867 
5868 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5869 		data &= 0xfffffffd;
5870 		if (orig != data)
5871 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5872 
5873 		tmp = cik_halt_rlc(rdev);
5874 
5875 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5876 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5877 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5878 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5879 		WREG32(RLC_SERDES_WR_CTRL, data);
5880 
5881 		cik_update_rlc(rdev, tmp);
5882 
5883 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
5884 			orig = data = RREG32(CGTS_SM_CTRL_REG);
5885 			data &= ~SM_MODE_MASK;
5886 			data |= SM_MODE(0x2);
5887 			data |= SM_MODE_ENABLE;
5888 			data &= ~CGTS_OVERRIDE;
5889 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
5890 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
5891 				data &= ~CGTS_LS_OVERRIDE;
5892 			data &= ~ON_MONITOR_ADD_MASK;
5893 			data |= ON_MONITOR_ADD_EN;
5894 			data |= ON_MONITOR_ADD(0x96);
5895 			if (orig != data)
5896 				WREG32(CGTS_SM_CTRL_REG, data);
5897 		}
5898 	} else {
5899 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5900 		data |= 0x00000002;
5901 		if (orig != data)
5902 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5903 
5904 		data = RREG32(RLC_MEM_SLP_CNTL);
5905 		if (data & RLC_MEM_LS_EN) {
5906 			data &= ~RLC_MEM_LS_EN;
5907 			WREG32(RLC_MEM_SLP_CNTL, data);
5908 		}
5909 
5910 		data = RREG32(CP_MEM_SLP_CNTL);
5911 		if (data & CP_MEM_LS_EN) {
5912 			data &= ~CP_MEM_LS_EN;
5913 			WREG32(CP_MEM_SLP_CNTL, data);
5914 		}
5915 
5916 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5917 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5918 		if (orig != data)
5919 			WREG32(CGTS_SM_CTRL_REG, data);
5920 
5921 		tmp = cik_halt_rlc(rdev);
5922 
5923 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5924 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5925 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5926 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5927 		WREG32(RLC_SERDES_WR_CTRL, data);
5928 
5929 		cik_update_rlc(rdev, tmp);
5930 	}
5931 }
5932 
5933 static const u32 mc_cg_registers[] =
5934 {
5935 	MC_HUB_MISC_HUB_CG,
5936 	MC_HUB_MISC_SIP_CG,
5937 	MC_HUB_MISC_VM_CG,
5938 	MC_XPB_CLK_GAT,
5939 	ATC_MISC_CG,
5940 	MC_CITF_MISC_WR_CG,
5941 	MC_CITF_MISC_RD_CG,
5942 	MC_CITF_MISC_VM_CG,
5943 	VM_L2_CG,
5944 };
5945 
5946 static void cik_enable_mc_ls(struct radeon_device *rdev,
5947 			     bool enable)
5948 {
5949 	int i;
5950 	u32 orig, data;
5951 
5952 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5953 		orig = data = RREG32(mc_cg_registers[i]);
5954 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5955 			data |= MC_LS_ENABLE;
5956 		else
5957 			data &= ~MC_LS_ENABLE;
5958 		if (data != orig)
5959 			WREG32(mc_cg_registers[i], data);
5960 	}
5961 }
5962 
5963 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5964 			       bool enable)
5965 {
5966 	int i;
5967 	u32 orig, data;
5968 
5969 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5970 		orig = data = RREG32(mc_cg_registers[i]);
5971 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5972 			data |= MC_CG_ENABLE;
5973 		else
5974 			data &= ~MC_CG_ENABLE;
5975 		if (data != orig)
5976 			WREG32(mc_cg_registers[i], data);
5977 	}
5978 }
5979 
5980 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
5981 				 bool enable)
5982 {
5983 	u32 orig, data;
5984 
5985 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5986 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
5987 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
5988 	} else {
5989 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
5990 		data |= 0xff000000;
5991 		if (data != orig)
5992 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
5993 
5994 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
5995 		data |= 0xff000000;
5996 		if (data != orig)
5997 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
5998 	}
5999 }
6000 
6001 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6002 				 bool enable)
6003 {
6004 	u32 orig, data;
6005 
6006 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6007 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6008 		data |= 0x100;
6009 		if (orig != data)
6010 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6011 
6012 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6013 		data |= 0x100;
6014 		if (orig != data)
6015 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6016 	} else {
6017 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6018 		data &= ~0x100;
6019 		if (orig != data)
6020 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6021 
6022 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6023 		data &= ~0x100;
6024 		if (orig != data)
6025 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6026 	}
6027 }
6028 
6029 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6030 				bool enable)
6031 {
6032 	u32 orig, data;
6033 
6034 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6035 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6036 		data = 0xfff;
6037 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6038 
6039 		orig = data = RREG32(UVD_CGC_CTRL);
6040 		data |= DCM;
6041 		if (orig != data)
6042 			WREG32(UVD_CGC_CTRL, data);
6043 	} else {
6044 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6045 		data &= ~0xfff;
6046 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6047 
6048 		orig = data = RREG32(UVD_CGC_CTRL);
6049 		data &= ~DCM;
6050 		if (orig != data)
6051 			WREG32(UVD_CGC_CTRL, data);
6052 	}
6053 }
6054 
6055 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6056 			       bool enable)
6057 {
6058 	u32 orig, data;
6059 
6060 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6061 
6062 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6063 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6064 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6065 	else
6066 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6067 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6068 
6069 	if (orig != data)
6070 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6071 }
6072 
6073 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6074 				bool enable)
6075 {
6076 	u32 orig, data;
6077 
6078 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6079 
6080 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6081 		data &= ~CLOCK_GATING_DIS;
6082 	else
6083 		data |= CLOCK_GATING_DIS;
6084 
6085 	if (orig != data)
6086 		WREG32(HDP_HOST_PATH_CNTL, data);
6087 }
6088 
6089 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6090 			      bool enable)
6091 {
6092 	u32 orig, data;
6093 
6094 	orig = data = RREG32(HDP_MEM_POWER_LS);
6095 
6096 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6097 		data |= HDP_LS_ENABLE;
6098 	else
6099 		data &= ~HDP_LS_ENABLE;
6100 
6101 	if (orig != data)
6102 		WREG32(HDP_MEM_POWER_LS, data);
6103 }
6104 
6105 void cik_update_cg(struct radeon_device *rdev,
6106 		   u32 block, bool enable)
6107 {
6108 
6109 	if (block & RADEON_CG_BLOCK_GFX) {
6110 		cik_enable_gui_idle_interrupt(rdev, false);
6111 		/* order matters! */
6112 		if (enable) {
6113 			cik_enable_mgcg(rdev, true);
6114 			cik_enable_cgcg(rdev, true);
6115 		} else {
6116 			cik_enable_cgcg(rdev, false);
6117 			cik_enable_mgcg(rdev, false);
6118 		}
6119 		cik_enable_gui_idle_interrupt(rdev, true);
6120 	}
6121 
6122 	if (block & RADEON_CG_BLOCK_MC) {
6123 		if (!(rdev->flags & RADEON_IS_IGP)) {
6124 			cik_enable_mc_mgcg(rdev, enable);
6125 			cik_enable_mc_ls(rdev, enable);
6126 		}
6127 	}
6128 
6129 	if (block & RADEON_CG_BLOCK_SDMA) {
6130 		cik_enable_sdma_mgcg(rdev, enable);
6131 		cik_enable_sdma_mgls(rdev, enable);
6132 	}
6133 
6134 	if (block & RADEON_CG_BLOCK_BIF) {
6135 		cik_enable_bif_mgls(rdev, enable);
6136 	}
6137 
6138 	if (block & RADEON_CG_BLOCK_UVD) {
6139 		if (rdev->has_uvd)
6140 			cik_enable_uvd_mgcg(rdev, enable);
6141 	}
6142 
6143 	if (block & RADEON_CG_BLOCK_HDP) {
6144 		cik_enable_hdp_mgcg(rdev, enable);
6145 		cik_enable_hdp_ls(rdev, enable);
6146 	}
6147 }
6148 
6149 static void cik_init_cg(struct radeon_device *rdev)
6150 {
6151 
6152 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6153 
6154 	if (rdev->has_uvd)
6155 		si_init_uvd_internal_cg(rdev);
6156 
6157 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6158 			     RADEON_CG_BLOCK_SDMA |
6159 			     RADEON_CG_BLOCK_BIF |
6160 			     RADEON_CG_BLOCK_UVD |
6161 			     RADEON_CG_BLOCK_HDP), true);
6162 }
6163 
6164 static void cik_fini_cg(struct radeon_device *rdev)
6165 {
6166 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6167 			     RADEON_CG_BLOCK_SDMA |
6168 			     RADEON_CG_BLOCK_BIF |
6169 			     RADEON_CG_BLOCK_UVD |
6170 			     RADEON_CG_BLOCK_HDP), false);
6171 
6172 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6173 }
6174 
6175 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6176 					  bool enable)
6177 {
6178 	u32 data, orig;
6179 
6180 	orig = data = RREG32(RLC_PG_CNTL);
6181 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6182 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6183 	else
6184 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6185 	if (orig != data)
6186 		WREG32(RLC_PG_CNTL, data);
6187 }
6188 
6189 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6190 					  bool enable)
6191 {
6192 	u32 data, orig;
6193 
6194 	orig = data = RREG32(RLC_PG_CNTL);
6195 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6196 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6197 	else
6198 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6199 	if (orig != data)
6200 		WREG32(RLC_PG_CNTL, data);
6201 }
6202 
6203 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6204 {
6205 	u32 data, orig;
6206 
6207 	orig = data = RREG32(RLC_PG_CNTL);
6208 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6209 		data &= ~DISABLE_CP_PG;
6210 	else
6211 		data |= DISABLE_CP_PG;
6212 	if (orig != data)
6213 		WREG32(RLC_PG_CNTL, data);
6214 }
6215 
6216 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6217 {
6218 	u32 data, orig;
6219 
6220 	orig = data = RREG32(RLC_PG_CNTL);
6221 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6222 		data &= ~DISABLE_GDS_PG;
6223 	else
6224 		data |= DISABLE_GDS_PG;
6225 	if (orig != data)
6226 		WREG32(RLC_PG_CNTL, data);
6227 }
6228 
6229 #define CP_ME_TABLE_SIZE    96
6230 #define CP_ME_TABLE_OFFSET  2048
6231 #define CP_MEC_TABLE_OFFSET 4096
6232 
6233 void cik_init_cp_pg_table(struct radeon_device *rdev)
6234 {
6235 	const __be32 *fw_data;
6236 	volatile u32 *dst_ptr;
6237 	int me, i, max_me = 4;
6238 	u32 bo_offset = 0;
6239 	u32 table_offset;
6240 
6241 	if (rdev->family == CHIP_KAVERI)
6242 		max_me = 5;
6243 
6244 	if (rdev->rlc.cp_table_ptr == NULL)
6245 		return;
6246 
6247 	/* write the cp table buffer */
6248 	dst_ptr = rdev->rlc.cp_table_ptr;
6249 	for (me = 0; me < max_me; me++) {
6250 		if (me == 0) {
6251 			fw_data = (const __be32 *)rdev->ce_fw->data;
6252 			table_offset = CP_ME_TABLE_OFFSET;
6253 		} else if (me == 1) {
6254 			fw_data = (const __be32 *)rdev->pfp_fw->data;
6255 			table_offset = CP_ME_TABLE_OFFSET;
6256 		} else if (me == 2) {
6257 			fw_data = (const __be32 *)rdev->me_fw->data;
6258 			table_offset = CP_ME_TABLE_OFFSET;
6259 		} else {
6260 			fw_data = (const __be32 *)rdev->mec_fw->data;
6261 			table_offset = CP_MEC_TABLE_OFFSET;
6262 		}
6263 
6264 		for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
6265 			dst_ptr[bo_offset + i] = cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6266 		}
6267 		bo_offset += CP_ME_TABLE_SIZE;
6268 	}
6269 }
6270 
6271 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6272 				bool enable)
6273 {
6274 	u32 data, orig;
6275 
6276 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6277 		orig = data = RREG32(RLC_PG_CNTL);
6278 		data |= GFX_PG_ENABLE;
6279 		if (orig != data)
6280 			WREG32(RLC_PG_CNTL, data);
6281 
6282 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6283 		data |= AUTO_PG_EN;
6284 		if (orig != data)
6285 			WREG32(RLC_AUTO_PG_CTRL, data);
6286 	} else {
6287 		orig = data = RREG32(RLC_PG_CNTL);
6288 		data &= ~GFX_PG_ENABLE;
6289 		if (orig != data)
6290 			WREG32(RLC_PG_CNTL, data);
6291 
6292 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6293 		data &= ~AUTO_PG_EN;
6294 		if (orig != data)
6295 			WREG32(RLC_AUTO_PG_CTRL, data);
6296 
6297 		data = RREG32(DB_RENDER_CONTROL);
6298 	}
6299 }
6300 
6301 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6302 {
6303 	u32 mask = 0, tmp, tmp1;
6304 	int i;
6305 
6306 	cik_select_se_sh(rdev, se, sh);
6307 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6308 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6309 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6310 
6311 	tmp &= 0xffff0000;
6312 
6313 	tmp |= tmp1;
6314 	tmp >>= 16;
6315 
6316 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6317 		mask <<= 1;
6318 		mask |= 1;
6319 	}
6320 
6321 	return (~tmp) & mask;
6322 }
6323 
6324 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6325 {
6326 	u32 i, j, k, active_cu_number = 0;
6327 	u32 mask, counter, cu_bitmap;
6328 	u32 tmp = 0;
6329 
6330 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6331 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6332 			mask = 1;
6333 			cu_bitmap = 0;
6334 			counter = 0;
6335 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6336 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6337 					if (counter < 2)
6338 						cu_bitmap |= mask;
6339 					counter ++;
6340 				}
6341 				mask <<= 1;
6342 			}
6343 
6344 			active_cu_number += counter;
6345 			tmp |= (cu_bitmap << (i * 16 + j * 8));
6346 		}
6347 	}
6348 
6349 	WREG32(RLC_PG_AO_CU_MASK, tmp);
6350 
6351 	tmp = RREG32(RLC_MAX_PG_CU);
6352 	tmp &= ~MAX_PU_CU_MASK;
6353 	tmp |= MAX_PU_CU(active_cu_number);
6354 	WREG32(RLC_MAX_PG_CU, tmp);
6355 }
6356 
6357 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6358 				       bool enable)
6359 {
6360 	u32 data, orig;
6361 
6362 	orig = data = RREG32(RLC_PG_CNTL);
6363 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6364 		data |= STATIC_PER_CU_PG_ENABLE;
6365 	else
6366 		data &= ~STATIC_PER_CU_PG_ENABLE;
6367 	if (orig != data)
6368 		WREG32(RLC_PG_CNTL, data);
6369 }
6370 
6371 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6372 					bool enable)
6373 {
6374 	u32 data, orig;
6375 
6376 	orig = data = RREG32(RLC_PG_CNTL);
6377 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6378 		data |= DYN_PER_CU_PG_ENABLE;
6379 	else
6380 		data &= ~DYN_PER_CU_PG_ENABLE;
6381 	if (orig != data)
6382 		WREG32(RLC_PG_CNTL, data);
6383 }
6384 
6385 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6386 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6387 
6388 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6389 {
6390 	u32 data, orig;
6391 	u32 i;
6392 
6393 	if (rdev->rlc.cs_data) {
6394 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6395 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6396 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6397 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6398 	} else {
6399 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6400 		for (i = 0; i < 3; i++)
6401 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6402 	}
6403 	if (rdev->rlc.reg_list) {
6404 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6405 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6406 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6407 	}
6408 
6409 	orig = data = RREG32(RLC_PG_CNTL);
6410 	data |= GFX_PG_SRC;
6411 	if (orig != data)
6412 		WREG32(RLC_PG_CNTL, data);
6413 
6414 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6415 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6416 
6417 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6418 	data &= ~IDLE_POLL_COUNT_MASK;
6419 	data |= IDLE_POLL_COUNT(0x60);
6420 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6421 
6422 	data = 0x10101010;
6423 	WREG32(RLC_PG_DELAY, data);
6424 
6425 	data = RREG32(RLC_PG_DELAY_2);
6426 	data &= ~0xff;
6427 	data |= 0x3;
6428 	WREG32(RLC_PG_DELAY_2, data);
6429 
6430 	data = RREG32(RLC_AUTO_PG_CTRL);
6431 	data &= ~GRBM_REG_SGIT_MASK;
6432 	data |= GRBM_REG_SGIT(0x700);
6433 	WREG32(RLC_AUTO_PG_CTRL, data);
6434 
6435 }
6436 
6437 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6438 {
6439 	cik_enable_gfx_cgpg(rdev, enable);
6440 	cik_enable_gfx_static_mgpg(rdev, enable);
6441 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
6442 }
6443 
6444 u32 cik_get_csb_size(struct radeon_device *rdev)
6445 {
6446 	u32 count = 0;
6447 	const struct cs_section_def *sect = NULL;
6448 	const struct cs_extent_def *ext = NULL;
6449 
6450 	if (rdev->rlc.cs_data == NULL)
6451 		return 0;
6452 
6453 	/* begin clear state */
6454 	count += 2;
6455 	/* context control state */
6456 	count += 3;
6457 
6458 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6459 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6460 			if (sect->id == SECT_CONTEXT)
6461 				count += 2 + ext->reg_count;
6462 			else
6463 				return 0;
6464 		}
6465 	}
6466 	/* pa_sc_raster_config/pa_sc_raster_config1 */
6467 	count += 4;
6468 	/* end clear state */
6469 	count += 2;
6470 	/* clear state */
6471 	count += 2;
6472 
6473 	return count;
6474 }
6475 
6476 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6477 {
6478 	u32 count = 0, i;
6479 	const struct cs_section_def *sect = NULL;
6480 	const struct cs_extent_def *ext = NULL;
6481 
6482 	if (rdev->rlc.cs_data == NULL)
6483 		return;
6484 	if (buffer == NULL)
6485 		return;
6486 
6487 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6488 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6489 
6490 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6491 	buffer[count++] = cpu_to_le32(0x80000000);
6492 	buffer[count++] = cpu_to_le32(0x80000000);
6493 
6494 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6495 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6496 			if (sect->id == SECT_CONTEXT) {
6497 				buffer[count++] =
6498 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6499 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6500 				for (i = 0; i < ext->reg_count; i++)
6501 					buffer[count++] = cpu_to_le32(ext->extent[i]);
6502 			} else {
6503 				return;
6504 			}
6505 		}
6506 	}
6507 
6508 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6509 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6510 	switch (rdev->family) {
6511 	case CHIP_BONAIRE:
6512 		buffer[count++] = cpu_to_le32(0x16000012);
6513 		buffer[count++] = cpu_to_le32(0x00000000);
6514 		break;
6515 	case CHIP_KAVERI:
6516 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6517 		buffer[count++] = cpu_to_le32(0x00000000);
6518 		break;
6519 	case CHIP_KABINI:
6520 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6521 		buffer[count++] = cpu_to_le32(0x00000000);
6522 		break;
6523 	case CHIP_HAWAII:
6524 		buffer[count++] = 0x3a00161a;
6525 		buffer[count++] = 0x0000002e;
6526 		break;
6527 	default:
6528 		buffer[count++] = cpu_to_le32(0x00000000);
6529 		buffer[count++] = cpu_to_le32(0x00000000);
6530 		break;
6531 	}
6532 
6533 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6534 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6535 
6536 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6537 	buffer[count++] = cpu_to_le32(0);
6538 }
6539 
6540 static void cik_init_pg(struct radeon_device *rdev)
6541 {
6542 	if (rdev->pg_flags) {
6543 		cik_enable_sck_slowdown_on_pu(rdev, true);
6544 		cik_enable_sck_slowdown_on_pd(rdev, true);
6545 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6546 			cik_init_gfx_cgpg(rdev);
6547 			cik_enable_cp_pg(rdev, true);
6548 			cik_enable_gds_pg(rdev, true);
6549 		}
6550 		cik_init_ao_cu_mask(rdev);
6551 		cik_update_gfx_pg(rdev, true);
6552 	}
6553 }
6554 
6555 static void cik_fini_pg(struct radeon_device *rdev)
6556 {
6557 	if (rdev->pg_flags) {
6558 		cik_update_gfx_pg(rdev, false);
6559 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6560 			cik_enable_cp_pg(rdev, false);
6561 			cik_enable_gds_pg(rdev, false);
6562 		}
6563 	}
6564 }
6565 
6566 /*
6567  * Interrupts
6568  * Starting with r6xx, interrupts are handled via a ring buffer.
6569  * Ring buffers are areas of GPU accessible memory that the GPU
6570  * writes interrupt vectors into and the host reads vectors out of.
6571  * There is a rptr (read pointer) that determines where the
6572  * host is currently reading, and a wptr (write pointer)
6573  * which determines where the GPU has written.  When the
6574  * pointers are equal, the ring is idle.  When the GPU
6575  * writes vectors to the ring buffer, it increments the
6576  * wptr.  When there is an interrupt, the host then starts
6577  * fetching commands and processing them until the pointers are
6578  * equal again at which point it updates the rptr.
6579  */
6580 
6581 /**
6582  * cik_enable_interrupts - Enable the interrupt ring buffer
6583  *
6584  * @rdev: radeon_device pointer
6585  *
6586  * Enable the interrupt ring buffer (CIK).
6587  */
6588 static void cik_enable_interrupts(struct radeon_device *rdev)
6589 {
6590 	u32 ih_cntl = RREG32(IH_CNTL);
6591 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6592 
6593 	ih_cntl |= ENABLE_INTR;
6594 	ih_rb_cntl |= IH_RB_ENABLE;
6595 	WREG32(IH_CNTL, ih_cntl);
6596 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6597 	rdev->ih.enabled = true;
6598 }
6599 
6600 /**
6601  * cik_disable_interrupts - Disable the interrupt ring buffer
6602  *
6603  * @rdev: radeon_device pointer
6604  *
6605  * Disable the interrupt ring buffer (CIK).
6606  */
6607 static void cik_disable_interrupts(struct radeon_device *rdev)
6608 {
6609 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6610 	u32 ih_cntl = RREG32(IH_CNTL);
6611 
6612 	ih_rb_cntl &= ~IH_RB_ENABLE;
6613 	ih_cntl &= ~ENABLE_INTR;
6614 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6615 	WREG32(IH_CNTL, ih_cntl);
6616 	/* set rptr, wptr to 0 */
6617 	WREG32(IH_RB_RPTR, 0);
6618 	WREG32(IH_RB_WPTR, 0);
6619 	rdev->ih.enabled = false;
6620 	rdev->ih.rptr = 0;
6621 }
6622 
6623 /**
6624  * cik_disable_interrupt_state - Disable all interrupt sources
6625  *
6626  * @rdev: radeon_device pointer
6627  *
6628  * Clear all interrupt enable bits used by the driver (CIK).
6629  */
6630 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6631 {
6632 	u32 tmp;
6633 
6634 	/* gfx ring */
6635 	tmp = RREG32(CP_INT_CNTL_RING0) &
6636 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6637 	WREG32(CP_INT_CNTL_RING0, tmp);
6638 	/* sdma */
6639 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6640 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6641 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6642 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6643 	/* compute queues */
6644 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6645 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6646 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6647 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6648 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6649 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6650 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6651 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6652 	/* grbm */
6653 	WREG32(GRBM_INT_CNTL, 0);
6654 	/* vline/vblank, etc. */
6655 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6656 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6657 	if (rdev->num_crtc >= 4) {
6658 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6659 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6660 	}
6661 	if (rdev->num_crtc >= 6) {
6662 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6663 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6664 	}
6665 
6666 	/* dac hotplug */
6667 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6668 
6669 	/* digital hotplug */
6670 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6671 	WREG32(DC_HPD1_INT_CONTROL, tmp);
6672 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6673 	WREG32(DC_HPD2_INT_CONTROL, tmp);
6674 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6675 	WREG32(DC_HPD3_INT_CONTROL, tmp);
6676 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6677 	WREG32(DC_HPD4_INT_CONTROL, tmp);
6678 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6679 	WREG32(DC_HPD5_INT_CONTROL, tmp);
6680 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6681 	WREG32(DC_HPD6_INT_CONTROL, tmp);
6682 
6683 }
6684 
6685 /**
6686  * cik_irq_init - init and enable the interrupt ring
6687  *
6688  * @rdev: radeon_device pointer
6689  *
6690  * Allocate a ring buffer for the interrupt controller,
6691  * enable the RLC, disable interrupts, enable the IH
6692  * ring buffer and enable it (CIK).
6693  * Called at device load and reume.
6694  * Returns 0 for success, errors for failure.
6695  */
6696 static int cik_irq_init(struct radeon_device *rdev)
6697 {
6698 	int ret = 0;
6699 	int rb_bufsz;
6700 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6701 
6702 	/* allocate ring */
6703 	ret = r600_ih_ring_alloc(rdev);
6704 	if (ret)
6705 		return ret;
6706 
6707 	/* disable irqs */
6708 	cik_disable_interrupts(rdev);
6709 
6710 	/* init rlc */
6711 	ret = cik_rlc_resume(rdev);
6712 	if (ret) {
6713 		r600_ih_ring_fini(rdev);
6714 		return ret;
6715 	}
6716 
6717 	/* setup interrupt control */
6718 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
6719 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6720 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
6721 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6722 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6723 	 */
6724 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6725 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6726 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6727 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6728 
6729 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6730 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6731 
6732 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6733 		      IH_WPTR_OVERFLOW_CLEAR |
6734 		      (rb_bufsz << 1));
6735 
6736 	if (rdev->wb.enabled)
6737 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6738 
6739 	/* set the writeback address whether it's enabled or not */
6740 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6741 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6742 
6743 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6744 
6745 	/* set rptr, wptr to 0 */
6746 	WREG32(IH_RB_RPTR, 0);
6747 	WREG32(IH_RB_WPTR, 0);
6748 
6749 	/* Default settings for IH_CNTL (disabled at first) */
6750 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6751 	/* RPTR_REARM only works if msi's are enabled */
6752 	if (rdev->msi_enabled)
6753 		ih_cntl |= RPTR_REARM;
6754 	WREG32(IH_CNTL, ih_cntl);
6755 
6756 	/* force the active interrupt state to all disabled */
6757 	cik_disable_interrupt_state(rdev);
6758 
6759 	pci_set_master(rdev->pdev);
6760 
6761 	/* enable irqs */
6762 	cik_enable_interrupts(rdev);
6763 
6764 	return ret;
6765 }
6766 
6767 /**
6768  * cik_irq_set - enable/disable interrupt sources
6769  *
6770  * @rdev: radeon_device pointer
6771  *
6772  * Enable interrupt sources on the GPU (vblanks, hpd,
6773  * etc.) (CIK).
6774  * Returns 0 for success, errors for failure.
6775  */
6776 int cik_irq_set(struct radeon_device *rdev)
6777 {
6778 	u32 cp_int_cntl;
6779 	u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6780 	u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6781 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6782 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6783 	u32 grbm_int_cntl = 0;
6784 	u32 dma_cntl, dma_cntl1;
6785 	u32 thermal_int;
6786 
6787 	if (!rdev->irq.installed) {
6788 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6789 		return -EINVAL;
6790 	}
6791 	/* don't enable anything if the ih is disabled */
6792 	if (!rdev->ih.enabled) {
6793 		cik_disable_interrupts(rdev);
6794 		/* force the active interrupt state to all disabled */
6795 		cik_disable_interrupt_state(rdev);
6796 		return 0;
6797 	}
6798 
6799 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6800 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6801 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6802 
6803 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6804 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6805 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6806 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6807 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6808 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6809 
6810 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6811 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6812 
6813 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6814 	cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6815 	cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6816 	cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6817 	cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6818 	cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6819 	cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6820 	cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6821 
6822 	if (rdev->flags & RADEON_IS_IGP)
6823 		thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6824 			~(THERM_INTH_MASK | THERM_INTL_MASK);
6825 	else
6826 		thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6827 			~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6828 
6829 	/* enable CP interrupts on all rings */
6830 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6831 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
6832 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6833 	}
6834 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6835 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6836 		DRM_DEBUG("si_irq_set: sw int cp1\n");
6837 		if (ring->me == 1) {
6838 			switch (ring->pipe) {
6839 			case 0:
6840 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6841 				break;
6842 			case 1:
6843 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6844 				break;
6845 			case 2:
6846 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6847 				break;
6848 			case 3:
6849 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6850 				break;
6851 			default:
6852 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6853 				break;
6854 			}
6855 		} else if (ring->me == 2) {
6856 			switch (ring->pipe) {
6857 			case 0:
6858 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6859 				break;
6860 			case 1:
6861 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6862 				break;
6863 			case 2:
6864 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6865 				break;
6866 			case 3:
6867 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6868 				break;
6869 			default:
6870 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6871 				break;
6872 			}
6873 		} else {
6874 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6875 		}
6876 	}
6877 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6878 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6879 		DRM_DEBUG("si_irq_set: sw int cp2\n");
6880 		if (ring->me == 1) {
6881 			switch (ring->pipe) {
6882 			case 0:
6883 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6884 				break;
6885 			case 1:
6886 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6887 				break;
6888 			case 2:
6889 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6890 				break;
6891 			case 3:
6892 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6893 				break;
6894 			default:
6895 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6896 				break;
6897 			}
6898 		} else if (ring->me == 2) {
6899 			switch (ring->pipe) {
6900 			case 0:
6901 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6902 				break;
6903 			case 1:
6904 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6905 				break;
6906 			case 2:
6907 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6908 				break;
6909 			case 3:
6910 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6911 				break;
6912 			default:
6913 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6914 				break;
6915 			}
6916 		} else {
6917 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6918 		}
6919 	}
6920 
6921 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6922 		DRM_DEBUG("cik_irq_set: sw int dma\n");
6923 		dma_cntl |= TRAP_ENABLE;
6924 	}
6925 
6926 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6927 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
6928 		dma_cntl1 |= TRAP_ENABLE;
6929 	}
6930 
6931 	if (rdev->irq.crtc_vblank_int[0] ||
6932 	    atomic_read(&rdev->irq.pflip[0])) {
6933 		DRM_DEBUG("cik_irq_set: vblank 0\n");
6934 		crtc1 |= VBLANK_INTERRUPT_MASK;
6935 	}
6936 	if (rdev->irq.crtc_vblank_int[1] ||
6937 	    atomic_read(&rdev->irq.pflip[1])) {
6938 		DRM_DEBUG("cik_irq_set: vblank 1\n");
6939 		crtc2 |= VBLANK_INTERRUPT_MASK;
6940 	}
6941 	if (rdev->irq.crtc_vblank_int[2] ||
6942 	    atomic_read(&rdev->irq.pflip[2])) {
6943 		DRM_DEBUG("cik_irq_set: vblank 2\n");
6944 		crtc3 |= VBLANK_INTERRUPT_MASK;
6945 	}
6946 	if (rdev->irq.crtc_vblank_int[3] ||
6947 	    atomic_read(&rdev->irq.pflip[3])) {
6948 		DRM_DEBUG("cik_irq_set: vblank 3\n");
6949 		crtc4 |= VBLANK_INTERRUPT_MASK;
6950 	}
6951 	if (rdev->irq.crtc_vblank_int[4] ||
6952 	    atomic_read(&rdev->irq.pflip[4])) {
6953 		DRM_DEBUG("cik_irq_set: vblank 4\n");
6954 		crtc5 |= VBLANK_INTERRUPT_MASK;
6955 	}
6956 	if (rdev->irq.crtc_vblank_int[5] ||
6957 	    atomic_read(&rdev->irq.pflip[5])) {
6958 		DRM_DEBUG("cik_irq_set: vblank 5\n");
6959 		crtc6 |= VBLANK_INTERRUPT_MASK;
6960 	}
6961 	if (rdev->irq.hpd[0]) {
6962 		DRM_DEBUG("cik_irq_set: hpd 1\n");
6963 		hpd1 |= DC_HPDx_INT_EN;
6964 	}
6965 	if (rdev->irq.hpd[1]) {
6966 		DRM_DEBUG("cik_irq_set: hpd 2\n");
6967 		hpd2 |= DC_HPDx_INT_EN;
6968 	}
6969 	if (rdev->irq.hpd[2]) {
6970 		DRM_DEBUG("cik_irq_set: hpd 3\n");
6971 		hpd3 |= DC_HPDx_INT_EN;
6972 	}
6973 	if (rdev->irq.hpd[3]) {
6974 		DRM_DEBUG("cik_irq_set: hpd 4\n");
6975 		hpd4 |= DC_HPDx_INT_EN;
6976 	}
6977 	if (rdev->irq.hpd[4]) {
6978 		DRM_DEBUG("cik_irq_set: hpd 5\n");
6979 		hpd5 |= DC_HPDx_INT_EN;
6980 	}
6981 	if (rdev->irq.hpd[5]) {
6982 		DRM_DEBUG("cik_irq_set: hpd 6\n");
6983 		hpd6 |= DC_HPDx_INT_EN;
6984 	}
6985 
6986 	if (rdev->irq.dpm_thermal) {
6987 		DRM_DEBUG("dpm thermal\n");
6988 		if (rdev->flags & RADEON_IS_IGP)
6989 			thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
6990 		else
6991 			thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6992 	}
6993 
6994 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6995 
6996 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
6997 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
6998 
6999 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7000 	WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7001 	WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7002 	WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7003 	WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7004 	WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7005 	WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7006 	WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7007 
7008 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7009 
7010 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7011 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7012 	if (rdev->num_crtc >= 4) {
7013 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7014 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7015 	}
7016 	if (rdev->num_crtc >= 6) {
7017 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7018 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7019 	}
7020 
7021 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7022 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7023 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7024 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7025 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7026 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7027 
7028 	if (rdev->flags & RADEON_IS_IGP)
7029 		WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
7030 	else
7031 		WREG32_SMC(CG_THERMAL_INT, thermal_int);
7032 
7033 	return 0;
7034 }
7035 
7036 /**
7037  * cik_irq_ack - ack interrupt sources
7038  *
7039  * @rdev: radeon_device pointer
7040  *
7041  * Ack interrupt sources on the GPU (vblanks, hpd,
7042  * etc.) (CIK).  Certain interrupts sources are sw
7043  * generated and do not require an explicit ack.
7044  */
7045 static inline void cik_irq_ack(struct radeon_device *rdev)
7046 {
7047 	u32 tmp;
7048 
7049 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7050 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7051 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7052 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7053 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7054 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7055 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7056 
7057 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7058 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7059 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7060 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7061 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7062 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7063 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7064 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7065 
7066 	if (rdev->num_crtc >= 4) {
7067 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7068 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7069 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7070 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7071 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7072 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7073 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7074 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7075 	}
7076 
7077 	if (rdev->num_crtc >= 6) {
7078 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7079 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7080 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7081 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7082 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7083 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7084 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7085 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7086 	}
7087 
7088 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7089 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7090 		tmp |= DC_HPDx_INT_ACK;
7091 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7092 	}
7093 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7094 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7095 		tmp |= DC_HPDx_INT_ACK;
7096 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7097 	}
7098 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7099 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7100 		tmp |= DC_HPDx_INT_ACK;
7101 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7102 	}
7103 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7104 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7105 		tmp |= DC_HPDx_INT_ACK;
7106 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7107 	}
7108 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7109 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7110 		tmp |= DC_HPDx_INT_ACK;
7111 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7112 	}
7113 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7114 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7115 		tmp |= DC_HPDx_INT_ACK;
7116 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7117 	}
7118 }
7119 
7120 /**
7121  * cik_irq_disable - disable interrupts
7122  *
7123  * @rdev: radeon_device pointer
7124  *
7125  * Disable interrupts on the hw (CIK).
7126  */
7127 static void cik_irq_disable(struct radeon_device *rdev)
7128 {
7129 	cik_disable_interrupts(rdev);
7130 	/* Wait and acknowledge irq */
7131 	mdelay(1);
7132 	cik_irq_ack(rdev);
7133 	cik_disable_interrupt_state(rdev);
7134 }
7135 
7136 /**
7137  * cik_irq_disable - disable interrupts for suspend
7138  *
7139  * @rdev: radeon_device pointer
7140  *
7141  * Disable interrupts and stop the RLC (CIK).
7142  * Used for suspend.
7143  */
7144 static void cik_irq_suspend(struct radeon_device *rdev)
7145 {
7146 	cik_irq_disable(rdev);
7147 	cik_rlc_stop(rdev);
7148 }
7149 
7150 /**
7151  * cik_irq_fini - tear down interrupt support
7152  *
7153  * @rdev: radeon_device pointer
7154  *
7155  * Disable interrupts on the hw and free the IH ring
7156  * buffer (CIK).
7157  * Used for driver unload.
7158  */
7159 static void cik_irq_fini(struct radeon_device *rdev)
7160 {
7161 	cik_irq_suspend(rdev);
7162 	r600_ih_ring_fini(rdev);
7163 }
7164 
7165 /**
7166  * cik_get_ih_wptr - get the IH ring buffer wptr
7167  *
7168  * @rdev: radeon_device pointer
7169  *
7170  * Get the IH ring buffer wptr from either the register
7171  * or the writeback memory buffer (CIK).  Also check for
7172  * ring buffer overflow and deal with it.
7173  * Used by cik_irq_process().
7174  * Returns the value of the wptr.
7175  */
7176 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7177 {
7178 	u32 wptr, tmp;
7179 
7180 	if (rdev->wb.enabled)
7181 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7182 	else
7183 		wptr = RREG32(IH_RB_WPTR);
7184 
7185 	if (wptr & RB_OVERFLOW) {
7186 		/* When a ring buffer overflow happen start parsing interrupt
7187 		 * from the last not overwritten vector (wptr + 16). Hopefully
7188 		 * this should allow us to catchup.
7189 		 */
7190 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
7191 			wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
7192 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7193 		tmp = RREG32(IH_RB_CNTL);
7194 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7195 		WREG32(IH_RB_CNTL, tmp);
7196 	}
7197 	return (wptr & rdev->ih.ptr_mask);
7198 }
7199 
7200 /*        CIK IV Ring
7201  * Each IV ring entry is 128 bits:
7202  * [7:0]    - interrupt source id
7203  * [31:8]   - reserved
7204  * [59:32]  - interrupt source data
7205  * [63:60]  - reserved
7206  * [71:64]  - RINGID
7207  *            CP:
7208  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7209  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7210  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7211  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7212  *            PIPE_ID - ME0 0=3D
7213  *                    - ME1&2 compute dispatcher (4 pipes each)
7214  *            SDMA:
7215  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7216  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7217  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7218  * [79:72]  - VMID
7219  * [95:80]  - PASID
7220  * [127:96] - reserved
7221  */
7222 /**
7223  * cik_irq_process - interrupt handler
7224  *
7225  * @rdev: radeon_device pointer
7226  *
7227  * Interrupt hander (CIK).  Walk the IH ring,
7228  * ack interrupts and schedule work to handle
7229  * interrupt events.
7230  * Returns irq process return code.
7231  */
7232 int cik_irq_process(struct radeon_device *rdev)
7233 {
7234 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7235 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7236 	u32 wptr;
7237 	u32 rptr;
7238 	u32 src_id, src_data, ring_id;
7239 	u8 me_id, pipe_id, queue_id;
7240 	u32 ring_index;
7241 	bool queue_hotplug = false;
7242 	bool queue_reset = false;
7243 	u32 addr, status, mc_client;
7244 	bool queue_thermal = false;
7245 
7246 	if (!rdev->ih.enabled || rdev->shutdown)
7247 		return IRQ_NONE;
7248 
7249 	wptr = cik_get_ih_wptr(rdev);
7250 
7251 restart_ih:
7252 	/* is somebody else already processing irqs? */
7253 	if (atomic_xchg(&rdev->ih.lock, 1))
7254 		return IRQ_NONE;
7255 
7256 	rptr = rdev->ih.rptr;
7257 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7258 
7259 	/* Order reading of wptr vs. reading of IH ring data */
7260 	rmb();
7261 
7262 	/* display interrupts */
7263 	cik_irq_ack(rdev);
7264 
7265 	while (rptr != wptr) {
7266 		/* wptr/rptr are in bytes! */
7267 		ring_index = rptr / 4;
7268 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7269 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7270 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7271 
7272 		switch (src_id) {
7273 		case 1: /* D1 vblank/vline */
7274 			switch (src_data) {
7275 			case 0: /* D1 vblank */
7276 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7277 					if (rdev->irq.crtc_vblank_int[0]) {
7278 						drm_handle_vblank(rdev->ddev, 0);
7279 						rdev->pm.vblank_sync = true;
7280 						wake_up(&rdev->irq.vblank_queue);
7281 					}
7282 					if (atomic_read(&rdev->irq.pflip[0]))
7283 						radeon_crtc_handle_flip(rdev, 0);
7284 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7285 					DRM_DEBUG("IH: D1 vblank\n");
7286 				}
7287 				break;
7288 			case 1: /* D1 vline */
7289 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7290 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7291 					DRM_DEBUG("IH: D1 vline\n");
7292 				}
7293 				break;
7294 			default:
7295 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7296 				break;
7297 			}
7298 			break;
7299 		case 2: /* D2 vblank/vline */
7300 			switch (src_data) {
7301 			case 0: /* D2 vblank */
7302 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7303 					if (rdev->irq.crtc_vblank_int[1]) {
7304 						drm_handle_vblank(rdev->ddev, 1);
7305 						rdev->pm.vblank_sync = true;
7306 						wake_up(&rdev->irq.vblank_queue);
7307 					}
7308 					if (atomic_read(&rdev->irq.pflip[1]))
7309 						radeon_crtc_handle_flip(rdev, 1);
7310 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7311 					DRM_DEBUG("IH: D2 vblank\n");
7312 				}
7313 				break;
7314 			case 1: /* D2 vline */
7315 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7316 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7317 					DRM_DEBUG("IH: D2 vline\n");
7318 				}
7319 				break;
7320 			default:
7321 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7322 				break;
7323 			}
7324 			break;
7325 		case 3: /* D3 vblank/vline */
7326 			switch (src_data) {
7327 			case 0: /* D3 vblank */
7328 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7329 					if (rdev->irq.crtc_vblank_int[2]) {
7330 						drm_handle_vblank(rdev->ddev, 2);
7331 						rdev->pm.vblank_sync = true;
7332 						wake_up(&rdev->irq.vblank_queue);
7333 					}
7334 					if (atomic_read(&rdev->irq.pflip[2]))
7335 						radeon_crtc_handle_flip(rdev, 2);
7336 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7337 					DRM_DEBUG("IH: D3 vblank\n");
7338 				}
7339 				break;
7340 			case 1: /* D3 vline */
7341 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7342 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7343 					DRM_DEBUG("IH: D3 vline\n");
7344 				}
7345 				break;
7346 			default:
7347 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7348 				break;
7349 			}
7350 			break;
7351 		case 4: /* D4 vblank/vline */
7352 			switch (src_data) {
7353 			case 0: /* D4 vblank */
7354 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7355 					if (rdev->irq.crtc_vblank_int[3]) {
7356 						drm_handle_vblank(rdev->ddev, 3);
7357 						rdev->pm.vblank_sync = true;
7358 						wake_up(&rdev->irq.vblank_queue);
7359 					}
7360 					if (atomic_read(&rdev->irq.pflip[3]))
7361 						radeon_crtc_handle_flip(rdev, 3);
7362 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7363 					DRM_DEBUG("IH: D4 vblank\n");
7364 				}
7365 				break;
7366 			case 1: /* D4 vline */
7367 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7368 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7369 					DRM_DEBUG("IH: D4 vline\n");
7370 				}
7371 				break;
7372 			default:
7373 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7374 				break;
7375 			}
7376 			break;
7377 		case 5: /* D5 vblank/vline */
7378 			switch (src_data) {
7379 			case 0: /* D5 vblank */
7380 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7381 					if (rdev->irq.crtc_vblank_int[4]) {
7382 						drm_handle_vblank(rdev->ddev, 4);
7383 						rdev->pm.vblank_sync = true;
7384 						wake_up(&rdev->irq.vblank_queue);
7385 					}
7386 					if (atomic_read(&rdev->irq.pflip[4]))
7387 						radeon_crtc_handle_flip(rdev, 4);
7388 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7389 					DRM_DEBUG("IH: D5 vblank\n");
7390 				}
7391 				break;
7392 			case 1: /* D5 vline */
7393 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7394 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7395 					DRM_DEBUG("IH: D5 vline\n");
7396 				}
7397 				break;
7398 			default:
7399 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7400 				break;
7401 			}
7402 			break;
7403 		case 6: /* D6 vblank/vline */
7404 			switch (src_data) {
7405 			case 0: /* D6 vblank */
7406 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7407 					if (rdev->irq.crtc_vblank_int[5]) {
7408 						drm_handle_vblank(rdev->ddev, 5);
7409 						rdev->pm.vblank_sync = true;
7410 						wake_up(&rdev->irq.vblank_queue);
7411 					}
7412 					if (atomic_read(&rdev->irq.pflip[5]))
7413 						radeon_crtc_handle_flip(rdev, 5);
7414 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7415 					DRM_DEBUG("IH: D6 vblank\n");
7416 				}
7417 				break;
7418 			case 1: /* D6 vline */
7419 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7420 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7421 					DRM_DEBUG("IH: D6 vline\n");
7422 				}
7423 				break;
7424 			default:
7425 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7426 				break;
7427 			}
7428 			break;
7429 		case 42: /* HPD hotplug */
7430 			switch (src_data) {
7431 			case 0:
7432 				if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7433 					rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7434 					queue_hotplug = true;
7435 					DRM_DEBUG("IH: HPD1\n");
7436 				}
7437 				break;
7438 			case 1:
7439 				if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7440 					rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7441 					queue_hotplug = true;
7442 					DRM_DEBUG("IH: HPD2\n");
7443 				}
7444 				break;
7445 			case 2:
7446 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7447 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7448 					queue_hotplug = true;
7449 					DRM_DEBUG("IH: HPD3\n");
7450 				}
7451 				break;
7452 			case 3:
7453 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7454 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7455 					queue_hotplug = true;
7456 					DRM_DEBUG("IH: HPD4\n");
7457 				}
7458 				break;
7459 			case 4:
7460 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7461 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7462 					queue_hotplug = true;
7463 					DRM_DEBUG("IH: HPD5\n");
7464 				}
7465 				break;
7466 			case 5:
7467 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7468 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7469 					queue_hotplug = true;
7470 					DRM_DEBUG("IH: HPD6\n");
7471 				}
7472 				break;
7473 			default:
7474 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7475 				break;
7476 			}
7477 			break;
7478 		case 124: /* UVD */
7479 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7480 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7481 			break;
7482 		case 146:
7483 		case 147:
7484 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7485 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7486 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7487 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7488 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7489 				addr);
7490 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7491 				status);
7492 			cik_vm_decode_fault(rdev, status, addr, mc_client);
7493 			/* reset addr and status */
7494 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7495 			break;
7496 		case 176: /* GFX RB CP_INT */
7497 		case 177: /* GFX IB CP_INT */
7498 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7499 			break;
7500 		case 181: /* CP EOP event */
7501 			DRM_DEBUG("IH: CP EOP\n");
7502 			/* XXX check the bitfield order! */
7503 			me_id = (ring_id & 0x60) >> 5;
7504 			pipe_id = (ring_id & 0x18) >> 3;
7505 			queue_id = (ring_id & 0x7) >> 0;
7506 			switch (me_id) {
7507 			case 0:
7508 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7509 				break;
7510 			case 1:
7511 			case 2:
7512 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7513 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7514 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7515 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7516 				break;
7517 			}
7518 			break;
7519 		case 184: /* CP Privileged reg access */
7520 			DRM_ERROR("Illegal register access in command stream\n");
7521 			/* XXX check the bitfield order! */
7522 			me_id = (ring_id & 0x60) >> 5;
7523 			pipe_id = (ring_id & 0x18) >> 3;
7524 			queue_id = (ring_id & 0x7) >> 0;
7525 			switch (me_id) {
7526 			case 0:
7527 				/* This results in a full GPU reset, but all we need to do is soft
7528 				 * reset the CP for gfx
7529 				 */
7530 				queue_reset = true;
7531 				break;
7532 			case 1:
7533 				/* XXX compute */
7534 				queue_reset = true;
7535 				break;
7536 			case 2:
7537 				/* XXX compute */
7538 				queue_reset = true;
7539 				break;
7540 			}
7541 			break;
7542 		case 185: /* CP Privileged inst */
7543 			DRM_ERROR("Illegal instruction in command stream\n");
7544 			/* XXX check the bitfield order! */
7545 			me_id = (ring_id & 0x60) >> 5;
7546 			pipe_id = (ring_id & 0x18) >> 3;
7547 			queue_id = (ring_id & 0x7) >> 0;
7548 			switch (me_id) {
7549 			case 0:
7550 				/* This results in a full GPU reset, but all we need to do is soft
7551 				 * reset the CP for gfx
7552 				 */
7553 				queue_reset = true;
7554 				break;
7555 			case 1:
7556 				/* XXX compute */
7557 				queue_reset = true;
7558 				break;
7559 			case 2:
7560 				/* XXX compute */
7561 				queue_reset = true;
7562 				break;
7563 			}
7564 			break;
7565 		case 224: /* SDMA trap event */
7566 			/* XXX check the bitfield order! */
7567 			me_id = (ring_id & 0x3) >> 0;
7568 			queue_id = (ring_id & 0xc) >> 2;
7569 			DRM_DEBUG("IH: SDMA trap\n");
7570 			switch (me_id) {
7571 			case 0:
7572 				switch (queue_id) {
7573 				case 0:
7574 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7575 					break;
7576 				case 1:
7577 					/* XXX compute */
7578 					break;
7579 				case 2:
7580 					/* XXX compute */
7581 					break;
7582 				}
7583 				break;
7584 			case 1:
7585 				switch (queue_id) {
7586 				case 0:
7587 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7588 					break;
7589 				case 1:
7590 					/* XXX compute */
7591 					break;
7592 				case 2:
7593 					/* XXX compute */
7594 					break;
7595 				}
7596 				break;
7597 			}
7598 			break;
7599 		case 230: /* thermal low to high */
7600 			DRM_DEBUG("IH: thermal low to high\n");
7601 			rdev->pm.dpm.thermal.high_to_low = false;
7602 			queue_thermal = true;
7603 			break;
7604 		case 231: /* thermal high to low */
7605 			DRM_DEBUG("IH: thermal high to low\n");
7606 			rdev->pm.dpm.thermal.high_to_low = true;
7607 			queue_thermal = true;
7608 			break;
7609 		case 233: /* GUI IDLE */
7610 			DRM_DEBUG("IH: GUI idle\n");
7611 			break;
7612 		case 241: /* SDMA Privileged inst */
7613 		case 247: /* SDMA Privileged inst */
7614 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
7615 			/* XXX check the bitfield order! */
7616 			me_id = (ring_id & 0x3) >> 0;
7617 			queue_id = (ring_id & 0xc) >> 2;
7618 			switch (me_id) {
7619 			case 0:
7620 				switch (queue_id) {
7621 				case 0:
7622 					queue_reset = true;
7623 					break;
7624 				case 1:
7625 					/* XXX compute */
7626 					queue_reset = true;
7627 					break;
7628 				case 2:
7629 					/* XXX compute */
7630 					queue_reset = true;
7631 					break;
7632 				}
7633 				break;
7634 			case 1:
7635 				switch (queue_id) {
7636 				case 0:
7637 					queue_reset = true;
7638 					break;
7639 				case 1:
7640 					/* XXX compute */
7641 					queue_reset = true;
7642 					break;
7643 				case 2:
7644 					/* XXX compute */
7645 					queue_reset = true;
7646 					break;
7647 				}
7648 				break;
7649 			}
7650 			break;
7651 		default:
7652 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7653 			break;
7654 		}
7655 
7656 		/* wptr/rptr are in bytes! */
7657 		rptr += 16;
7658 		rptr &= rdev->ih.ptr_mask;
7659 	}
7660 	if (queue_hotplug)
7661 		schedule_work(&rdev->hotplug_work);
7662 	if (queue_reset)
7663 		schedule_work(&rdev->reset_work);
7664 	if (queue_thermal)
7665 		schedule_work(&rdev->pm.dpm.thermal.work);
7666 	rdev->ih.rptr = rptr;
7667 	WREG32(IH_RB_RPTR, rdev->ih.rptr);
7668 	atomic_set(&rdev->ih.lock, 0);
7669 
7670 	/* make sure wptr hasn't changed while processing */
7671 	wptr = cik_get_ih_wptr(rdev);
7672 	if (wptr != rptr)
7673 		goto restart_ih;
7674 
7675 	return IRQ_HANDLED;
7676 }
7677 
7678 /*
7679  * startup/shutdown callbacks
7680  */
7681 /**
7682  * cik_startup - program the asic to a functional state
7683  *
7684  * @rdev: radeon_device pointer
7685  *
7686  * Programs the asic to a functional state (CIK).
7687  * Called by cik_init() and cik_resume().
7688  * Returns 0 for success, error for failure.
7689  */
7690 static int cik_startup(struct radeon_device *rdev)
7691 {
7692 	struct radeon_ring *ring;
7693 	int r;
7694 
7695 	/* enable pcie gen2/3 link */
7696 	cik_pcie_gen3_enable(rdev);
7697 	/* enable aspm */
7698 	cik_program_aspm(rdev);
7699 
7700 	/* scratch needs to be initialized before MC */
7701 	r = r600_vram_scratch_init(rdev);
7702 	if (r)
7703 		return r;
7704 
7705 	cik_mc_program(rdev);
7706 
7707 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
7708 		r = ci_mc_load_microcode(rdev);
7709 		if (r) {
7710 			DRM_ERROR("Failed to load MC firmware!\n");
7711 			return r;
7712 		}
7713 	}
7714 
7715 	r = cik_pcie_gart_enable(rdev);
7716 	if (r)
7717 		return r;
7718 	cik_gpu_init(rdev);
7719 
7720 	/* allocate rlc buffers */
7721 	if (rdev->flags & RADEON_IS_IGP) {
7722 		if (rdev->family == CHIP_KAVERI) {
7723 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7724 			rdev->rlc.reg_list_size =
7725 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7726 		} else {
7727 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7728 			rdev->rlc.reg_list_size =
7729 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7730 		}
7731 	}
7732 	rdev->rlc.cs_data = ci_cs_data;
7733 	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7734 	r = sumo_rlc_init(rdev);
7735 	if (r) {
7736 		DRM_ERROR("Failed to init rlc BOs!\n");
7737 		return r;
7738 	}
7739 
7740 	/* allocate wb buffer */
7741 	r = radeon_wb_init(rdev);
7742 	if (r)
7743 		return r;
7744 
7745 	/* allocate mec buffers */
7746 	r = cik_mec_init(rdev);
7747 	if (r) {
7748 		DRM_ERROR("Failed to init MEC BOs!\n");
7749 		return r;
7750 	}
7751 
7752 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7753 	if (r) {
7754 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7755 		return r;
7756 	}
7757 
7758 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7759 	if (r) {
7760 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7761 		return r;
7762 	}
7763 
7764 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7765 	if (r) {
7766 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7767 		return r;
7768 	}
7769 
7770 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7771 	if (r) {
7772 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7773 		return r;
7774 	}
7775 
7776 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7777 	if (r) {
7778 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7779 		return r;
7780 	}
7781 
7782 	r = radeon_uvd_resume(rdev);
7783 	if (!r) {
7784 		r = uvd_v4_2_resume(rdev);
7785 		if (!r) {
7786 			r = radeon_fence_driver_start_ring(rdev,
7787 							   R600_RING_TYPE_UVD_INDEX);
7788 			if (r)
7789 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7790 		}
7791 	}
7792 	if (r)
7793 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7794 
7795 	/* Enable IRQ */
7796 	if (!rdev->irq.installed) {
7797 		r = radeon_irq_kms_init(rdev);
7798 		if (r)
7799 			return r;
7800 	}
7801 
7802 	r = cik_irq_init(rdev);
7803 	if (r) {
7804 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
7805 		radeon_irq_kms_fini(rdev);
7806 		return r;
7807 	}
7808 	cik_irq_set(rdev);
7809 
7810 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7811 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7812 			     PACKET3(PACKET3_NOP, 0x3FFF));
7813 	if (r)
7814 		return r;
7815 
7816 	/* set up the compute queues */
7817 	/* type-2 packets are deprecated on MEC, use type-3 instead */
7818 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7819 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7820 			     PACKET3(PACKET3_NOP, 0x3FFF));
7821 	if (r)
7822 		return r;
7823 	ring->me = 1; /* first MEC */
7824 	ring->pipe = 0; /* first pipe */
7825 	ring->queue = 0; /* first queue */
7826 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
7827 
7828 	/* type-2 packets are deprecated on MEC, use type-3 instead */
7829 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7830 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7831 			     PACKET3(PACKET3_NOP, 0x3FFF));
7832 	if (r)
7833 		return r;
7834 	/* dGPU only have 1 MEC */
7835 	ring->me = 1; /* first MEC */
7836 	ring->pipe = 0; /* first pipe */
7837 	ring->queue = 1; /* second queue */
7838 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
7839 
7840 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7841 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7842 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7843 	if (r)
7844 		return r;
7845 
7846 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7847 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7848 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7849 	if (r)
7850 		return r;
7851 
7852 	r = cik_cp_resume(rdev);
7853 	if (r)
7854 		return r;
7855 
7856 	r = cik_sdma_resume(rdev);
7857 	if (r)
7858 		return r;
7859 
7860 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7861 	if (ring->ring_size) {
7862 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7863 				     RADEON_CP_PACKET2);
7864 		if (!r)
7865 			r = uvd_v1_0_init(rdev);
7866 		if (r)
7867 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7868 	}
7869 
7870 	r = radeon_ib_pool_init(rdev);
7871 	if (r) {
7872 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7873 		return r;
7874 	}
7875 
7876 	r = radeon_vm_manager_init(rdev);
7877 	if (r) {
7878 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7879 		return r;
7880 	}
7881 
7882 	r = dce6_audio_init(rdev);
7883 	if (r)
7884 		return r;
7885 
7886 	return 0;
7887 }
7888 
7889 /**
7890  * cik_resume - resume the asic to a functional state
7891  *
7892  * @rdev: radeon_device pointer
7893  *
7894  * Programs the asic to a functional state (CIK).
7895  * Called at resume.
7896  * Returns 0 for success, error for failure.
7897  */
7898 int cik_resume(struct radeon_device *rdev)
7899 {
7900 	int r;
7901 
7902 	/* post card */
7903 	atom_asic_init(rdev->mode_info.atom_context);
7904 
7905 	/* init golden registers */
7906 	cik_init_golden_registers(rdev);
7907 
7908 	if (rdev->pm.pm_method == PM_METHOD_DPM)
7909 		radeon_pm_resume(rdev);
7910 
7911 	rdev->accel_working = true;
7912 	r = cik_startup(rdev);
7913 	if (r) {
7914 		DRM_ERROR("cik startup failed on resume\n");
7915 		rdev->accel_working = false;
7916 		return r;
7917 	}
7918 
7919 	return r;
7920 
7921 }
7922 
7923 /**
7924  * cik_suspend - suspend the asic
7925  *
7926  * @rdev: radeon_device pointer
7927  *
7928  * Bring the chip into a state suitable for suspend (CIK).
7929  * Called at suspend.
7930  * Returns 0 for success.
7931  */
7932 int cik_suspend(struct radeon_device *rdev)
7933 {
7934 	radeon_pm_suspend(rdev);
7935 	dce6_audio_fini(rdev);
7936 	radeon_vm_manager_fini(rdev);
7937 	cik_cp_enable(rdev, false);
7938 	cik_sdma_enable(rdev, false);
7939 	uvd_v1_0_fini(rdev);
7940 	radeon_uvd_suspend(rdev);
7941 	cik_fini_pg(rdev);
7942 	cik_fini_cg(rdev);
7943 	cik_irq_suspend(rdev);
7944 	radeon_wb_disable(rdev);
7945 	cik_pcie_gart_disable(rdev);
7946 	return 0;
7947 }
7948 
7949 /* Plan is to move initialization in that function and use
7950  * helper function so that radeon_device_init pretty much
7951  * do nothing more than calling asic specific function. This
7952  * should also allow to remove a bunch of callback function
7953  * like vram_info.
7954  */
7955 /**
7956  * cik_init - asic specific driver and hw init
7957  *
7958  * @rdev: radeon_device pointer
7959  *
7960  * Setup asic specific driver variables and program the hw
7961  * to a functional state (CIK).
7962  * Called at driver startup.
7963  * Returns 0 for success, errors for failure.
7964  */
7965 int cik_init(struct radeon_device *rdev)
7966 {
7967 	struct radeon_ring *ring;
7968 	int r;
7969 
7970 	/* Read BIOS */
7971 	if (!radeon_get_bios(rdev)) {
7972 		if (ASIC_IS_AVIVO(rdev))
7973 			return -EINVAL;
7974 	}
7975 	/* Must be an ATOMBIOS */
7976 	if (!rdev->is_atom_bios) {
7977 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7978 		return -EINVAL;
7979 	}
7980 	r = radeon_atombios_init(rdev);
7981 	if (r)
7982 		return r;
7983 
7984 	/* Post card if necessary */
7985 	if (!radeon_card_posted(rdev)) {
7986 		if (!rdev->bios) {
7987 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7988 			return -EINVAL;
7989 		}
7990 		DRM_INFO("GPU not posted. posting now...\n");
7991 		atom_asic_init(rdev->mode_info.atom_context);
7992 	}
7993 	/* init golden registers */
7994 	cik_init_golden_registers(rdev);
7995 	/* Initialize scratch registers */
7996 	cik_scratch_init(rdev);
7997 	/* Initialize surface registers */
7998 	radeon_surface_init(rdev);
7999 	/* Initialize clocks */
8000 	radeon_get_clock_info(rdev->ddev);
8001 
8002 	/* Fence driver */
8003 	r = radeon_fence_driver_init(rdev);
8004 	if (r)
8005 		return r;
8006 
8007 	/* initialize memory controller */
8008 	r = cik_mc_init(rdev);
8009 	if (r)
8010 		return r;
8011 	/* Memory manager */
8012 	r = radeon_bo_init(rdev);
8013 	if (r)
8014 		return r;
8015 
8016 	if (rdev->flags & RADEON_IS_IGP) {
8017 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8018 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8019 			r = cik_init_microcode(rdev);
8020 			if (r) {
8021 				DRM_ERROR("Failed to load firmware!\n");
8022 				return r;
8023 			}
8024 		}
8025 	} else {
8026 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8027 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8028 		    !rdev->mc_fw) {
8029 			r = cik_init_microcode(rdev);
8030 			if (r) {
8031 				DRM_ERROR("Failed to load firmware!\n");
8032 				return r;
8033 			}
8034 		}
8035 	}
8036 
8037 	/* Initialize power management */
8038 	radeon_pm_init(rdev);
8039 
8040 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8041 	ring->ring_obj = NULL;
8042 	r600_ring_init(rdev, ring, 1024 * 1024);
8043 
8044 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8045 	ring->ring_obj = NULL;
8046 	r600_ring_init(rdev, ring, 1024 * 1024);
8047 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8048 	if (r)
8049 		return r;
8050 
8051 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8052 	ring->ring_obj = NULL;
8053 	r600_ring_init(rdev, ring, 1024 * 1024);
8054 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8055 	if (r)
8056 		return r;
8057 
8058 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8059 	ring->ring_obj = NULL;
8060 	r600_ring_init(rdev, ring, 256 * 1024);
8061 
8062 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8063 	ring->ring_obj = NULL;
8064 	r600_ring_init(rdev, ring, 256 * 1024);
8065 
8066 	r = radeon_uvd_init(rdev);
8067 	if (!r) {
8068 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8069 		ring->ring_obj = NULL;
8070 		r600_ring_init(rdev, ring, 4096);
8071 	}
8072 
8073 	rdev->ih.ring_obj = NULL;
8074 	r600_ih_ring_init(rdev, 64 * 1024);
8075 
8076 	r = r600_pcie_gart_init(rdev);
8077 	if (r)
8078 		return r;
8079 
8080 	rdev->accel_working = true;
8081 	r = cik_startup(rdev);
8082 	if (r) {
8083 		dev_err(rdev->dev, "disabling GPU acceleration\n");
8084 		cik_cp_fini(rdev);
8085 		cik_sdma_fini(rdev);
8086 		cik_irq_fini(rdev);
8087 		sumo_rlc_fini(rdev);
8088 		cik_mec_fini(rdev);
8089 		radeon_wb_fini(rdev);
8090 		radeon_ib_pool_fini(rdev);
8091 		radeon_vm_manager_fini(rdev);
8092 		radeon_irq_kms_fini(rdev);
8093 		cik_pcie_gart_fini(rdev);
8094 		rdev->accel_working = false;
8095 	}
8096 
8097 	/* Don't start up if the MC ucode is missing.
8098 	 * The default clocks and voltages before the MC ucode
8099 	 * is loaded are not suffient for advanced operations.
8100 	 */
8101 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8102 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8103 		return -EINVAL;
8104 	}
8105 
8106 	return 0;
8107 }
8108 
8109 /**
8110  * cik_fini - asic specific driver and hw fini
8111  *
8112  * @rdev: radeon_device pointer
8113  *
8114  * Tear down the asic specific driver variables and program the hw
8115  * to an idle state (CIK).
8116  * Called at driver unload.
8117  */
8118 void cik_fini(struct radeon_device *rdev)
8119 {
8120 	radeon_pm_fini(rdev);
8121 	cik_cp_fini(rdev);
8122 	cik_sdma_fini(rdev);
8123 	cik_fini_pg(rdev);
8124 	cik_fini_cg(rdev);
8125 	cik_irq_fini(rdev);
8126 	sumo_rlc_fini(rdev);
8127 	cik_mec_fini(rdev);
8128 	radeon_wb_fini(rdev);
8129 	radeon_vm_manager_fini(rdev);
8130 	radeon_ib_pool_fini(rdev);
8131 	radeon_irq_kms_fini(rdev);
8132 	uvd_v1_0_fini(rdev);
8133 	radeon_uvd_fini(rdev);
8134 	cik_pcie_gart_fini(rdev);
8135 	r600_vram_scratch_fini(rdev);
8136 	radeon_gem_fini(rdev);
8137 	radeon_fence_driver_fini(rdev);
8138 	radeon_bo_fini(rdev);
8139 	radeon_atombios_fini(rdev);
8140 	kfree(rdev->bios);
8141 	rdev->bios = NULL;
8142 }
8143 
8144 void dce8_program_fmt(struct drm_encoder *encoder)
8145 {
8146 	struct drm_device *dev = encoder->dev;
8147 	struct radeon_device *rdev = dev->dev_private;
8148 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8149 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8150 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8151 	int bpc = 0;
8152 	u32 tmp = 0;
8153 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8154 
8155 	if (connector) {
8156 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8157 		bpc = radeon_get_monitor_bpc(connector);
8158 		dither = radeon_connector->dither;
8159 	}
8160 
8161 	/* LVDS/eDP FMT is set up by atom */
8162 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8163 		return;
8164 
8165 	/* not needed for analog */
8166 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8167 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8168 		return;
8169 
8170 	if (bpc == 0)
8171 		return;
8172 
8173 	switch (bpc) {
8174 	case 6:
8175 		if (dither == RADEON_FMT_DITHER_ENABLE)
8176 			/* XXX sort out optimal dither settings */
8177 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8178 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8179 		else
8180 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8181 		break;
8182 	case 8:
8183 		if (dither == RADEON_FMT_DITHER_ENABLE)
8184 			/* XXX sort out optimal dither settings */
8185 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8186 				FMT_RGB_RANDOM_ENABLE |
8187 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8188 		else
8189 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8190 		break;
8191 	case 10:
8192 		if (dither == RADEON_FMT_DITHER_ENABLE)
8193 			/* XXX sort out optimal dither settings */
8194 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8195 				FMT_RGB_RANDOM_ENABLE |
8196 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8197 		else
8198 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8199 		break;
8200 	default:
8201 		/* not needed */
8202 		break;
8203 	}
8204 
8205 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8206 }
8207 
8208 /* display watermark setup */
8209 /**
8210  * dce8_line_buffer_adjust - Set up the line buffer
8211  *
8212  * @rdev: radeon_device pointer
8213  * @radeon_crtc: the selected display controller
8214  * @mode: the current display mode on the selected display
8215  * controller
8216  *
8217  * Setup up the line buffer allocation for
8218  * the selected display controller (CIK).
8219  * Returns the line buffer size in pixels.
8220  */
8221 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8222 				   struct radeon_crtc *radeon_crtc,
8223 				   struct drm_display_mode *mode)
8224 {
8225 	u32 tmp, buffer_alloc, i;
8226 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8227 	/*
8228 	 * Line Buffer Setup
8229 	 * There are 6 line buffers, one for each display controllers.
8230 	 * There are 3 partitions per LB. Select the number of partitions
8231 	 * to enable based on the display width.  For display widths larger
8232 	 * than 4096, you need use to use 2 display controllers and combine
8233 	 * them using the stereo blender.
8234 	 */
8235 	if (radeon_crtc->base.enabled && mode) {
8236 		if (mode->crtc_hdisplay < 1920) {
8237 			tmp = 1;
8238 			buffer_alloc = 2;
8239 		} else if (mode->crtc_hdisplay < 2560) {
8240 			tmp = 2;
8241 			buffer_alloc = 2;
8242 		} else if (mode->crtc_hdisplay < 4096) {
8243 			tmp = 0;
8244 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8245 		} else {
8246 			DRM_DEBUG_KMS("Mode too big for LB!\n");
8247 			tmp = 0;
8248 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8249 		}
8250 	} else {
8251 		tmp = 1;
8252 		buffer_alloc = 0;
8253 	}
8254 
8255 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8256 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8257 
8258 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8259 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8260 	for (i = 0; i < rdev->usec_timeout; i++) {
8261 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8262 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8263 			break;
8264 		udelay(1);
8265 	}
8266 
8267 	if (radeon_crtc->base.enabled && mode) {
8268 		switch (tmp) {
8269 		case 0:
8270 		default:
8271 			return 4096 * 2;
8272 		case 1:
8273 			return 1920 * 2;
8274 		case 2:
8275 			return 2560 * 2;
8276 		}
8277 	}
8278 
8279 	/* controller not enabled, so no lb used */
8280 	return 0;
8281 }
8282 
8283 /**
8284  * cik_get_number_of_dram_channels - get the number of dram channels
8285  *
8286  * @rdev: radeon_device pointer
8287  *
8288  * Look up the number of video ram channels (CIK).
8289  * Used for display watermark bandwidth calculations
8290  * Returns the number of dram channels
8291  */
8292 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8293 {
8294 	u32 tmp = RREG32(MC_SHARED_CHMAP);
8295 
8296 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8297 	case 0:
8298 	default:
8299 		return 1;
8300 	case 1:
8301 		return 2;
8302 	case 2:
8303 		return 4;
8304 	case 3:
8305 		return 8;
8306 	case 4:
8307 		return 3;
8308 	case 5:
8309 		return 6;
8310 	case 6:
8311 		return 10;
8312 	case 7:
8313 		return 12;
8314 	case 8:
8315 		return 16;
8316 	}
8317 }
8318 
8319 struct dce8_wm_params {
8320 	u32 dram_channels; /* number of dram channels */
8321 	u32 yclk;          /* bandwidth per dram data pin in kHz */
8322 	u32 sclk;          /* engine clock in kHz */
8323 	u32 disp_clk;      /* display clock in kHz */
8324 	u32 src_width;     /* viewport width */
8325 	u32 active_time;   /* active display time in ns */
8326 	u32 blank_time;    /* blank time in ns */
8327 	bool interlaced;    /* mode is interlaced */
8328 	fixed20_12 vsc;    /* vertical scale ratio */
8329 	u32 num_heads;     /* number of active crtcs */
8330 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8331 	u32 lb_size;       /* line buffer allocated to pipe */
8332 	u32 vtaps;         /* vertical scaler taps */
8333 };
8334 
8335 /**
8336  * dce8_dram_bandwidth - get the dram bandwidth
8337  *
8338  * @wm: watermark calculation data
8339  *
8340  * Calculate the raw dram bandwidth (CIK).
8341  * Used for display watermark bandwidth calculations
8342  * Returns the dram bandwidth in MBytes/s
8343  */
8344 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8345 {
8346 	/* Calculate raw DRAM Bandwidth */
8347 	fixed20_12 dram_efficiency; /* 0.7 */
8348 	fixed20_12 yclk, dram_channels, bandwidth;
8349 	fixed20_12 a;
8350 
8351 	a.full = dfixed_const(1000);
8352 	yclk.full = dfixed_const(wm->yclk);
8353 	yclk.full = dfixed_div(yclk, a);
8354 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8355 	a.full = dfixed_const(10);
8356 	dram_efficiency.full = dfixed_const(7);
8357 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
8358 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8359 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8360 
8361 	return dfixed_trunc(bandwidth);
8362 }
8363 
8364 /**
8365  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8366  *
8367  * @wm: watermark calculation data
8368  *
8369  * Calculate the dram bandwidth used for display (CIK).
8370  * Used for display watermark bandwidth calculations
8371  * Returns the dram bandwidth for display in MBytes/s
8372  */
8373 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8374 {
8375 	/* Calculate DRAM Bandwidth and the part allocated to display. */
8376 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8377 	fixed20_12 yclk, dram_channels, bandwidth;
8378 	fixed20_12 a;
8379 
8380 	a.full = dfixed_const(1000);
8381 	yclk.full = dfixed_const(wm->yclk);
8382 	yclk.full = dfixed_div(yclk, a);
8383 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8384 	a.full = dfixed_const(10);
8385 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8386 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8387 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8388 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8389 
8390 	return dfixed_trunc(bandwidth);
8391 }
8392 
8393 /**
8394  * dce8_data_return_bandwidth - get the data return bandwidth
8395  *
8396  * @wm: watermark calculation data
8397  *
8398  * Calculate the data return bandwidth used for display (CIK).
8399  * Used for display watermark bandwidth calculations
8400  * Returns the data return bandwidth in MBytes/s
8401  */
8402 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8403 {
8404 	/* Calculate the display Data return Bandwidth */
8405 	fixed20_12 return_efficiency; /* 0.8 */
8406 	fixed20_12 sclk, bandwidth;
8407 	fixed20_12 a;
8408 
8409 	a.full = dfixed_const(1000);
8410 	sclk.full = dfixed_const(wm->sclk);
8411 	sclk.full = dfixed_div(sclk, a);
8412 	a.full = dfixed_const(10);
8413 	return_efficiency.full = dfixed_const(8);
8414 	return_efficiency.full = dfixed_div(return_efficiency, a);
8415 	a.full = dfixed_const(32);
8416 	bandwidth.full = dfixed_mul(a, sclk);
8417 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8418 
8419 	return dfixed_trunc(bandwidth);
8420 }
8421 
8422 /**
8423  * dce8_dmif_request_bandwidth - get the dmif bandwidth
8424  *
8425  * @wm: watermark calculation data
8426  *
8427  * Calculate the dmif bandwidth used for display (CIK).
8428  * Used for display watermark bandwidth calculations
8429  * Returns the dmif bandwidth in MBytes/s
8430  */
8431 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
8432 {
8433 	/* Calculate the DMIF Request Bandwidth */
8434 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
8435 	fixed20_12 disp_clk, bandwidth;
8436 	fixed20_12 a, b;
8437 
8438 	a.full = dfixed_const(1000);
8439 	disp_clk.full = dfixed_const(wm->disp_clk);
8440 	disp_clk.full = dfixed_div(disp_clk, a);
8441 	a.full = dfixed_const(32);
8442 	b.full = dfixed_mul(a, disp_clk);
8443 
8444 	a.full = dfixed_const(10);
8445 	disp_clk_request_efficiency.full = dfixed_const(8);
8446 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
8447 
8448 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
8449 
8450 	return dfixed_trunc(bandwidth);
8451 }
8452 
8453 /**
8454  * dce8_available_bandwidth - get the min available bandwidth
8455  *
8456  * @wm: watermark calculation data
8457  *
8458  * Calculate the min available bandwidth used for display (CIK).
8459  * Used for display watermark bandwidth calculations
8460  * Returns the min available bandwidth in MBytes/s
8461  */
8462 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
8463 {
8464 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8465 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
8466 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
8467 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
8468 
8469 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
8470 }
8471 
8472 /**
8473  * dce8_average_bandwidth - get the average available bandwidth
8474  *
8475  * @wm: watermark calculation data
8476  *
8477  * Calculate the average available bandwidth used for display (CIK).
8478  * Used for display watermark bandwidth calculations
8479  * Returns the average available bandwidth in MBytes/s
8480  */
8481 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
8482 {
8483 	/* Calculate the display mode Average Bandwidth
8484 	 * DisplayMode should contain the source and destination dimensions,
8485 	 * timing, etc.
8486 	 */
8487 	fixed20_12 bpp;
8488 	fixed20_12 line_time;
8489 	fixed20_12 src_width;
8490 	fixed20_12 bandwidth;
8491 	fixed20_12 a;
8492 
8493 	a.full = dfixed_const(1000);
8494 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
8495 	line_time.full = dfixed_div(line_time, a);
8496 	bpp.full = dfixed_const(wm->bytes_per_pixel);
8497 	src_width.full = dfixed_const(wm->src_width);
8498 	bandwidth.full = dfixed_mul(src_width, bpp);
8499 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
8500 	bandwidth.full = dfixed_div(bandwidth, line_time);
8501 
8502 	return dfixed_trunc(bandwidth);
8503 }
8504 
8505 /**
8506  * dce8_latency_watermark - get the latency watermark
8507  *
8508  * @wm: watermark calculation data
8509  *
8510  * Calculate the latency watermark (CIK).
8511  * Used for display watermark bandwidth calculations
8512  * Returns the latency watermark in ns
8513  */
8514 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
8515 {
8516 	/* First calculate the latency in ns */
8517 	u32 mc_latency = 2000; /* 2000 ns. */
8518 	u32 available_bandwidth = dce8_available_bandwidth(wm);
8519 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
8520 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
8521 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
8522 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
8523 		(wm->num_heads * cursor_line_pair_return_time);
8524 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
8525 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
8526 	u32 tmp, dmif_size = 12288;
8527 	fixed20_12 a, b, c;
8528 
8529 	if (wm->num_heads == 0)
8530 		return 0;
8531 
8532 	a.full = dfixed_const(2);
8533 	b.full = dfixed_const(1);
8534 	if ((wm->vsc.full > a.full) ||
8535 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
8536 	    (wm->vtaps >= 5) ||
8537 	    ((wm->vsc.full >= a.full) && wm->interlaced))
8538 		max_src_lines_per_dst_line = 4;
8539 	else
8540 		max_src_lines_per_dst_line = 2;
8541 
8542 	a.full = dfixed_const(available_bandwidth);
8543 	b.full = dfixed_const(wm->num_heads);
8544 	a.full = dfixed_div(a, b);
8545 
8546 	b.full = dfixed_const(mc_latency + 512);
8547 	c.full = dfixed_const(wm->disp_clk);
8548 	b.full = dfixed_div(b, c);
8549 
8550 	c.full = dfixed_const(dmif_size);
8551 	b.full = dfixed_div(c, b);
8552 
8553 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
8554 
8555 	b.full = dfixed_const(1000);
8556 	c.full = dfixed_const(wm->disp_clk);
8557 	b.full = dfixed_div(c, b);
8558 	c.full = dfixed_const(wm->bytes_per_pixel);
8559 	b.full = dfixed_mul(b, c);
8560 
8561 	lb_fill_bw = min(tmp, dfixed_trunc(b));
8562 
8563 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
8564 	b.full = dfixed_const(1000);
8565 	c.full = dfixed_const(lb_fill_bw);
8566 	b.full = dfixed_div(c, b);
8567 	a.full = dfixed_div(a, b);
8568 	line_fill_time = dfixed_trunc(a);
8569 
8570 	if (line_fill_time < wm->active_time)
8571 		return latency;
8572 	else
8573 		return latency + (line_fill_time - wm->active_time);
8574 
8575 }
8576 
8577 /**
8578  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
8579  * average and available dram bandwidth
8580  *
8581  * @wm: watermark calculation data
8582  *
8583  * Check if the display average bandwidth fits in the display
8584  * dram bandwidth (CIK).
8585  * Used for display watermark bandwidth calculations
8586  * Returns true if the display fits, false if not.
8587  */
8588 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8589 {
8590 	if (dce8_average_bandwidth(wm) <=
8591 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
8592 		return true;
8593 	else
8594 		return false;
8595 }
8596 
8597 /**
8598  * dce8_average_bandwidth_vs_available_bandwidth - check
8599  * average and available bandwidth
8600  *
8601  * @wm: watermark calculation data
8602  *
8603  * Check if the display average bandwidth fits in the display
8604  * available bandwidth (CIK).
8605  * Used for display watermark bandwidth calculations
8606  * Returns true if the display fits, false if not.
8607  */
8608 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
8609 {
8610 	if (dce8_average_bandwidth(wm) <=
8611 	    (dce8_available_bandwidth(wm) / wm->num_heads))
8612 		return true;
8613 	else
8614 		return false;
8615 }
8616 
8617 /**
8618  * dce8_check_latency_hiding - check latency hiding
8619  *
8620  * @wm: watermark calculation data
8621  *
8622  * Check latency hiding (CIK).
8623  * Used for display watermark bandwidth calculations
8624  * Returns true if the display fits, false if not.
8625  */
8626 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
8627 {
8628 	u32 lb_partitions = wm->lb_size / wm->src_width;
8629 	u32 line_time = wm->active_time + wm->blank_time;
8630 	u32 latency_tolerant_lines;
8631 	u32 latency_hiding;
8632 	fixed20_12 a;
8633 
8634 	a.full = dfixed_const(1);
8635 	if (wm->vsc.full > a.full)
8636 		latency_tolerant_lines = 1;
8637 	else {
8638 		if (lb_partitions <= (wm->vtaps + 1))
8639 			latency_tolerant_lines = 1;
8640 		else
8641 			latency_tolerant_lines = 2;
8642 	}
8643 
8644 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
8645 
8646 	if (dce8_latency_watermark(wm) <= latency_hiding)
8647 		return true;
8648 	else
8649 		return false;
8650 }
8651 
8652 /**
8653  * dce8_program_watermarks - program display watermarks
8654  *
8655  * @rdev: radeon_device pointer
8656  * @radeon_crtc: the selected display controller
8657  * @lb_size: line buffer size
8658  * @num_heads: number of display controllers in use
8659  *
8660  * Calculate and program the display watermarks for the
8661  * selected display controller (CIK).
8662  */
8663 static void dce8_program_watermarks(struct radeon_device *rdev,
8664 				    struct radeon_crtc *radeon_crtc,
8665 				    u32 lb_size, u32 num_heads)
8666 {
8667 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
8668 	struct dce8_wm_params wm_low, wm_high;
8669 	u32 pixel_period;
8670 	u32 line_time = 0;
8671 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
8672 	u32 tmp, wm_mask;
8673 
8674 	if (radeon_crtc->base.enabled && num_heads && mode) {
8675 		pixel_period = 1000000 / (u32)mode->clock;
8676 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
8677 
8678 		/* watermark for high clocks */
8679 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8680 		    rdev->pm.dpm_enabled) {
8681 			wm_high.yclk =
8682 				radeon_dpm_get_mclk(rdev, false) * 10;
8683 			wm_high.sclk =
8684 				radeon_dpm_get_sclk(rdev, false) * 10;
8685 		} else {
8686 			wm_high.yclk = rdev->pm.current_mclk * 10;
8687 			wm_high.sclk = rdev->pm.current_sclk * 10;
8688 		}
8689 
8690 		wm_high.disp_clk = mode->clock;
8691 		wm_high.src_width = mode->crtc_hdisplay;
8692 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
8693 		wm_high.blank_time = line_time - wm_high.active_time;
8694 		wm_high.interlaced = false;
8695 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8696 			wm_high.interlaced = true;
8697 		wm_high.vsc = radeon_crtc->vsc;
8698 		wm_high.vtaps = 1;
8699 		if (radeon_crtc->rmx_type != RMX_OFF)
8700 			wm_high.vtaps = 2;
8701 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
8702 		wm_high.lb_size = lb_size;
8703 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
8704 		wm_high.num_heads = num_heads;
8705 
8706 		/* set for high clocks */
8707 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
8708 
8709 		/* possibly force display priority to high */
8710 		/* should really do this at mode validation time... */
8711 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
8712 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
8713 		    !dce8_check_latency_hiding(&wm_high) ||
8714 		    (rdev->disp_priority == 2)) {
8715 			DRM_DEBUG_KMS("force priority to high\n");
8716 		}
8717 
8718 		/* watermark for low clocks */
8719 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8720 		    rdev->pm.dpm_enabled) {
8721 			wm_low.yclk =
8722 				radeon_dpm_get_mclk(rdev, true) * 10;
8723 			wm_low.sclk =
8724 				radeon_dpm_get_sclk(rdev, true) * 10;
8725 		} else {
8726 			wm_low.yclk = rdev->pm.current_mclk * 10;
8727 			wm_low.sclk = rdev->pm.current_sclk * 10;
8728 		}
8729 
8730 		wm_low.disp_clk = mode->clock;
8731 		wm_low.src_width = mode->crtc_hdisplay;
8732 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
8733 		wm_low.blank_time = line_time - wm_low.active_time;
8734 		wm_low.interlaced = false;
8735 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8736 			wm_low.interlaced = true;
8737 		wm_low.vsc = radeon_crtc->vsc;
8738 		wm_low.vtaps = 1;
8739 		if (radeon_crtc->rmx_type != RMX_OFF)
8740 			wm_low.vtaps = 2;
8741 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
8742 		wm_low.lb_size = lb_size;
8743 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
8744 		wm_low.num_heads = num_heads;
8745 
8746 		/* set for low clocks */
8747 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
8748 
8749 		/* possibly force display priority to high */
8750 		/* should really do this at mode validation time... */
8751 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
8752 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
8753 		    !dce8_check_latency_hiding(&wm_low) ||
8754 		    (rdev->disp_priority == 2)) {
8755 			DRM_DEBUG_KMS("force priority to high\n");
8756 		}
8757 	}
8758 
8759 	/* select wm A */
8760 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8761 	tmp = wm_mask;
8762 	tmp &= ~LATENCY_WATERMARK_MASK(3);
8763 	tmp |= LATENCY_WATERMARK_MASK(1);
8764 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8765 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8766 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
8767 		LATENCY_HIGH_WATERMARK(line_time)));
8768 	/* select wm B */
8769 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8770 	tmp &= ~LATENCY_WATERMARK_MASK(3);
8771 	tmp |= LATENCY_WATERMARK_MASK(2);
8772 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8773 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8774 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
8775 		LATENCY_HIGH_WATERMARK(line_time)));
8776 	/* restore original selection */
8777 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
8778 
8779 	/* save values for DPM */
8780 	radeon_crtc->line_time = line_time;
8781 	radeon_crtc->wm_high = latency_watermark_a;
8782 	radeon_crtc->wm_low = latency_watermark_b;
8783 }
8784 
8785 /**
8786  * dce8_bandwidth_update - program display watermarks
8787  *
8788  * @rdev: radeon_device pointer
8789  *
8790  * Calculate and program the display watermarks and line
8791  * buffer allocation (CIK).
8792  */
8793 void dce8_bandwidth_update(struct radeon_device *rdev)
8794 {
8795 	struct drm_display_mode *mode = NULL;
8796 	u32 num_heads = 0, lb_size;
8797 	int i;
8798 
8799 	radeon_update_display_priority(rdev);
8800 
8801 	for (i = 0; i < rdev->num_crtc; i++) {
8802 		if (rdev->mode_info.crtcs[i]->base.enabled)
8803 			num_heads++;
8804 	}
8805 	for (i = 0; i < rdev->num_crtc; i++) {
8806 		mode = &rdev->mode_info.crtcs[i]->base.mode;
8807 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
8808 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
8809 	}
8810 }
8811 
8812 /**
8813  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8814  *
8815  * @rdev: radeon_device pointer
8816  *
8817  * Fetches a GPU clock counter snapshot (SI).
8818  * Returns the 64 bit clock counter snapshot.
8819  */
8820 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
8821 {
8822 	uint64_t clock;
8823 
8824 	mutex_lock(&rdev->gpu_clock_mutex);
8825 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
8826 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
8827 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
8828 	mutex_unlock(&rdev->gpu_clock_mutex);
8829 	return clock;
8830 }
8831 
8832 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
8833                               u32 cntl_reg, u32 status_reg)
8834 {
8835 	int r, i;
8836 	struct atom_clock_dividers dividers;
8837 	uint32_t tmp;
8838 
8839 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8840 					   clock, false, &dividers);
8841 	if (r)
8842 		return r;
8843 
8844 	tmp = RREG32_SMC(cntl_reg);
8845 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
8846 	tmp |= dividers.post_divider;
8847 	WREG32_SMC(cntl_reg, tmp);
8848 
8849 	for (i = 0; i < 100; i++) {
8850 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
8851 			break;
8852 		mdelay(10);
8853 	}
8854 	if (i == 100)
8855 		return -ETIMEDOUT;
8856 
8857 	return 0;
8858 }
8859 
8860 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
8861 {
8862 	int r = 0;
8863 
8864 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
8865 	if (r)
8866 		return r;
8867 
8868 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
8869 	return r;
8870 }
8871 
8872 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
8873 {
8874 	struct pci_dev *root = rdev->pdev->bus->self;
8875 	int bridge_pos, gpu_pos;
8876 	u32 speed_cntl, mask, current_data_rate;
8877 	int ret, i;
8878 	u16 tmp16;
8879 
8880 	if (radeon_pcie_gen2 == 0)
8881 		return;
8882 
8883 	if (rdev->flags & RADEON_IS_IGP)
8884 		return;
8885 
8886 	if (!(rdev->flags & RADEON_IS_PCIE))
8887 		return;
8888 
8889 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
8890 	if (ret != 0)
8891 		return;
8892 
8893 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
8894 		return;
8895 
8896 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8897 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
8898 		LC_CURRENT_DATA_RATE_SHIFT;
8899 	if (mask & DRM_PCIE_SPEED_80) {
8900 		if (current_data_rate == 2) {
8901 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
8902 			return;
8903 		}
8904 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
8905 	} else if (mask & DRM_PCIE_SPEED_50) {
8906 		if (current_data_rate == 1) {
8907 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
8908 			return;
8909 		}
8910 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
8911 	}
8912 
8913 	bridge_pos = pci_pcie_cap(root);
8914 	if (!bridge_pos)
8915 		return;
8916 
8917 	gpu_pos = pci_pcie_cap(rdev->pdev);
8918 	if (!gpu_pos)
8919 		return;
8920 
8921 	if (mask & DRM_PCIE_SPEED_80) {
8922 		/* re-try equalization if gen3 is not already enabled */
8923 		if (current_data_rate != 2) {
8924 			u16 bridge_cfg, gpu_cfg;
8925 			u16 bridge_cfg2, gpu_cfg2;
8926 			u32 max_lw, current_lw, tmp;
8927 
8928 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8929 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8930 
8931 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
8932 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8933 
8934 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
8935 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8936 
8937 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8938 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
8939 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
8940 
8941 			if (current_lw < max_lw) {
8942 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8943 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
8944 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
8945 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
8946 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
8947 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
8948 				}
8949 			}
8950 
8951 			for (i = 0; i < 10; i++) {
8952 				/* check status */
8953 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
8954 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
8955 					break;
8956 
8957 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8958 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8959 
8960 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
8961 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
8962 
8963 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8964 				tmp |= LC_SET_QUIESCE;
8965 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8966 
8967 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8968 				tmp |= LC_REDO_EQ;
8969 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8970 
8971 				mdelay(100);
8972 
8973 				/* linkctl */
8974 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
8975 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8976 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
8977 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8978 
8979 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
8980 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8981 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
8982 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8983 
8984 				/* linkctl2 */
8985 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
8986 				tmp16 &= ~((1 << 4) | (7 << 9));
8987 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
8988 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
8989 
8990 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8991 				tmp16 &= ~((1 << 4) | (7 << 9));
8992 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
8993 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8994 
8995 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8996 				tmp &= ~LC_SET_QUIESCE;
8997 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8998 			}
8999 		}
9000 	}
9001 
9002 	/* set the link speed */
9003 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9004 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9005 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9006 
9007 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9008 	tmp16 &= ~0xf;
9009 	if (mask & DRM_PCIE_SPEED_80)
9010 		tmp16 |= 3; /* gen3 */
9011 	else if (mask & DRM_PCIE_SPEED_50)
9012 		tmp16 |= 2; /* gen2 */
9013 	else
9014 		tmp16 |= 1; /* gen1 */
9015 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9016 
9017 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9018 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9019 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9020 
9021 	for (i = 0; i < rdev->usec_timeout; i++) {
9022 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9023 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9024 			break;
9025 		udelay(1);
9026 	}
9027 }
9028 
9029 static void cik_program_aspm(struct radeon_device *rdev)
9030 {
9031 	u32 data, orig;
9032 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9033 	bool disable_clkreq = false;
9034 
9035 	if (radeon_aspm == 0)
9036 		return;
9037 
9038 	/* XXX double check IGPs */
9039 	if (rdev->flags & RADEON_IS_IGP)
9040 		return;
9041 
9042 	if (!(rdev->flags & RADEON_IS_PCIE))
9043 		return;
9044 
9045 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9046 	data &= ~LC_XMIT_N_FTS_MASK;
9047 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9048 	if (orig != data)
9049 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9050 
9051 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9052 	data |= LC_GO_TO_RECOVERY;
9053 	if (orig != data)
9054 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9055 
9056 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9057 	data |= P_IGNORE_EDB_ERR;
9058 	if (orig != data)
9059 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9060 
9061 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9062 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9063 	data |= LC_PMI_TO_L1_DIS;
9064 	if (!disable_l0s)
9065 		data |= LC_L0S_INACTIVITY(7);
9066 
9067 	if (!disable_l1) {
9068 		data |= LC_L1_INACTIVITY(7);
9069 		data &= ~LC_PMI_TO_L1_DIS;
9070 		if (orig != data)
9071 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9072 
9073 		if (!disable_plloff_in_l1) {
9074 			bool clk_req_support;
9075 
9076 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9077 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9078 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9079 			if (orig != data)
9080 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9081 
9082 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9083 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9084 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9085 			if (orig != data)
9086 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9087 
9088 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9089 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9090 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9091 			if (orig != data)
9092 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9093 
9094 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9095 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9096 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9097 			if (orig != data)
9098 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9099 
9100 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9101 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9102 			data |= LC_DYN_LANES_PWR_STATE(3);
9103 			if (orig != data)
9104 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9105 
9106 			if (!disable_clkreq) {
9107 				struct pci_dev *root = rdev->pdev->bus->self;
9108 				u32 lnkcap;
9109 
9110 				clk_req_support = false;
9111 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9112 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9113 					clk_req_support = true;
9114 			} else {
9115 				clk_req_support = false;
9116 			}
9117 
9118 			if (clk_req_support) {
9119 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9120 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9121 				if (orig != data)
9122 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9123 
9124 				orig = data = RREG32_SMC(THM_CLK_CNTL);
9125 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9126 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9127 				if (orig != data)
9128 					WREG32_SMC(THM_CLK_CNTL, data);
9129 
9130 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9131 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9132 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9133 				if (orig != data)
9134 					WREG32_SMC(MISC_CLK_CTRL, data);
9135 
9136 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9137 				data &= ~BCLK_AS_XCLK;
9138 				if (orig != data)
9139 					WREG32_SMC(CG_CLKPIN_CNTL, data);
9140 
9141 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9142 				data &= ~FORCE_BIF_REFCLK_EN;
9143 				if (orig != data)
9144 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9145 
9146 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9147 				data &= ~MPLL_CLKOUT_SEL_MASK;
9148 				data |= MPLL_CLKOUT_SEL(4);
9149 				if (orig != data)
9150 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9151 			}
9152 		}
9153 	} else {
9154 		if (orig != data)
9155 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9156 	}
9157 
9158 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9159 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9160 	if (orig != data)
9161 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9162 
9163 	if (!disable_l0s) {
9164 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9165 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9166 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9167 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9168 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9169 				data &= ~LC_L0S_INACTIVITY_MASK;
9170 				if (orig != data)
9171 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9172 			}
9173 		}
9174 	}
9175 }
9176