xref: /openbmc/linux/drivers/gpu/drm/radeon/cik.c (revision e0bf6c5c)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include "cikd.h"
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_ci.h"
36 #include "radeon_kfd.h"
37 
38 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
46 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
47 
48 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
49 MODULE_FIRMWARE("radeon/bonaire_me.bin");
50 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
51 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
52 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
53 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
54 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
55 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
56 
57 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
58 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
59 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
60 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
64 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
65 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
66 
67 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
68 MODULE_FIRMWARE("radeon/hawaii_me.bin");
69 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
70 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
71 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
72 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
73 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
74 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
75 
76 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
77 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
78 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
79 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
80 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
81 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
82 
83 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
84 MODULE_FIRMWARE("radeon/kaveri_me.bin");
85 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
86 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
87 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
88 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
89 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
90 
91 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
92 MODULE_FIRMWARE("radeon/KABINI_me.bin");
93 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
94 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
95 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
96 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
97 
98 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
99 MODULE_FIRMWARE("radeon/kabini_me.bin");
100 MODULE_FIRMWARE("radeon/kabini_ce.bin");
101 MODULE_FIRMWARE("radeon/kabini_mec.bin");
102 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
103 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
104 
105 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
106 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
107 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
108 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
109 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
110 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
111 
112 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
113 MODULE_FIRMWARE("radeon/mullins_me.bin");
114 MODULE_FIRMWARE("radeon/mullins_ce.bin");
115 MODULE_FIRMWARE("radeon/mullins_mec.bin");
116 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
117 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
118 
119 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
120 extern void r600_ih_ring_fini(struct radeon_device *rdev);
121 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
122 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
123 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
124 extern void sumo_rlc_fini(struct radeon_device *rdev);
125 extern int sumo_rlc_init(struct radeon_device *rdev);
126 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
127 extern void si_rlc_reset(struct radeon_device *rdev);
128 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
129 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
130 extern int cik_sdma_resume(struct radeon_device *rdev);
131 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
132 extern void cik_sdma_fini(struct radeon_device *rdev);
133 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
134 static void cik_rlc_stop(struct radeon_device *rdev);
135 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
136 static void cik_program_aspm(struct radeon_device *rdev);
137 static void cik_init_pg(struct radeon_device *rdev);
138 static void cik_init_cg(struct radeon_device *rdev);
139 static void cik_fini_pg(struct radeon_device *rdev);
140 static void cik_fini_cg(struct radeon_device *rdev);
141 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
142 					  bool enable);
143 
144 /* get temperature in millidegrees */
145 int ci_get_temp(struct radeon_device *rdev)
146 {
147 	u32 temp;
148 	int actual_temp = 0;
149 
150 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
151 		CTF_TEMP_SHIFT;
152 
153 	if (temp & 0x200)
154 		actual_temp = 255;
155 	else
156 		actual_temp = temp & 0x1ff;
157 
158 	actual_temp = actual_temp * 1000;
159 
160 	return actual_temp;
161 }
162 
163 /* get temperature in millidegrees */
164 int kv_get_temp(struct radeon_device *rdev)
165 {
166 	u32 temp;
167 	int actual_temp = 0;
168 
169 	temp = RREG32_SMC(0xC0300E0C);
170 
171 	if (temp)
172 		actual_temp = (temp / 8) - 49;
173 	else
174 		actual_temp = 0;
175 
176 	actual_temp = actual_temp * 1000;
177 
178 	return actual_temp;
179 }
180 
181 /*
182  * Indirect registers accessor
183  */
184 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
185 {
186 	unsigned long flags;
187 	u32 r;
188 
189 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
190 	WREG32(PCIE_INDEX, reg);
191 	(void)RREG32(PCIE_INDEX);
192 	r = RREG32(PCIE_DATA);
193 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
194 	return r;
195 }
196 
197 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
198 {
199 	unsigned long flags;
200 
201 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
202 	WREG32(PCIE_INDEX, reg);
203 	(void)RREG32(PCIE_INDEX);
204 	WREG32(PCIE_DATA, v);
205 	(void)RREG32(PCIE_DATA);
206 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
207 }
208 
209 static const u32 spectre_rlc_save_restore_register_list[] =
210 {
211 	(0x0e00 << 16) | (0xc12c >> 2),
212 	0x00000000,
213 	(0x0e00 << 16) | (0xc140 >> 2),
214 	0x00000000,
215 	(0x0e00 << 16) | (0xc150 >> 2),
216 	0x00000000,
217 	(0x0e00 << 16) | (0xc15c >> 2),
218 	0x00000000,
219 	(0x0e00 << 16) | (0xc168 >> 2),
220 	0x00000000,
221 	(0x0e00 << 16) | (0xc170 >> 2),
222 	0x00000000,
223 	(0x0e00 << 16) | (0xc178 >> 2),
224 	0x00000000,
225 	(0x0e00 << 16) | (0xc204 >> 2),
226 	0x00000000,
227 	(0x0e00 << 16) | (0xc2b4 >> 2),
228 	0x00000000,
229 	(0x0e00 << 16) | (0xc2b8 >> 2),
230 	0x00000000,
231 	(0x0e00 << 16) | (0xc2bc >> 2),
232 	0x00000000,
233 	(0x0e00 << 16) | (0xc2c0 >> 2),
234 	0x00000000,
235 	(0x0e00 << 16) | (0x8228 >> 2),
236 	0x00000000,
237 	(0x0e00 << 16) | (0x829c >> 2),
238 	0x00000000,
239 	(0x0e00 << 16) | (0x869c >> 2),
240 	0x00000000,
241 	(0x0600 << 16) | (0x98f4 >> 2),
242 	0x00000000,
243 	(0x0e00 << 16) | (0x98f8 >> 2),
244 	0x00000000,
245 	(0x0e00 << 16) | (0x9900 >> 2),
246 	0x00000000,
247 	(0x0e00 << 16) | (0xc260 >> 2),
248 	0x00000000,
249 	(0x0e00 << 16) | (0x90e8 >> 2),
250 	0x00000000,
251 	(0x0e00 << 16) | (0x3c000 >> 2),
252 	0x00000000,
253 	(0x0e00 << 16) | (0x3c00c >> 2),
254 	0x00000000,
255 	(0x0e00 << 16) | (0x8c1c >> 2),
256 	0x00000000,
257 	(0x0e00 << 16) | (0x9700 >> 2),
258 	0x00000000,
259 	(0x0e00 << 16) | (0xcd20 >> 2),
260 	0x00000000,
261 	(0x4e00 << 16) | (0xcd20 >> 2),
262 	0x00000000,
263 	(0x5e00 << 16) | (0xcd20 >> 2),
264 	0x00000000,
265 	(0x6e00 << 16) | (0xcd20 >> 2),
266 	0x00000000,
267 	(0x7e00 << 16) | (0xcd20 >> 2),
268 	0x00000000,
269 	(0x8e00 << 16) | (0xcd20 >> 2),
270 	0x00000000,
271 	(0x9e00 << 16) | (0xcd20 >> 2),
272 	0x00000000,
273 	(0xae00 << 16) | (0xcd20 >> 2),
274 	0x00000000,
275 	(0xbe00 << 16) | (0xcd20 >> 2),
276 	0x00000000,
277 	(0x0e00 << 16) | (0x89bc >> 2),
278 	0x00000000,
279 	(0x0e00 << 16) | (0x8900 >> 2),
280 	0x00000000,
281 	0x3,
282 	(0x0e00 << 16) | (0xc130 >> 2),
283 	0x00000000,
284 	(0x0e00 << 16) | (0xc134 >> 2),
285 	0x00000000,
286 	(0x0e00 << 16) | (0xc1fc >> 2),
287 	0x00000000,
288 	(0x0e00 << 16) | (0xc208 >> 2),
289 	0x00000000,
290 	(0x0e00 << 16) | (0xc264 >> 2),
291 	0x00000000,
292 	(0x0e00 << 16) | (0xc268 >> 2),
293 	0x00000000,
294 	(0x0e00 << 16) | (0xc26c >> 2),
295 	0x00000000,
296 	(0x0e00 << 16) | (0xc270 >> 2),
297 	0x00000000,
298 	(0x0e00 << 16) | (0xc274 >> 2),
299 	0x00000000,
300 	(0x0e00 << 16) | (0xc278 >> 2),
301 	0x00000000,
302 	(0x0e00 << 16) | (0xc27c >> 2),
303 	0x00000000,
304 	(0x0e00 << 16) | (0xc280 >> 2),
305 	0x00000000,
306 	(0x0e00 << 16) | (0xc284 >> 2),
307 	0x00000000,
308 	(0x0e00 << 16) | (0xc288 >> 2),
309 	0x00000000,
310 	(0x0e00 << 16) | (0xc28c >> 2),
311 	0x00000000,
312 	(0x0e00 << 16) | (0xc290 >> 2),
313 	0x00000000,
314 	(0x0e00 << 16) | (0xc294 >> 2),
315 	0x00000000,
316 	(0x0e00 << 16) | (0xc298 >> 2),
317 	0x00000000,
318 	(0x0e00 << 16) | (0xc29c >> 2),
319 	0x00000000,
320 	(0x0e00 << 16) | (0xc2a0 >> 2),
321 	0x00000000,
322 	(0x0e00 << 16) | (0xc2a4 >> 2),
323 	0x00000000,
324 	(0x0e00 << 16) | (0xc2a8 >> 2),
325 	0x00000000,
326 	(0x0e00 << 16) | (0xc2ac  >> 2),
327 	0x00000000,
328 	(0x0e00 << 16) | (0xc2b0 >> 2),
329 	0x00000000,
330 	(0x0e00 << 16) | (0x301d0 >> 2),
331 	0x00000000,
332 	(0x0e00 << 16) | (0x30238 >> 2),
333 	0x00000000,
334 	(0x0e00 << 16) | (0x30250 >> 2),
335 	0x00000000,
336 	(0x0e00 << 16) | (0x30254 >> 2),
337 	0x00000000,
338 	(0x0e00 << 16) | (0x30258 >> 2),
339 	0x00000000,
340 	(0x0e00 << 16) | (0x3025c >> 2),
341 	0x00000000,
342 	(0x4e00 << 16) | (0xc900 >> 2),
343 	0x00000000,
344 	(0x5e00 << 16) | (0xc900 >> 2),
345 	0x00000000,
346 	(0x6e00 << 16) | (0xc900 >> 2),
347 	0x00000000,
348 	(0x7e00 << 16) | (0xc900 >> 2),
349 	0x00000000,
350 	(0x8e00 << 16) | (0xc900 >> 2),
351 	0x00000000,
352 	(0x9e00 << 16) | (0xc900 >> 2),
353 	0x00000000,
354 	(0xae00 << 16) | (0xc900 >> 2),
355 	0x00000000,
356 	(0xbe00 << 16) | (0xc900 >> 2),
357 	0x00000000,
358 	(0x4e00 << 16) | (0xc904 >> 2),
359 	0x00000000,
360 	(0x5e00 << 16) | (0xc904 >> 2),
361 	0x00000000,
362 	(0x6e00 << 16) | (0xc904 >> 2),
363 	0x00000000,
364 	(0x7e00 << 16) | (0xc904 >> 2),
365 	0x00000000,
366 	(0x8e00 << 16) | (0xc904 >> 2),
367 	0x00000000,
368 	(0x9e00 << 16) | (0xc904 >> 2),
369 	0x00000000,
370 	(0xae00 << 16) | (0xc904 >> 2),
371 	0x00000000,
372 	(0xbe00 << 16) | (0xc904 >> 2),
373 	0x00000000,
374 	(0x4e00 << 16) | (0xc908 >> 2),
375 	0x00000000,
376 	(0x5e00 << 16) | (0xc908 >> 2),
377 	0x00000000,
378 	(0x6e00 << 16) | (0xc908 >> 2),
379 	0x00000000,
380 	(0x7e00 << 16) | (0xc908 >> 2),
381 	0x00000000,
382 	(0x8e00 << 16) | (0xc908 >> 2),
383 	0x00000000,
384 	(0x9e00 << 16) | (0xc908 >> 2),
385 	0x00000000,
386 	(0xae00 << 16) | (0xc908 >> 2),
387 	0x00000000,
388 	(0xbe00 << 16) | (0xc908 >> 2),
389 	0x00000000,
390 	(0x4e00 << 16) | (0xc90c >> 2),
391 	0x00000000,
392 	(0x5e00 << 16) | (0xc90c >> 2),
393 	0x00000000,
394 	(0x6e00 << 16) | (0xc90c >> 2),
395 	0x00000000,
396 	(0x7e00 << 16) | (0xc90c >> 2),
397 	0x00000000,
398 	(0x8e00 << 16) | (0xc90c >> 2),
399 	0x00000000,
400 	(0x9e00 << 16) | (0xc90c >> 2),
401 	0x00000000,
402 	(0xae00 << 16) | (0xc90c >> 2),
403 	0x00000000,
404 	(0xbe00 << 16) | (0xc90c >> 2),
405 	0x00000000,
406 	(0x4e00 << 16) | (0xc910 >> 2),
407 	0x00000000,
408 	(0x5e00 << 16) | (0xc910 >> 2),
409 	0x00000000,
410 	(0x6e00 << 16) | (0xc910 >> 2),
411 	0x00000000,
412 	(0x7e00 << 16) | (0xc910 >> 2),
413 	0x00000000,
414 	(0x8e00 << 16) | (0xc910 >> 2),
415 	0x00000000,
416 	(0x9e00 << 16) | (0xc910 >> 2),
417 	0x00000000,
418 	(0xae00 << 16) | (0xc910 >> 2),
419 	0x00000000,
420 	(0xbe00 << 16) | (0xc910 >> 2),
421 	0x00000000,
422 	(0x0e00 << 16) | (0xc99c >> 2),
423 	0x00000000,
424 	(0x0e00 << 16) | (0x9834 >> 2),
425 	0x00000000,
426 	(0x0000 << 16) | (0x30f00 >> 2),
427 	0x00000000,
428 	(0x0001 << 16) | (0x30f00 >> 2),
429 	0x00000000,
430 	(0x0000 << 16) | (0x30f04 >> 2),
431 	0x00000000,
432 	(0x0001 << 16) | (0x30f04 >> 2),
433 	0x00000000,
434 	(0x0000 << 16) | (0x30f08 >> 2),
435 	0x00000000,
436 	(0x0001 << 16) | (0x30f08 >> 2),
437 	0x00000000,
438 	(0x0000 << 16) | (0x30f0c >> 2),
439 	0x00000000,
440 	(0x0001 << 16) | (0x30f0c >> 2),
441 	0x00000000,
442 	(0x0600 << 16) | (0x9b7c >> 2),
443 	0x00000000,
444 	(0x0e00 << 16) | (0x8a14 >> 2),
445 	0x00000000,
446 	(0x0e00 << 16) | (0x8a18 >> 2),
447 	0x00000000,
448 	(0x0600 << 16) | (0x30a00 >> 2),
449 	0x00000000,
450 	(0x0e00 << 16) | (0x8bf0 >> 2),
451 	0x00000000,
452 	(0x0e00 << 16) | (0x8bcc >> 2),
453 	0x00000000,
454 	(0x0e00 << 16) | (0x8b24 >> 2),
455 	0x00000000,
456 	(0x0e00 << 16) | (0x30a04 >> 2),
457 	0x00000000,
458 	(0x0600 << 16) | (0x30a10 >> 2),
459 	0x00000000,
460 	(0x0600 << 16) | (0x30a14 >> 2),
461 	0x00000000,
462 	(0x0600 << 16) | (0x30a18 >> 2),
463 	0x00000000,
464 	(0x0600 << 16) | (0x30a2c >> 2),
465 	0x00000000,
466 	(0x0e00 << 16) | (0xc700 >> 2),
467 	0x00000000,
468 	(0x0e00 << 16) | (0xc704 >> 2),
469 	0x00000000,
470 	(0x0e00 << 16) | (0xc708 >> 2),
471 	0x00000000,
472 	(0x0e00 << 16) | (0xc768 >> 2),
473 	0x00000000,
474 	(0x0400 << 16) | (0xc770 >> 2),
475 	0x00000000,
476 	(0x0400 << 16) | (0xc774 >> 2),
477 	0x00000000,
478 	(0x0400 << 16) | (0xc778 >> 2),
479 	0x00000000,
480 	(0x0400 << 16) | (0xc77c >> 2),
481 	0x00000000,
482 	(0x0400 << 16) | (0xc780 >> 2),
483 	0x00000000,
484 	(0x0400 << 16) | (0xc784 >> 2),
485 	0x00000000,
486 	(0x0400 << 16) | (0xc788 >> 2),
487 	0x00000000,
488 	(0x0400 << 16) | (0xc78c >> 2),
489 	0x00000000,
490 	(0x0400 << 16) | (0xc798 >> 2),
491 	0x00000000,
492 	(0x0400 << 16) | (0xc79c >> 2),
493 	0x00000000,
494 	(0x0400 << 16) | (0xc7a0 >> 2),
495 	0x00000000,
496 	(0x0400 << 16) | (0xc7a4 >> 2),
497 	0x00000000,
498 	(0x0400 << 16) | (0xc7a8 >> 2),
499 	0x00000000,
500 	(0x0400 << 16) | (0xc7ac >> 2),
501 	0x00000000,
502 	(0x0400 << 16) | (0xc7b0 >> 2),
503 	0x00000000,
504 	(0x0400 << 16) | (0xc7b4 >> 2),
505 	0x00000000,
506 	(0x0e00 << 16) | (0x9100 >> 2),
507 	0x00000000,
508 	(0x0e00 << 16) | (0x3c010 >> 2),
509 	0x00000000,
510 	(0x0e00 << 16) | (0x92a8 >> 2),
511 	0x00000000,
512 	(0x0e00 << 16) | (0x92ac >> 2),
513 	0x00000000,
514 	(0x0e00 << 16) | (0x92b4 >> 2),
515 	0x00000000,
516 	(0x0e00 << 16) | (0x92b8 >> 2),
517 	0x00000000,
518 	(0x0e00 << 16) | (0x92bc >> 2),
519 	0x00000000,
520 	(0x0e00 << 16) | (0x92c0 >> 2),
521 	0x00000000,
522 	(0x0e00 << 16) | (0x92c4 >> 2),
523 	0x00000000,
524 	(0x0e00 << 16) | (0x92c8 >> 2),
525 	0x00000000,
526 	(0x0e00 << 16) | (0x92cc >> 2),
527 	0x00000000,
528 	(0x0e00 << 16) | (0x92d0 >> 2),
529 	0x00000000,
530 	(0x0e00 << 16) | (0x8c00 >> 2),
531 	0x00000000,
532 	(0x0e00 << 16) | (0x8c04 >> 2),
533 	0x00000000,
534 	(0x0e00 << 16) | (0x8c20 >> 2),
535 	0x00000000,
536 	(0x0e00 << 16) | (0x8c38 >> 2),
537 	0x00000000,
538 	(0x0e00 << 16) | (0x8c3c >> 2),
539 	0x00000000,
540 	(0x0e00 << 16) | (0xae00 >> 2),
541 	0x00000000,
542 	(0x0e00 << 16) | (0x9604 >> 2),
543 	0x00000000,
544 	(0x0e00 << 16) | (0xac08 >> 2),
545 	0x00000000,
546 	(0x0e00 << 16) | (0xac0c >> 2),
547 	0x00000000,
548 	(0x0e00 << 16) | (0xac10 >> 2),
549 	0x00000000,
550 	(0x0e00 << 16) | (0xac14 >> 2),
551 	0x00000000,
552 	(0x0e00 << 16) | (0xac58 >> 2),
553 	0x00000000,
554 	(0x0e00 << 16) | (0xac68 >> 2),
555 	0x00000000,
556 	(0x0e00 << 16) | (0xac6c >> 2),
557 	0x00000000,
558 	(0x0e00 << 16) | (0xac70 >> 2),
559 	0x00000000,
560 	(0x0e00 << 16) | (0xac74 >> 2),
561 	0x00000000,
562 	(0x0e00 << 16) | (0xac78 >> 2),
563 	0x00000000,
564 	(0x0e00 << 16) | (0xac7c >> 2),
565 	0x00000000,
566 	(0x0e00 << 16) | (0xac80 >> 2),
567 	0x00000000,
568 	(0x0e00 << 16) | (0xac84 >> 2),
569 	0x00000000,
570 	(0x0e00 << 16) | (0xac88 >> 2),
571 	0x00000000,
572 	(0x0e00 << 16) | (0xac8c >> 2),
573 	0x00000000,
574 	(0x0e00 << 16) | (0x970c >> 2),
575 	0x00000000,
576 	(0x0e00 << 16) | (0x9714 >> 2),
577 	0x00000000,
578 	(0x0e00 << 16) | (0x9718 >> 2),
579 	0x00000000,
580 	(0x0e00 << 16) | (0x971c >> 2),
581 	0x00000000,
582 	(0x0e00 << 16) | (0x31068 >> 2),
583 	0x00000000,
584 	(0x4e00 << 16) | (0x31068 >> 2),
585 	0x00000000,
586 	(0x5e00 << 16) | (0x31068 >> 2),
587 	0x00000000,
588 	(0x6e00 << 16) | (0x31068 >> 2),
589 	0x00000000,
590 	(0x7e00 << 16) | (0x31068 >> 2),
591 	0x00000000,
592 	(0x8e00 << 16) | (0x31068 >> 2),
593 	0x00000000,
594 	(0x9e00 << 16) | (0x31068 >> 2),
595 	0x00000000,
596 	(0xae00 << 16) | (0x31068 >> 2),
597 	0x00000000,
598 	(0xbe00 << 16) | (0x31068 >> 2),
599 	0x00000000,
600 	(0x0e00 << 16) | (0xcd10 >> 2),
601 	0x00000000,
602 	(0x0e00 << 16) | (0xcd14 >> 2),
603 	0x00000000,
604 	(0x0e00 << 16) | (0x88b0 >> 2),
605 	0x00000000,
606 	(0x0e00 << 16) | (0x88b4 >> 2),
607 	0x00000000,
608 	(0x0e00 << 16) | (0x88b8 >> 2),
609 	0x00000000,
610 	(0x0e00 << 16) | (0x88bc >> 2),
611 	0x00000000,
612 	(0x0400 << 16) | (0x89c0 >> 2),
613 	0x00000000,
614 	(0x0e00 << 16) | (0x88c4 >> 2),
615 	0x00000000,
616 	(0x0e00 << 16) | (0x88c8 >> 2),
617 	0x00000000,
618 	(0x0e00 << 16) | (0x88d0 >> 2),
619 	0x00000000,
620 	(0x0e00 << 16) | (0x88d4 >> 2),
621 	0x00000000,
622 	(0x0e00 << 16) | (0x88d8 >> 2),
623 	0x00000000,
624 	(0x0e00 << 16) | (0x8980 >> 2),
625 	0x00000000,
626 	(0x0e00 << 16) | (0x30938 >> 2),
627 	0x00000000,
628 	(0x0e00 << 16) | (0x3093c >> 2),
629 	0x00000000,
630 	(0x0e00 << 16) | (0x30940 >> 2),
631 	0x00000000,
632 	(0x0e00 << 16) | (0x89a0 >> 2),
633 	0x00000000,
634 	(0x0e00 << 16) | (0x30900 >> 2),
635 	0x00000000,
636 	(0x0e00 << 16) | (0x30904 >> 2),
637 	0x00000000,
638 	(0x0e00 << 16) | (0x89b4 >> 2),
639 	0x00000000,
640 	(0x0e00 << 16) | (0x3c210 >> 2),
641 	0x00000000,
642 	(0x0e00 << 16) | (0x3c214 >> 2),
643 	0x00000000,
644 	(0x0e00 << 16) | (0x3c218 >> 2),
645 	0x00000000,
646 	(0x0e00 << 16) | (0x8904 >> 2),
647 	0x00000000,
648 	0x5,
649 	(0x0e00 << 16) | (0x8c28 >> 2),
650 	(0x0e00 << 16) | (0x8c2c >> 2),
651 	(0x0e00 << 16) | (0x8c30 >> 2),
652 	(0x0e00 << 16) | (0x8c34 >> 2),
653 	(0x0e00 << 16) | (0x9600 >> 2),
654 };
655 
656 static const u32 kalindi_rlc_save_restore_register_list[] =
657 {
658 	(0x0e00 << 16) | (0xc12c >> 2),
659 	0x00000000,
660 	(0x0e00 << 16) | (0xc140 >> 2),
661 	0x00000000,
662 	(0x0e00 << 16) | (0xc150 >> 2),
663 	0x00000000,
664 	(0x0e00 << 16) | (0xc15c >> 2),
665 	0x00000000,
666 	(0x0e00 << 16) | (0xc168 >> 2),
667 	0x00000000,
668 	(0x0e00 << 16) | (0xc170 >> 2),
669 	0x00000000,
670 	(0x0e00 << 16) | (0xc204 >> 2),
671 	0x00000000,
672 	(0x0e00 << 16) | (0xc2b4 >> 2),
673 	0x00000000,
674 	(0x0e00 << 16) | (0xc2b8 >> 2),
675 	0x00000000,
676 	(0x0e00 << 16) | (0xc2bc >> 2),
677 	0x00000000,
678 	(0x0e00 << 16) | (0xc2c0 >> 2),
679 	0x00000000,
680 	(0x0e00 << 16) | (0x8228 >> 2),
681 	0x00000000,
682 	(0x0e00 << 16) | (0x829c >> 2),
683 	0x00000000,
684 	(0x0e00 << 16) | (0x869c >> 2),
685 	0x00000000,
686 	(0x0600 << 16) | (0x98f4 >> 2),
687 	0x00000000,
688 	(0x0e00 << 16) | (0x98f8 >> 2),
689 	0x00000000,
690 	(0x0e00 << 16) | (0x9900 >> 2),
691 	0x00000000,
692 	(0x0e00 << 16) | (0xc260 >> 2),
693 	0x00000000,
694 	(0x0e00 << 16) | (0x90e8 >> 2),
695 	0x00000000,
696 	(0x0e00 << 16) | (0x3c000 >> 2),
697 	0x00000000,
698 	(0x0e00 << 16) | (0x3c00c >> 2),
699 	0x00000000,
700 	(0x0e00 << 16) | (0x8c1c >> 2),
701 	0x00000000,
702 	(0x0e00 << 16) | (0x9700 >> 2),
703 	0x00000000,
704 	(0x0e00 << 16) | (0xcd20 >> 2),
705 	0x00000000,
706 	(0x4e00 << 16) | (0xcd20 >> 2),
707 	0x00000000,
708 	(0x5e00 << 16) | (0xcd20 >> 2),
709 	0x00000000,
710 	(0x6e00 << 16) | (0xcd20 >> 2),
711 	0x00000000,
712 	(0x7e00 << 16) | (0xcd20 >> 2),
713 	0x00000000,
714 	(0x0e00 << 16) | (0x89bc >> 2),
715 	0x00000000,
716 	(0x0e00 << 16) | (0x8900 >> 2),
717 	0x00000000,
718 	0x3,
719 	(0x0e00 << 16) | (0xc130 >> 2),
720 	0x00000000,
721 	(0x0e00 << 16) | (0xc134 >> 2),
722 	0x00000000,
723 	(0x0e00 << 16) | (0xc1fc >> 2),
724 	0x00000000,
725 	(0x0e00 << 16) | (0xc208 >> 2),
726 	0x00000000,
727 	(0x0e00 << 16) | (0xc264 >> 2),
728 	0x00000000,
729 	(0x0e00 << 16) | (0xc268 >> 2),
730 	0x00000000,
731 	(0x0e00 << 16) | (0xc26c >> 2),
732 	0x00000000,
733 	(0x0e00 << 16) | (0xc270 >> 2),
734 	0x00000000,
735 	(0x0e00 << 16) | (0xc274 >> 2),
736 	0x00000000,
737 	(0x0e00 << 16) | (0xc28c >> 2),
738 	0x00000000,
739 	(0x0e00 << 16) | (0xc290 >> 2),
740 	0x00000000,
741 	(0x0e00 << 16) | (0xc294 >> 2),
742 	0x00000000,
743 	(0x0e00 << 16) | (0xc298 >> 2),
744 	0x00000000,
745 	(0x0e00 << 16) | (0xc2a0 >> 2),
746 	0x00000000,
747 	(0x0e00 << 16) | (0xc2a4 >> 2),
748 	0x00000000,
749 	(0x0e00 << 16) | (0xc2a8 >> 2),
750 	0x00000000,
751 	(0x0e00 << 16) | (0xc2ac >> 2),
752 	0x00000000,
753 	(0x0e00 << 16) | (0x301d0 >> 2),
754 	0x00000000,
755 	(0x0e00 << 16) | (0x30238 >> 2),
756 	0x00000000,
757 	(0x0e00 << 16) | (0x30250 >> 2),
758 	0x00000000,
759 	(0x0e00 << 16) | (0x30254 >> 2),
760 	0x00000000,
761 	(0x0e00 << 16) | (0x30258 >> 2),
762 	0x00000000,
763 	(0x0e00 << 16) | (0x3025c >> 2),
764 	0x00000000,
765 	(0x4e00 << 16) | (0xc900 >> 2),
766 	0x00000000,
767 	(0x5e00 << 16) | (0xc900 >> 2),
768 	0x00000000,
769 	(0x6e00 << 16) | (0xc900 >> 2),
770 	0x00000000,
771 	(0x7e00 << 16) | (0xc900 >> 2),
772 	0x00000000,
773 	(0x4e00 << 16) | (0xc904 >> 2),
774 	0x00000000,
775 	(0x5e00 << 16) | (0xc904 >> 2),
776 	0x00000000,
777 	(0x6e00 << 16) | (0xc904 >> 2),
778 	0x00000000,
779 	(0x7e00 << 16) | (0xc904 >> 2),
780 	0x00000000,
781 	(0x4e00 << 16) | (0xc908 >> 2),
782 	0x00000000,
783 	(0x5e00 << 16) | (0xc908 >> 2),
784 	0x00000000,
785 	(0x6e00 << 16) | (0xc908 >> 2),
786 	0x00000000,
787 	(0x7e00 << 16) | (0xc908 >> 2),
788 	0x00000000,
789 	(0x4e00 << 16) | (0xc90c >> 2),
790 	0x00000000,
791 	(0x5e00 << 16) | (0xc90c >> 2),
792 	0x00000000,
793 	(0x6e00 << 16) | (0xc90c >> 2),
794 	0x00000000,
795 	(0x7e00 << 16) | (0xc90c >> 2),
796 	0x00000000,
797 	(0x4e00 << 16) | (0xc910 >> 2),
798 	0x00000000,
799 	(0x5e00 << 16) | (0xc910 >> 2),
800 	0x00000000,
801 	(0x6e00 << 16) | (0xc910 >> 2),
802 	0x00000000,
803 	(0x7e00 << 16) | (0xc910 >> 2),
804 	0x00000000,
805 	(0x0e00 << 16) | (0xc99c >> 2),
806 	0x00000000,
807 	(0x0e00 << 16) | (0x9834 >> 2),
808 	0x00000000,
809 	(0x0000 << 16) | (0x30f00 >> 2),
810 	0x00000000,
811 	(0x0000 << 16) | (0x30f04 >> 2),
812 	0x00000000,
813 	(0x0000 << 16) | (0x30f08 >> 2),
814 	0x00000000,
815 	(0x0000 << 16) | (0x30f0c >> 2),
816 	0x00000000,
817 	(0x0600 << 16) | (0x9b7c >> 2),
818 	0x00000000,
819 	(0x0e00 << 16) | (0x8a14 >> 2),
820 	0x00000000,
821 	(0x0e00 << 16) | (0x8a18 >> 2),
822 	0x00000000,
823 	(0x0600 << 16) | (0x30a00 >> 2),
824 	0x00000000,
825 	(0x0e00 << 16) | (0x8bf0 >> 2),
826 	0x00000000,
827 	(0x0e00 << 16) | (0x8bcc >> 2),
828 	0x00000000,
829 	(0x0e00 << 16) | (0x8b24 >> 2),
830 	0x00000000,
831 	(0x0e00 << 16) | (0x30a04 >> 2),
832 	0x00000000,
833 	(0x0600 << 16) | (0x30a10 >> 2),
834 	0x00000000,
835 	(0x0600 << 16) | (0x30a14 >> 2),
836 	0x00000000,
837 	(0x0600 << 16) | (0x30a18 >> 2),
838 	0x00000000,
839 	(0x0600 << 16) | (0x30a2c >> 2),
840 	0x00000000,
841 	(0x0e00 << 16) | (0xc700 >> 2),
842 	0x00000000,
843 	(0x0e00 << 16) | (0xc704 >> 2),
844 	0x00000000,
845 	(0x0e00 << 16) | (0xc708 >> 2),
846 	0x00000000,
847 	(0x0e00 << 16) | (0xc768 >> 2),
848 	0x00000000,
849 	(0x0400 << 16) | (0xc770 >> 2),
850 	0x00000000,
851 	(0x0400 << 16) | (0xc774 >> 2),
852 	0x00000000,
853 	(0x0400 << 16) | (0xc798 >> 2),
854 	0x00000000,
855 	(0x0400 << 16) | (0xc79c >> 2),
856 	0x00000000,
857 	(0x0e00 << 16) | (0x9100 >> 2),
858 	0x00000000,
859 	(0x0e00 << 16) | (0x3c010 >> 2),
860 	0x00000000,
861 	(0x0e00 << 16) | (0x8c00 >> 2),
862 	0x00000000,
863 	(0x0e00 << 16) | (0x8c04 >> 2),
864 	0x00000000,
865 	(0x0e00 << 16) | (0x8c20 >> 2),
866 	0x00000000,
867 	(0x0e00 << 16) | (0x8c38 >> 2),
868 	0x00000000,
869 	(0x0e00 << 16) | (0x8c3c >> 2),
870 	0x00000000,
871 	(0x0e00 << 16) | (0xae00 >> 2),
872 	0x00000000,
873 	(0x0e00 << 16) | (0x9604 >> 2),
874 	0x00000000,
875 	(0x0e00 << 16) | (0xac08 >> 2),
876 	0x00000000,
877 	(0x0e00 << 16) | (0xac0c >> 2),
878 	0x00000000,
879 	(0x0e00 << 16) | (0xac10 >> 2),
880 	0x00000000,
881 	(0x0e00 << 16) | (0xac14 >> 2),
882 	0x00000000,
883 	(0x0e00 << 16) | (0xac58 >> 2),
884 	0x00000000,
885 	(0x0e00 << 16) | (0xac68 >> 2),
886 	0x00000000,
887 	(0x0e00 << 16) | (0xac6c >> 2),
888 	0x00000000,
889 	(0x0e00 << 16) | (0xac70 >> 2),
890 	0x00000000,
891 	(0x0e00 << 16) | (0xac74 >> 2),
892 	0x00000000,
893 	(0x0e00 << 16) | (0xac78 >> 2),
894 	0x00000000,
895 	(0x0e00 << 16) | (0xac7c >> 2),
896 	0x00000000,
897 	(0x0e00 << 16) | (0xac80 >> 2),
898 	0x00000000,
899 	(0x0e00 << 16) | (0xac84 >> 2),
900 	0x00000000,
901 	(0x0e00 << 16) | (0xac88 >> 2),
902 	0x00000000,
903 	(0x0e00 << 16) | (0xac8c >> 2),
904 	0x00000000,
905 	(0x0e00 << 16) | (0x970c >> 2),
906 	0x00000000,
907 	(0x0e00 << 16) | (0x9714 >> 2),
908 	0x00000000,
909 	(0x0e00 << 16) | (0x9718 >> 2),
910 	0x00000000,
911 	(0x0e00 << 16) | (0x971c >> 2),
912 	0x00000000,
913 	(0x0e00 << 16) | (0x31068 >> 2),
914 	0x00000000,
915 	(0x4e00 << 16) | (0x31068 >> 2),
916 	0x00000000,
917 	(0x5e00 << 16) | (0x31068 >> 2),
918 	0x00000000,
919 	(0x6e00 << 16) | (0x31068 >> 2),
920 	0x00000000,
921 	(0x7e00 << 16) | (0x31068 >> 2),
922 	0x00000000,
923 	(0x0e00 << 16) | (0xcd10 >> 2),
924 	0x00000000,
925 	(0x0e00 << 16) | (0xcd14 >> 2),
926 	0x00000000,
927 	(0x0e00 << 16) | (0x88b0 >> 2),
928 	0x00000000,
929 	(0x0e00 << 16) | (0x88b4 >> 2),
930 	0x00000000,
931 	(0x0e00 << 16) | (0x88b8 >> 2),
932 	0x00000000,
933 	(0x0e00 << 16) | (0x88bc >> 2),
934 	0x00000000,
935 	(0x0400 << 16) | (0x89c0 >> 2),
936 	0x00000000,
937 	(0x0e00 << 16) | (0x88c4 >> 2),
938 	0x00000000,
939 	(0x0e00 << 16) | (0x88c8 >> 2),
940 	0x00000000,
941 	(0x0e00 << 16) | (0x88d0 >> 2),
942 	0x00000000,
943 	(0x0e00 << 16) | (0x88d4 >> 2),
944 	0x00000000,
945 	(0x0e00 << 16) | (0x88d8 >> 2),
946 	0x00000000,
947 	(0x0e00 << 16) | (0x8980 >> 2),
948 	0x00000000,
949 	(0x0e00 << 16) | (0x30938 >> 2),
950 	0x00000000,
951 	(0x0e00 << 16) | (0x3093c >> 2),
952 	0x00000000,
953 	(0x0e00 << 16) | (0x30940 >> 2),
954 	0x00000000,
955 	(0x0e00 << 16) | (0x89a0 >> 2),
956 	0x00000000,
957 	(0x0e00 << 16) | (0x30900 >> 2),
958 	0x00000000,
959 	(0x0e00 << 16) | (0x30904 >> 2),
960 	0x00000000,
961 	(0x0e00 << 16) | (0x89b4 >> 2),
962 	0x00000000,
963 	(0x0e00 << 16) | (0x3e1fc >> 2),
964 	0x00000000,
965 	(0x0e00 << 16) | (0x3c210 >> 2),
966 	0x00000000,
967 	(0x0e00 << 16) | (0x3c214 >> 2),
968 	0x00000000,
969 	(0x0e00 << 16) | (0x3c218 >> 2),
970 	0x00000000,
971 	(0x0e00 << 16) | (0x8904 >> 2),
972 	0x00000000,
973 	0x5,
974 	(0x0e00 << 16) | (0x8c28 >> 2),
975 	(0x0e00 << 16) | (0x8c2c >> 2),
976 	(0x0e00 << 16) | (0x8c30 >> 2),
977 	(0x0e00 << 16) | (0x8c34 >> 2),
978 	(0x0e00 << 16) | (0x9600 >> 2),
979 };
980 
981 static const u32 bonaire_golden_spm_registers[] =
982 {
983 	0x30800, 0xe0ffffff, 0xe0000000
984 };
985 
986 static const u32 bonaire_golden_common_registers[] =
987 {
988 	0xc770, 0xffffffff, 0x00000800,
989 	0xc774, 0xffffffff, 0x00000800,
990 	0xc798, 0xffffffff, 0x00007fbf,
991 	0xc79c, 0xffffffff, 0x00007faf
992 };
993 
994 static const u32 bonaire_golden_registers[] =
995 {
996 	0x3354, 0x00000333, 0x00000333,
997 	0x3350, 0x000c0fc0, 0x00040200,
998 	0x9a10, 0x00010000, 0x00058208,
999 	0x3c000, 0xffff1fff, 0x00140000,
1000 	0x3c200, 0xfdfc0fff, 0x00000100,
1001 	0x3c234, 0x40000000, 0x40000200,
1002 	0x9830, 0xffffffff, 0x00000000,
1003 	0x9834, 0xf00fffff, 0x00000400,
1004 	0x9838, 0x0002021c, 0x00020200,
1005 	0xc78, 0x00000080, 0x00000000,
1006 	0x5bb0, 0x000000f0, 0x00000070,
1007 	0x5bc0, 0xf0311fff, 0x80300000,
1008 	0x98f8, 0x73773777, 0x12010001,
1009 	0x350c, 0x00810000, 0x408af000,
1010 	0x7030, 0x31000111, 0x00000011,
1011 	0x2f48, 0x73773777, 0x12010001,
1012 	0x220c, 0x00007fb6, 0x0021a1b1,
1013 	0x2210, 0x00007fb6, 0x002021b1,
1014 	0x2180, 0x00007fb6, 0x00002191,
1015 	0x2218, 0x00007fb6, 0x002121b1,
1016 	0x221c, 0x00007fb6, 0x002021b1,
1017 	0x21dc, 0x00007fb6, 0x00002191,
1018 	0x21e0, 0x00007fb6, 0x00002191,
1019 	0x3628, 0x0000003f, 0x0000000a,
1020 	0x362c, 0x0000003f, 0x0000000a,
1021 	0x2ae4, 0x00073ffe, 0x000022a2,
1022 	0x240c, 0x000007ff, 0x00000000,
1023 	0x8a14, 0xf000003f, 0x00000007,
1024 	0x8bf0, 0x00002001, 0x00000001,
1025 	0x8b24, 0xffffffff, 0x00ffffff,
1026 	0x30a04, 0x0000ff0f, 0x00000000,
1027 	0x28a4c, 0x07ffffff, 0x06000000,
1028 	0x4d8, 0x00000fff, 0x00000100,
1029 	0x3e78, 0x00000001, 0x00000002,
1030 	0x9100, 0x03000000, 0x0362c688,
1031 	0x8c00, 0x000000ff, 0x00000001,
1032 	0xe40, 0x00001fff, 0x00001fff,
1033 	0x9060, 0x0000007f, 0x00000020,
1034 	0x9508, 0x00010000, 0x00010000,
1035 	0xac14, 0x000003ff, 0x000000f3,
1036 	0xac0c, 0xffffffff, 0x00001032
1037 };
1038 
1039 static const u32 bonaire_mgcg_cgcg_init[] =
1040 {
1041 	0xc420, 0xffffffff, 0xfffffffc,
1042 	0x30800, 0xffffffff, 0xe0000000,
1043 	0x3c2a0, 0xffffffff, 0x00000100,
1044 	0x3c208, 0xffffffff, 0x00000100,
1045 	0x3c2c0, 0xffffffff, 0xc0000100,
1046 	0x3c2c8, 0xffffffff, 0xc0000100,
1047 	0x3c2c4, 0xffffffff, 0xc0000100,
1048 	0x55e4, 0xffffffff, 0x00600100,
1049 	0x3c280, 0xffffffff, 0x00000100,
1050 	0x3c214, 0xffffffff, 0x06000100,
1051 	0x3c220, 0xffffffff, 0x00000100,
1052 	0x3c218, 0xffffffff, 0x06000100,
1053 	0x3c204, 0xffffffff, 0x00000100,
1054 	0x3c2e0, 0xffffffff, 0x00000100,
1055 	0x3c224, 0xffffffff, 0x00000100,
1056 	0x3c200, 0xffffffff, 0x00000100,
1057 	0x3c230, 0xffffffff, 0x00000100,
1058 	0x3c234, 0xffffffff, 0x00000100,
1059 	0x3c250, 0xffffffff, 0x00000100,
1060 	0x3c254, 0xffffffff, 0x00000100,
1061 	0x3c258, 0xffffffff, 0x00000100,
1062 	0x3c25c, 0xffffffff, 0x00000100,
1063 	0x3c260, 0xffffffff, 0x00000100,
1064 	0x3c27c, 0xffffffff, 0x00000100,
1065 	0x3c278, 0xffffffff, 0x00000100,
1066 	0x3c210, 0xffffffff, 0x06000100,
1067 	0x3c290, 0xffffffff, 0x00000100,
1068 	0x3c274, 0xffffffff, 0x00000100,
1069 	0x3c2b4, 0xffffffff, 0x00000100,
1070 	0x3c2b0, 0xffffffff, 0x00000100,
1071 	0x3c270, 0xffffffff, 0x00000100,
1072 	0x30800, 0xffffffff, 0xe0000000,
1073 	0x3c020, 0xffffffff, 0x00010000,
1074 	0x3c024, 0xffffffff, 0x00030002,
1075 	0x3c028, 0xffffffff, 0x00040007,
1076 	0x3c02c, 0xffffffff, 0x00060005,
1077 	0x3c030, 0xffffffff, 0x00090008,
1078 	0x3c034, 0xffffffff, 0x00010000,
1079 	0x3c038, 0xffffffff, 0x00030002,
1080 	0x3c03c, 0xffffffff, 0x00040007,
1081 	0x3c040, 0xffffffff, 0x00060005,
1082 	0x3c044, 0xffffffff, 0x00090008,
1083 	0x3c048, 0xffffffff, 0x00010000,
1084 	0x3c04c, 0xffffffff, 0x00030002,
1085 	0x3c050, 0xffffffff, 0x00040007,
1086 	0x3c054, 0xffffffff, 0x00060005,
1087 	0x3c058, 0xffffffff, 0x00090008,
1088 	0x3c05c, 0xffffffff, 0x00010000,
1089 	0x3c060, 0xffffffff, 0x00030002,
1090 	0x3c064, 0xffffffff, 0x00040007,
1091 	0x3c068, 0xffffffff, 0x00060005,
1092 	0x3c06c, 0xffffffff, 0x00090008,
1093 	0x3c070, 0xffffffff, 0x00010000,
1094 	0x3c074, 0xffffffff, 0x00030002,
1095 	0x3c078, 0xffffffff, 0x00040007,
1096 	0x3c07c, 0xffffffff, 0x00060005,
1097 	0x3c080, 0xffffffff, 0x00090008,
1098 	0x3c084, 0xffffffff, 0x00010000,
1099 	0x3c088, 0xffffffff, 0x00030002,
1100 	0x3c08c, 0xffffffff, 0x00040007,
1101 	0x3c090, 0xffffffff, 0x00060005,
1102 	0x3c094, 0xffffffff, 0x00090008,
1103 	0x3c098, 0xffffffff, 0x00010000,
1104 	0x3c09c, 0xffffffff, 0x00030002,
1105 	0x3c0a0, 0xffffffff, 0x00040007,
1106 	0x3c0a4, 0xffffffff, 0x00060005,
1107 	0x3c0a8, 0xffffffff, 0x00090008,
1108 	0x3c000, 0xffffffff, 0x96e00200,
1109 	0x8708, 0xffffffff, 0x00900100,
1110 	0xc424, 0xffffffff, 0x0020003f,
1111 	0x38, 0xffffffff, 0x0140001c,
1112 	0x3c, 0x000f0000, 0x000f0000,
1113 	0x220, 0xffffffff, 0xC060000C,
1114 	0x224, 0xc0000fff, 0x00000100,
1115 	0xf90, 0xffffffff, 0x00000100,
1116 	0xf98, 0x00000101, 0x00000000,
1117 	0x20a8, 0xffffffff, 0x00000104,
1118 	0x55e4, 0xff000fff, 0x00000100,
1119 	0x30cc, 0xc0000fff, 0x00000104,
1120 	0xc1e4, 0x00000001, 0x00000001,
1121 	0xd00c, 0xff000ff0, 0x00000100,
1122 	0xd80c, 0xff000ff0, 0x00000100
1123 };
1124 
1125 static const u32 spectre_golden_spm_registers[] =
1126 {
1127 	0x30800, 0xe0ffffff, 0xe0000000
1128 };
1129 
1130 static const u32 spectre_golden_common_registers[] =
1131 {
1132 	0xc770, 0xffffffff, 0x00000800,
1133 	0xc774, 0xffffffff, 0x00000800,
1134 	0xc798, 0xffffffff, 0x00007fbf,
1135 	0xc79c, 0xffffffff, 0x00007faf
1136 };
1137 
1138 static const u32 spectre_golden_registers[] =
1139 {
1140 	0x3c000, 0xffff1fff, 0x96940200,
1141 	0x3c00c, 0xffff0001, 0xff000000,
1142 	0x3c200, 0xfffc0fff, 0x00000100,
1143 	0x6ed8, 0x00010101, 0x00010000,
1144 	0x9834, 0xf00fffff, 0x00000400,
1145 	0x9838, 0xfffffffc, 0x00020200,
1146 	0x5bb0, 0x000000f0, 0x00000070,
1147 	0x5bc0, 0xf0311fff, 0x80300000,
1148 	0x98f8, 0x73773777, 0x12010001,
1149 	0x9b7c, 0x00ff0000, 0x00fc0000,
1150 	0x2f48, 0x73773777, 0x12010001,
1151 	0x8a14, 0xf000003f, 0x00000007,
1152 	0x8b24, 0xffffffff, 0x00ffffff,
1153 	0x28350, 0x3f3f3fff, 0x00000082,
1154 	0x28354, 0x0000003f, 0x00000000,
1155 	0x3e78, 0x00000001, 0x00000002,
1156 	0x913c, 0xffff03df, 0x00000004,
1157 	0xc768, 0x00000008, 0x00000008,
1158 	0x8c00, 0x000008ff, 0x00000800,
1159 	0x9508, 0x00010000, 0x00010000,
1160 	0xac0c, 0xffffffff, 0x54763210,
1161 	0x214f8, 0x01ff01ff, 0x00000002,
1162 	0x21498, 0x007ff800, 0x00200000,
1163 	0x2015c, 0xffffffff, 0x00000f40,
1164 	0x30934, 0xffffffff, 0x00000001
1165 };
1166 
1167 static const u32 spectre_mgcg_cgcg_init[] =
1168 {
1169 	0xc420, 0xffffffff, 0xfffffffc,
1170 	0x30800, 0xffffffff, 0xe0000000,
1171 	0x3c2a0, 0xffffffff, 0x00000100,
1172 	0x3c208, 0xffffffff, 0x00000100,
1173 	0x3c2c0, 0xffffffff, 0x00000100,
1174 	0x3c2c8, 0xffffffff, 0x00000100,
1175 	0x3c2c4, 0xffffffff, 0x00000100,
1176 	0x55e4, 0xffffffff, 0x00600100,
1177 	0x3c280, 0xffffffff, 0x00000100,
1178 	0x3c214, 0xffffffff, 0x06000100,
1179 	0x3c220, 0xffffffff, 0x00000100,
1180 	0x3c218, 0xffffffff, 0x06000100,
1181 	0x3c204, 0xffffffff, 0x00000100,
1182 	0x3c2e0, 0xffffffff, 0x00000100,
1183 	0x3c224, 0xffffffff, 0x00000100,
1184 	0x3c200, 0xffffffff, 0x00000100,
1185 	0x3c230, 0xffffffff, 0x00000100,
1186 	0x3c234, 0xffffffff, 0x00000100,
1187 	0x3c250, 0xffffffff, 0x00000100,
1188 	0x3c254, 0xffffffff, 0x00000100,
1189 	0x3c258, 0xffffffff, 0x00000100,
1190 	0x3c25c, 0xffffffff, 0x00000100,
1191 	0x3c260, 0xffffffff, 0x00000100,
1192 	0x3c27c, 0xffffffff, 0x00000100,
1193 	0x3c278, 0xffffffff, 0x00000100,
1194 	0x3c210, 0xffffffff, 0x06000100,
1195 	0x3c290, 0xffffffff, 0x00000100,
1196 	0x3c274, 0xffffffff, 0x00000100,
1197 	0x3c2b4, 0xffffffff, 0x00000100,
1198 	0x3c2b0, 0xffffffff, 0x00000100,
1199 	0x3c270, 0xffffffff, 0x00000100,
1200 	0x30800, 0xffffffff, 0xe0000000,
1201 	0x3c020, 0xffffffff, 0x00010000,
1202 	0x3c024, 0xffffffff, 0x00030002,
1203 	0x3c028, 0xffffffff, 0x00040007,
1204 	0x3c02c, 0xffffffff, 0x00060005,
1205 	0x3c030, 0xffffffff, 0x00090008,
1206 	0x3c034, 0xffffffff, 0x00010000,
1207 	0x3c038, 0xffffffff, 0x00030002,
1208 	0x3c03c, 0xffffffff, 0x00040007,
1209 	0x3c040, 0xffffffff, 0x00060005,
1210 	0x3c044, 0xffffffff, 0x00090008,
1211 	0x3c048, 0xffffffff, 0x00010000,
1212 	0x3c04c, 0xffffffff, 0x00030002,
1213 	0x3c050, 0xffffffff, 0x00040007,
1214 	0x3c054, 0xffffffff, 0x00060005,
1215 	0x3c058, 0xffffffff, 0x00090008,
1216 	0x3c05c, 0xffffffff, 0x00010000,
1217 	0x3c060, 0xffffffff, 0x00030002,
1218 	0x3c064, 0xffffffff, 0x00040007,
1219 	0x3c068, 0xffffffff, 0x00060005,
1220 	0x3c06c, 0xffffffff, 0x00090008,
1221 	0x3c070, 0xffffffff, 0x00010000,
1222 	0x3c074, 0xffffffff, 0x00030002,
1223 	0x3c078, 0xffffffff, 0x00040007,
1224 	0x3c07c, 0xffffffff, 0x00060005,
1225 	0x3c080, 0xffffffff, 0x00090008,
1226 	0x3c084, 0xffffffff, 0x00010000,
1227 	0x3c088, 0xffffffff, 0x00030002,
1228 	0x3c08c, 0xffffffff, 0x00040007,
1229 	0x3c090, 0xffffffff, 0x00060005,
1230 	0x3c094, 0xffffffff, 0x00090008,
1231 	0x3c098, 0xffffffff, 0x00010000,
1232 	0x3c09c, 0xffffffff, 0x00030002,
1233 	0x3c0a0, 0xffffffff, 0x00040007,
1234 	0x3c0a4, 0xffffffff, 0x00060005,
1235 	0x3c0a8, 0xffffffff, 0x00090008,
1236 	0x3c0ac, 0xffffffff, 0x00010000,
1237 	0x3c0b0, 0xffffffff, 0x00030002,
1238 	0x3c0b4, 0xffffffff, 0x00040007,
1239 	0x3c0b8, 0xffffffff, 0x00060005,
1240 	0x3c0bc, 0xffffffff, 0x00090008,
1241 	0x3c000, 0xffffffff, 0x96e00200,
1242 	0x8708, 0xffffffff, 0x00900100,
1243 	0xc424, 0xffffffff, 0x0020003f,
1244 	0x38, 0xffffffff, 0x0140001c,
1245 	0x3c, 0x000f0000, 0x000f0000,
1246 	0x220, 0xffffffff, 0xC060000C,
1247 	0x224, 0xc0000fff, 0x00000100,
1248 	0xf90, 0xffffffff, 0x00000100,
1249 	0xf98, 0x00000101, 0x00000000,
1250 	0x20a8, 0xffffffff, 0x00000104,
1251 	0x55e4, 0xff000fff, 0x00000100,
1252 	0x30cc, 0xc0000fff, 0x00000104,
1253 	0xc1e4, 0x00000001, 0x00000001,
1254 	0xd00c, 0xff000ff0, 0x00000100,
1255 	0xd80c, 0xff000ff0, 0x00000100
1256 };
1257 
1258 static const u32 kalindi_golden_spm_registers[] =
1259 {
1260 	0x30800, 0xe0ffffff, 0xe0000000
1261 };
1262 
1263 static const u32 kalindi_golden_common_registers[] =
1264 {
1265 	0xc770, 0xffffffff, 0x00000800,
1266 	0xc774, 0xffffffff, 0x00000800,
1267 	0xc798, 0xffffffff, 0x00007fbf,
1268 	0xc79c, 0xffffffff, 0x00007faf
1269 };
1270 
1271 static const u32 kalindi_golden_registers[] =
1272 {
1273 	0x3c000, 0xffffdfff, 0x6e944040,
1274 	0x55e4, 0xff607fff, 0xfc000100,
1275 	0x3c220, 0xff000fff, 0x00000100,
1276 	0x3c224, 0xff000fff, 0x00000100,
1277 	0x3c200, 0xfffc0fff, 0x00000100,
1278 	0x6ed8, 0x00010101, 0x00010000,
1279 	0x9830, 0xffffffff, 0x00000000,
1280 	0x9834, 0xf00fffff, 0x00000400,
1281 	0x5bb0, 0x000000f0, 0x00000070,
1282 	0x5bc0, 0xf0311fff, 0x80300000,
1283 	0x98f8, 0x73773777, 0x12010001,
1284 	0x98fc, 0xffffffff, 0x00000010,
1285 	0x9b7c, 0x00ff0000, 0x00fc0000,
1286 	0x8030, 0x00001f0f, 0x0000100a,
1287 	0x2f48, 0x73773777, 0x12010001,
1288 	0x2408, 0x000fffff, 0x000c007f,
1289 	0x8a14, 0xf000003f, 0x00000007,
1290 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1291 	0x30a04, 0x0000ff0f, 0x00000000,
1292 	0x28a4c, 0x07ffffff, 0x06000000,
1293 	0x4d8, 0x00000fff, 0x00000100,
1294 	0x3e78, 0x00000001, 0x00000002,
1295 	0xc768, 0x00000008, 0x00000008,
1296 	0x8c00, 0x000000ff, 0x00000003,
1297 	0x214f8, 0x01ff01ff, 0x00000002,
1298 	0x21498, 0x007ff800, 0x00200000,
1299 	0x2015c, 0xffffffff, 0x00000f40,
1300 	0x88c4, 0x001f3ae3, 0x00000082,
1301 	0x88d4, 0x0000001f, 0x00000010,
1302 	0x30934, 0xffffffff, 0x00000000
1303 };
1304 
1305 static const u32 kalindi_mgcg_cgcg_init[] =
1306 {
1307 	0xc420, 0xffffffff, 0xfffffffc,
1308 	0x30800, 0xffffffff, 0xe0000000,
1309 	0x3c2a0, 0xffffffff, 0x00000100,
1310 	0x3c208, 0xffffffff, 0x00000100,
1311 	0x3c2c0, 0xffffffff, 0x00000100,
1312 	0x3c2c8, 0xffffffff, 0x00000100,
1313 	0x3c2c4, 0xffffffff, 0x00000100,
1314 	0x55e4, 0xffffffff, 0x00600100,
1315 	0x3c280, 0xffffffff, 0x00000100,
1316 	0x3c214, 0xffffffff, 0x06000100,
1317 	0x3c220, 0xffffffff, 0x00000100,
1318 	0x3c218, 0xffffffff, 0x06000100,
1319 	0x3c204, 0xffffffff, 0x00000100,
1320 	0x3c2e0, 0xffffffff, 0x00000100,
1321 	0x3c224, 0xffffffff, 0x00000100,
1322 	0x3c200, 0xffffffff, 0x00000100,
1323 	0x3c230, 0xffffffff, 0x00000100,
1324 	0x3c234, 0xffffffff, 0x00000100,
1325 	0x3c250, 0xffffffff, 0x00000100,
1326 	0x3c254, 0xffffffff, 0x00000100,
1327 	0x3c258, 0xffffffff, 0x00000100,
1328 	0x3c25c, 0xffffffff, 0x00000100,
1329 	0x3c260, 0xffffffff, 0x00000100,
1330 	0x3c27c, 0xffffffff, 0x00000100,
1331 	0x3c278, 0xffffffff, 0x00000100,
1332 	0x3c210, 0xffffffff, 0x06000100,
1333 	0x3c290, 0xffffffff, 0x00000100,
1334 	0x3c274, 0xffffffff, 0x00000100,
1335 	0x3c2b4, 0xffffffff, 0x00000100,
1336 	0x3c2b0, 0xffffffff, 0x00000100,
1337 	0x3c270, 0xffffffff, 0x00000100,
1338 	0x30800, 0xffffffff, 0xe0000000,
1339 	0x3c020, 0xffffffff, 0x00010000,
1340 	0x3c024, 0xffffffff, 0x00030002,
1341 	0x3c028, 0xffffffff, 0x00040007,
1342 	0x3c02c, 0xffffffff, 0x00060005,
1343 	0x3c030, 0xffffffff, 0x00090008,
1344 	0x3c034, 0xffffffff, 0x00010000,
1345 	0x3c038, 0xffffffff, 0x00030002,
1346 	0x3c03c, 0xffffffff, 0x00040007,
1347 	0x3c040, 0xffffffff, 0x00060005,
1348 	0x3c044, 0xffffffff, 0x00090008,
1349 	0x3c000, 0xffffffff, 0x96e00200,
1350 	0x8708, 0xffffffff, 0x00900100,
1351 	0xc424, 0xffffffff, 0x0020003f,
1352 	0x38, 0xffffffff, 0x0140001c,
1353 	0x3c, 0x000f0000, 0x000f0000,
1354 	0x220, 0xffffffff, 0xC060000C,
1355 	0x224, 0xc0000fff, 0x00000100,
1356 	0x20a8, 0xffffffff, 0x00000104,
1357 	0x55e4, 0xff000fff, 0x00000100,
1358 	0x30cc, 0xc0000fff, 0x00000104,
1359 	0xc1e4, 0x00000001, 0x00000001,
1360 	0xd00c, 0xff000ff0, 0x00000100,
1361 	0xd80c, 0xff000ff0, 0x00000100
1362 };
1363 
1364 static const u32 hawaii_golden_spm_registers[] =
1365 {
1366 	0x30800, 0xe0ffffff, 0xe0000000
1367 };
1368 
1369 static const u32 hawaii_golden_common_registers[] =
1370 {
1371 	0x30800, 0xffffffff, 0xe0000000,
1372 	0x28350, 0xffffffff, 0x3a00161a,
1373 	0x28354, 0xffffffff, 0x0000002e,
1374 	0x9a10, 0xffffffff, 0x00018208,
1375 	0x98f8, 0xffffffff, 0x12011003
1376 };
1377 
1378 static const u32 hawaii_golden_registers[] =
1379 {
1380 	0x3354, 0x00000333, 0x00000333,
1381 	0x9a10, 0x00010000, 0x00058208,
1382 	0x9830, 0xffffffff, 0x00000000,
1383 	0x9834, 0xf00fffff, 0x00000400,
1384 	0x9838, 0x0002021c, 0x00020200,
1385 	0xc78, 0x00000080, 0x00000000,
1386 	0x5bb0, 0x000000f0, 0x00000070,
1387 	0x5bc0, 0xf0311fff, 0x80300000,
1388 	0x350c, 0x00810000, 0x408af000,
1389 	0x7030, 0x31000111, 0x00000011,
1390 	0x2f48, 0x73773777, 0x12010001,
1391 	0x2120, 0x0000007f, 0x0000001b,
1392 	0x21dc, 0x00007fb6, 0x00002191,
1393 	0x3628, 0x0000003f, 0x0000000a,
1394 	0x362c, 0x0000003f, 0x0000000a,
1395 	0x2ae4, 0x00073ffe, 0x000022a2,
1396 	0x240c, 0x000007ff, 0x00000000,
1397 	0x8bf0, 0x00002001, 0x00000001,
1398 	0x8b24, 0xffffffff, 0x00ffffff,
1399 	0x30a04, 0x0000ff0f, 0x00000000,
1400 	0x28a4c, 0x07ffffff, 0x06000000,
1401 	0x3e78, 0x00000001, 0x00000002,
1402 	0xc768, 0x00000008, 0x00000008,
1403 	0xc770, 0x00000f00, 0x00000800,
1404 	0xc774, 0x00000f00, 0x00000800,
1405 	0xc798, 0x00ffffff, 0x00ff7fbf,
1406 	0xc79c, 0x00ffffff, 0x00ff7faf,
1407 	0x8c00, 0x000000ff, 0x00000800,
1408 	0xe40, 0x00001fff, 0x00001fff,
1409 	0x9060, 0x0000007f, 0x00000020,
1410 	0x9508, 0x00010000, 0x00010000,
1411 	0xae00, 0x00100000, 0x000ff07c,
1412 	0xac14, 0x000003ff, 0x0000000f,
1413 	0xac10, 0xffffffff, 0x7564fdec,
1414 	0xac0c, 0xffffffff, 0x3120b9a8,
1415 	0xac08, 0x20000000, 0x0f9c0000
1416 };
1417 
1418 static const u32 hawaii_mgcg_cgcg_init[] =
1419 {
1420 	0xc420, 0xffffffff, 0xfffffffd,
1421 	0x30800, 0xffffffff, 0xe0000000,
1422 	0x3c2a0, 0xffffffff, 0x00000100,
1423 	0x3c208, 0xffffffff, 0x00000100,
1424 	0x3c2c0, 0xffffffff, 0x00000100,
1425 	0x3c2c8, 0xffffffff, 0x00000100,
1426 	0x3c2c4, 0xffffffff, 0x00000100,
1427 	0x55e4, 0xffffffff, 0x00200100,
1428 	0x3c280, 0xffffffff, 0x00000100,
1429 	0x3c214, 0xffffffff, 0x06000100,
1430 	0x3c220, 0xffffffff, 0x00000100,
1431 	0x3c218, 0xffffffff, 0x06000100,
1432 	0x3c204, 0xffffffff, 0x00000100,
1433 	0x3c2e0, 0xffffffff, 0x00000100,
1434 	0x3c224, 0xffffffff, 0x00000100,
1435 	0x3c200, 0xffffffff, 0x00000100,
1436 	0x3c230, 0xffffffff, 0x00000100,
1437 	0x3c234, 0xffffffff, 0x00000100,
1438 	0x3c250, 0xffffffff, 0x00000100,
1439 	0x3c254, 0xffffffff, 0x00000100,
1440 	0x3c258, 0xffffffff, 0x00000100,
1441 	0x3c25c, 0xffffffff, 0x00000100,
1442 	0x3c260, 0xffffffff, 0x00000100,
1443 	0x3c27c, 0xffffffff, 0x00000100,
1444 	0x3c278, 0xffffffff, 0x00000100,
1445 	0x3c210, 0xffffffff, 0x06000100,
1446 	0x3c290, 0xffffffff, 0x00000100,
1447 	0x3c274, 0xffffffff, 0x00000100,
1448 	0x3c2b4, 0xffffffff, 0x00000100,
1449 	0x3c2b0, 0xffffffff, 0x00000100,
1450 	0x3c270, 0xffffffff, 0x00000100,
1451 	0x30800, 0xffffffff, 0xe0000000,
1452 	0x3c020, 0xffffffff, 0x00010000,
1453 	0x3c024, 0xffffffff, 0x00030002,
1454 	0x3c028, 0xffffffff, 0x00040007,
1455 	0x3c02c, 0xffffffff, 0x00060005,
1456 	0x3c030, 0xffffffff, 0x00090008,
1457 	0x3c034, 0xffffffff, 0x00010000,
1458 	0x3c038, 0xffffffff, 0x00030002,
1459 	0x3c03c, 0xffffffff, 0x00040007,
1460 	0x3c040, 0xffffffff, 0x00060005,
1461 	0x3c044, 0xffffffff, 0x00090008,
1462 	0x3c048, 0xffffffff, 0x00010000,
1463 	0x3c04c, 0xffffffff, 0x00030002,
1464 	0x3c050, 0xffffffff, 0x00040007,
1465 	0x3c054, 0xffffffff, 0x00060005,
1466 	0x3c058, 0xffffffff, 0x00090008,
1467 	0x3c05c, 0xffffffff, 0x00010000,
1468 	0x3c060, 0xffffffff, 0x00030002,
1469 	0x3c064, 0xffffffff, 0x00040007,
1470 	0x3c068, 0xffffffff, 0x00060005,
1471 	0x3c06c, 0xffffffff, 0x00090008,
1472 	0x3c070, 0xffffffff, 0x00010000,
1473 	0x3c074, 0xffffffff, 0x00030002,
1474 	0x3c078, 0xffffffff, 0x00040007,
1475 	0x3c07c, 0xffffffff, 0x00060005,
1476 	0x3c080, 0xffffffff, 0x00090008,
1477 	0x3c084, 0xffffffff, 0x00010000,
1478 	0x3c088, 0xffffffff, 0x00030002,
1479 	0x3c08c, 0xffffffff, 0x00040007,
1480 	0x3c090, 0xffffffff, 0x00060005,
1481 	0x3c094, 0xffffffff, 0x00090008,
1482 	0x3c098, 0xffffffff, 0x00010000,
1483 	0x3c09c, 0xffffffff, 0x00030002,
1484 	0x3c0a0, 0xffffffff, 0x00040007,
1485 	0x3c0a4, 0xffffffff, 0x00060005,
1486 	0x3c0a8, 0xffffffff, 0x00090008,
1487 	0x3c0ac, 0xffffffff, 0x00010000,
1488 	0x3c0b0, 0xffffffff, 0x00030002,
1489 	0x3c0b4, 0xffffffff, 0x00040007,
1490 	0x3c0b8, 0xffffffff, 0x00060005,
1491 	0x3c0bc, 0xffffffff, 0x00090008,
1492 	0x3c0c0, 0xffffffff, 0x00010000,
1493 	0x3c0c4, 0xffffffff, 0x00030002,
1494 	0x3c0c8, 0xffffffff, 0x00040007,
1495 	0x3c0cc, 0xffffffff, 0x00060005,
1496 	0x3c0d0, 0xffffffff, 0x00090008,
1497 	0x3c0d4, 0xffffffff, 0x00010000,
1498 	0x3c0d8, 0xffffffff, 0x00030002,
1499 	0x3c0dc, 0xffffffff, 0x00040007,
1500 	0x3c0e0, 0xffffffff, 0x00060005,
1501 	0x3c0e4, 0xffffffff, 0x00090008,
1502 	0x3c0e8, 0xffffffff, 0x00010000,
1503 	0x3c0ec, 0xffffffff, 0x00030002,
1504 	0x3c0f0, 0xffffffff, 0x00040007,
1505 	0x3c0f4, 0xffffffff, 0x00060005,
1506 	0x3c0f8, 0xffffffff, 0x00090008,
1507 	0xc318, 0xffffffff, 0x00020200,
1508 	0x3350, 0xffffffff, 0x00000200,
1509 	0x15c0, 0xffffffff, 0x00000400,
1510 	0x55e8, 0xffffffff, 0x00000000,
1511 	0x2f50, 0xffffffff, 0x00000902,
1512 	0x3c000, 0xffffffff, 0x96940200,
1513 	0x8708, 0xffffffff, 0x00900100,
1514 	0xc424, 0xffffffff, 0x0020003f,
1515 	0x38, 0xffffffff, 0x0140001c,
1516 	0x3c, 0x000f0000, 0x000f0000,
1517 	0x220, 0xffffffff, 0xc060000c,
1518 	0x224, 0xc0000fff, 0x00000100,
1519 	0xf90, 0xffffffff, 0x00000100,
1520 	0xf98, 0x00000101, 0x00000000,
1521 	0x20a8, 0xffffffff, 0x00000104,
1522 	0x55e4, 0xff000fff, 0x00000100,
1523 	0x30cc, 0xc0000fff, 0x00000104,
1524 	0xc1e4, 0x00000001, 0x00000001,
1525 	0xd00c, 0xff000ff0, 0x00000100,
1526 	0xd80c, 0xff000ff0, 0x00000100
1527 };
1528 
1529 static const u32 godavari_golden_registers[] =
1530 {
1531 	0x55e4, 0xff607fff, 0xfc000100,
1532 	0x6ed8, 0x00010101, 0x00010000,
1533 	0x9830, 0xffffffff, 0x00000000,
1534 	0x98302, 0xf00fffff, 0x00000400,
1535 	0x6130, 0xffffffff, 0x00010000,
1536 	0x5bb0, 0x000000f0, 0x00000070,
1537 	0x5bc0, 0xf0311fff, 0x80300000,
1538 	0x98f8, 0x73773777, 0x12010001,
1539 	0x98fc, 0xffffffff, 0x00000010,
1540 	0x8030, 0x00001f0f, 0x0000100a,
1541 	0x2f48, 0x73773777, 0x12010001,
1542 	0x2408, 0x000fffff, 0x000c007f,
1543 	0x8a14, 0xf000003f, 0x00000007,
1544 	0x8b24, 0xffffffff, 0x00ff0fff,
1545 	0x30a04, 0x0000ff0f, 0x00000000,
1546 	0x28a4c, 0x07ffffff, 0x06000000,
1547 	0x4d8, 0x00000fff, 0x00000100,
1548 	0xd014, 0x00010000, 0x00810001,
1549 	0xd814, 0x00010000, 0x00810001,
1550 	0x3e78, 0x00000001, 0x00000002,
1551 	0xc768, 0x00000008, 0x00000008,
1552 	0xc770, 0x00000f00, 0x00000800,
1553 	0xc774, 0x00000f00, 0x00000800,
1554 	0xc798, 0x00ffffff, 0x00ff7fbf,
1555 	0xc79c, 0x00ffffff, 0x00ff7faf,
1556 	0x8c00, 0x000000ff, 0x00000001,
1557 	0x214f8, 0x01ff01ff, 0x00000002,
1558 	0x21498, 0x007ff800, 0x00200000,
1559 	0x2015c, 0xffffffff, 0x00000f40,
1560 	0x88c4, 0x001f3ae3, 0x00000082,
1561 	0x88d4, 0x0000001f, 0x00000010,
1562 	0x30934, 0xffffffff, 0x00000000
1563 };
1564 
1565 
1566 static void cik_init_golden_registers(struct radeon_device *rdev)
1567 {
1568 	/* Some of the registers might be dependent on GRBM_GFX_INDEX */
1569 	mutex_lock(&rdev->grbm_idx_mutex);
1570 	switch (rdev->family) {
1571 	case CHIP_BONAIRE:
1572 		radeon_program_register_sequence(rdev,
1573 						 bonaire_mgcg_cgcg_init,
1574 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1575 		radeon_program_register_sequence(rdev,
1576 						 bonaire_golden_registers,
1577 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1578 		radeon_program_register_sequence(rdev,
1579 						 bonaire_golden_common_registers,
1580 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1581 		radeon_program_register_sequence(rdev,
1582 						 bonaire_golden_spm_registers,
1583 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1584 		break;
1585 	case CHIP_KABINI:
1586 		radeon_program_register_sequence(rdev,
1587 						 kalindi_mgcg_cgcg_init,
1588 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1589 		radeon_program_register_sequence(rdev,
1590 						 kalindi_golden_registers,
1591 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1592 		radeon_program_register_sequence(rdev,
1593 						 kalindi_golden_common_registers,
1594 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1595 		radeon_program_register_sequence(rdev,
1596 						 kalindi_golden_spm_registers,
1597 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1598 		break;
1599 	case CHIP_MULLINS:
1600 		radeon_program_register_sequence(rdev,
1601 						 kalindi_mgcg_cgcg_init,
1602 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1603 		radeon_program_register_sequence(rdev,
1604 						 godavari_golden_registers,
1605 						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1606 		radeon_program_register_sequence(rdev,
1607 						 kalindi_golden_common_registers,
1608 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1609 		radeon_program_register_sequence(rdev,
1610 						 kalindi_golden_spm_registers,
1611 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1612 		break;
1613 	case CHIP_KAVERI:
1614 		radeon_program_register_sequence(rdev,
1615 						 spectre_mgcg_cgcg_init,
1616 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1617 		radeon_program_register_sequence(rdev,
1618 						 spectre_golden_registers,
1619 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1620 		radeon_program_register_sequence(rdev,
1621 						 spectre_golden_common_registers,
1622 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1623 		radeon_program_register_sequence(rdev,
1624 						 spectre_golden_spm_registers,
1625 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1626 		break;
1627 	case CHIP_HAWAII:
1628 		radeon_program_register_sequence(rdev,
1629 						 hawaii_mgcg_cgcg_init,
1630 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1631 		radeon_program_register_sequence(rdev,
1632 						 hawaii_golden_registers,
1633 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1634 		radeon_program_register_sequence(rdev,
1635 						 hawaii_golden_common_registers,
1636 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1637 		radeon_program_register_sequence(rdev,
1638 						 hawaii_golden_spm_registers,
1639 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1640 		break;
1641 	default:
1642 		break;
1643 	}
1644 	mutex_unlock(&rdev->grbm_idx_mutex);
1645 }
1646 
1647 /**
1648  * cik_get_xclk - get the xclk
1649  *
1650  * @rdev: radeon_device pointer
1651  *
1652  * Returns the reference clock used by the gfx engine
1653  * (CIK).
1654  */
1655 u32 cik_get_xclk(struct radeon_device *rdev)
1656 {
1657         u32 reference_clock = rdev->clock.spll.reference_freq;
1658 
1659 	if (rdev->flags & RADEON_IS_IGP) {
1660 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1661 			return reference_clock / 2;
1662 	} else {
1663 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1664 			return reference_clock / 4;
1665 	}
1666 	return reference_clock;
1667 }
1668 
1669 /**
1670  * cik_mm_rdoorbell - read a doorbell dword
1671  *
1672  * @rdev: radeon_device pointer
1673  * @index: doorbell index
1674  *
1675  * Returns the value in the doorbell aperture at the
1676  * requested doorbell index (CIK).
1677  */
1678 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1679 {
1680 	if (index < rdev->doorbell.num_doorbells) {
1681 		return readl(rdev->doorbell.ptr + index);
1682 	} else {
1683 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1684 		return 0;
1685 	}
1686 }
1687 
1688 /**
1689  * cik_mm_wdoorbell - write a doorbell dword
1690  *
1691  * @rdev: radeon_device pointer
1692  * @index: doorbell index
1693  * @v: value to write
1694  *
1695  * Writes @v to the doorbell aperture at the
1696  * requested doorbell index (CIK).
1697  */
1698 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1699 {
1700 	if (index < rdev->doorbell.num_doorbells) {
1701 		writel(v, rdev->doorbell.ptr + index);
1702 	} else {
1703 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1704 	}
1705 }
1706 
1707 #define BONAIRE_IO_MC_REGS_SIZE 36
1708 
1709 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1710 {
1711 	{0x00000070, 0x04400000},
1712 	{0x00000071, 0x80c01803},
1713 	{0x00000072, 0x00004004},
1714 	{0x00000073, 0x00000100},
1715 	{0x00000074, 0x00ff0000},
1716 	{0x00000075, 0x34000000},
1717 	{0x00000076, 0x08000014},
1718 	{0x00000077, 0x00cc08ec},
1719 	{0x00000078, 0x00000400},
1720 	{0x00000079, 0x00000000},
1721 	{0x0000007a, 0x04090000},
1722 	{0x0000007c, 0x00000000},
1723 	{0x0000007e, 0x4408a8e8},
1724 	{0x0000007f, 0x00000304},
1725 	{0x00000080, 0x00000000},
1726 	{0x00000082, 0x00000001},
1727 	{0x00000083, 0x00000002},
1728 	{0x00000084, 0xf3e4f400},
1729 	{0x00000085, 0x052024e3},
1730 	{0x00000087, 0x00000000},
1731 	{0x00000088, 0x01000000},
1732 	{0x0000008a, 0x1c0a0000},
1733 	{0x0000008b, 0xff010000},
1734 	{0x0000008d, 0xffffefff},
1735 	{0x0000008e, 0xfff3efff},
1736 	{0x0000008f, 0xfff3efbf},
1737 	{0x00000092, 0xf7ffffff},
1738 	{0x00000093, 0xffffff7f},
1739 	{0x00000095, 0x00101101},
1740 	{0x00000096, 0x00000fff},
1741 	{0x00000097, 0x00116fff},
1742 	{0x00000098, 0x60010000},
1743 	{0x00000099, 0x10010000},
1744 	{0x0000009a, 0x00006000},
1745 	{0x0000009b, 0x00001000},
1746 	{0x0000009f, 0x00b48000}
1747 };
1748 
1749 #define HAWAII_IO_MC_REGS_SIZE 22
1750 
1751 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1752 {
1753 	{0x0000007d, 0x40000000},
1754 	{0x0000007e, 0x40180304},
1755 	{0x0000007f, 0x0000ff00},
1756 	{0x00000081, 0x00000000},
1757 	{0x00000083, 0x00000800},
1758 	{0x00000086, 0x00000000},
1759 	{0x00000087, 0x00000100},
1760 	{0x00000088, 0x00020100},
1761 	{0x00000089, 0x00000000},
1762 	{0x0000008b, 0x00040000},
1763 	{0x0000008c, 0x00000100},
1764 	{0x0000008e, 0xff010000},
1765 	{0x00000090, 0xffffefff},
1766 	{0x00000091, 0xfff3efff},
1767 	{0x00000092, 0xfff3efbf},
1768 	{0x00000093, 0xf7ffffff},
1769 	{0x00000094, 0xffffff7f},
1770 	{0x00000095, 0x00000fff},
1771 	{0x00000096, 0x00116fff},
1772 	{0x00000097, 0x60010000},
1773 	{0x00000098, 0x10010000},
1774 	{0x0000009f, 0x00c79000}
1775 };
1776 
1777 
1778 /**
1779  * cik_srbm_select - select specific register instances
1780  *
1781  * @rdev: radeon_device pointer
1782  * @me: selected ME (micro engine)
1783  * @pipe: pipe
1784  * @queue: queue
1785  * @vmid: VMID
1786  *
1787  * Switches the currently active registers instances.  Some
1788  * registers are instanced per VMID, others are instanced per
1789  * me/pipe/queue combination.
1790  */
1791 static void cik_srbm_select(struct radeon_device *rdev,
1792 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1793 {
1794 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1795 			     MEID(me & 0x3) |
1796 			     VMID(vmid & 0xf) |
1797 			     QUEUEID(queue & 0x7));
1798 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1799 }
1800 
1801 /* ucode loading */
1802 /**
1803  * ci_mc_load_microcode - load MC ucode into the hw
1804  *
1805  * @rdev: radeon_device pointer
1806  *
1807  * Load the GDDR MC ucode into the hw (CIK).
1808  * Returns 0 on success, error on failure.
1809  */
1810 int ci_mc_load_microcode(struct radeon_device *rdev)
1811 {
1812 	const __be32 *fw_data = NULL;
1813 	const __le32 *new_fw_data = NULL;
1814 	u32 running, blackout = 0, tmp;
1815 	u32 *io_mc_regs = NULL;
1816 	const __le32 *new_io_mc_regs = NULL;
1817 	int i, regs_size, ucode_size;
1818 
1819 	if (!rdev->mc_fw)
1820 		return -EINVAL;
1821 
1822 	if (rdev->new_fw) {
1823 		const struct mc_firmware_header_v1_0 *hdr =
1824 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1825 
1826 		radeon_ucode_print_mc_hdr(&hdr->header);
1827 
1828 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1829 		new_io_mc_regs = (const __le32 *)
1830 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1831 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1832 		new_fw_data = (const __le32 *)
1833 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1834 	} else {
1835 		ucode_size = rdev->mc_fw->size / 4;
1836 
1837 		switch (rdev->family) {
1838 		case CHIP_BONAIRE:
1839 			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1840 			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1841 			break;
1842 		case CHIP_HAWAII:
1843 			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1844 			regs_size = HAWAII_IO_MC_REGS_SIZE;
1845 			break;
1846 		default:
1847 			return -EINVAL;
1848 		}
1849 		fw_data = (const __be32 *)rdev->mc_fw->data;
1850 	}
1851 
1852 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1853 
1854 	if (running == 0) {
1855 		if (running) {
1856 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1857 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1858 		}
1859 
1860 		/* reset the engine and set to writable */
1861 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1862 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1863 
1864 		/* load mc io regs */
1865 		for (i = 0; i < regs_size; i++) {
1866 			if (rdev->new_fw) {
1867 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1868 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1869 			} else {
1870 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1871 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1872 			}
1873 		}
1874 
1875 		tmp = RREG32(MC_SEQ_MISC0);
1876 		if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1877 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1878 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1879 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1880 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1881 		}
1882 
1883 		/* load the MC ucode */
1884 		for (i = 0; i < ucode_size; i++) {
1885 			if (rdev->new_fw)
1886 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1887 			else
1888 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1889 		}
1890 
1891 		/* put the engine back into the active state */
1892 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1893 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1894 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1895 
1896 		/* wait for training to complete */
1897 		for (i = 0; i < rdev->usec_timeout; i++) {
1898 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1899 				break;
1900 			udelay(1);
1901 		}
1902 		for (i = 0; i < rdev->usec_timeout; i++) {
1903 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1904 				break;
1905 			udelay(1);
1906 		}
1907 
1908 		if (running)
1909 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1910 	}
1911 
1912 	return 0;
1913 }
1914 
1915 /**
1916  * cik_init_microcode - load ucode images from disk
1917  *
1918  * @rdev: radeon_device pointer
1919  *
1920  * Use the firmware interface to load the ucode images into
1921  * the driver (not loaded into hw).
1922  * Returns 0 on success, error on failure.
1923  */
1924 static int cik_init_microcode(struct radeon_device *rdev)
1925 {
1926 	const char *chip_name;
1927 	const char *new_chip_name;
1928 	size_t pfp_req_size, me_req_size, ce_req_size,
1929 		mec_req_size, rlc_req_size, mc_req_size = 0,
1930 		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1931 	char fw_name[30];
1932 	int new_fw = 0;
1933 	int err;
1934 	int num_fw;
1935 
1936 	DRM_DEBUG("\n");
1937 
1938 	switch (rdev->family) {
1939 	case CHIP_BONAIRE:
1940 		chip_name = "BONAIRE";
1941 		new_chip_name = "bonaire";
1942 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1943 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1944 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1945 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1946 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1947 		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1948 		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1949 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1950 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1951 		num_fw = 8;
1952 		break;
1953 	case CHIP_HAWAII:
1954 		chip_name = "HAWAII";
1955 		new_chip_name = "hawaii";
1956 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1957 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1958 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1959 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1960 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1961 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1962 		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
1963 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1964 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1965 		num_fw = 8;
1966 		break;
1967 	case CHIP_KAVERI:
1968 		chip_name = "KAVERI";
1969 		new_chip_name = "kaveri";
1970 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1971 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1972 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1973 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1974 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1975 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1976 		num_fw = 7;
1977 		break;
1978 	case CHIP_KABINI:
1979 		chip_name = "KABINI";
1980 		new_chip_name = "kabini";
1981 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1982 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1983 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1984 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1985 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1986 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1987 		num_fw = 6;
1988 		break;
1989 	case CHIP_MULLINS:
1990 		chip_name = "MULLINS";
1991 		new_chip_name = "mullins";
1992 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1993 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1994 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1995 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1996 		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
1997 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1998 		num_fw = 6;
1999 		break;
2000 	default: BUG();
2001 	}
2002 
2003 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
2004 
2005 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2006 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2007 	if (err) {
2008 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2009 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2010 		if (err)
2011 			goto out;
2012 		if (rdev->pfp_fw->size != pfp_req_size) {
2013 			printk(KERN_ERR
2014 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2015 			       rdev->pfp_fw->size, fw_name);
2016 			err = -EINVAL;
2017 			goto out;
2018 		}
2019 	} else {
2020 		err = radeon_ucode_validate(rdev->pfp_fw);
2021 		if (err) {
2022 			printk(KERN_ERR
2023 			       "cik_fw: validation failed for firmware \"%s\"\n",
2024 			       fw_name);
2025 			goto out;
2026 		} else {
2027 			new_fw++;
2028 		}
2029 	}
2030 
2031 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2032 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2033 	if (err) {
2034 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2035 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2036 		if (err)
2037 			goto out;
2038 		if (rdev->me_fw->size != me_req_size) {
2039 			printk(KERN_ERR
2040 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2041 			       rdev->me_fw->size, fw_name);
2042 			err = -EINVAL;
2043 		}
2044 	} else {
2045 		err = radeon_ucode_validate(rdev->me_fw);
2046 		if (err) {
2047 			printk(KERN_ERR
2048 			       "cik_fw: validation failed for firmware \"%s\"\n",
2049 			       fw_name);
2050 			goto out;
2051 		} else {
2052 			new_fw++;
2053 		}
2054 	}
2055 
2056 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2057 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2058 	if (err) {
2059 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2060 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2061 		if (err)
2062 			goto out;
2063 		if (rdev->ce_fw->size != ce_req_size) {
2064 			printk(KERN_ERR
2065 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2066 			       rdev->ce_fw->size, fw_name);
2067 			err = -EINVAL;
2068 		}
2069 	} else {
2070 		err = radeon_ucode_validate(rdev->ce_fw);
2071 		if (err) {
2072 			printk(KERN_ERR
2073 			       "cik_fw: validation failed for firmware \"%s\"\n",
2074 			       fw_name);
2075 			goto out;
2076 		} else {
2077 			new_fw++;
2078 		}
2079 	}
2080 
2081 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2082 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2083 	if (err) {
2084 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2085 		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2086 		if (err)
2087 			goto out;
2088 		if (rdev->mec_fw->size != mec_req_size) {
2089 			printk(KERN_ERR
2090 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2091 			       rdev->mec_fw->size, fw_name);
2092 			err = -EINVAL;
2093 		}
2094 	} else {
2095 		err = radeon_ucode_validate(rdev->mec_fw);
2096 		if (err) {
2097 			printk(KERN_ERR
2098 			       "cik_fw: validation failed for firmware \"%s\"\n",
2099 			       fw_name);
2100 			goto out;
2101 		} else {
2102 			new_fw++;
2103 		}
2104 	}
2105 
2106 	if (rdev->family == CHIP_KAVERI) {
2107 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2108 		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2109 		if (err) {
2110 			goto out;
2111 		} else {
2112 			err = radeon_ucode_validate(rdev->mec2_fw);
2113 			if (err) {
2114 				goto out;
2115 			} else {
2116 				new_fw++;
2117 			}
2118 		}
2119 	}
2120 
2121 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2122 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2123 	if (err) {
2124 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2125 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2126 		if (err)
2127 			goto out;
2128 		if (rdev->rlc_fw->size != rlc_req_size) {
2129 			printk(KERN_ERR
2130 			       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2131 			       rdev->rlc_fw->size, fw_name);
2132 			err = -EINVAL;
2133 		}
2134 	} else {
2135 		err = radeon_ucode_validate(rdev->rlc_fw);
2136 		if (err) {
2137 			printk(KERN_ERR
2138 			       "cik_fw: validation failed for firmware \"%s\"\n",
2139 			       fw_name);
2140 			goto out;
2141 		} else {
2142 			new_fw++;
2143 		}
2144 	}
2145 
2146 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2147 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2148 	if (err) {
2149 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2150 		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2151 		if (err)
2152 			goto out;
2153 		if (rdev->sdma_fw->size != sdma_req_size) {
2154 			printk(KERN_ERR
2155 			       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2156 			       rdev->sdma_fw->size, fw_name);
2157 			err = -EINVAL;
2158 		}
2159 	} else {
2160 		err = radeon_ucode_validate(rdev->sdma_fw);
2161 		if (err) {
2162 			printk(KERN_ERR
2163 			       "cik_fw: validation failed for firmware \"%s\"\n",
2164 			       fw_name);
2165 			goto out;
2166 		} else {
2167 			new_fw++;
2168 		}
2169 	}
2170 
2171 	/* No SMC, MC ucode on APUs */
2172 	if (!(rdev->flags & RADEON_IS_IGP)) {
2173 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2174 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2175 		if (err) {
2176 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2177 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2178 			if (err) {
2179 				snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2180 				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2181 				if (err)
2182 					goto out;
2183 			}
2184 			if ((rdev->mc_fw->size != mc_req_size) &&
2185 			    (rdev->mc_fw->size != mc2_req_size)){
2186 				printk(KERN_ERR
2187 				       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2188 				       rdev->mc_fw->size, fw_name);
2189 				err = -EINVAL;
2190 			}
2191 			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2192 		} else {
2193 			err = radeon_ucode_validate(rdev->mc_fw);
2194 			if (err) {
2195 				printk(KERN_ERR
2196 				       "cik_fw: validation failed for firmware \"%s\"\n",
2197 				       fw_name);
2198 				goto out;
2199 			} else {
2200 				new_fw++;
2201 			}
2202 		}
2203 
2204 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2205 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2206 		if (err) {
2207 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2208 			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2209 			if (err) {
2210 				printk(KERN_ERR
2211 				       "smc: error loading firmware \"%s\"\n",
2212 				       fw_name);
2213 				release_firmware(rdev->smc_fw);
2214 				rdev->smc_fw = NULL;
2215 				err = 0;
2216 			} else if (rdev->smc_fw->size != smc_req_size) {
2217 				printk(KERN_ERR
2218 				       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2219 				       rdev->smc_fw->size, fw_name);
2220 				err = -EINVAL;
2221 			}
2222 		} else {
2223 			err = radeon_ucode_validate(rdev->smc_fw);
2224 			if (err) {
2225 				printk(KERN_ERR
2226 				       "cik_fw: validation failed for firmware \"%s\"\n",
2227 				       fw_name);
2228 				goto out;
2229 			} else {
2230 				new_fw++;
2231 			}
2232 		}
2233 	}
2234 
2235 	if (new_fw == 0) {
2236 		rdev->new_fw = false;
2237 	} else if (new_fw < num_fw) {
2238 		printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2239 		err = -EINVAL;
2240 	} else {
2241 		rdev->new_fw = true;
2242 	}
2243 
2244 out:
2245 	if (err) {
2246 		if (err != -EINVAL)
2247 			printk(KERN_ERR
2248 			       "cik_cp: Failed to load firmware \"%s\"\n",
2249 			       fw_name);
2250 		release_firmware(rdev->pfp_fw);
2251 		rdev->pfp_fw = NULL;
2252 		release_firmware(rdev->me_fw);
2253 		rdev->me_fw = NULL;
2254 		release_firmware(rdev->ce_fw);
2255 		rdev->ce_fw = NULL;
2256 		release_firmware(rdev->mec_fw);
2257 		rdev->mec_fw = NULL;
2258 		release_firmware(rdev->mec2_fw);
2259 		rdev->mec2_fw = NULL;
2260 		release_firmware(rdev->rlc_fw);
2261 		rdev->rlc_fw = NULL;
2262 		release_firmware(rdev->sdma_fw);
2263 		rdev->sdma_fw = NULL;
2264 		release_firmware(rdev->mc_fw);
2265 		rdev->mc_fw = NULL;
2266 		release_firmware(rdev->smc_fw);
2267 		rdev->smc_fw = NULL;
2268 	}
2269 	return err;
2270 }
2271 
2272 /*
2273  * Core functions
2274  */
2275 /**
2276  * cik_tiling_mode_table_init - init the hw tiling table
2277  *
2278  * @rdev: radeon_device pointer
2279  *
2280  * Starting with SI, the tiling setup is done globally in a
2281  * set of 32 tiling modes.  Rather than selecting each set of
2282  * parameters per surface as on older asics, we just select
2283  * which index in the tiling table we want to use, and the
2284  * surface uses those parameters (CIK).
2285  */
2286 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2287 {
2288 	const u32 num_tile_mode_states = 32;
2289 	const u32 num_secondary_tile_mode_states = 16;
2290 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2291 	u32 num_pipe_configs;
2292 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2293 		rdev->config.cik.max_shader_engines;
2294 
2295 	switch (rdev->config.cik.mem_row_size_in_kb) {
2296 	case 1:
2297 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2298 		break;
2299 	case 2:
2300 	default:
2301 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2302 		break;
2303 	case 4:
2304 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2305 		break;
2306 	}
2307 
2308 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2309 	if (num_pipe_configs > 8)
2310 		num_pipe_configs = 16;
2311 
2312 	if (num_pipe_configs == 16) {
2313 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2314 			switch (reg_offset) {
2315 			case 0:
2316 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2317 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2318 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2319 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2320 				break;
2321 			case 1:
2322 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2323 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2324 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2325 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2326 				break;
2327 			case 2:
2328 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2329 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2330 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2331 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2332 				break;
2333 			case 3:
2334 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2335 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2336 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2337 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2338 				break;
2339 			case 4:
2340 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2341 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2342 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2343 						 TILE_SPLIT(split_equal_to_row_size));
2344 				break;
2345 			case 5:
2346 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2347 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2348 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2349 				break;
2350 			case 6:
2351 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2352 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2353 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2354 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2355 				break;
2356 			case 7:
2357 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2358 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2359 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2360 						 TILE_SPLIT(split_equal_to_row_size));
2361 				break;
2362 			case 8:
2363 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2364 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2365 				break;
2366 			case 9:
2367 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2368 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2369 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2370 				break;
2371 			case 10:
2372 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2373 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2374 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2375 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2376 				break;
2377 			case 11:
2378 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2379 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2380 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2381 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2382 				break;
2383 			case 12:
2384 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2385 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2386 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2387 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2388 				break;
2389 			case 13:
2390 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2391 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2393 				break;
2394 			case 14:
2395 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2396 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2397 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2398 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2399 				break;
2400 			case 16:
2401 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2402 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2403 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2404 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2405 				break;
2406 			case 17:
2407 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2408 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2409 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2410 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2411 				break;
2412 			case 27:
2413 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2414 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2415 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2416 				break;
2417 			case 28:
2418 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2419 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2420 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2421 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2422 				break;
2423 			case 29:
2424 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2425 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2426 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2427 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2428 				break;
2429 			case 30:
2430 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2431 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2432 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2433 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2434 				break;
2435 			default:
2436 				gb_tile_moden = 0;
2437 				break;
2438 			}
2439 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2440 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2441 		}
2442 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2443 			switch (reg_offset) {
2444 			case 0:
2445 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2446 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2447 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2448 						 NUM_BANKS(ADDR_SURF_16_BANK));
2449 				break;
2450 			case 1:
2451 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2452 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2453 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2454 						 NUM_BANKS(ADDR_SURF_16_BANK));
2455 				break;
2456 			case 2:
2457 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2459 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2460 						 NUM_BANKS(ADDR_SURF_16_BANK));
2461 				break;
2462 			case 3:
2463 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2464 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2465 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2466 						 NUM_BANKS(ADDR_SURF_16_BANK));
2467 				break;
2468 			case 4:
2469 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2470 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2471 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2472 						 NUM_BANKS(ADDR_SURF_8_BANK));
2473 				break;
2474 			case 5:
2475 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2476 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2477 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2478 						 NUM_BANKS(ADDR_SURF_4_BANK));
2479 				break;
2480 			case 6:
2481 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2483 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2484 						 NUM_BANKS(ADDR_SURF_2_BANK));
2485 				break;
2486 			case 8:
2487 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2488 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2489 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2490 						 NUM_BANKS(ADDR_SURF_16_BANK));
2491 				break;
2492 			case 9:
2493 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2494 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2495 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2496 						 NUM_BANKS(ADDR_SURF_16_BANK));
2497 				break;
2498 			case 10:
2499 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2500 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2501 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2502 						 NUM_BANKS(ADDR_SURF_16_BANK));
2503 				break;
2504 			case 11:
2505 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2506 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2507 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2508 						 NUM_BANKS(ADDR_SURF_8_BANK));
2509 				break;
2510 			case 12:
2511 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2512 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2513 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2514 						 NUM_BANKS(ADDR_SURF_4_BANK));
2515 				break;
2516 			case 13:
2517 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2518 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2519 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2520 						 NUM_BANKS(ADDR_SURF_2_BANK));
2521 				break;
2522 			case 14:
2523 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2524 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2525 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2526 						 NUM_BANKS(ADDR_SURF_2_BANK));
2527 				break;
2528 			default:
2529 				gb_tile_moden = 0;
2530 				break;
2531 			}
2532 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2533 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2534 		}
2535 	} else if (num_pipe_configs == 8) {
2536 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2537 			switch (reg_offset) {
2538 			case 0:
2539 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2540 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2541 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2542 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2543 				break;
2544 			case 1:
2545 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2546 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2547 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2548 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2549 				break;
2550 			case 2:
2551 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2552 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2553 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2554 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2555 				break;
2556 			case 3:
2557 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2558 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2559 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2560 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2561 				break;
2562 			case 4:
2563 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2564 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2565 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2566 						 TILE_SPLIT(split_equal_to_row_size));
2567 				break;
2568 			case 5:
2569 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2570 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2571 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2572 				break;
2573 			case 6:
2574 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2575 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2576 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2577 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2578 				break;
2579 			case 7:
2580 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2581 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2582 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2583 						 TILE_SPLIT(split_equal_to_row_size));
2584 				break;
2585 			case 8:
2586 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2587 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2588 				break;
2589 			case 9:
2590 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2591 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2592 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2593 				break;
2594 			case 10:
2595 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2596 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2597 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2598 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2599 				break;
2600 			case 11:
2601 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2602 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2603 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2604 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2605 				break;
2606 			case 12:
2607 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2608 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2609 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2610 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2611 				break;
2612 			case 13:
2613 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2614 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2615 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2616 				break;
2617 			case 14:
2618 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2619 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2620 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2621 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2622 				break;
2623 			case 16:
2624 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2625 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2626 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2627 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2628 				break;
2629 			case 17:
2630 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2631 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2632 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2633 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2634 				break;
2635 			case 27:
2636 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2637 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2638 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2639 				break;
2640 			case 28:
2641 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2642 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2643 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2644 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2645 				break;
2646 			case 29:
2647 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2648 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2649 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2650 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2651 				break;
2652 			case 30:
2653 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2654 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2655 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2656 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2657 				break;
2658 			default:
2659 				gb_tile_moden = 0;
2660 				break;
2661 			}
2662 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2663 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2664 		}
2665 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2666 			switch (reg_offset) {
2667 			case 0:
2668 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2669 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2670 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2671 						 NUM_BANKS(ADDR_SURF_16_BANK));
2672 				break;
2673 			case 1:
2674 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2675 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2676 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2677 						 NUM_BANKS(ADDR_SURF_16_BANK));
2678 				break;
2679 			case 2:
2680 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2681 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2682 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2683 						 NUM_BANKS(ADDR_SURF_16_BANK));
2684 				break;
2685 			case 3:
2686 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2687 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2688 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2689 						 NUM_BANKS(ADDR_SURF_16_BANK));
2690 				break;
2691 			case 4:
2692 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2693 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2694 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2695 						 NUM_BANKS(ADDR_SURF_8_BANK));
2696 				break;
2697 			case 5:
2698 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2699 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2700 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2701 						 NUM_BANKS(ADDR_SURF_4_BANK));
2702 				break;
2703 			case 6:
2704 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2705 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2706 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2707 						 NUM_BANKS(ADDR_SURF_2_BANK));
2708 				break;
2709 			case 8:
2710 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2711 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2712 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2713 						 NUM_BANKS(ADDR_SURF_16_BANK));
2714 				break;
2715 			case 9:
2716 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2717 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2718 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2719 						 NUM_BANKS(ADDR_SURF_16_BANK));
2720 				break;
2721 			case 10:
2722 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2723 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2724 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2725 						 NUM_BANKS(ADDR_SURF_16_BANK));
2726 				break;
2727 			case 11:
2728 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2729 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2730 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2731 						 NUM_BANKS(ADDR_SURF_16_BANK));
2732 				break;
2733 			case 12:
2734 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2735 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2736 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2737 						 NUM_BANKS(ADDR_SURF_8_BANK));
2738 				break;
2739 			case 13:
2740 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2741 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2742 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2743 						 NUM_BANKS(ADDR_SURF_4_BANK));
2744 				break;
2745 			case 14:
2746 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2747 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2748 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2749 						 NUM_BANKS(ADDR_SURF_2_BANK));
2750 				break;
2751 			default:
2752 				gb_tile_moden = 0;
2753 				break;
2754 			}
2755 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2756 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2757 		}
2758 	} else if (num_pipe_configs == 4) {
2759 		if (num_rbs == 4) {
2760 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2761 				switch (reg_offset) {
2762 				case 0:
2763 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2764 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2765 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2766 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2767 					break;
2768 				case 1:
2769 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2770 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2771 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2772 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2773 					break;
2774 				case 2:
2775 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2776 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2777 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2778 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2779 					break;
2780 				case 3:
2781 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2782 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2783 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2784 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2785 					break;
2786 				case 4:
2787 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2788 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2789 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2790 							 TILE_SPLIT(split_equal_to_row_size));
2791 					break;
2792 				case 5:
2793 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2794 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2795 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2796 					break;
2797 				case 6:
2798 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2799 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2800 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2801 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2802 					break;
2803 				case 7:
2804 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2805 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2806 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2807 							 TILE_SPLIT(split_equal_to_row_size));
2808 					break;
2809 				case 8:
2810 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2811 							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2812 					break;
2813 				case 9:
2814 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2815 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2816 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2817 					break;
2818 				case 10:
2819 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2820 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2821 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2822 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2823 					break;
2824 				case 11:
2825 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2826 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2827 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2828 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2829 					break;
2830 				case 12:
2831 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2832 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2833 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2834 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2835 					break;
2836 				case 13:
2837 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2838 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2839 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2840 					break;
2841 				case 14:
2842 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2843 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2844 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2845 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2846 					break;
2847 				case 16:
2848 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2849 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2850 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2851 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2852 					break;
2853 				case 17:
2854 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2855 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2856 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2857 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2858 					break;
2859 				case 27:
2860 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2861 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2862 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2863 					break;
2864 				case 28:
2865 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2866 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2867 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2868 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2869 					break;
2870 				case 29:
2871 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2872 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2873 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2874 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2875 					break;
2876 				case 30:
2877 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2878 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2879 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2880 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2881 					break;
2882 				default:
2883 					gb_tile_moden = 0;
2884 					break;
2885 				}
2886 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2887 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2888 			}
2889 		} else if (num_rbs < 4) {
2890 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2891 				switch (reg_offset) {
2892 				case 0:
2893 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2894 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2895 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2896 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2897 					break;
2898 				case 1:
2899 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2900 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2901 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2902 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2903 					break;
2904 				case 2:
2905 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2906 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2907 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2908 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2909 					break;
2910 				case 3:
2911 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2912 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2913 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2914 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2915 					break;
2916 				case 4:
2917 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2918 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2919 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2920 							 TILE_SPLIT(split_equal_to_row_size));
2921 					break;
2922 				case 5:
2923 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2924 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2925 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2926 					break;
2927 				case 6:
2928 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2929 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2930 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2931 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2932 					break;
2933 				case 7:
2934 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2935 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2936 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2937 							 TILE_SPLIT(split_equal_to_row_size));
2938 					break;
2939 				case 8:
2940 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2941 						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2942 					break;
2943 				case 9:
2944 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2945 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2946 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2947 					break;
2948 				case 10:
2949 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2950 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2951 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2952 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2953 					break;
2954 				case 11:
2955 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2956 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2957 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2958 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2959 					break;
2960 				case 12:
2961 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2962 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2963 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2964 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2965 					break;
2966 				case 13:
2967 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2968 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2969 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2970 					break;
2971 				case 14:
2972 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2973 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2974 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2975 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2976 					break;
2977 				case 16:
2978 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2979 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2980 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2981 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2982 					break;
2983 				case 17:
2984 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2985 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2986 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2987 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2988 					break;
2989 				case 27:
2990 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2991 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2992 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2993 					break;
2994 				case 28:
2995 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2996 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2997 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2998 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2999 					break;
3000 				case 29:
3001 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3002 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3003 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3004 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3005 					break;
3006 				case 30:
3007 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3008 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3009 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3010 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3011 					break;
3012 				default:
3013 					gb_tile_moden = 0;
3014 					break;
3015 				}
3016 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3017 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3018 			}
3019 		}
3020 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3021 			switch (reg_offset) {
3022 			case 0:
3023 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3024 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3025 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3026 						 NUM_BANKS(ADDR_SURF_16_BANK));
3027 				break;
3028 			case 1:
3029 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3030 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3031 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3032 						 NUM_BANKS(ADDR_SURF_16_BANK));
3033 				break;
3034 			case 2:
3035 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3036 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3037 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3038 						 NUM_BANKS(ADDR_SURF_16_BANK));
3039 				break;
3040 			case 3:
3041 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3042 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3043 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3044 						 NUM_BANKS(ADDR_SURF_16_BANK));
3045 				break;
3046 			case 4:
3047 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3048 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3049 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3050 						 NUM_BANKS(ADDR_SURF_16_BANK));
3051 				break;
3052 			case 5:
3053 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3054 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3055 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3056 						 NUM_BANKS(ADDR_SURF_8_BANK));
3057 				break;
3058 			case 6:
3059 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3060 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3061 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3062 						 NUM_BANKS(ADDR_SURF_4_BANK));
3063 				break;
3064 			case 8:
3065 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3066 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3067 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3068 						 NUM_BANKS(ADDR_SURF_16_BANK));
3069 				break;
3070 			case 9:
3071 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3072 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3073 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3074 						 NUM_BANKS(ADDR_SURF_16_BANK));
3075 				break;
3076 			case 10:
3077 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3078 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3079 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3080 						 NUM_BANKS(ADDR_SURF_16_BANK));
3081 				break;
3082 			case 11:
3083 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3084 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3085 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3086 						 NUM_BANKS(ADDR_SURF_16_BANK));
3087 				break;
3088 			case 12:
3089 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3090 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3091 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3092 						 NUM_BANKS(ADDR_SURF_16_BANK));
3093 				break;
3094 			case 13:
3095 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3096 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3097 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3098 						 NUM_BANKS(ADDR_SURF_8_BANK));
3099 				break;
3100 			case 14:
3101 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3102 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3103 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3104 						 NUM_BANKS(ADDR_SURF_4_BANK));
3105 				break;
3106 			default:
3107 				gb_tile_moden = 0;
3108 				break;
3109 			}
3110 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3111 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3112 		}
3113 	} else if (num_pipe_configs == 2) {
3114 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
3115 			switch (reg_offset) {
3116 			case 0:
3117 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3118 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3119 						 PIPE_CONFIG(ADDR_SURF_P2) |
3120 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
3121 				break;
3122 			case 1:
3123 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3124 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3125 						 PIPE_CONFIG(ADDR_SURF_P2) |
3126 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
3127 				break;
3128 			case 2:
3129 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3130 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3131 						 PIPE_CONFIG(ADDR_SURF_P2) |
3132 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3133 				break;
3134 			case 3:
3135 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3136 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3137 						 PIPE_CONFIG(ADDR_SURF_P2) |
3138 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
3139 				break;
3140 			case 4:
3141 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3142 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3143 						 PIPE_CONFIG(ADDR_SURF_P2) |
3144 						 TILE_SPLIT(split_equal_to_row_size));
3145 				break;
3146 			case 5:
3147 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3148 						 PIPE_CONFIG(ADDR_SURF_P2) |
3149 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3150 				break;
3151 			case 6:
3152 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3153 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3154 						 PIPE_CONFIG(ADDR_SURF_P2) |
3155 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3156 				break;
3157 			case 7:
3158 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3159 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3160 						 PIPE_CONFIG(ADDR_SURF_P2) |
3161 						 TILE_SPLIT(split_equal_to_row_size));
3162 				break;
3163 			case 8:
3164 				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3165 						PIPE_CONFIG(ADDR_SURF_P2);
3166 				break;
3167 			case 9:
3168 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3169 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3170 						 PIPE_CONFIG(ADDR_SURF_P2));
3171 				break;
3172 			case 10:
3173 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3174 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3175 						 PIPE_CONFIG(ADDR_SURF_P2) |
3176 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3177 				break;
3178 			case 11:
3179 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3180 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3181 						 PIPE_CONFIG(ADDR_SURF_P2) |
3182 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3183 				break;
3184 			case 12:
3185 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3186 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3187 						 PIPE_CONFIG(ADDR_SURF_P2) |
3188 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3189 				break;
3190 			case 13:
3191 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3192 						 PIPE_CONFIG(ADDR_SURF_P2) |
3193 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3194 				break;
3195 			case 14:
3196 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3197 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3198 						 PIPE_CONFIG(ADDR_SURF_P2) |
3199 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3200 				break;
3201 			case 16:
3202 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3203 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3204 						 PIPE_CONFIG(ADDR_SURF_P2) |
3205 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3206 				break;
3207 			case 17:
3208 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3209 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3210 						 PIPE_CONFIG(ADDR_SURF_P2) |
3211 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3212 				break;
3213 			case 27:
3214 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3215 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3216 						 PIPE_CONFIG(ADDR_SURF_P2));
3217 				break;
3218 			case 28:
3219 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3220 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3221 						 PIPE_CONFIG(ADDR_SURF_P2) |
3222 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3223 				break;
3224 			case 29:
3225 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3226 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3227 						 PIPE_CONFIG(ADDR_SURF_P2) |
3228 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3229 				break;
3230 			case 30:
3231 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3232 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3233 						 PIPE_CONFIG(ADDR_SURF_P2) |
3234 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3235 				break;
3236 			default:
3237 				gb_tile_moden = 0;
3238 				break;
3239 			}
3240 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3241 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3242 		}
3243 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3244 			switch (reg_offset) {
3245 			case 0:
3246 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3247 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3248 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3249 						 NUM_BANKS(ADDR_SURF_16_BANK));
3250 				break;
3251 			case 1:
3252 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3253 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3254 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3255 						 NUM_BANKS(ADDR_SURF_16_BANK));
3256 				break;
3257 			case 2:
3258 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3259 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3260 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3261 						 NUM_BANKS(ADDR_SURF_16_BANK));
3262 				break;
3263 			case 3:
3264 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3265 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3266 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3267 						 NUM_BANKS(ADDR_SURF_16_BANK));
3268 				break;
3269 			case 4:
3270 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3271 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3272 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3273 						 NUM_BANKS(ADDR_SURF_16_BANK));
3274 				break;
3275 			case 5:
3276 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3277 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3278 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3279 						 NUM_BANKS(ADDR_SURF_16_BANK));
3280 				break;
3281 			case 6:
3282 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3283 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3284 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3285 						 NUM_BANKS(ADDR_SURF_8_BANK));
3286 				break;
3287 			case 8:
3288 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3289 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3290 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3291 						 NUM_BANKS(ADDR_SURF_16_BANK));
3292 				break;
3293 			case 9:
3294 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3295 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3296 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3297 						 NUM_BANKS(ADDR_SURF_16_BANK));
3298 				break;
3299 			case 10:
3300 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3301 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3302 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3303 						 NUM_BANKS(ADDR_SURF_16_BANK));
3304 				break;
3305 			case 11:
3306 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3307 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3308 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3309 						 NUM_BANKS(ADDR_SURF_16_BANK));
3310 				break;
3311 			case 12:
3312 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3313 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3314 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3315 						 NUM_BANKS(ADDR_SURF_16_BANK));
3316 				break;
3317 			case 13:
3318 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3319 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3320 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3321 						 NUM_BANKS(ADDR_SURF_16_BANK));
3322 				break;
3323 			case 14:
3324 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3325 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3326 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3327 						 NUM_BANKS(ADDR_SURF_8_BANK));
3328 				break;
3329 			default:
3330 				gb_tile_moden = 0;
3331 				break;
3332 			}
3333 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3334 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3335 		}
3336 	} else
3337 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3338 }
3339 
3340 /**
3341  * cik_select_se_sh - select which SE, SH to address
3342  *
3343  * @rdev: radeon_device pointer
3344  * @se_num: shader engine to address
3345  * @sh_num: sh block to address
3346  *
3347  * Select which SE, SH combinations to address. Certain
3348  * registers are instanced per SE or SH.  0xffffffff means
3349  * broadcast to all SEs or SHs (CIK).
3350  */
3351 static void cik_select_se_sh(struct radeon_device *rdev,
3352 			     u32 se_num, u32 sh_num)
3353 {
3354 	u32 data = INSTANCE_BROADCAST_WRITES;
3355 
3356 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3357 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3358 	else if (se_num == 0xffffffff)
3359 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3360 	else if (sh_num == 0xffffffff)
3361 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3362 	else
3363 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3364 	WREG32(GRBM_GFX_INDEX, data);
3365 }
3366 
3367 /**
3368  * cik_create_bitmask - create a bitmask
3369  *
3370  * @bit_width: length of the mask
3371  *
3372  * create a variable length bit mask (CIK).
3373  * Returns the bitmask.
3374  */
3375 static u32 cik_create_bitmask(u32 bit_width)
3376 {
3377 	u32 i, mask = 0;
3378 
3379 	for (i = 0; i < bit_width; i++) {
3380 		mask <<= 1;
3381 		mask |= 1;
3382 	}
3383 	return mask;
3384 }
3385 
3386 /**
3387  * cik_get_rb_disabled - computes the mask of disabled RBs
3388  *
3389  * @rdev: radeon_device pointer
3390  * @max_rb_num: max RBs (render backends) for the asic
3391  * @se_num: number of SEs (shader engines) for the asic
3392  * @sh_per_se: number of SH blocks per SE for the asic
3393  *
3394  * Calculates the bitmask of disabled RBs (CIK).
3395  * Returns the disabled RB bitmask.
3396  */
3397 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3398 			      u32 max_rb_num_per_se,
3399 			      u32 sh_per_se)
3400 {
3401 	u32 data, mask;
3402 
3403 	data = RREG32(CC_RB_BACKEND_DISABLE);
3404 	if (data & 1)
3405 		data &= BACKEND_DISABLE_MASK;
3406 	else
3407 		data = 0;
3408 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3409 
3410 	data >>= BACKEND_DISABLE_SHIFT;
3411 
3412 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3413 
3414 	return data & mask;
3415 }
3416 
3417 /**
3418  * cik_setup_rb - setup the RBs on the asic
3419  *
3420  * @rdev: radeon_device pointer
3421  * @se_num: number of SEs (shader engines) for the asic
3422  * @sh_per_se: number of SH blocks per SE for the asic
3423  * @max_rb_num: max RBs (render backends) for the asic
3424  *
3425  * Configures per-SE/SH RB registers (CIK).
3426  */
3427 static void cik_setup_rb(struct radeon_device *rdev,
3428 			 u32 se_num, u32 sh_per_se,
3429 			 u32 max_rb_num_per_se)
3430 {
3431 	int i, j;
3432 	u32 data, mask;
3433 	u32 disabled_rbs = 0;
3434 	u32 enabled_rbs = 0;
3435 
3436 	mutex_lock(&rdev->grbm_idx_mutex);
3437 	for (i = 0; i < se_num; i++) {
3438 		for (j = 0; j < sh_per_se; j++) {
3439 			cik_select_se_sh(rdev, i, j);
3440 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3441 			if (rdev->family == CHIP_HAWAII)
3442 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3443 			else
3444 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3445 		}
3446 	}
3447 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3448 	mutex_unlock(&rdev->grbm_idx_mutex);
3449 
3450 	mask = 1;
3451 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3452 		if (!(disabled_rbs & mask))
3453 			enabled_rbs |= mask;
3454 		mask <<= 1;
3455 	}
3456 
3457 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3458 
3459 	mutex_lock(&rdev->grbm_idx_mutex);
3460 	for (i = 0; i < se_num; i++) {
3461 		cik_select_se_sh(rdev, i, 0xffffffff);
3462 		data = 0;
3463 		for (j = 0; j < sh_per_se; j++) {
3464 			switch (enabled_rbs & 3) {
3465 			case 0:
3466 				if (j == 0)
3467 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3468 				else
3469 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3470 				break;
3471 			case 1:
3472 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3473 				break;
3474 			case 2:
3475 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3476 				break;
3477 			case 3:
3478 			default:
3479 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3480 				break;
3481 			}
3482 			enabled_rbs >>= 2;
3483 		}
3484 		WREG32(PA_SC_RASTER_CONFIG, data);
3485 	}
3486 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3487 	mutex_unlock(&rdev->grbm_idx_mutex);
3488 }
3489 
3490 /**
3491  * cik_gpu_init - setup the 3D engine
3492  *
3493  * @rdev: radeon_device pointer
3494  *
3495  * Configures the 3D engine and tiling configuration
3496  * registers so that the 3D engine is usable.
3497  */
3498 static void cik_gpu_init(struct radeon_device *rdev)
3499 {
3500 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3501 	u32 mc_shared_chmap, mc_arb_ramcfg;
3502 	u32 hdp_host_path_cntl;
3503 	u32 tmp;
3504 	int i, j;
3505 
3506 	switch (rdev->family) {
3507 	case CHIP_BONAIRE:
3508 		rdev->config.cik.max_shader_engines = 2;
3509 		rdev->config.cik.max_tile_pipes = 4;
3510 		rdev->config.cik.max_cu_per_sh = 7;
3511 		rdev->config.cik.max_sh_per_se = 1;
3512 		rdev->config.cik.max_backends_per_se = 2;
3513 		rdev->config.cik.max_texture_channel_caches = 4;
3514 		rdev->config.cik.max_gprs = 256;
3515 		rdev->config.cik.max_gs_threads = 32;
3516 		rdev->config.cik.max_hw_contexts = 8;
3517 
3518 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3519 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3520 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3521 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3522 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3523 		break;
3524 	case CHIP_HAWAII:
3525 		rdev->config.cik.max_shader_engines = 4;
3526 		rdev->config.cik.max_tile_pipes = 16;
3527 		rdev->config.cik.max_cu_per_sh = 11;
3528 		rdev->config.cik.max_sh_per_se = 1;
3529 		rdev->config.cik.max_backends_per_se = 4;
3530 		rdev->config.cik.max_texture_channel_caches = 16;
3531 		rdev->config.cik.max_gprs = 256;
3532 		rdev->config.cik.max_gs_threads = 32;
3533 		rdev->config.cik.max_hw_contexts = 8;
3534 
3535 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3536 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3537 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3538 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3539 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3540 		break;
3541 	case CHIP_KAVERI:
3542 		rdev->config.cik.max_shader_engines = 1;
3543 		rdev->config.cik.max_tile_pipes = 4;
3544 		if ((rdev->pdev->device == 0x1304) ||
3545 		    (rdev->pdev->device == 0x1305) ||
3546 		    (rdev->pdev->device == 0x130C) ||
3547 		    (rdev->pdev->device == 0x130F) ||
3548 		    (rdev->pdev->device == 0x1310) ||
3549 		    (rdev->pdev->device == 0x1311) ||
3550 		    (rdev->pdev->device == 0x131C)) {
3551 			rdev->config.cik.max_cu_per_sh = 8;
3552 			rdev->config.cik.max_backends_per_se = 2;
3553 		} else if ((rdev->pdev->device == 0x1309) ||
3554 			   (rdev->pdev->device == 0x130A) ||
3555 			   (rdev->pdev->device == 0x130D) ||
3556 			   (rdev->pdev->device == 0x1313) ||
3557 			   (rdev->pdev->device == 0x131D)) {
3558 			rdev->config.cik.max_cu_per_sh = 6;
3559 			rdev->config.cik.max_backends_per_se = 2;
3560 		} else if ((rdev->pdev->device == 0x1306) ||
3561 			   (rdev->pdev->device == 0x1307) ||
3562 			   (rdev->pdev->device == 0x130B) ||
3563 			   (rdev->pdev->device == 0x130E) ||
3564 			   (rdev->pdev->device == 0x1315) ||
3565 			   (rdev->pdev->device == 0x1318) ||
3566 			   (rdev->pdev->device == 0x131B)) {
3567 			rdev->config.cik.max_cu_per_sh = 4;
3568 			rdev->config.cik.max_backends_per_se = 1;
3569 		} else {
3570 			rdev->config.cik.max_cu_per_sh = 3;
3571 			rdev->config.cik.max_backends_per_se = 1;
3572 		}
3573 		rdev->config.cik.max_sh_per_se = 1;
3574 		rdev->config.cik.max_texture_channel_caches = 4;
3575 		rdev->config.cik.max_gprs = 256;
3576 		rdev->config.cik.max_gs_threads = 16;
3577 		rdev->config.cik.max_hw_contexts = 8;
3578 
3579 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3580 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3581 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3582 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3583 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3584 		break;
3585 	case CHIP_KABINI:
3586 	case CHIP_MULLINS:
3587 	default:
3588 		rdev->config.cik.max_shader_engines = 1;
3589 		rdev->config.cik.max_tile_pipes = 2;
3590 		rdev->config.cik.max_cu_per_sh = 2;
3591 		rdev->config.cik.max_sh_per_se = 1;
3592 		rdev->config.cik.max_backends_per_se = 1;
3593 		rdev->config.cik.max_texture_channel_caches = 2;
3594 		rdev->config.cik.max_gprs = 256;
3595 		rdev->config.cik.max_gs_threads = 16;
3596 		rdev->config.cik.max_hw_contexts = 8;
3597 
3598 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3599 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3600 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3601 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3602 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3603 		break;
3604 	}
3605 
3606 	/* Initialize HDP */
3607 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3608 		WREG32((0x2c14 + j), 0x00000000);
3609 		WREG32((0x2c18 + j), 0x00000000);
3610 		WREG32((0x2c1c + j), 0x00000000);
3611 		WREG32((0x2c20 + j), 0x00000000);
3612 		WREG32((0x2c24 + j), 0x00000000);
3613 	}
3614 
3615 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3616 	WREG32(SRBM_INT_CNTL, 0x1);
3617 	WREG32(SRBM_INT_ACK, 0x1);
3618 
3619 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3620 
3621 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3622 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3623 
3624 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3625 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3626 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3627 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3628 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3629 		rdev->config.cik.mem_row_size_in_kb = 4;
3630 	/* XXX use MC settings? */
3631 	rdev->config.cik.shader_engine_tile_size = 32;
3632 	rdev->config.cik.num_gpus = 1;
3633 	rdev->config.cik.multi_gpu_tile_size = 64;
3634 
3635 	/* fix up row size */
3636 	gb_addr_config &= ~ROW_SIZE_MASK;
3637 	switch (rdev->config.cik.mem_row_size_in_kb) {
3638 	case 1:
3639 	default:
3640 		gb_addr_config |= ROW_SIZE(0);
3641 		break;
3642 	case 2:
3643 		gb_addr_config |= ROW_SIZE(1);
3644 		break;
3645 	case 4:
3646 		gb_addr_config |= ROW_SIZE(2);
3647 		break;
3648 	}
3649 
3650 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3651 	 * not have bank info, so create a custom tiling dword.
3652 	 * bits 3:0   num_pipes
3653 	 * bits 7:4   num_banks
3654 	 * bits 11:8  group_size
3655 	 * bits 15:12 row_size
3656 	 */
3657 	rdev->config.cik.tile_config = 0;
3658 	switch (rdev->config.cik.num_tile_pipes) {
3659 	case 1:
3660 		rdev->config.cik.tile_config |= (0 << 0);
3661 		break;
3662 	case 2:
3663 		rdev->config.cik.tile_config |= (1 << 0);
3664 		break;
3665 	case 4:
3666 		rdev->config.cik.tile_config |= (2 << 0);
3667 		break;
3668 	case 8:
3669 	default:
3670 		/* XXX what about 12? */
3671 		rdev->config.cik.tile_config |= (3 << 0);
3672 		break;
3673 	}
3674 	rdev->config.cik.tile_config |=
3675 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3676 	rdev->config.cik.tile_config |=
3677 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3678 	rdev->config.cik.tile_config |=
3679 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3680 
3681 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3682 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3683 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3684 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3685 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3686 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3687 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3688 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3689 
3690 	cik_tiling_mode_table_init(rdev);
3691 
3692 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3693 		     rdev->config.cik.max_sh_per_se,
3694 		     rdev->config.cik.max_backends_per_se);
3695 
3696 	rdev->config.cik.active_cus = 0;
3697 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3698 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3699 			rdev->config.cik.active_cus +=
3700 				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3701 		}
3702 	}
3703 
3704 	/* set HW defaults for 3D engine */
3705 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3706 
3707 	mutex_lock(&rdev->grbm_idx_mutex);
3708 	/*
3709 	 * making sure that the following register writes will be broadcasted
3710 	 * to all the shaders
3711 	 */
3712 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3713 	WREG32(SX_DEBUG_1, 0x20);
3714 
3715 	WREG32(TA_CNTL_AUX, 0x00010000);
3716 
3717 	tmp = RREG32(SPI_CONFIG_CNTL);
3718 	tmp |= 0x03000000;
3719 	WREG32(SPI_CONFIG_CNTL, tmp);
3720 
3721 	WREG32(SQ_CONFIG, 1);
3722 
3723 	WREG32(DB_DEBUG, 0);
3724 
3725 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3726 	tmp |= 0x00000400;
3727 	WREG32(DB_DEBUG2, tmp);
3728 
3729 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3730 	tmp |= 0x00020200;
3731 	WREG32(DB_DEBUG3, tmp);
3732 
3733 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3734 	tmp |= 0x00018208;
3735 	WREG32(CB_HW_CONTROL, tmp);
3736 
3737 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3738 
3739 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3740 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3741 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3742 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3743 
3744 	WREG32(VGT_NUM_INSTANCES, 1);
3745 
3746 	WREG32(CP_PERFMON_CNTL, 0);
3747 
3748 	WREG32(SQ_CONFIG, 0);
3749 
3750 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3751 					  FORCE_EOV_MAX_REZ_CNT(255)));
3752 
3753 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3754 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3755 
3756 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3757 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3758 
3759 	tmp = RREG32(HDP_MISC_CNTL);
3760 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3761 	WREG32(HDP_MISC_CNTL, tmp);
3762 
3763 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3764 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3765 
3766 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3767 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3768 	mutex_unlock(&rdev->grbm_idx_mutex);
3769 
3770 	udelay(50);
3771 }
3772 
3773 /*
3774  * GPU scratch registers helpers function.
3775  */
3776 /**
3777  * cik_scratch_init - setup driver info for CP scratch regs
3778  *
3779  * @rdev: radeon_device pointer
3780  *
3781  * Set up the number and offset of the CP scratch registers.
3782  * NOTE: use of CP scratch registers is a legacy inferface and
3783  * is not used by default on newer asics (r6xx+).  On newer asics,
3784  * memory buffers are used for fences rather than scratch regs.
3785  */
3786 static void cik_scratch_init(struct radeon_device *rdev)
3787 {
3788 	int i;
3789 
3790 	rdev->scratch.num_reg = 7;
3791 	rdev->scratch.reg_base = SCRATCH_REG0;
3792 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3793 		rdev->scratch.free[i] = true;
3794 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3795 	}
3796 }
3797 
3798 /**
3799  * cik_ring_test - basic gfx ring test
3800  *
3801  * @rdev: radeon_device pointer
3802  * @ring: radeon_ring structure holding ring information
3803  *
3804  * Allocate a scratch register and write to it using the gfx ring (CIK).
3805  * Provides a basic gfx ring test to verify that the ring is working.
3806  * Used by cik_cp_gfx_resume();
3807  * Returns 0 on success, error on failure.
3808  */
3809 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3810 {
3811 	uint32_t scratch;
3812 	uint32_t tmp = 0;
3813 	unsigned i;
3814 	int r;
3815 
3816 	r = radeon_scratch_get(rdev, &scratch);
3817 	if (r) {
3818 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3819 		return r;
3820 	}
3821 	WREG32(scratch, 0xCAFEDEAD);
3822 	r = radeon_ring_lock(rdev, ring, 3);
3823 	if (r) {
3824 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3825 		radeon_scratch_free(rdev, scratch);
3826 		return r;
3827 	}
3828 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3829 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3830 	radeon_ring_write(ring, 0xDEADBEEF);
3831 	radeon_ring_unlock_commit(rdev, ring, false);
3832 
3833 	for (i = 0; i < rdev->usec_timeout; i++) {
3834 		tmp = RREG32(scratch);
3835 		if (tmp == 0xDEADBEEF)
3836 			break;
3837 		DRM_UDELAY(1);
3838 	}
3839 	if (i < rdev->usec_timeout) {
3840 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3841 	} else {
3842 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3843 			  ring->idx, scratch, tmp);
3844 		r = -EINVAL;
3845 	}
3846 	radeon_scratch_free(rdev, scratch);
3847 	return r;
3848 }
3849 
3850 /**
3851  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3852  *
3853  * @rdev: radeon_device pointer
3854  * @ridx: radeon ring index
3855  *
3856  * Emits an hdp flush on the cp.
3857  */
3858 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3859 				       int ridx)
3860 {
3861 	struct radeon_ring *ring = &rdev->ring[ridx];
3862 	u32 ref_and_mask;
3863 
3864 	switch (ring->idx) {
3865 	case CAYMAN_RING_TYPE_CP1_INDEX:
3866 	case CAYMAN_RING_TYPE_CP2_INDEX:
3867 	default:
3868 		switch (ring->me) {
3869 		case 0:
3870 			ref_and_mask = CP2 << ring->pipe;
3871 			break;
3872 		case 1:
3873 			ref_and_mask = CP6 << ring->pipe;
3874 			break;
3875 		default:
3876 			return;
3877 		}
3878 		break;
3879 	case RADEON_RING_TYPE_GFX_INDEX:
3880 		ref_and_mask = CP0;
3881 		break;
3882 	}
3883 
3884 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3885 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3886 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3887 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3888 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3889 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3890 	radeon_ring_write(ring, ref_and_mask);
3891 	radeon_ring_write(ring, ref_and_mask);
3892 	radeon_ring_write(ring, 0x20); /* poll interval */
3893 }
3894 
3895 /**
3896  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3897  *
3898  * @rdev: radeon_device pointer
3899  * @fence: radeon fence object
3900  *
3901  * Emits a fence sequnce number on the gfx ring and flushes
3902  * GPU caches.
3903  */
3904 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3905 			     struct radeon_fence *fence)
3906 {
3907 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3908 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3909 
3910 	/* Workaround for cache flush problems. First send a dummy EOP
3911 	 * event down the pipe with seq one below.
3912 	 */
3913 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3914 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3915 				 EOP_TC_ACTION_EN |
3916 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3917 				 EVENT_INDEX(5)));
3918 	radeon_ring_write(ring, addr & 0xfffffffc);
3919 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3920 				DATA_SEL(1) | INT_SEL(0));
3921 	radeon_ring_write(ring, fence->seq - 1);
3922 	radeon_ring_write(ring, 0);
3923 
3924 	/* Then send the real EOP event down the pipe. */
3925 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3926 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3927 				 EOP_TC_ACTION_EN |
3928 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3929 				 EVENT_INDEX(5)));
3930 	radeon_ring_write(ring, addr & 0xfffffffc);
3931 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3932 	radeon_ring_write(ring, fence->seq);
3933 	radeon_ring_write(ring, 0);
3934 }
3935 
3936 /**
3937  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3938  *
3939  * @rdev: radeon_device pointer
3940  * @fence: radeon fence object
3941  *
3942  * Emits a fence sequnce number on the compute ring and flushes
3943  * GPU caches.
3944  */
3945 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3946 				 struct radeon_fence *fence)
3947 {
3948 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3949 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3950 
3951 	/* RELEASE_MEM - flush caches, send int */
3952 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3953 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3954 				 EOP_TC_ACTION_EN |
3955 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3956 				 EVENT_INDEX(5)));
3957 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3958 	radeon_ring_write(ring, addr & 0xfffffffc);
3959 	radeon_ring_write(ring, upper_32_bits(addr));
3960 	radeon_ring_write(ring, fence->seq);
3961 	radeon_ring_write(ring, 0);
3962 }
3963 
3964 /**
3965  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3966  *
3967  * @rdev: radeon_device pointer
3968  * @ring: radeon ring buffer object
3969  * @semaphore: radeon semaphore object
3970  * @emit_wait: Is this a sempahore wait?
3971  *
3972  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3973  * from running ahead of semaphore waits.
3974  */
3975 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3976 			     struct radeon_ring *ring,
3977 			     struct radeon_semaphore *semaphore,
3978 			     bool emit_wait)
3979 {
3980 	uint64_t addr = semaphore->gpu_addr;
3981 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3982 
3983 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3984 	radeon_ring_write(ring, lower_32_bits(addr));
3985 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3986 
3987 	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3988 		/* Prevent the PFP from running ahead of the semaphore wait */
3989 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3990 		radeon_ring_write(ring, 0x0);
3991 	}
3992 
3993 	return true;
3994 }
3995 
3996 /**
3997  * cik_copy_cpdma - copy pages using the CP DMA engine
3998  *
3999  * @rdev: radeon_device pointer
4000  * @src_offset: src GPU address
4001  * @dst_offset: dst GPU address
4002  * @num_gpu_pages: number of GPU pages to xfer
4003  * @resv: reservation object to sync to
4004  *
4005  * Copy GPU paging using the CP DMA engine (CIK+).
4006  * Used by the radeon ttm implementation to move pages if
4007  * registered as the asic copy callback.
4008  */
4009 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
4010 				    uint64_t src_offset, uint64_t dst_offset,
4011 				    unsigned num_gpu_pages,
4012 				    struct reservation_object *resv)
4013 {
4014 	struct radeon_fence *fence;
4015 	struct radeon_sync sync;
4016 	int ring_index = rdev->asic->copy.blit_ring_index;
4017 	struct radeon_ring *ring = &rdev->ring[ring_index];
4018 	u32 size_in_bytes, cur_size_in_bytes, control;
4019 	int i, num_loops;
4020 	int r = 0;
4021 
4022 	radeon_sync_create(&sync);
4023 
4024 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
4025 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
4026 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
4027 	if (r) {
4028 		DRM_ERROR("radeon: moving bo (%d).\n", r);
4029 		radeon_sync_free(rdev, &sync, NULL);
4030 		return ERR_PTR(r);
4031 	}
4032 
4033 	radeon_sync_resv(rdev, &sync, resv, false);
4034 	radeon_sync_rings(rdev, &sync, ring->idx);
4035 
4036 	for (i = 0; i < num_loops; i++) {
4037 		cur_size_in_bytes = size_in_bytes;
4038 		if (cur_size_in_bytes > 0x1fffff)
4039 			cur_size_in_bytes = 0x1fffff;
4040 		size_in_bytes -= cur_size_in_bytes;
4041 		control = 0;
4042 		if (size_in_bytes == 0)
4043 			control |= PACKET3_DMA_DATA_CP_SYNC;
4044 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4045 		radeon_ring_write(ring, control);
4046 		radeon_ring_write(ring, lower_32_bits(src_offset));
4047 		radeon_ring_write(ring, upper_32_bits(src_offset));
4048 		radeon_ring_write(ring, lower_32_bits(dst_offset));
4049 		radeon_ring_write(ring, upper_32_bits(dst_offset));
4050 		radeon_ring_write(ring, cur_size_in_bytes);
4051 		src_offset += cur_size_in_bytes;
4052 		dst_offset += cur_size_in_bytes;
4053 	}
4054 
4055 	r = radeon_fence_emit(rdev, &fence, ring->idx);
4056 	if (r) {
4057 		radeon_ring_unlock_undo(rdev, ring);
4058 		radeon_sync_free(rdev, &sync, NULL);
4059 		return ERR_PTR(r);
4060 	}
4061 
4062 	radeon_ring_unlock_commit(rdev, ring, false);
4063 	radeon_sync_free(rdev, &sync, fence);
4064 
4065 	return fence;
4066 }
4067 
4068 /*
4069  * IB stuff
4070  */
4071 /**
4072  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
4073  *
4074  * @rdev: radeon_device pointer
4075  * @ib: radeon indirect buffer object
4076  *
4077  * Emits an DE (drawing engine) or CE (constant engine) IB
4078  * on the gfx ring.  IBs are usually generated by userspace
4079  * acceleration drivers and submitted to the kernel for
4080  * sheduling on the ring.  This function schedules the IB
4081  * on the gfx ring for execution by the GPU.
4082  */
4083 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
4084 {
4085 	struct radeon_ring *ring = &rdev->ring[ib->ring];
4086 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
4087 	u32 header, control = INDIRECT_BUFFER_VALID;
4088 
4089 	if (ib->is_const_ib) {
4090 		/* set switch buffer packet before const IB */
4091 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4092 		radeon_ring_write(ring, 0);
4093 
4094 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4095 	} else {
4096 		u32 next_rptr;
4097 		if (ring->rptr_save_reg) {
4098 			next_rptr = ring->wptr + 3 + 4;
4099 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4100 			radeon_ring_write(ring, ((ring->rptr_save_reg -
4101 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
4102 			radeon_ring_write(ring, next_rptr);
4103 		} else if (rdev->wb.enabled) {
4104 			next_rptr = ring->wptr + 5 + 4;
4105 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4106 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
4107 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4108 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
4109 			radeon_ring_write(ring, next_rptr);
4110 		}
4111 
4112 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4113 	}
4114 
4115 	control |= ib->length_dw | (vm_id << 24);
4116 
4117 	radeon_ring_write(ring, header);
4118 	radeon_ring_write(ring,
4119 #ifdef __BIG_ENDIAN
4120 			  (2 << 0) |
4121 #endif
4122 			  (ib->gpu_addr & 0xFFFFFFFC));
4123 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4124 	radeon_ring_write(ring, control);
4125 }
4126 
4127 /**
4128  * cik_ib_test - basic gfx ring IB test
4129  *
4130  * @rdev: radeon_device pointer
4131  * @ring: radeon_ring structure holding ring information
4132  *
4133  * Allocate an IB and execute it on the gfx ring (CIK).
4134  * Provides a basic gfx ring test to verify that IBs are working.
4135  * Returns 0 on success, error on failure.
4136  */
4137 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4138 {
4139 	struct radeon_ib ib;
4140 	uint32_t scratch;
4141 	uint32_t tmp = 0;
4142 	unsigned i;
4143 	int r;
4144 
4145 	r = radeon_scratch_get(rdev, &scratch);
4146 	if (r) {
4147 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
4148 		return r;
4149 	}
4150 	WREG32(scratch, 0xCAFEDEAD);
4151 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
4152 	if (r) {
4153 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
4154 		radeon_scratch_free(rdev, scratch);
4155 		return r;
4156 	}
4157 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
4158 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
4159 	ib.ptr[2] = 0xDEADBEEF;
4160 	ib.length_dw = 3;
4161 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
4162 	if (r) {
4163 		radeon_scratch_free(rdev, scratch);
4164 		radeon_ib_free(rdev, &ib);
4165 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4166 		return r;
4167 	}
4168 	r = radeon_fence_wait(ib.fence, false);
4169 	if (r) {
4170 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4171 		radeon_scratch_free(rdev, scratch);
4172 		radeon_ib_free(rdev, &ib);
4173 		return r;
4174 	}
4175 	for (i = 0; i < rdev->usec_timeout; i++) {
4176 		tmp = RREG32(scratch);
4177 		if (tmp == 0xDEADBEEF)
4178 			break;
4179 		DRM_UDELAY(1);
4180 	}
4181 	if (i < rdev->usec_timeout) {
4182 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
4183 	} else {
4184 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
4185 			  scratch, tmp);
4186 		r = -EINVAL;
4187 	}
4188 	radeon_scratch_free(rdev, scratch);
4189 	radeon_ib_free(rdev, &ib);
4190 	return r;
4191 }
4192 
4193 /*
4194  * CP.
4195  * On CIK, gfx and compute now have independant command processors.
4196  *
4197  * GFX
4198  * Gfx consists of a single ring and can process both gfx jobs and
4199  * compute jobs.  The gfx CP consists of three microengines (ME):
4200  * PFP - Pre-Fetch Parser
4201  * ME - Micro Engine
4202  * CE - Constant Engine
4203  * The PFP and ME make up what is considered the Drawing Engine (DE).
4204  * The CE is an asynchronous engine used for updating buffer desciptors
4205  * used by the DE so that they can be loaded into cache in parallel
4206  * while the DE is processing state update packets.
4207  *
4208  * Compute
4209  * The compute CP consists of two microengines (ME):
4210  * MEC1 - Compute MicroEngine 1
4211  * MEC2 - Compute MicroEngine 2
4212  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
4213  * The queues are exposed to userspace and are programmed directly
4214  * by the compute runtime.
4215  */
4216 /**
4217  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
4218  *
4219  * @rdev: radeon_device pointer
4220  * @enable: enable or disable the MEs
4221  *
4222  * Halts or unhalts the gfx MEs.
4223  */
4224 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
4225 {
4226 	if (enable)
4227 		WREG32(CP_ME_CNTL, 0);
4228 	else {
4229 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4230 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
4231 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
4232 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4233 	}
4234 	udelay(50);
4235 }
4236 
4237 /**
4238  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
4239  *
4240  * @rdev: radeon_device pointer
4241  *
4242  * Loads the gfx PFP, ME, and CE ucode.
4243  * Returns 0 for success, -EINVAL if the ucode is not available.
4244  */
4245 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
4246 {
4247 	int i;
4248 
4249 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
4250 		return -EINVAL;
4251 
4252 	cik_cp_gfx_enable(rdev, false);
4253 
4254 	if (rdev->new_fw) {
4255 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
4256 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
4257 		const struct gfx_firmware_header_v1_0 *ce_hdr =
4258 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
4259 		const struct gfx_firmware_header_v1_0 *me_hdr =
4260 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
4261 		const __le32 *fw_data;
4262 		u32 fw_size;
4263 
4264 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
4265 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
4266 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
4267 
4268 		/* PFP */
4269 		fw_data = (const __le32 *)
4270 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4271 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4272 		WREG32(CP_PFP_UCODE_ADDR, 0);
4273 		for (i = 0; i < fw_size; i++)
4274 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4275 		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
4276 
4277 		/* CE */
4278 		fw_data = (const __le32 *)
4279 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4280 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4281 		WREG32(CP_CE_UCODE_ADDR, 0);
4282 		for (i = 0; i < fw_size; i++)
4283 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4284 		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
4285 
4286 		/* ME */
4287 		fw_data = (const __be32 *)
4288 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4289 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4290 		WREG32(CP_ME_RAM_WADDR, 0);
4291 		for (i = 0; i < fw_size; i++)
4292 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4293 		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
4294 		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
4295 	} else {
4296 		const __be32 *fw_data;
4297 
4298 		/* PFP */
4299 		fw_data = (const __be32 *)rdev->pfp_fw->data;
4300 		WREG32(CP_PFP_UCODE_ADDR, 0);
4301 		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4302 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4303 		WREG32(CP_PFP_UCODE_ADDR, 0);
4304 
4305 		/* CE */
4306 		fw_data = (const __be32 *)rdev->ce_fw->data;
4307 		WREG32(CP_CE_UCODE_ADDR, 0);
4308 		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4309 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4310 		WREG32(CP_CE_UCODE_ADDR, 0);
4311 
4312 		/* ME */
4313 		fw_data = (const __be32 *)rdev->me_fw->data;
4314 		WREG32(CP_ME_RAM_WADDR, 0);
4315 		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4316 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4317 		WREG32(CP_ME_RAM_WADDR, 0);
4318 	}
4319 
4320 	return 0;
4321 }
4322 
4323 /**
4324  * cik_cp_gfx_start - start the gfx ring
4325  *
4326  * @rdev: radeon_device pointer
4327  *
4328  * Enables the ring and loads the clear state context and other
4329  * packets required to init the ring.
4330  * Returns 0 for success, error for failure.
4331  */
4332 static int cik_cp_gfx_start(struct radeon_device *rdev)
4333 {
4334 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4335 	int r, i;
4336 
4337 	/* init the CP */
4338 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4339 	WREG32(CP_ENDIAN_SWAP, 0);
4340 	WREG32(CP_DEVICE_ID, 1);
4341 
4342 	cik_cp_gfx_enable(rdev, true);
4343 
4344 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4345 	if (r) {
4346 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4347 		return r;
4348 	}
4349 
4350 	/* init the CE partitions.  CE only used for gfx on CIK */
4351 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4352 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4353 	radeon_ring_write(ring, 0x8000);
4354 	radeon_ring_write(ring, 0x8000);
4355 
4356 	/* setup clear context state */
4357 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4358 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4359 
4360 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4361 	radeon_ring_write(ring, 0x80000000);
4362 	radeon_ring_write(ring, 0x80000000);
4363 
4364 	for (i = 0; i < cik_default_size; i++)
4365 		radeon_ring_write(ring, cik_default_state[i]);
4366 
4367 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4368 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4369 
4370 	/* set clear context state */
4371 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4372 	radeon_ring_write(ring, 0);
4373 
4374 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4375 	radeon_ring_write(ring, 0x00000316);
4376 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4377 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4378 
4379 	radeon_ring_unlock_commit(rdev, ring, false);
4380 
4381 	return 0;
4382 }
4383 
4384 /**
4385  * cik_cp_gfx_fini - stop the gfx ring
4386  *
4387  * @rdev: radeon_device pointer
4388  *
4389  * Stop the gfx ring and tear down the driver ring
4390  * info.
4391  */
4392 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4393 {
4394 	cik_cp_gfx_enable(rdev, false);
4395 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4396 }
4397 
4398 /**
4399  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4400  *
4401  * @rdev: radeon_device pointer
4402  *
4403  * Program the location and size of the gfx ring buffer
4404  * and test it to make sure it's working.
4405  * Returns 0 for success, error for failure.
4406  */
4407 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4408 {
4409 	struct radeon_ring *ring;
4410 	u32 tmp;
4411 	u32 rb_bufsz;
4412 	u64 rb_addr;
4413 	int r;
4414 
4415 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4416 	if (rdev->family != CHIP_HAWAII)
4417 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4418 
4419 	/* Set the write pointer delay */
4420 	WREG32(CP_RB_WPTR_DELAY, 0);
4421 
4422 	/* set the RB to use vmid 0 */
4423 	WREG32(CP_RB_VMID, 0);
4424 
4425 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4426 
4427 	/* ring 0 - compute and gfx */
4428 	/* Set ring buffer size */
4429 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4430 	rb_bufsz = order_base_2(ring->ring_size / 8);
4431 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4432 #ifdef __BIG_ENDIAN
4433 	tmp |= BUF_SWAP_32BIT;
4434 #endif
4435 	WREG32(CP_RB0_CNTL, tmp);
4436 
4437 	/* Initialize the ring buffer's read and write pointers */
4438 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4439 	ring->wptr = 0;
4440 	WREG32(CP_RB0_WPTR, ring->wptr);
4441 
4442 	/* set the wb address wether it's enabled or not */
4443 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4444 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4445 
4446 	/* scratch register shadowing is no longer supported */
4447 	WREG32(SCRATCH_UMSK, 0);
4448 
4449 	if (!rdev->wb.enabled)
4450 		tmp |= RB_NO_UPDATE;
4451 
4452 	mdelay(1);
4453 	WREG32(CP_RB0_CNTL, tmp);
4454 
4455 	rb_addr = ring->gpu_addr >> 8;
4456 	WREG32(CP_RB0_BASE, rb_addr);
4457 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4458 
4459 	/* start the ring */
4460 	cik_cp_gfx_start(rdev);
4461 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4462 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4463 	if (r) {
4464 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4465 		return r;
4466 	}
4467 
4468 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4469 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4470 
4471 	return 0;
4472 }
4473 
4474 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4475 		     struct radeon_ring *ring)
4476 {
4477 	u32 rptr;
4478 
4479 	if (rdev->wb.enabled)
4480 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4481 	else
4482 		rptr = RREG32(CP_RB0_RPTR);
4483 
4484 	return rptr;
4485 }
4486 
4487 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4488 		     struct radeon_ring *ring)
4489 {
4490 	u32 wptr;
4491 
4492 	wptr = RREG32(CP_RB0_WPTR);
4493 
4494 	return wptr;
4495 }
4496 
4497 void cik_gfx_set_wptr(struct radeon_device *rdev,
4498 		      struct radeon_ring *ring)
4499 {
4500 	WREG32(CP_RB0_WPTR, ring->wptr);
4501 	(void)RREG32(CP_RB0_WPTR);
4502 }
4503 
4504 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4505 			 struct radeon_ring *ring)
4506 {
4507 	u32 rptr;
4508 
4509 	if (rdev->wb.enabled) {
4510 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4511 	} else {
4512 		mutex_lock(&rdev->srbm_mutex);
4513 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4514 		rptr = RREG32(CP_HQD_PQ_RPTR);
4515 		cik_srbm_select(rdev, 0, 0, 0, 0);
4516 		mutex_unlock(&rdev->srbm_mutex);
4517 	}
4518 
4519 	return rptr;
4520 }
4521 
4522 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4523 			 struct radeon_ring *ring)
4524 {
4525 	u32 wptr;
4526 
4527 	if (rdev->wb.enabled) {
4528 		/* XXX check if swapping is necessary on BE */
4529 		wptr = rdev->wb.wb[ring->wptr_offs/4];
4530 	} else {
4531 		mutex_lock(&rdev->srbm_mutex);
4532 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4533 		wptr = RREG32(CP_HQD_PQ_WPTR);
4534 		cik_srbm_select(rdev, 0, 0, 0, 0);
4535 		mutex_unlock(&rdev->srbm_mutex);
4536 	}
4537 
4538 	return wptr;
4539 }
4540 
4541 void cik_compute_set_wptr(struct radeon_device *rdev,
4542 			  struct radeon_ring *ring)
4543 {
4544 	/* XXX check if swapping is necessary on BE */
4545 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4546 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4547 }
4548 
4549 /**
4550  * cik_cp_compute_enable - enable/disable the compute CP MEs
4551  *
4552  * @rdev: radeon_device pointer
4553  * @enable: enable or disable the MEs
4554  *
4555  * Halts or unhalts the compute MEs.
4556  */
4557 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4558 {
4559 	if (enable)
4560 		WREG32(CP_MEC_CNTL, 0);
4561 	else {
4562 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4563 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4564 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4565 	}
4566 	udelay(50);
4567 }
4568 
4569 /**
4570  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4571  *
4572  * @rdev: radeon_device pointer
4573  *
4574  * Loads the compute MEC1&2 ucode.
4575  * Returns 0 for success, -EINVAL if the ucode is not available.
4576  */
4577 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4578 {
4579 	int i;
4580 
4581 	if (!rdev->mec_fw)
4582 		return -EINVAL;
4583 
4584 	cik_cp_compute_enable(rdev, false);
4585 
4586 	if (rdev->new_fw) {
4587 		const struct gfx_firmware_header_v1_0 *mec_hdr =
4588 			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4589 		const __le32 *fw_data;
4590 		u32 fw_size;
4591 
4592 		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4593 
4594 		/* MEC1 */
4595 		fw_data = (const __le32 *)
4596 			(rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4597 		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4598 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4599 		for (i = 0; i < fw_size; i++)
4600 			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4601 		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4602 
4603 		/* MEC2 */
4604 		if (rdev->family == CHIP_KAVERI) {
4605 			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4606 				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4607 
4608 			fw_data = (const __le32 *)
4609 				(rdev->mec2_fw->data +
4610 				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4611 			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4612 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4613 			for (i = 0; i < fw_size; i++)
4614 				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4615 			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4616 		}
4617 	} else {
4618 		const __be32 *fw_data;
4619 
4620 		/* MEC1 */
4621 		fw_data = (const __be32 *)rdev->mec_fw->data;
4622 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4623 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4624 			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4625 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4626 
4627 		if (rdev->family == CHIP_KAVERI) {
4628 			/* MEC2 */
4629 			fw_data = (const __be32 *)rdev->mec_fw->data;
4630 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4631 			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4632 				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4633 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4634 		}
4635 	}
4636 
4637 	return 0;
4638 }
4639 
4640 /**
4641  * cik_cp_compute_start - start the compute queues
4642  *
4643  * @rdev: radeon_device pointer
4644  *
4645  * Enable the compute queues.
4646  * Returns 0 for success, error for failure.
4647  */
4648 static int cik_cp_compute_start(struct radeon_device *rdev)
4649 {
4650 	cik_cp_compute_enable(rdev, true);
4651 
4652 	return 0;
4653 }
4654 
4655 /**
4656  * cik_cp_compute_fini - stop the compute queues
4657  *
4658  * @rdev: radeon_device pointer
4659  *
4660  * Stop the compute queues and tear down the driver queue
4661  * info.
4662  */
4663 static void cik_cp_compute_fini(struct radeon_device *rdev)
4664 {
4665 	int i, idx, r;
4666 
4667 	cik_cp_compute_enable(rdev, false);
4668 
4669 	for (i = 0; i < 2; i++) {
4670 		if (i == 0)
4671 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4672 		else
4673 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4674 
4675 		if (rdev->ring[idx].mqd_obj) {
4676 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4677 			if (unlikely(r != 0))
4678 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4679 
4680 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4681 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4682 
4683 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4684 			rdev->ring[idx].mqd_obj = NULL;
4685 		}
4686 	}
4687 }
4688 
4689 static void cik_mec_fini(struct radeon_device *rdev)
4690 {
4691 	int r;
4692 
4693 	if (rdev->mec.hpd_eop_obj) {
4694 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4695 		if (unlikely(r != 0))
4696 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4697 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4698 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4699 
4700 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4701 		rdev->mec.hpd_eop_obj = NULL;
4702 	}
4703 }
4704 
4705 #define MEC_HPD_SIZE 2048
4706 
4707 static int cik_mec_init(struct radeon_device *rdev)
4708 {
4709 	int r;
4710 	u32 *hpd;
4711 
4712 	/*
4713 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4714 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4715 	 * Nonetheless, we assign only 1 pipe because all other pipes will
4716 	 * be handled by KFD
4717 	 */
4718 	rdev->mec.num_mec = 1;
4719 	rdev->mec.num_pipe = 1;
4720 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4721 
4722 	if (rdev->mec.hpd_eop_obj == NULL) {
4723 		r = radeon_bo_create(rdev,
4724 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4725 				     PAGE_SIZE, true,
4726 				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4727 				     &rdev->mec.hpd_eop_obj);
4728 		if (r) {
4729 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4730 			return r;
4731 		}
4732 	}
4733 
4734 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4735 	if (unlikely(r != 0)) {
4736 		cik_mec_fini(rdev);
4737 		return r;
4738 	}
4739 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4740 			  &rdev->mec.hpd_eop_gpu_addr);
4741 	if (r) {
4742 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4743 		cik_mec_fini(rdev);
4744 		return r;
4745 	}
4746 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4747 	if (r) {
4748 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4749 		cik_mec_fini(rdev);
4750 		return r;
4751 	}
4752 
4753 	/* clear memory.  Not sure if this is required or not */
4754 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4755 
4756 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4757 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4758 
4759 	return 0;
4760 }
4761 
4762 struct hqd_registers
4763 {
4764 	u32 cp_mqd_base_addr;
4765 	u32 cp_mqd_base_addr_hi;
4766 	u32 cp_hqd_active;
4767 	u32 cp_hqd_vmid;
4768 	u32 cp_hqd_persistent_state;
4769 	u32 cp_hqd_pipe_priority;
4770 	u32 cp_hqd_queue_priority;
4771 	u32 cp_hqd_quantum;
4772 	u32 cp_hqd_pq_base;
4773 	u32 cp_hqd_pq_base_hi;
4774 	u32 cp_hqd_pq_rptr;
4775 	u32 cp_hqd_pq_rptr_report_addr;
4776 	u32 cp_hqd_pq_rptr_report_addr_hi;
4777 	u32 cp_hqd_pq_wptr_poll_addr;
4778 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4779 	u32 cp_hqd_pq_doorbell_control;
4780 	u32 cp_hqd_pq_wptr;
4781 	u32 cp_hqd_pq_control;
4782 	u32 cp_hqd_ib_base_addr;
4783 	u32 cp_hqd_ib_base_addr_hi;
4784 	u32 cp_hqd_ib_rptr;
4785 	u32 cp_hqd_ib_control;
4786 	u32 cp_hqd_iq_timer;
4787 	u32 cp_hqd_iq_rptr;
4788 	u32 cp_hqd_dequeue_request;
4789 	u32 cp_hqd_dma_offload;
4790 	u32 cp_hqd_sema_cmd;
4791 	u32 cp_hqd_msg_type;
4792 	u32 cp_hqd_atomic0_preop_lo;
4793 	u32 cp_hqd_atomic0_preop_hi;
4794 	u32 cp_hqd_atomic1_preop_lo;
4795 	u32 cp_hqd_atomic1_preop_hi;
4796 	u32 cp_hqd_hq_scheduler0;
4797 	u32 cp_hqd_hq_scheduler1;
4798 	u32 cp_mqd_control;
4799 };
4800 
4801 struct bonaire_mqd
4802 {
4803 	u32 header;
4804 	u32 dispatch_initiator;
4805 	u32 dimensions[3];
4806 	u32 start_idx[3];
4807 	u32 num_threads[3];
4808 	u32 pipeline_stat_enable;
4809 	u32 perf_counter_enable;
4810 	u32 pgm[2];
4811 	u32 tba[2];
4812 	u32 tma[2];
4813 	u32 pgm_rsrc[2];
4814 	u32 vmid;
4815 	u32 resource_limits;
4816 	u32 static_thread_mgmt01[2];
4817 	u32 tmp_ring_size;
4818 	u32 static_thread_mgmt23[2];
4819 	u32 restart[3];
4820 	u32 thread_trace_enable;
4821 	u32 reserved1;
4822 	u32 user_data[16];
4823 	u32 vgtcs_invoke_count[2];
4824 	struct hqd_registers queue_state;
4825 	u32 dequeue_cntr;
4826 	u32 interrupt_queue[64];
4827 };
4828 
4829 /**
4830  * cik_cp_compute_resume - setup the compute queue registers
4831  *
4832  * @rdev: radeon_device pointer
4833  *
4834  * Program the compute queues and test them to make sure they
4835  * are working.
4836  * Returns 0 for success, error for failure.
4837  */
4838 static int cik_cp_compute_resume(struct radeon_device *rdev)
4839 {
4840 	int r, i, j, idx;
4841 	u32 tmp;
4842 	bool use_doorbell = true;
4843 	u64 hqd_gpu_addr;
4844 	u64 mqd_gpu_addr;
4845 	u64 eop_gpu_addr;
4846 	u64 wb_gpu_addr;
4847 	u32 *buf;
4848 	struct bonaire_mqd *mqd;
4849 
4850 	r = cik_cp_compute_start(rdev);
4851 	if (r)
4852 		return r;
4853 
4854 	/* fix up chicken bits */
4855 	tmp = RREG32(CP_CPF_DEBUG);
4856 	tmp |= (1 << 23);
4857 	WREG32(CP_CPF_DEBUG, tmp);
4858 
4859 	/* init the pipes */
4860 	mutex_lock(&rdev->srbm_mutex);
4861 
4862 	eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
4863 
4864 	cik_srbm_select(rdev, 0, 0, 0, 0);
4865 
4866 	/* write the EOP addr */
4867 	WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4868 	WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4869 
4870 	/* set the VMID assigned */
4871 	WREG32(CP_HPD_EOP_VMID, 0);
4872 
4873 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4874 	tmp = RREG32(CP_HPD_EOP_CONTROL);
4875 	tmp &= ~EOP_SIZE_MASK;
4876 	tmp |= order_base_2(MEC_HPD_SIZE / 8);
4877 	WREG32(CP_HPD_EOP_CONTROL, tmp);
4878 
4879 	mutex_unlock(&rdev->srbm_mutex);
4880 
4881 	/* init the queues.  Just two for now. */
4882 	for (i = 0; i < 2; i++) {
4883 		if (i == 0)
4884 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4885 		else
4886 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4887 
4888 		if (rdev->ring[idx].mqd_obj == NULL) {
4889 			r = radeon_bo_create(rdev,
4890 					     sizeof(struct bonaire_mqd),
4891 					     PAGE_SIZE, true,
4892 					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4893 					     NULL, &rdev->ring[idx].mqd_obj);
4894 			if (r) {
4895 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4896 				return r;
4897 			}
4898 		}
4899 
4900 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4901 		if (unlikely(r != 0)) {
4902 			cik_cp_compute_fini(rdev);
4903 			return r;
4904 		}
4905 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4906 				  &mqd_gpu_addr);
4907 		if (r) {
4908 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4909 			cik_cp_compute_fini(rdev);
4910 			return r;
4911 		}
4912 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4913 		if (r) {
4914 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4915 			cik_cp_compute_fini(rdev);
4916 			return r;
4917 		}
4918 
4919 		/* init the mqd struct */
4920 		memset(buf, 0, sizeof(struct bonaire_mqd));
4921 
4922 		mqd = (struct bonaire_mqd *)buf;
4923 		mqd->header = 0xC0310800;
4924 		mqd->static_thread_mgmt01[0] = 0xffffffff;
4925 		mqd->static_thread_mgmt01[1] = 0xffffffff;
4926 		mqd->static_thread_mgmt23[0] = 0xffffffff;
4927 		mqd->static_thread_mgmt23[1] = 0xffffffff;
4928 
4929 		mutex_lock(&rdev->srbm_mutex);
4930 		cik_srbm_select(rdev, rdev->ring[idx].me,
4931 				rdev->ring[idx].pipe,
4932 				rdev->ring[idx].queue, 0);
4933 
4934 		/* disable wptr polling */
4935 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4936 		tmp &= ~WPTR_POLL_EN;
4937 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4938 
4939 		/* enable doorbell? */
4940 		mqd->queue_state.cp_hqd_pq_doorbell_control =
4941 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4942 		if (use_doorbell)
4943 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4944 		else
4945 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4946 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4947 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4948 
4949 		/* disable the queue if it's active */
4950 		mqd->queue_state.cp_hqd_dequeue_request = 0;
4951 		mqd->queue_state.cp_hqd_pq_rptr = 0;
4952 		mqd->queue_state.cp_hqd_pq_wptr= 0;
4953 		if (RREG32(CP_HQD_ACTIVE) & 1) {
4954 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4955 			for (j = 0; j < rdev->usec_timeout; j++) {
4956 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4957 					break;
4958 				udelay(1);
4959 			}
4960 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4961 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4962 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4963 		}
4964 
4965 		/* set the pointer to the MQD */
4966 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4967 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4968 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4969 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4970 		/* set MQD vmid to 0 */
4971 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4972 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4973 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4974 
4975 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4976 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4977 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4978 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4979 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4980 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4981 
4982 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4983 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4984 		mqd->queue_state.cp_hqd_pq_control &=
4985 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4986 
4987 		mqd->queue_state.cp_hqd_pq_control |=
4988 			order_base_2(rdev->ring[idx].ring_size / 8);
4989 		mqd->queue_state.cp_hqd_pq_control |=
4990 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4991 #ifdef __BIG_ENDIAN
4992 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4993 #endif
4994 		mqd->queue_state.cp_hqd_pq_control &=
4995 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4996 		mqd->queue_state.cp_hqd_pq_control |=
4997 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4998 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4999 
5000 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
5001 		if (i == 0)
5002 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
5003 		else
5004 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
5005 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
5006 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
5007 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
5008 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
5009 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
5010 
5011 		/* set the wb address wether it's enabled or not */
5012 		if (i == 0)
5013 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
5014 		else
5015 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
5016 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
5017 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
5018 			upper_32_bits(wb_gpu_addr) & 0xffff;
5019 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
5020 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
5021 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5022 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
5023 
5024 		/* enable the doorbell if requested */
5025 		if (use_doorbell) {
5026 			mqd->queue_state.cp_hqd_pq_doorbell_control =
5027 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5028 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
5029 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
5030 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
5031 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5032 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
5033 				~(DOORBELL_SOURCE | DOORBELL_HIT);
5034 
5035 		} else {
5036 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
5037 		}
5038 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5039 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
5040 
5041 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5042 		rdev->ring[idx].wptr = 0;
5043 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
5044 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5045 		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
5046 
5047 		/* set the vmid for the queue */
5048 		mqd->queue_state.cp_hqd_vmid = 0;
5049 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
5050 
5051 		/* activate the queue */
5052 		mqd->queue_state.cp_hqd_active = 1;
5053 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
5054 
5055 		cik_srbm_select(rdev, 0, 0, 0, 0);
5056 		mutex_unlock(&rdev->srbm_mutex);
5057 
5058 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
5059 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
5060 
5061 		rdev->ring[idx].ready = true;
5062 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
5063 		if (r)
5064 			rdev->ring[idx].ready = false;
5065 	}
5066 
5067 	return 0;
5068 }
5069 
5070 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
5071 {
5072 	cik_cp_gfx_enable(rdev, enable);
5073 	cik_cp_compute_enable(rdev, enable);
5074 }
5075 
5076 static int cik_cp_load_microcode(struct radeon_device *rdev)
5077 {
5078 	int r;
5079 
5080 	r = cik_cp_gfx_load_microcode(rdev);
5081 	if (r)
5082 		return r;
5083 	r = cik_cp_compute_load_microcode(rdev);
5084 	if (r)
5085 		return r;
5086 
5087 	return 0;
5088 }
5089 
5090 static void cik_cp_fini(struct radeon_device *rdev)
5091 {
5092 	cik_cp_gfx_fini(rdev);
5093 	cik_cp_compute_fini(rdev);
5094 }
5095 
5096 static int cik_cp_resume(struct radeon_device *rdev)
5097 {
5098 	int r;
5099 
5100 	cik_enable_gui_idle_interrupt(rdev, false);
5101 
5102 	r = cik_cp_load_microcode(rdev);
5103 	if (r)
5104 		return r;
5105 
5106 	r = cik_cp_gfx_resume(rdev);
5107 	if (r)
5108 		return r;
5109 	r = cik_cp_compute_resume(rdev);
5110 	if (r)
5111 		return r;
5112 
5113 	cik_enable_gui_idle_interrupt(rdev, true);
5114 
5115 	return 0;
5116 }
5117 
5118 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
5119 {
5120 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
5121 		RREG32(GRBM_STATUS));
5122 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
5123 		RREG32(GRBM_STATUS2));
5124 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
5125 		RREG32(GRBM_STATUS_SE0));
5126 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
5127 		RREG32(GRBM_STATUS_SE1));
5128 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
5129 		RREG32(GRBM_STATUS_SE2));
5130 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
5131 		RREG32(GRBM_STATUS_SE3));
5132 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
5133 		RREG32(SRBM_STATUS));
5134 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
5135 		RREG32(SRBM_STATUS2));
5136 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
5137 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
5138 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
5139 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
5140 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
5141 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
5142 		 RREG32(CP_STALLED_STAT1));
5143 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
5144 		 RREG32(CP_STALLED_STAT2));
5145 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
5146 		 RREG32(CP_STALLED_STAT3));
5147 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
5148 		 RREG32(CP_CPF_BUSY_STAT));
5149 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
5150 		 RREG32(CP_CPF_STALLED_STAT1));
5151 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
5152 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
5153 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
5154 		 RREG32(CP_CPC_STALLED_STAT1));
5155 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
5156 }
5157 
5158 /**
5159  * cik_gpu_check_soft_reset - check which blocks are busy
5160  *
5161  * @rdev: radeon_device pointer
5162  *
5163  * Check which blocks are busy and return the relevant reset
5164  * mask to be used by cik_gpu_soft_reset().
5165  * Returns a mask of the blocks to be reset.
5166  */
5167 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
5168 {
5169 	u32 reset_mask = 0;
5170 	u32 tmp;
5171 
5172 	/* GRBM_STATUS */
5173 	tmp = RREG32(GRBM_STATUS);
5174 	if (tmp & (PA_BUSY | SC_BUSY |
5175 		   BCI_BUSY | SX_BUSY |
5176 		   TA_BUSY | VGT_BUSY |
5177 		   DB_BUSY | CB_BUSY |
5178 		   GDS_BUSY | SPI_BUSY |
5179 		   IA_BUSY | IA_BUSY_NO_DMA))
5180 		reset_mask |= RADEON_RESET_GFX;
5181 
5182 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
5183 		reset_mask |= RADEON_RESET_CP;
5184 
5185 	/* GRBM_STATUS2 */
5186 	tmp = RREG32(GRBM_STATUS2);
5187 	if (tmp & RLC_BUSY)
5188 		reset_mask |= RADEON_RESET_RLC;
5189 
5190 	/* SDMA0_STATUS_REG */
5191 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
5192 	if (!(tmp & SDMA_IDLE))
5193 		reset_mask |= RADEON_RESET_DMA;
5194 
5195 	/* SDMA1_STATUS_REG */
5196 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
5197 	if (!(tmp & SDMA_IDLE))
5198 		reset_mask |= RADEON_RESET_DMA1;
5199 
5200 	/* SRBM_STATUS2 */
5201 	tmp = RREG32(SRBM_STATUS2);
5202 	if (tmp & SDMA_BUSY)
5203 		reset_mask |= RADEON_RESET_DMA;
5204 
5205 	if (tmp & SDMA1_BUSY)
5206 		reset_mask |= RADEON_RESET_DMA1;
5207 
5208 	/* SRBM_STATUS */
5209 	tmp = RREG32(SRBM_STATUS);
5210 
5211 	if (tmp & IH_BUSY)
5212 		reset_mask |= RADEON_RESET_IH;
5213 
5214 	if (tmp & SEM_BUSY)
5215 		reset_mask |= RADEON_RESET_SEM;
5216 
5217 	if (tmp & GRBM_RQ_PENDING)
5218 		reset_mask |= RADEON_RESET_GRBM;
5219 
5220 	if (tmp & VMC_BUSY)
5221 		reset_mask |= RADEON_RESET_VMC;
5222 
5223 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
5224 		   MCC_BUSY | MCD_BUSY))
5225 		reset_mask |= RADEON_RESET_MC;
5226 
5227 	if (evergreen_is_display_hung(rdev))
5228 		reset_mask |= RADEON_RESET_DISPLAY;
5229 
5230 	/* Skip MC reset as it's mostly likely not hung, just busy */
5231 	if (reset_mask & RADEON_RESET_MC) {
5232 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
5233 		reset_mask &= ~RADEON_RESET_MC;
5234 	}
5235 
5236 	return reset_mask;
5237 }
5238 
5239 /**
5240  * cik_gpu_soft_reset - soft reset GPU
5241  *
5242  * @rdev: radeon_device pointer
5243  * @reset_mask: mask of which blocks to reset
5244  *
5245  * Soft reset the blocks specified in @reset_mask.
5246  */
5247 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
5248 {
5249 	struct evergreen_mc_save save;
5250 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5251 	u32 tmp;
5252 
5253 	if (reset_mask == 0)
5254 		return;
5255 
5256 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
5257 
5258 	cik_print_gpu_status_regs(rdev);
5259 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5260 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5261 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5262 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5263 
5264 	/* disable CG/PG */
5265 	cik_fini_pg(rdev);
5266 	cik_fini_cg(rdev);
5267 
5268 	/* stop the rlc */
5269 	cik_rlc_stop(rdev);
5270 
5271 	/* Disable GFX parsing/prefetching */
5272 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5273 
5274 	/* Disable MEC parsing/prefetching */
5275 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5276 
5277 	if (reset_mask & RADEON_RESET_DMA) {
5278 		/* sdma0 */
5279 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5280 		tmp |= SDMA_HALT;
5281 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5282 	}
5283 	if (reset_mask & RADEON_RESET_DMA1) {
5284 		/* sdma1 */
5285 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5286 		tmp |= SDMA_HALT;
5287 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5288 	}
5289 
5290 	evergreen_mc_stop(rdev, &save);
5291 	if (evergreen_mc_wait_for_idle(rdev)) {
5292 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5293 	}
5294 
5295 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5296 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5297 
5298 	if (reset_mask & RADEON_RESET_CP) {
5299 		grbm_soft_reset |= SOFT_RESET_CP;
5300 
5301 		srbm_soft_reset |= SOFT_RESET_GRBM;
5302 	}
5303 
5304 	if (reset_mask & RADEON_RESET_DMA)
5305 		srbm_soft_reset |= SOFT_RESET_SDMA;
5306 
5307 	if (reset_mask & RADEON_RESET_DMA1)
5308 		srbm_soft_reset |= SOFT_RESET_SDMA1;
5309 
5310 	if (reset_mask & RADEON_RESET_DISPLAY)
5311 		srbm_soft_reset |= SOFT_RESET_DC;
5312 
5313 	if (reset_mask & RADEON_RESET_RLC)
5314 		grbm_soft_reset |= SOFT_RESET_RLC;
5315 
5316 	if (reset_mask & RADEON_RESET_SEM)
5317 		srbm_soft_reset |= SOFT_RESET_SEM;
5318 
5319 	if (reset_mask & RADEON_RESET_IH)
5320 		srbm_soft_reset |= SOFT_RESET_IH;
5321 
5322 	if (reset_mask & RADEON_RESET_GRBM)
5323 		srbm_soft_reset |= SOFT_RESET_GRBM;
5324 
5325 	if (reset_mask & RADEON_RESET_VMC)
5326 		srbm_soft_reset |= SOFT_RESET_VMC;
5327 
5328 	if (!(rdev->flags & RADEON_IS_IGP)) {
5329 		if (reset_mask & RADEON_RESET_MC)
5330 			srbm_soft_reset |= SOFT_RESET_MC;
5331 	}
5332 
5333 	if (grbm_soft_reset) {
5334 		tmp = RREG32(GRBM_SOFT_RESET);
5335 		tmp |= grbm_soft_reset;
5336 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5337 		WREG32(GRBM_SOFT_RESET, tmp);
5338 		tmp = RREG32(GRBM_SOFT_RESET);
5339 
5340 		udelay(50);
5341 
5342 		tmp &= ~grbm_soft_reset;
5343 		WREG32(GRBM_SOFT_RESET, tmp);
5344 		tmp = RREG32(GRBM_SOFT_RESET);
5345 	}
5346 
5347 	if (srbm_soft_reset) {
5348 		tmp = RREG32(SRBM_SOFT_RESET);
5349 		tmp |= srbm_soft_reset;
5350 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5351 		WREG32(SRBM_SOFT_RESET, tmp);
5352 		tmp = RREG32(SRBM_SOFT_RESET);
5353 
5354 		udelay(50);
5355 
5356 		tmp &= ~srbm_soft_reset;
5357 		WREG32(SRBM_SOFT_RESET, tmp);
5358 		tmp = RREG32(SRBM_SOFT_RESET);
5359 	}
5360 
5361 	/* Wait a little for things to settle down */
5362 	udelay(50);
5363 
5364 	evergreen_mc_resume(rdev, &save);
5365 	udelay(50);
5366 
5367 	cik_print_gpu_status_regs(rdev);
5368 }
5369 
5370 struct kv_reset_save_regs {
5371 	u32 gmcon_reng_execute;
5372 	u32 gmcon_misc;
5373 	u32 gmcon_misc3;
5374 };
5375 
5376 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5377 				   struct kv_reset_save_regs *save)
5378 {
5379 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5380 	save->gmcon_misc = RREG32(GMCON_MISC);
5381 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5382 
5383 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5384 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5385 						STCTRL_STUTTER_EN));
5386 }
5387 
5388 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5389 				      struct kv_reset_save_regs *save)
5390 {
5391 	int i;
5392 
5393 	WREG32(GMCON_PGFSM_WRITE, 0);
5394 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5395 
5396 	for (i = 0; i < 5; i++)
5397 		WREG32(GMCON_PGFSM_WRITE, 0);
5398 
5399 	WREG32(GMCON_PGFSM_WRITE, 0);
5400 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5401 
5402 	for (i = 0; i < 5; i++)
5403 		WREG32(GMCON_PGFSM_WRITE, 0);
5404 
5405 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5406 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5407 
5408 	for (i = 0; i < 5; i++)
5409 		WREG32(GMCON_PGFSM_WRITE, 0);
5410 
5411 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5412 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5413 
5414 	for (i = 0; i < 5; i++)
5415 		WREG32(GMCON_PGFSM_WRITE, 0);
5416 
5417 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5418 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5419 
5420 	for (i = 0; i < 5; i++)
5421 		WREG32(GMCON_PGFSM_WRITE, 0);
5422 
5423 	WREG32(GMCON_PGFSM_WRITE, 0);
5424 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5425 
5426 	for (i = 0; i < 5; i++)
5427 		WREG32(GMCON_PGFSM_WRITE, 0);
5428 
5429 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5430 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5431 
5432 	for (i = 0; i < 5; i++)
5433 		WREG32(GMCON_PGFSM_WRITE, 0);
5434 
5435 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5436 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5437 
5438 	for (i = 0; i < 5; i++)
5439 		WREG32(GMCON_PGFSM_WRITE, 0);
5440 
5441 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5442 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5443 
5444 	for (i = 0; i < 5; i++)
5445 		WREG32(GMCON_PGFSM_WRITE, 0);
5446 
5447 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5448 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5449 
5450 	for (i = 0; i < 5; i++)
5451 		WREG32(GMCON_PGFSM_WRITE, 0);
5452 
5453 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5454 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5455 
5456 	WREG32(GMCON_MISC3, save->gmcon_misc3);
5457 	WREG32(GMCON_MISC, save->gmcon_misc);
5458 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5459 }
5460 
5461 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5462 {
5463 	struct evergreen_mc_save save;
5464 	struct kv_reset_save_regs kv_save = { 0 };
5465 	u32 tmp, i;
5466 
5467 	dev_info(rdev->dev, "GPU pci config reset\n");
5468 
5469 	/* disable dpm? */
5470 
5471 	/* disable cg/pg */
5472 	cik_fini_pg(rdev);
5473 	cik_fini_cg(rdev);
5474 
5475 	/* Disable GFX parsing/prefetching */
5476 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5477 
5478 	/* Disable MEC parsing/prefetching */
5479 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5480 
5481 	/* sdma0 */
5482 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5483 	tmp |= SDMA_HALT;
5484 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5485 	/* sdma1 */
5486 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5487 	tmp |= SDMA_HALT;
5488 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5489 	/* XXX other engines? */
5490 
5491 	/* halt the rlc, disable cp internal ints */
5492 	cik_rlc_stop(rdev);
5493 
5494 	udelay(50);
5495 
5496 	/* disable mem access */
5497 	evergreen_mc_stop(rdev, &save);
5498 	if (evergreen_mc_wait_for_idle(rdev)) {
5499 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5500 	}
5501 
5502 	if (rdev->flags & RADEON_IS_IGP)
5503 		kv_save_regs_for_reset(rdev, &kv_save);
5504 
5505 	/* disable BM */
5506 	pci_clear_master(rdev->pdev);
5507 	/* reset */
5508 	radeon_pci_config_reset(rdev);
5509 
5510 	udelay(100);
5511 
5512 	/* wait for asic to come out of reset */
5513 	for (i = 0; i < rdev->usec_timeout; i++) {
5514 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5515 			break;
5516 		udelay(1);
5517 	}
5518 
5519 	/* does asic init need to be run first??? */
5520 	if (rdev->flags & RADEON_IS_IGP)
5521 		kv_restore_regs_for_reset(rdev, &kv_save);
5522 }
5523 
5524 /**
5525  * cik_asic_reset - soft reset GPU
5526  *
5527  * @rdev: radeon_device pointer
5528  *
5529  * Look up which blocks are hung and attempt
5530  * to reset them.
5531  * Returns 0 for success.
5532  */
5533 int cik_asic_reset(struct radeon_device *rdev)
5534 {
5535 	u32 reset_mask;
5536 
5537 	reset_mask = cik_gpu_check_soft_reset(rdev);
5538 
5539 	if (reset_mask)
5540 		r600_set_bios_scratch_engine_hung(rdev, true);
5541 
5542 	/* try soft reset */
5543 	cik_gpu_soft_reset(rdev, reset_mask);
5544 
5545 	reset_mask = cik_gpu_check_soft_reset(rdev);
5546 
5547 	/* try pci config reset */
5548 	if (reset_mask && radeon_hard_reset)
5549 		cik_gpu_pci_config_reset(rdev);
5550 
5551 	reset_mask = cik_gpu_check_soft_reset(rdev);
5552 
5553 	if (!reset_mask)
5554 		r600_set_bios_scratch_engine_hung(rdev, false);
5555 
5556 	return 0;
5557 }
5558 
5559 /**
5560  * cik_gfx_is_lockup - check if the 3D engine is locked up
5561  *
5562  * @rdev: radeon_device pointer
5563  * @ring: radeon_ring structure holding ring information
5564  *
5565  * Check if the 3D engine is locked up (CIK).
5566  * Returns true if the engine is locked, false if not.
5567  */
5568 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5569 {
5570 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5571 
5572 	if (!(reset_mask & (RADEON_RESET_GFX |
5573 			    RADEON_RESET_COMPUTE |
5574 			    RADEON_RESET_CP))) {
5575 		radeon_ring_lockup_update(rdev, ring);
5576 		return false;
5577 	}
5578 	return radeon_ring_test_lockup(rdev, ring);
5579 }
5580 
5581 /* MC */
5582 /**
5583  * cik_mc_program - program the GPU memory controller
5584  *
5585  * @rdev: radeon_device pointer
5586  *
5587  * Set the location of vram, gart, and AGP in the GPU's
5588  * physical address space (CIK).
5589  */
5590 static void cik_mc_program(struct radeon_device *rdev)
5591 {
5592 	struct evergreen_mc_save save;
5593 	u32 tmp;
5594 	int i, j;
5595 
5596 	/* Initialize HDP */
5597 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5598 		WREG32((0x2c14 + j), 0x00000000);
5599 		WREG32((0x2c18 + j), 0x00000000);
5600 		WREG32((0x2c1c + j), 0x00000000);
5601 		WREG32((0x2c20 + j), 0x00000000);
5602 		WREG32((0x2c24 + j), 0x00000000);
5603 	}
5604 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5605 
5606 	evergreen_mc_stop(rdev, &save);
5607 	if (radeon_mc_wait_for_idle(rdev)) {
5608 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5609 	}
5610 	/* Lockout access through VGA aperture*/
5611 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5612 	/* Update configuration */
5613 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5614 	       rdev->mc.vram_start >> 12);
5615 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5616 	       rdev->mc.vram_end >> 12);
5617 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5618 	       rdev->vram_scratch.gpu_addr >> 12);
5619 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5620 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5621 	WREG32(MC_VM_FB_LOCATION, tmp);
5622 	/* XXX double check these! */
5623 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5624 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5625 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5626 	WREG32(MC_VM_AGP_BASE, 0);
5627 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5628 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5629 	if (radeon_mc_wait_for_idle(rdev)) {
5630 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5631 	}
5632 	evergreen_mc_resume(rdev, &save);
5633 	/* we need to own VRAM, so turn off the VGA renderer here
5634 	 * to stop it overwriting our objects */
5635 	rv515_vga_render_disable(rdev);
5636 }
5637 
5638 /**
5639  * cik_mc_init - initialize the memory controller driver params
5640  *
5641  * @rdev: radeon_device pointer
5642  *
5643  * Look up the amount of vram, vram width, and decide how to place
5644  * vram and gart within the GPU's physical address space (CIK).
5645  * Returns 0 for success.
5646  */
5647 static int cik_mc_init(struct radeon_device *rdev)
5648 {
5649 	u32 tmp;
5650 	int chansize, numchan;
5651 
5652 	/* Get VRAM informations */
5653 	rdev->mc.vram_is_ddr = true;
5654 	tmp = RREG32(MC_ARB_RAMCFG);
5655 	if (tmp & CHANSIZE_MASK) {
5656 		chansize = 64;
5657 	} else {
5658 		chansize = 32;
5659 	}
5660 	tmp = RREG32(MC_SHARED_CHMAP);
5661 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5662 	case 0:
5663 	default:
5664 		numchan = 1;
5665 		break;
5666 	case 1:
5667 		numchan = 2;
5668 		break;
5669 	case 2:
5670 		numchan = 4;
5671 		break;
5672 	case 3:
5673 		numchan = 8;
5674 		break;
5675 	case 4:
5676 		numchan = 3;
5677 		break;
5678 	case 5:
5679 		numchan = 6;
5680 		break;
5681 	case 6:
5682 		numchan = 10;
5683 		break;
5684 	case 7:
5685 		numchan = 12;
5686 		break;
5687 	case 8:
5688 		numchan = 16;
5689 		break;
5690 	}
5691 	rdev->mc.vram_width = numchan * chansize;
5692 	/* Could aper size report 0 ? */
5693 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5694 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5695 	/* size in MB on si */
5696 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5697 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5698 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5699 	si_vram_gtt_location(rdev, &rdev->mc);
5700 	radeon_update_bandwidth_info(rdev);
5701 
5702 	return 0;
5703 }
5704 
5705 /*
5706  * GART
5707  * VMID 0 is the physical GPU addresses as used by the kernel.
5708  * VMIDs 1-15 are used for userspace clients and are handled
5709  * by the radeon vm/hsa code.
5710  */
5711 /**
5712  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5713  *
5714  * @rdev: radeon_device pointer
5715  *
5716  * Flush the TLB for the VMID 0 page table (CIK).
5717  */
5718 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5719 {
5720 	/* flush hdp cache */
5721 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5722 
5723 	/* bits 0-15 are the VM contexts0-15 */
5724 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5725 }
5726 
5727 static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5728 {
5729 	int i;
5730 	uint32_t sh_mem_bases, sh_mem_config;
5731 
5732 	sh_mem_bases = 0x6000 | 0x6000 << 16;
5733 	sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5734 	sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5735 
5736 	mutex_lock(&rdev->srbm_mutex);
5737 	for (i = 8; i < 16; i++) {
5738 		cik_srbm_select(rdev, 0, 0, 0, i);
5739 		/* CP and shaders */
5740 		WREG32(SH_MEM_CONFIG, sh_mem_config);
5741 		WREG32(SH_MEM_APE1_BASE, 1);
5742 		WREG32(SH_MEM_APE1_LIMIT, 0);
5743 		WREG32(SH_MEM_BASES, sh_mem_bases);
5744 	}
5745 	cik_srbm_select(rdev, 0, 0, 0, 0);
5746 	mutex_unlock(&rdev->srbm_mutex);
5747 }
5748 
5749 /**
5750  * cik_pcie_gart_enable - gart enable
5751  *
5752  * @rdev: radeon_device pointer
5753  *
5754  * This sets up the TLBs, programs the page tables for VMID0,
5755  * sets up the hw for VMIDs 1-15 which are allocated on
5756  * demand, and sets up the global locations for the LDS, GDS,
5757  * and GPUVM for FSA64 clients (CIK).
5758  * Returns 0 for success, errors for failure.
5759  */
5760 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5761 {
5762 	int r, i;
5763 
5764 	if (rdev->gart.robj == NULL) {
5765 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5766 		return -EINVAL;
5767 	}
5768 	r = radeon_gart_table_vram_pin(rdev);
5769 	if (r)
5770 		return r;
5771 	/* Setup TLB control */
5772 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5773 	       (0xA << 7) |
5774 	       ENABLE_L1_TLB |
5775 	       ENABLE_L1_FRAGMENT_PROCESSING |
5776 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5777 	       ENABLE_ADVANCED_DRIVER_MODEL |
5778 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5779 	/* Setup L2 cache */
5780 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5781 	       ENABLE_L2_FRAGMENT_PROCESSING |
5782 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5783 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5784 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5785 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5786 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5787 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5788 	       BANK_SELECT(4) |
5789 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5790 	/* setup context0 */
5791 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5792 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5793 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5794 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5795 			(u32)(rdev->dummy_page.addr >> 12));
5796 	WREG32(VM_CONTEXT0_CNTL2, 0);
5797 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5798 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5799 
5800 	WREG32(0x15D4, 0);
5801 	WREG32(0x15D8, 0);
5802 	WREG32(0x15DC, 0);
5803 
5804 	/* restore context1-15 */
5805 	/* set vm size, must be a multiple of 4 */
5806 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5807 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5808 	for (i = 1; i < 16; i++) {
5809 		if (i < 8)
5810 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5811 			       rdev->vm_manager.saved_table_addr[i]);
5812 		else
5813 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5814 			       rdev->vm_manager.saved_table_addr[i]);
5815 	}
5816 
5817 	/* enable context1-15 */
5818 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5819 	       (u32)(rdev->dummy_page.addr >> 12));
5820 	WREG32(VM_CONTEXT1_CNTL2, 4);
5821 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5822 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5823 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5824 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5825 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5826 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5827 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5828 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5829 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5830 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5831 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5832 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5833 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5834 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5835 
5836 	if (rdev->family == CHIP_KAVERI) {
5837 		u32 tmp = RREG32(CHUB_CONTROL);
5838 		tmp &= ~BYPASS_VM;
5839 		WREG32(CHUB_CONTROL, tmp);
5840 	}
5841 
5842 	/* XXX SH_MEM regs */
5843 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5844 	mutex_lock(&rdev->srbm_mutex);
5845 	for (i = 0; i < 16; i++) {
5846 		cik_srbm_select(rdev, 0, 0, 0, i);
5847 		/* CP and shaders */
5848 		WREG32(SH_MEM_CONFIG, 0);
5849 		WREG32(SH_MEM_APE1_BASE, 1);
5850 		WREG32(SH_MEM_APE1_LIMIT, 0);
5851 		WREG32(SH_MEM_BASES, 0);
5852 		/* SDMA GFX */
5853 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5854 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5855 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5856 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5857 		/* XXX SDMA RLC - todo */
5858 	}
5859 	cik_srbm_select(rdev, 0, 0, 0, 0);
5860 	mutex_unlock(&rdev->srbm_mutex);
5861 
5862 	cik_pcie_init_compute_vmid(rdev);
5863 
5864 	cik_pcie_gart_tlb_flush(rdev);
5865 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5866 		 (unsigned)(rdev->mc.gtt_size >> 20),
5867 		 (unsigned long long)rdev->gart.table_addr);
5868 	rdev->gart.ready = true;
5869 	return 0;
5870 }
5871 
5872 /**
5873  * cik_pcie_gart_disable - gart disable
5874  *
5875  * @rdev: radeon_device pointer
5876  *
5877  * This disables all VM page table (CIK).
5878  */
5879 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5880 {
5881 	unsigned i;
5882 
5883 	for (i = 1; i < 16; ++i) {
5884 		uint32_t reg;
5885 		if (i < 8)
5886 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5887 		else
5888 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5889 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5890 	}
5891 
5892 	/* Disable all tables */
5893 	WREG32(VM_CONTEXT0_CNTL, 0);
5894 	WREG32(VM_CONTEXT1_CNTL, 0);
5895 	/* Setup TLB control */
5896 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5897 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5898 	/* Setup L2 cache */
5899 	WREG32(VM_L2_CNTL,
5900 	       ENABLE_L2_FRAGMENT_PROCESSING |
5901 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5902 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5903 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5904 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5905 	WREG32(VM_L2_CNTL2, 0);
5906 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5907 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5908 	radeon_gart_table_vram_unpin(rdev);
5909 }
5910 
5911 /**
5912  * cik_pcie_gart_fini - vm fini callback
5913  *
5914  * @rdev: radeon_device pointer
5915  *
5916  * Tears down the driver GART/VM setup (CIK).
5917  */
5918 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5919 {
5920 	cik_pcie_gart_disable(rdev);
5921 	radeon_gart_table_vram_free(rdev);
5922 	radeon_gart_fini(rdev);
5923 }
5924 
5925 /* vm parser */
5926 /**
5927  * cik_ib_parse - vm ib_parse callback
5928  *
5929  * @rdev: radeon_device pointer
5930  * @ib: indirect buffer pointer
5931  *
5932  * CIK uses hw IB checking so this is a nop (CIK).
5933  */
5934 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5935 {
5936 	return 0;
5937 }
5938 
5939 /*
5940  * vm
5941  * VMID 0 is the physical GPU addresses as used by the kernel.
5942  * VMIDs 1-15 are used for userspace clients and are handled
5943  * by the radeon vm/hsa code.
5944  */
5945 /**
5946  * cik_vm_init - cik vm init callback
5947  *
5948  * @rdev: radeon_device pointer
5949  *
5950  * Inits cik specific vm parameters (number of VMs, base of vram for
5951  * VMIDs 1-15) (CIK).
5952  * Returns 0 for success.
5953  */
5954 int cik_vm_init(struct radeon_device *rdev)
5955 {
5956 	/*
5957 	 * number of VMs
5958 	 * VMID 0 is reserved for System
5959 	 * radeon graphics/compute will use VMIDs 1-7
5960 	 * amdkfd will use VMIDs 8-15
5961 	 */
5962 	rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
5963 	/* base offset of vram pages */
5964 	if (rdev->flags & RADEON_IS_IGP) {
5965 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5966 		tmp <<= 22;
5967 		rdev->vm_manager.vram_base_offset = tmp;
5968 	} else
5969 		rdev->vm_manager.vram_base_offset = 0;
5970 
5971 	return 0;
5972 }
5973 
5974 /**
5975  * cik_vm_fini - cik vm fini callback
5976  *
5977  * @rdev: radeon_device pointer
5978  *
5979  * Tear down any asic specific VM setup (CIK).
5980  */
5981 void cik_vm_fini(struct radeon_device *rdev)
5982 {
5983 }
5984 
5985 /**
5986  * cik_vm_decode_fault - print human readable fault info
5987  *
5988  * @rdev: radeon_device pointer
5989  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5990  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5991  *
5992  * Print human readable fault information (CIK).
5993  */
5994 static void cik_vm_decode_fault(struct radeon_device *rdev,
5995 				u32 status, u32 addr, u32 mc_client)
5996 {
5997 	u32 mc_id;
5998 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5999 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
6000 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
6001 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
6002 
6003 	if (rdev->family == CHIP_HAWAII)
6004 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
6005 	else
6006 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
6007 
6008 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
6009 	       protections, vmid, addr,
6010 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
6011 	       block, mc_client, mc_id);
6012 }
6013 
6014 /**
6015  * cik_vm_flush - cik vm flush using the CP
6016  *
6017  * @rdev: radeon_device pointer
6018  *
6019  * Update the page table base and flush the VM TLB
6020  * using the CP (CIK).
6021  */
6022 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
6023 		  unsigned vm_id, uint64_t pd_addr)
6024 {
6025 	int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
6026 
6027 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6028 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6029 				 WRITE_DATA_DST_SEL(0)));
6030 	if (vm_id < 8) {
6031 		radeon_ring_write(ring,
6032 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
6033 	} else {
6034 		radeon_ring_write(ring,
6035 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
6036 	}
6037 	radeon_ring_write(ring, 0);
6038 	radeon_ring_write(ring, pd_addr >> 12);
6039 
6040 	/* update SH_MEM_* regs */
6041 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6042 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6043 				 WRITE_DATA_DST_SEL(0)));
6044 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6045 	radeon_ring_write(ring, 0);
6046 	radeon_ring_write(ring, VMID(vm_id));
6047 
6048 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
6049 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6050 				 WRITE_DATA_DST_SEL(0)));
6051 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
6052 	radeon_ring_write(ring, 0);
6053 
6054 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
6055 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
6056 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
6057 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
6058 
6059 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6060 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6061 				 WRITE_DATA_DST_SEL(0)));
6062 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6063 	radeon_ring_write(ring, 0);
6064 	radeon_ring_write(ring, VMID(0));
6065 
6066 	/* HDP flush */
6067 	cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
6068 
6069 	/* bits 0-15 are the VM contexts0-15 */
6070 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6071 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6072 				 WRITE_DATA_DST_SEL(0)));
6073 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6074 	radeon_ring_write(ring, 0);
6075 	radeon_ring_write(ring, 1 << vm_id);
6076 
6077 	/* wait for the invalidate to complete */
6078 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6079 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6080 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
6081 				 WAIT_REG_MEM_ENGINE(0))); /* me */
6082 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6083 	radeon_ring_write(ring, 0);
6084 	radeon_ring_write(ring, 0); /* ref */
6085 	radeon_ring_write(ring, 0); /* mask */
6086 	radeon_ring_write(ring, 0x20); /* poll interval */
6087 
6088 	/* compute doesn't have PFP */
6089 	if (usepfp) {
6090 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6091 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6092 		radeon_ring_write(ring, 0x0);
6093 	}
6094 }
6095 
6096 /*
6097  * RLC
6098  * The RLC is a multi-purpose microengine that handles a
6099  * variety of functions, the most important of which is
6100  * the interrupt controller.
6101  */
6102 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
6103 					  bool enable)
6104 {
6105 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
6106 
6107 	if (enable)
6108 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6109 	else
6110 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6111 	WREG32(CP_INT_CNTL_RING0, tmp);
6112 }
6113 
6114 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
6115 {
6116 	u32 tmp;
6117 
6118 	tmp = RREG32(RLC_LB_CNTL);
6119 	if (enable)
6120 		tmp |= LOAD_BALANCE_ENABLE;
6121 	else
6122 		tmp &= ~LOAD_BALANCE_ENABLE;
6123 	WREG32(RLC_LB_CNTL, tmp);
6124 }
6125 
6126 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
6127 {
6128 	u32 i, j, k;
6129 	u32 mask;
6130 
6131 	mutex_lock(&rdev->grbm_idx_mutex);
6132 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6133 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6134 			cik_select_se_sh(rdev, i, j);
6135 			for (k = 0; k < rdev->usec_timeout; k++) {
6136 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
6137 					break;
6138 				udelay(1);
6139 			}
6140 		}
6141 	}
6142 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6143 	mutex_unlock(&rdev->grbm_idx_mutex);
6144 
6145 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
6146 	for (k = 0; k < rdev->usec_timeout; k++) {
6147 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
6148 			break;
6149 		udelay(1);
6150 	}
6151 }
6152 
6153 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
6154 {
6155 	u32 tmp;
6156 
6157 	tmp = RREG32(RLC_CNTL);
6158 	if (tmp != rlc)
6159 		WREG32(RLC_CNTL, rlc);
6160 }
6161 
6162 static u32 cik_halt_rlc(struct radeon_device *rdev)
6163 {
6164 	u32 data, orig;
6165 
6166 	orig = data = RREG32(RLC_CNTL);
6167 
6168 	if (data & RLC_ENABLE) {
6169 		u32 i;
6170 
6171 		data &= ~RLC_ENABLE;
6172 		WREG32(RLC_CNTL, data);
6173 
6174 		for (i = 0; i < rdev->usec_timeout; i++) {
6175 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
6176 				break;
6177 			udelay(1);
6178 		}
6179 
6180 		cik_wait_for_rlc_serdes(rdev);
6181 	}
6182 
6183 	return orig;
6184 }
6185 
6186 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
6187 {
6188 	u32 tmp, i, mask;
6189 
6190 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
6191 	WREG32(RLC_GPR_REG2, tmp);
6192 
6193 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
6194 	for (i = 0; i < rdev->usec_timeout; i++) {
6195 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
6196 			break;
6197 		udelay(1);
6198 	}
6199 
6200 	for (i = 0; i < rdev->usec_timeout; i++) {
6201 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
6202 			break;
6203 		udelay(1);
6204 	}
6205 }
6206 
6207 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
6208 {
6209 	u32 tmp;
6210 
6211 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
6212 	WREG32(RLC_GPR_REG2, tmp);
6213 }
6214 
6215 /**
6216  * cik_rlc_stop - stop the RLC ME
6217  *
6218  * @rdev: radeon_device pointer
6219  *
6220  * Halt the RLC ME (MicroEngine) (CIK).
6221  */
6222 static void cik_rlc_stop(struct radeon_device *rdev)
6223 {
6224 	WREG32(RLC_CNTL, 0);
6225 
6226 	cik_enable_gui_idle_interrupt(rdev, false);
6227 
6228 	cik_wait_for_rlc_serdes(rdev);
6229 }
6230 
6231 /**
6232  * cik_rlc_start - start the RLC ME
6233  *
6234  * @rdev: radeon_device pointer
6235  *
6236  * Unhalt the RLC ME (MicroEngine) (CIK).
6237  */
6238 static void cik_rlc_start(struct radeon_device *rdev)
6239 {
6240 	WREG32(RLC_CNTL, RLC_ENABLE);
6241 
6242 	cik_enable_gui_idle_interrupt(rdev, true);
6243 
6244 	udelay(50);
6245 }
6246 
6247 /**
6248  * cik_rlc_resume - setup the RLC hw
6249  *
6250  * @rdev: radeon_device pointer
6251  *
6252  * Initialize the RLC registers, load the ucode,
6253  * and start the RLC (CIK).
6254  * Returns 0 for success, -EINVAL if the ucode is not available.
6255  */
6256 static int cik_rlc_resume(struct radeon_device *rdev)
6257 {
6258 	u32 i, size, tmp;
6259 
6260 	if (!rdev->rlc_fw)
6261 		return -EINVAL;
6262 
6263 	cik_rlc_stop(rdev);
6264 
6265 	/* disable CG */
6266 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6267 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6268 
6269 	si_rlc_reset(rdev);
6270 
6271 	cik_init_pg(rdev);
6272 
6273 	cik_init_cg(rdev);
6274 
6275 	WREG32(RLC_LB_CNTR_INIT, 0);
6276 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6277 
6278 	mutex_lock(&rdev->grbm_idx_mutex);
6279 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6280 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6281 	WREG32(RLC_LB_PARAMS, 0x00600408);
6282 	WREG32(RLC_LB_CNTL, 0x80000004);
6283 	mutex_unlock(&rdev->grbm_idx_mutex);
6284 
6285 	WREG32(RLC_MC_CNTL, 0);
6286 	WREG32(RLC_UCODE_CNTL, 0);
6287 
6288 	if (rdev->new_fw) {
6289 		const struct rlc_firmware_header_v1_0 *hdr =
6290 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6291 		const __le32 *fw_data = (const __le32 *)
6292 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6293 
6294 		radeon_ucode_print_rlc_hdr(&hdr->header);
6295 
6296 		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6297 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6298 		for (i = 0; i < size; i++)
6299 			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6300 		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6301 	} else {
6302 		const __be32 *fw_data;
6303 
6304 		switch (rdev->family) {
6305 		case CHIP_BONAIRE:
6306 		case CHIP_HAWAII:
6307 		default:
6308 			size = BONAIRE_RLC_UCODE_SIZE;
6309 			break;
6310 		case CHIP_KAVERI:
6311 			size = KV_RLC_UCODE_SIZE;
6312 			break;
6313 		case CHIP_KABINI:
6314 			size = KB_RLC_UCODE_SIZE;
6315 			break;
6316 		case CHIP_MULLINS:
6317 			size = ML_RLC_UCODE_SIZE;
6318 			break;
6319 		}
6320 
6321 		fw_data = (const __be32 *)rdev->rlc_fw->data;
6322 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6323 		for (i = 0; i < size; i++)
6324 			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6325 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6326 	}
6327 
6328 	/* XXX - find out what chips support lbpw */
6329 	cik_enable_lbpw(rdev, false);
6330 
6331 	if (rdev->family == CHIP_BONAIRE)
6332 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
6333 
6334 	cik_rlc_start(rdev);
6335 
6336 	return 0;
6337 }
6338 
6339 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6340 {
6341 	u32 data, orig, tmp, tmp2;
6342 
6343 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6344 
6345 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6346 		cik_enable_gui_idle_interrupt(rdev, true);
6347 
6348 		tmp = cik_halt_rlc(rdev);
6349 
6350 		mutex_lock(&rdev->grbm_idx_mutex);
6351 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6352 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6353 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6354 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6355 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6356 		mutex_unlock(&rdev->grbm_idx_mutex);
6357 
6358 		cik_update_rlc(rdev, tmp);
6359 
6360 		data |= CGCG_EN | CGLS_EN;
6361 	} else {
6362 		cik_enable_gui_idle_interrupt(rdev, false);
6363 
6364 		RREG32(CB_CGTT_SCLK_CTRL);
6365 		RREG32(CB_CGTT_SCLK_CTRL);
6366 		RREG32(CB_CGTT_SCLK_CTRL);
6367 		RREG32(CB_CGTT_SCLK_CTRL);
6368 
6369 		data &= ~(CGCG_EN | CGLS_EN);
6370 	}
6371 
6372 	if (orig != data)
6373 		WREG32(RLC_CGCG_CGLS_CTRL, data);
6374 
6375 }
6376 
6377 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6378 {
6379 	u32 data, orig, tmp = 0;
6380 
6381 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6382 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6383 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6384 				orig = data = RREG32(CP_MEM_SLP_CNTL);
6385 				data |= CP_MEM_LS_EN;
6386 				if (orig != data)
6387 					WREG32(CP_MEM_SLP_CNTL, data);
6388 			}
6389 		}
6390 
6391 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6392 		data |= 0x00000001;
6393 		data &= 0xfffffffd;
6394 		if (orig != data)
6395 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6396 
6397 		tmp = cik_halt_rlc(rdev);
6398 
6399 		mutex_lock(&rdev->grbm_idx_mutex);
6400 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6401 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6402 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6403 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6404 		WREG32(RLC_SERDES_WR_CTRL, data);
6405 		mutex_unlock(&rdev->grbm_idx_mutex);
6406 
6407 		cik_update_rlc(rdev, tmp);
6408 
6409 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6410 			orig = data = RREG32(CGTS_SM_CTRL_REG);
6411 			data &= ~SM_MODE_MASK;
6412 			data |= SM_MODE(0x2);
6413 			data |= SM_MODE_ENABLE;
6414 			data &= ~CGTS_OVERRIDE;
6415 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6416 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6417 				data &= ~CGTS_LS_OVERRIDE;
6418 			data &= ~ON_MONITOR_ADD_MASK;
6419 			data |= ON_MONITOR_ADD_EN;
6420 			data |= ON_MONITOR_ADD(0x96);
6421 			if (orig != data)
6422 				WREG32(CGTS_SM_CTRL_REG, data);
6423 		}
6424 	} else {
6425 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6426 		data |= 0x00000003;
6427 		if (orig != data)
6428 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6429 
6430 		data = RREG32(RLC_MEM_SLP_CNTL);
6431 		if (data & RLC_MEM_LS_EN) {
6432 			data &= ~RLC_MEM_LS_EN;
6433 			WREG32(RLC_MEM_SLP_CNTL, data);
6434 		}
6435 
6436 		data = RREG32(CP_MEM_SLP_CNTL);
6437 		if (data & CP_MEM_LS_EN) {
6438 			data &= ~CP_MEM_LS_EN;
6439 			WREG32(CP_MEM_SLP_CNTL, data);
6440 		}
6441 
6442 		orig = data = RREG32(CGTS_SM_CTRL_REG);
6443 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6444 		if (orig != data)
6445 			WREG32(CGTS_SM_CTRL_REG, data);
6446 
6447 		tmp = cik_halt_rlc(rdev);
6448 
6449 		mutex_lock(&rdev->grbm_idx_mutex);
6450 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6451 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6452 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6453 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6454 		WREG32(RLC_SERDES_WR_CTRL, data);
6455 		mutex_unlock(&rdev->grbm_idx_mutex);
6456 
6457 		cik_update_rlc(rdev, tmp);
6458 	}
6459 }
6460 
6461 static const u32 mc_cg_registers[] =
6462 {
6463 	MC_HUB_MISC_HUB_CG,
6464 	MC_HUB_MISC_SIP_CG,
6465 	MC_HUB_MISC_VM_CG,
6466 	MC_XPB_CLK_GAT,
6467 	ATC_MISC_CG,
6468 	MC_CITF_MISC_WR_CG,
6469 	MC_CITF_MISC_RD_CG,
6470 	MC_CITF_MISC_VM_CG,
6471 	VM_L2_CG,
6472 };
6473 
6474 static void cik_enable_mc_ls(struct radeon_device *rdev,
6475 			     bool enable)
6476 {
6477 	int i;
6478 	u32 orig, data;
6479 
6480 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6481 		orig = data = RREG32(mc_cg_registers[i]);
6482 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6483 			data |= MC_LS_ENABLE;
6484 		else
6485 			data &= ~MC_LS_ENABLE;
6486 		if (data != orig)
6487 			WREG32(mc_cg_registers[i], data);
6488 	}
6489 }
6490 
6491 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6492 			       bool enable)
6493 {
6494 	int i;
6495 	u32 orig, data;
6496 
6497 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6498 		orig = data = RREG32(mc_cg_registers[i]);
6499 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6500 			data |= MC_CG_ENABLE;
6501 		else
6502 			data &= ~MC_CG_ENABLE;
6503 		if (data != orig)
6504 			WREG32(mc_cg_registers[i], data);
6505 	}
6506 }
6507 
6508 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6509 				 bool enable)
6510 {
6511 	u32 orig, data;
6512 
6513 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6514 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6515 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6516 	} else {
6517 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6518 		data |= 0xff000000;
6519 		if (data != orig)
6520 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6521 
6522 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6523 		data |= 0xff000000;
6524 		if (data != orig)
6525 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6526 	}
6527 }
6528 
6529 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6530 				 bool enable)
6531 {
6532 	u32 orig, data;
6533 
6534 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6535 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6536 		data |= 0x100;
6537 		if (orig != data)
6538 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6539 
6540 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6541 		data |= 0x100;
6542 		if (orig != data)
6543 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6544 	} else {
6545 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6546 		data &= ~0x100;
6547 		if (orig != data)
6548 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6549 
6550 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6551 		data &= ~0x100;
6552 		if (orig != data)
6553 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6554 	}
6555 }
6556 
6557 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6558 				bool enable)
6559 {
6560 	u32 orig, data;
6561 
6562 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6563 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6564 		data = 0xfff;
6565 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6566 
6567 		orig = data = RREG32(UVD_CGC_CTRL);
6568 		data |= DCM;
6569 		if (orig != data)
6570 			WREG32(UVD_CGC_CTRL, data);
6571 	} else {
6572 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6573 		data &= ~0xfff;
6574 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6575 
6576 		orig = data = RREG32(UVD_CGC_CTRL);
6577 		data &= ~DCM;
6578 		if (orig != data)
6579 			WREG32(UVD_CGC_CTRL, data);
6580 	}
6581 }
6582 
6583 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6584 			       bool enable)
6585 {
6586 	u32 orig, data;
6587 
6588 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6589 
6590 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6591 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6592 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6593 	else
6594 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6595 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6596 
6597 	if (orig != data)
6598 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6599 }
6600 
6601 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6602 				bool enable)
6603 {
6604 	u32 orig, data;
6605 
6606 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6607 
6608 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6609 		data &= ~CLOCK_GATING_DIS;
6610 	else
6611 		data |= CLOCK_GATING_DIS;
6612 
6613 	if (orig != data)
6614 		WREG32(HDP_HOST_PATH_CNTL, data);
6615 }
6616 
6617 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6618 			      bool enable)
6619 {
6620 	u32 orig, data;
6621 
6622 	orig = data = RREG32(HDP_MEM_POWER_LS);
6623 
6624 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6625 		data |= HDP_LS_ENABLE;
6626 	else
6627 		data &= ~HDP_LS_ENABLE;
6628 
6629 	if (orig != data)
6630 		WREG32(HDP_MEM_POWER_LS, data);
6631 }
6632 
6633 void cik_update_cg(struct radeon_device *rdev,
6634 		   u32 block, bool enable)
6635 {
6636 
6637 	if (block & RADEON_CG_BLOCK_GFX) {
6638 		cik_enable_gui_idle_interrupt(rdev, false);
6639 		/* order matters! */
6640 		if (enable) {
6641 			cik_enable_mgcg(rdev, true);
6642 			cik_enable_cgcg(rdev, true);
6643 		} else {
6644 			cik_enable_cgcg(rdev, false);
6645 			cik_enable_mgcg(rdev, false);
6646 		}
6647 		cik_enable_gui_idle_interrupt(rdev, true);
6648 	}
6649 
6650 	if (block & RADEON_CG_BLOCK_MC) {
6651 		if (!(rdev->flags & RADEON_IS_IGP)) {
6652 			cik_enable_mc_mgcg(rdev, enable);
6653 			cik_enable_mc_ls(rdev, enable);
6654 		}
6655 	}
6656 
6657 	if (block & RADEON_CG_BLOCK_SDMA) {
6658 		cik_enable_sdma_mgcg(rdev, enable);
6659 		cik_enable_sdma_mgls(rdev, enable);
6660 	}
6661 
6662 	if (block & RADEON_CG_BLOCK_BIF) {
6663 		cik_enable_bif_mgls(rdev, enable);
6664 	}
6665 
6666 	if (block & RADEON_CG_BLOCK_UVD) {
6667 		if (rdev->has_uvd)
6668 			cik_enable_uvd_mgcg(rdev, enable);
6669 	}
6670 
6671 	if (block & RADEON_CG_BLOCK_HDP) {
6672 		cik_enable_hdp_mgcg(rdev, enable);
6673 		cik_enable_hdp_ls(rdev, enable);
6674 	}
6675 
6676 	if (block & RADEON_CG_BLOCK_VCE) {
6677 		vce_v2_0_enable_mgcg(rdev, enable);
6678 	}
6679 }
6680 
6681 static void cik_init_cg(struct radeon_device *rdev)
6682 {
6683 
6684 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6685 
6686 	if (rdev->has_uvd)
6687 		si_init_uvd_internal_cg(rdev);
6688 
6689 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6690 			     RADEON_CG_BLOCK_SDMA |
6691 			     RADEON_CG_BLOCK_BIF |
6692 			     RADEON_CG_BLOCK_UVD |
6693 			     RADEON_CG_BLOCK_HDP), true);
6694 }
6695 
6696 static void cik_fini_cg(struct radeon_device *rdev)
6697 {
6698 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6699 			     RADEON_CG_BLOCK_SDMA |
6700 			     RADEON_CG_BLOCK_BIF |
6701 			     RADEON_CG_BLOCK_UVD |
6702 			     RADEON_CG_BLOCK_HDP), false);
6703 
6704 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6705 }
6706 
6707 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6708 					  bool enable)
6709 {
6710 	u32 data, orig;
6711 
6712 	orig = data = RREG32(RLC_PG_CNTL);
6713 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6714 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6715 	else
6716 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6717 	if (orig != data)
6718 		WREG32(RLC_PG_CNTL, data);
6719 }
6720 
6721 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6722 					  bool enable)
6723 {
6724 	u32 data, orig;
6725 
6726 	orig = data = RREG32(RLC_PG_CNTL);
6727 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6728 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6729 	else
6730 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6731 	if (orig != data)
6732 		WREG32(RLC_PG_CNTL, data);
6733 }
6734 
6735 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6736 {
6737 	u32 data, orig;
6738 
6739 	orig = data = RREG32(RLC_PG_CNTL);
6740 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6741 		data &= ~DISABLE_CP_PG;
6742 	else
6743 		data |= DISABLE_CP_PG;
6744 	if (orig != data)
6745 		WREG32(RLC_PG_CNTL, data);
6746 }
6747 
6748 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6749 {
6750 	u32 data, orig;
6751 
6752 	orig = data = RREG32(RLC_PG_CNTL);
6753 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6754 		data &= ~DISABLE_GDS_PG;
6755 	else
6756 		data |= DISABLE_GDS_PG;
6757 	if (orig != data)
6758 		WREG32(RLC_PG_CNTL, data);
6759 }
6760 
6761 #define CP_ME_TABLE_SIZE    96
6762 #define CP_ME_TABLE_OFFSET  2048
6763 #define CP_MEC_TABLE_OFFSET 4096
6764 
6765 void cik_init_cp_pg_table(struct radeon_device *rdev)
6766 {
6767 	volatile u32 *dst_ptr;
6768 	int me, i, max_me = 4;
6769 	u32 bo_offset = 0;
6770 	u32 table_offset, table_size;
6771 
6772 	if (rdev->family == CHIP_KAVERI)
6773 		max_me = 5;
6774 
6775 	if (rdev->rlc.cp_table_ptr == NULL)
6776 		return;
6777 
6778 	/* write the cp table buffer */
6779 	dst_ptr = rdev->rlc.cp_table_ptr;
6780 	for (me = 0; me < max_me; me++) {
6781 		if (rdev->new_fw) {
6782 			const __le32 *fw_data;
6783 			const struct gfx_firmware_header_v1_0 *hdr;
6784 
6785 			if (me == 0) {
6786 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6787 				fw_data = (const __le32 *)
6788 					(rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6789 				table_offset = le32_to_cpu(hdr->jt_offset);
6790 				table_size = le32_to_cpu(hdr->jt_size);
6791 			} else if (me == 1) {
6792 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6793 				fw_data = (const __le32 *)
6794 					(rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6795 				table_offset = le32_to_cpu(hdr->jt_offset);
6796 				table_size = le32_to_cpu(hdr->jt_size);
6797 			} else if (me == 2) {
6798 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6799 				fw_data = (const __le32 *)
6800 					(rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6801 				table_offset = le32_to_cpu(hdr->jt_offset);
6802 				table_size = le32_to_cpu(hdr->jt_size);
6803 			} else if (me == 3) {
6804 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6805 				fw_data = (const __le32 *)
6806 					(rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6807 				table_offset = le32_to_cpu(hdr->jt_offset);
6808 				table_size = le32_to_cpu(hdr->jt_size);
6809 			} else {
6810 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6811 				fw_data = (const __le32 *)
6812 					(rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6813 				table_offset = le32_to_cpu(hdr->jt_offset);
6814 				table_size = le32_to_cpu(hdr->jt_size);
6815 			}
6816 
6817 			for (i = 0; i < table_size; i ++) {
6818 				dst_ptr[bo_offset + i] =
6819 					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6820 			}
6821 			bo_offset += table_size;
6822 		} else {
6823 			const __be32 *fw_data;
6824 			table_size = CP_ME_TABLE_SIZE;
6825 
6826 			if (me == 0) {
6827 				fw_data = (const __be32 *)rdev->ce_fw->data;
6828 				table_offset = CP_ME_TABLE_OFFSET;
6829 			} else if (me == 1) {
6830 				fw_data = (const __be32 *)rdev->pfp_fw->data;
6831 				table_offset = CP_ME_TABLE_OFFSET;
6832 			} else if (me == 2) {
6833 				fw_data = (const __be32 *)rdev->me_fw->data;
6834 				table_offset = CP_ME_TABLE_OFFSET;
6835 			} else {
6836 				fw_data = (const __be32 *)rdev->mec_fw->data;
6837 				table_offset = CP_MEC_TABLE_OFFSET;
6838 			}
6839 
6840 			for (i = 0; i < table_size; i ++) {
6841 				dst_ptr[bo_offset + i] =
6842 					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6843 			}
6844 			bo_offset += table_size;
6845 		}
6846 	}
6847 }
6848 
6849 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6850 				bool enable)
6851 {
6852 	u32 data, orig;
6853 
6854 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6855 		orig = data = RREG32(RLC_PG_CNTL);
6856 		data |= GFX_PG_ENABLE;
6857 		if (orig != data)
6858 			WREG32(RLC_PG_CNTL, data);
6859 
6860 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6861 		data |= AUTO_PG_EN;
6862 		if (orig != data)
6863 			WREG32(RLC_AUTO_PG_CTRL, data);
6864 	} else {
6865 		orig = data = RREG32(RLC_PG_CNTL);
6866 		data &= ~GFX_PG_ENABLE;
6867 		if (orig != data)
6868 			WREG32(RLC_PG_CNTL, data);
6869 
6870 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6871 		data &= ~AUTO_PG_EN;
6872 		if (orig != data)
6873 			WREG32(RLC_AUTO_PG_CTRL, data);
6874 
6875 		data = RREG32(DB_RENDER_CONTROL);
6876 	}
6877 }
6878 
6879 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6880 {
6881 	u32 mask = 0, tmp, tmp1;
6882 	int i;
6883 
6884 	mutex_lock(&rdev->grbm_idx_mutex);
6885 	cik_select_se_sh(rdev, se, sh);
6886 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6887 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6888 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6889 	mutex_unlock(&rdev->grbm_idx_mutex);
6890 
6891 	tmp &= 0xffff0000;
6892 
6893 	tmp |= tmp1;
6894 	tmp >>= 16;
6895 
6896 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6897 		mask <<= 1;
6898 		mask |= 1;
6899 	}
6900 
6901 	return (~tmp) & mask;
6902 }
6903 
6904 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6905 {
6906 	u32 i, j, k, active_cu_number = 0;
6907 	u32 mask, counter, cu_bitmap;
6908 	u32 tmp = 0;
6909 
6910 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6911 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6912 			mask = 1;
6913 			cu_bitmap = 0;
6914 			counter = 0;
6915 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6916 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6917 					if (counter < 2)
6918 						cu_bitmap |= mask;
6919 					counter ++;
6920 				}
6921 				mask <<= 1;
6922 			}
6923 
6924 			active_cu_number += counter;
6925 			tmp |= (cu_bitmap << (i * 16 + j * 8));
6926 		}
6927 	}
6928 
6929 	WREG32(RLC_PG_AO_CU_MASK, tmp);
6930 
6931 	tmp = RREG32(RLC_MAX_PG_CU);
6932 	tmp &= ~MAX_PU_CU_MASK;
6933 	tmp |= MAX_PU_CU(active_cu_number);
6934 	WREG32(RLC_MAX_PG_CU, tmp);
6935 }
6936 
6937 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6938 				       bool enable)
6939 {
6940 	u32 data, orig;
6941 
6942 	orig = data = RREG32(RLC_PG_CNTL);
6943 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6944 		data |= STATIC_PER_CU_PG_ENABLE;
6945 	else
6946 		data &= ~STATIC_PER_CU_PG_ENABLE;
6947 	if (orig != data)
6948 		WREG32(RLC_PG_CNTL, data);
6949 }
6950 
6951 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6952 					bool enable)
6953 {
6954 	u32 data, orig;
6955 
6956 	orig = data = RREG32(RLC_PG_CNTL);
6957 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6958 		data |= DYN_PER_CU_PG_ENABLE;
6959 	else
6960 		data &= ~DYN_PER_CU_PG_ENABLE;
6961 	if (orig != data)
6962 		WREG32(RLC_PG_CNTL, data);
6963 }
6964 
6965 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6966 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6967 
6968 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6969 {
6970 	u32 data, orig;
6971 	u32 i;
6972 
6973 	if (rdev->rlc.cs_data) {
6974 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6975 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6976 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6977 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6978 	} else {
6979 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6980 		for (i = 0; i < 3; i++)
6981 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6982 	}
6983 	if (rdev->rlc.reg_list) {
6984 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6985 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6986 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6987 	}
6988 
6989 	orig = data = RREG32(RLC_PG_CNTL);
6990 	data |= GFX_PG_SRC;
6991 	if (orig != data)
6992 		WREG32(RLC_PG_CNTL, data);
6993 
6994 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6995 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6996 
6997 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6998 	data &= ~IDLE_POLL_COUNT_MASK;
6999 	data |= IDLE_POLL_COUNT(0x60);
7000 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
7001 
7002 	data = 0x10101010;
7003 	WREG32(RLC_PG_DELAY, data);
7004 
7005 	data = RREG32(RLC_PG_DELAY_2);
7006 	data &= ~0xff;
7007 	data |= 0x3;
7008 	WREG32(RLC_PG_DELAY_2, data);
7009 
7010 	data = RREG32(RLC_AUTO_PG_CTRL);
7011 	data &= ~GRBM_REG_SGIT_MASK;
7012 	data |= GRBM_REG_SGIT(0x700);
7013 	WREG32(RLC_AUTO_PG_CTRL, data);
7014 
7015 }
7016 
7017 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
7018 {
7019 	cik_enable_gfx_cgpg(rdev, enable);
7020 	cik_enable_gfx_static_mgpg(rdev, enable);
7021 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
7022 }
7023 
7024 u32 cik_get_csb_size(struct radeon_device *rdev)
7025 {
7026 	u32 count = 0;
7027 	const struct cs_section_def *sect = NULL;
7028 	const struct cs_extent_def *ext = NULL;
7029 
7030 	if (rdev->rlc.cs_data == NULL)
7031 		return 0;
7032 
7033 	/* begin clear state */
7034 	count += 2;
7035 	/* context control state */
7036 	count += 3;
7037 
7038 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7039 		for (ext = sect->section; ext->extent != NULL; ++ext) {
7040 			if (sect->id == SECT_CONTEXT)
7041 				count += 2 + ext->reg_count;
7042 			else
7043 				return 0;
7044 		}
7045 	}
7046 	/* pa_sc_raster_config/pa_sc_raster_config1 */
7047 	count += 4;
7048 	/* end clear state */
7049 	count += 2;
7050 	/* clear state */
7051 	count += 2;
7052 
7053 	return count;
7054 }
7055 
7056 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
7057 {
7058 	u32 count = 0, i;
7059 	const struct cs_section_def *sect = NULL;
7060 	const struct cs_extent_def *ext = NULL;
7061 
7062 	if (rdev->rlc.cs_data == NULL)
7063 		return;
7064 	if (buffer == NULL)
7065 		return;
7066 
7067 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7068 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
7069 
7070 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
7071 	buffer[count++] = cpu_to_le32(0x80000000);
7072 	buffer[count++] = cpu_to_le32(0x80000000);
7073 
7074 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7075 		for (ext = sect->section; ext->extent != NULL; ++ext) {
7076 			if (sect->id == SECT_CONTEXT) {
7077 				buffer[count++] =
7078 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
7079 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
7080 				for (i = 0; i < ext->reg_count; i++)
7081 					buffer[count++] = cpu_to_le32(ext->extent[i]);
7082 			} else {
7083 				return;
7084 			}
7085 		}
7086 	}
7087 
7088 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
7089 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
7090 	switch (rdev->family) {
7091 	case CHIP_BONAIRE:
7092 		buffer[count++] = cpu_to_le32(0x16000012);
7093 		buffer[count++] = cpu_to_le32(0x00000000);
7094 		break;
7095 	case CHIP_KAVERI:
7096 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7097 		buffer[count++] = cpu_to_le32(0x00000000);
7098 		break;
7099 	case CHIP_KABINI:
7100 	case CHIP_MULLINS:
7101 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7102 		buffer[count++] = cpu_to_le32(0x00000000);
7103 		break;
7104 	case CHIP_HAWAII:
7105 		buffer[count++] = cpu_to_le32(0x3a00161a);
7106 		buffer[count++] = cpu_to_le32(0x0000002e);
7107 		break;
7108 	default:
7109 		buffer[count++] = cpu_to_le32(0x00000000);
7110 		buffer[count++] = cpu_to_le32(0x00000000);
7111 		break;
7112 	}
7113 
7114 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7115 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
7116 
7117 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
7118 	buffer[count++] = cpu_to_le32(0);
7119 }
7120 
7121 static void cik_init_pg(struct radeon_device *rdev)
7122 {
7123 	if (rdev->pg_flags) {
7124 		cik_enable_sck_slowdown_on_pu(rdev, true);
7125 		cik_enable_sck_slowdown_on_pd(rdev, true);
7126 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7127 			cik_init_gfx_cgpg(rdev);
7128 			cik_enable_cp_pg(rdev, true);
7129 			cik_enable_gds_pg(rdev, true);
7130 		}
7131 		cik_init_ao_cu_mask(rdev);
7132 		cik_update_gfx_pg(rdev, true);
7133 	}
7134 }
7135 
7136 static void cik_fini_pg(struct radeon_device *rdev)
7137 {
7138 	if (rdev->pg_flags) {
7139 		cik_update_gfx_pg(rdev, false);
7140 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7141 			cik_enable_cp_pg(rdev, false);
7142 			cik_enable_gds_pg(rdev, false);
7143 		}
7144 	}
7145 }
7146 
7147 /*
7148  * Interrupts
7149  * Starting with r6xx, interrupts are handled via a ring buffer.
7150  * Ring buffers are areas of GPU accessible memory that the GPU
7151  * writes interrupt vectors into and the host reads vectors out of.
7152  * There is a rptr (read pointer) that determines where the
7153  * host is currently reading, and a wptr (write pointer)
7154  * which determines where the GPU has written.  When the
7155  * pointers are equal, the ring is idle.  When the GPU
7156  * writes vectors to the ring buffer, it increments the
7157  * wptr.  When there is an interrupt, the host then starts
7158  * fetching commands and processing them until the pointers are
7159  * equal again at which point it updates the rptr.
7160  */
7161 
7162 /**
7163  * cik_enable_interrupts - Enable the interrupt ring buffer
7164  *
7165  * @rdev: radeon_device pointer
7166  *
7167  * Enable the interrupt ring buffer (CIK).
7168  */
7169 static void cik_enable_interrupts(struct radeon_device *rdev)
7170 {
7171 	u32 ih_cntl = RREG32(IH_CNTL);
7172 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7173 
7174 	ih_cntl |= ENABLE_INTR;
7175 	ih_rb_cntl |= IH_RB_ENABLE;
7176 	WREG32(IH_CNTL, ih_cntl);
7177 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7178 	rdev->ih.enabled = true;
7179 }
7180 
7181 /**
7182  * cik_disable_interrupts - Disable the interrupt ring buffer
7183  *
7184  * @rdev: radeon_device pointer
7185  *
7186  * Disable the interrupt ring buffer (CIK).
7187  */
7188 static void cik_disable_interrupts(struct radeon_device *rdev)
7189 {
7190 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7191 	u32 ih_cntl = RREG32(IH_CNTL);
7192 
7193 	ih_rb_cntl &= ~IH_RB_ENABLE;
7194 	ih_cntl &= ~ENABLE_INTR;
7195 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7196 	WREG32(IH_CNTL, ih_cntl);
7197 	/* set rptr, wptr to 0 */
7198 	WREG32(IH_RB_RPTR, 0);
7199 	WREG32(IH_RB_WPTR, 0);
7200 	rdev->ih.enabled = false;
7201 	rdev->ih.rptr = 0;
7202 }
7203 
7204 /**
7205  * cik_disable_interrupt_state - Disable all interrupt sources
7206  *
7207  * @rdev: radeon_device pointer
7208  *
7209  * Clear all interrupt enable bits used by the driver (CIK).
7210  */
7211 static void cik_disable_interrupt_state(struct radeon_device *rdev)
7212 {
7213 	u32 tmp;
7214 
7215 	/* gfx ring */
7216 	tmp = RREG32(CP_INT_CNTL_RING0) &
7217 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7218 	WREG32(CP_INT_CNTL_RING0, tmp);
7219 	/* sdma */
7220 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7221 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
7222 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7223 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
7224 	/* compute queues */
7225 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
7226 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
7227 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
7228 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
7229 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
7230 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
7231 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
7232 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
7233 	/* grbm */
7234 	WREG32(GRBM_INT_CNTL, 0);
7235 	/* SRBM */
7236 	WREG32(SRBM_INT_CNTL, 0);
7237 	/* vline/vblank, etc. */
7238 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7239 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7240 	if (rdev->num_crtc >= 4) {
7241 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7242 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7243 	}
7244 	if (rdev->num_crtc >= 6) {
7245 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7246 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7247 	}
7248 	/* pflip */
7249 	if (rdev->num_crtc >= 2) {
7250 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7251 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7252 	}
7253 	if (rdev->num_crtc >= 4) {
7254 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7255 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7256 	}
7257 	if (rdev->num_crtc >= 6) {
7258 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7259 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7260 	}
7261 
7262 	/* dac hotplug */
7263 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7264 
7265 	/* digital hotplug */
7266 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7267 	WREG32(DC_HPD1_INT_CONTROL, tmp);
7268 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7269 	WREG32(DC_HPD2_INT_CONTROL, tmp);
7270 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7271 	WREG32(DC_HPD3_INT_CONTROL, tmp);
7272 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7273 	WREG32(DC_HPD4_INT_CONTROL, tmp);
7274 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7275 	WREG32(DC_HPD5_INT_CONTROL, tmp);
7276 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7277 	WREG32(DC_HPD6_INT_CONTROL, tmp);
7278 
7279 }
7280 
7281 /**
7282  * cik_irq_init - init and enable the interrupt ring
7283  *
7284  * @rdev: radeon_device pointer
7285  *
7286  * Allocate a ring buffer for the interrupt controller,
7287  * enable the RLC, disable interrupts, enable the IH
7288  * ring buffer and enable it (CIK).
7289  * Called at device load and reume.
7290  * Returns 0 for success, errors for failure.
7291  */
7292 static int cik_irq_init(struct radeon_device *rdev)
7293 {
7294 	int ret = 0;
7295 	int rb_bufsz;
7296 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7297 
7298 	/* allocate ring */
7299 	ret = r600_ih_ring_alloc(rdev);
7300 	if (ret)
7301 		return ret;
7302 
7303 	/* disable irqs */
7304 	cik_disable_interrupts(rdev);
7305 
7306 	/* init rlc */
7307 	ret = cik_rlc_resume(rdev);
7308 	if (ret) {
7309 		r600_ih_ring_fini(rdev);
7310 		return ret;
7311 	}
7312 
7313 	/* setup interrupt control */
7314 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
7315 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7316 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
7317 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7318 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7319 	 */
7320 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7321 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7322 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7323 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
7324 
7325 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7326 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7327 
7328 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7329 		      IH_WPTR_OVERFLOW_CLEAR |
7330 		      (rb_bufsz << 1));
7331 
7332 	if (rdev->wb.enabled)
7333 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7334 
7335 	/* set the writeback address whether it's enabled or not */
7336 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7337 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7338 
7339 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7340 
7341 	/* set rptr, wptr to 0 */
7342 	WREG32(IH_RB_RPTR, 0);
7343 	WREG32(IH_RB_WPTR, 0);
7344 
7345 	/* Default settings for IH_CNTL (disabled at first) */
7346 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7347 	/* RPTR_REARM only works if msi's are enabled */
7348 	if (rdev->msi_enabled)
7349 		ih_cntl |= RPTR_REARM;
7350 	WREG32(IH_CNTL, ih_cntl);
7351 
7352 	/* force the active interrupt state to all disabled */
7353 	cik_disable_interrupt_state(rdev);
7354 
7355 	pci_set_master(rdev->pdev);
7356 
7357 	/* enable irqs */
7358 	cik_enable_interrupts(rdev);
7359 
7360 	return ret;
7361 }
7362 
7363 /**
7364  * cik_irq_set - enable/disable interrupt sources
7365  *
7366  * @rdev: radeon_device pointer
7367  *
7368  * Enable interrupt sources on the GPU (vblanks, hpd,
7369  * etc.) (CIK).
7370  * Returns 0 for success, errors for failure.
7371  */
7372 int cik_irq_set(struct radeon_device *rdev)
7373 {
7374 	u32 cp_int_cntl;
7375 	u32 cp_m1p0;
7376 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7377 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7378 	u32 grbm_int_cntl = 0;
7379 	u32 dma_cntl, dma_cntl1;
7380 
7381 	if (!rdev->irq.installed) {
7382 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7383 		return -EINVAL;
7384 	}
7385 	/* don't enable anything if the ih is disabled */
7386 	if (!rdev->ih.enabled) {
7387 		cik_disable_interrupts(rdev);
7388 		/* force the active interrupt state to all disabled */
7389 		cik_disable_interrupt_state(rdev);
7390 		return 0;
7391 	}
7392 
7393 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7394 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7395 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7396 
7397 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
7398 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
7399 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
7400 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
7401 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
7402 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
7403 
7404 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7405 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7406 
7407 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7408 
7409 	/* enable CP interrupts on all rings */
7410 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7411 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7412 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7413 	}
7414 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7415 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7416 		DRM_DEBUG("si_irq_set: sw int cp1\n");
7417 		if (ring->me == 1) {
7418 			switch (ring->pipe) {
7419 			case 0:
7420 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7421 				break;
7422 			default:
7423 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7424 				break;
7425 			}
7426 		} else {
7427 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7428 		}
7429 	}
7430 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7431 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7432 		DRM_DEBUG("si_irq_set: sw int cp2\n");
7433 		if (ring->me == 1) {
7434 			switch (ring->pipe) {
7435 			case 0:
7436 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7437 				break;
7438 			default:
7439 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7440 				break;
7441 			}
7442 		} else {
7443 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7444 		}
7445 	}
7446 
7447 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7448 		DRM_DEBUG("cik_irq_set: sw int dma\n");
7449 		dma_cntl |= TRAP_ENABLE;
7450 	}
7451 
7452 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7453 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7454 		dma_cntl1 |= TRAP_ENABLE;
7455 	}
7456 
7457 	if (rdev->irq.crtc_vblank_int[0] ||
7458 	    atomic_read(&rdev->irq.pflip[0])) {
7459 		DRM_DEBUG("cik_irq_set: vblank 0\n");
7460 		crtc1 |= VBLANK_INTERRUPT_MASK;
7461 	}
7462 	if (rdev->irq.crtc_vblank_int[1] ||
7463 	    atomic_read(&rdev->irq.pflip[1])) {
7464 		DRM_DEBUG("cik_irq_set: vblank 1\n");
7465 		crtc2 |= VBLANK_INTERRUPT_MASK;
7466 	}
7467 	if (rdev->irq.crtc_vblank_int[2] ||
7468 	    atomic_read(&rdev->irq.pflip[2])) {
7469 		DRM_DEBUG("cik_irq_set: vblank 2\n");
7470 		crtc3 |= VBLANK_INTERRUPT_MASK;
7471 	}
7472 	if (rdev->irq.crtc_vblank_int[3] ||
7473 	    atomic_read(&rdev->irq.pflip[3])) {
7474 		DRM_DEBUG("cik_irq_set: vblank 3\n");
7475 		crtc4 |= VBLANK_INTERRUPT_MASK;
7476 	}
7477 	if (rdev->irq.crtc_vblank_int[4] ||
7478 	    atomic_read(&rdev->irq.pflip[4])) {
7479 		DRM_DEBUG("cik_irq_set: vblank 4\n");
7480 		crtc5 |= VBLANK_INTERRUPT_MASK;
7481 	}
7482 	if (rdev->irq.crtc_vblank_int[5] ||
7483 	    atomic_read(&rdev->irq.pflip[5])) {
7484 		DRM_DEBUG("cik_irq_set: vblank 5\n");
7485 		crtc6 |= VBLANK_INTERRUPT_MASK;
7486 	}
7487 	if (rdev->irq.hpd[0]) {
7488 		DRM_DEBUG("cik_irq_set: hpd 1\n");
7489 		hpd1 |= DC_HPDx_INT_EN;
7490 	}
7491 	if (rdev->irq.hpd[1]) {
7492 		DRM_DEBUG("cik_irq_set: hpd 2\n");
7493 		hpd2 |= DC_HPDx_INT_EN;
7494 	}
7495 	if (rdev->irq.hpd[2]) {
7496 		DRM_DEBUG("cik_irq_set: hpd 3\n");
7497 		hpd3 |= DC_HPDx_INT_EN;
7498 	}
7499 	if (rdev->irq.hpd[3]) {
7500 		DRM_DEBUG("cik_irq_set: hpd 4\n");
7501 		hpd4 |= DC_HPDx_INT_EN;
7502 	}
7503 	if (rdev->irq.hpd[4]) {
7504 		DRM_DEBUG("cik_irq_set: hpd 5\n");
7505 		hpd5 |= DC_HPDx_INT_EN;
7506 	}
7507 	if (rdev->irq.hpd[5]) {
7508 		DRM_DEBUG("cik_irq_set: hpd 6\n");
7509 		hpd6 |= DC_HPDx_INT_EN;
7510 	}
7511 
7512 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7513 
7514 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7515 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7516 
7517 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7518 
7519 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7520 
7521 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7522 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7523 	if (rdev->num_crtc >= 4) {
7524 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7525 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7526 	}
7527 	if (rdev->num_crtc >= 6) {
7528 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7529 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7530 	}
7531 
7532 	if (rdev->num_crtc >= 2) {
7533 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7534 		       GRPH_PFLIP_INT_MASK);
7535 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7536 		       GRPH_PFLIP_INT_MASK);
7537 	}
7538 	if (rdev->num_crtc >= 4) {
7539 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7540 		       GRPH_PFLIP_INT_MASK);
7541 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7542 		       GRPH_PFLIP_INT_MASK);
7543 	}
7544 	if (rdev->num_crtc >= 6) {
7545 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7546 		       GRPH_PFLIP_INT_MASK);
7547 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7548 		       GRPH_PFLIP_INT_MASK);
7549 	}
7550 
7551 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7552 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7553 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7554 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7555 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7556 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7557 
7558 	/* posting read */
7559 	RREG32(SRBM_STATUS);
7560 
7561 	return 0;
7562 }
7563 
7564 /**
7565  * cik_irq_ack - ack interrupt sources
7566  *
7567  * @rdev: radeon_device pointer
7568  *
7569  * Ack interrupt sources on the GPU (vblanks, hpd,
7570  * etc.) (CIK).  Certain interrupts sources are sw
7571  * generated and do not require an explicit ack.
7572  */
7573 static inline void cik_irq_ack(struct radeon_device *rdev)
7574 {
7575 	u32 tmp;
7576 
7577 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7578 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7579 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7580 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7581 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7582 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7583 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7584 
7585 	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7586 		EVERGREEN_CRTC0_REGISTER_OFFSET);
7587 	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7588 		EVERGREEN_CRTC1_REGISTER_OFFSET);
7589 	if (rdev->num_crtc >= 4) {
7590 		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7591 			EVERGREEN_CRTC2_REGISTER_OFFSET);
7592 		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7593 			EVERGREEN_CRTC3_REGISTER_OFFSET);
7594 	}
7595 	if (rdev->num_crtc >= 6) {
7596 		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7597 			EVERGREEN_CRTC4_REGISTER_OFFSET);
7598 		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7599 			EVERGREEN_CRTC5_REGISTER_OFFSET);
7600 	}
7601 
7602 	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7603 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7604 		       GRPH_PFLIP_INT_CLEAR);
7605 	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7606 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7607 		       GRPH_PFLIP_INT_CLEAR);
7608 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7609 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7610 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7611 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7612 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7613 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7614 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7615 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7616 
7617 	if (rdev->num_crtc >= 4) {
7618 		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7619 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7620 			       GRPH_PFLIP_INT_CLEAR);
7621 		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7622 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7623 			       GRPH_PFLIP_INT_CLEAR);
7624 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7625 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7626 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7627 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7628 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7629 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7630 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7631 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7632 	}
7633 
7634 	if (rdev->num_crtc >= 6) {
7635 		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7636 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7637 			       GRPH_PFLIP_INT_CLEAR);
7638 		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7639 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7640 			       GRPH_PFLIP_INT_CLEAR);
7641 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7642 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7643 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7644 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7645 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7646 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7647 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7648 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7649 	}
7650 
7651 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7652 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7653 		tmp |= DC_HPDx_INT_ACK;
7654 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7655 	}
7656 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7657 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7658 		tmp |= DC_HPDx_INT_ACK;
7659 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7660 	}
7661 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7662 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7663 		tmp |= DC_HPDx_INT_ACK;
7664 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7665 	}
7666 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7667 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7668 		tmp |= DC_HPDx_INT_ACK;
7669 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7670 	}
7671 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7672 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7673 		tmp |= DC_HPDx_INT_ACK;
7674 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7675 	}
7676 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7677 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7678 		tmp |= DC_HPDx_INT_ACK;
7679 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7680 	}
7681 }
7682 
7683 /**
7684  * cik_irq_disable - disable interrupts
7685  *
7686  * @rdev: radeon_device pointer
7687  *
7688  * Disable interrupts on the hw (CIK).
7689  */
7690 static void cik_irq_disable(struct radeon_device *rdev)
7691 {
7692 	cik_disable_interrupts(rdev);
7693 	/* Wait and acknowledge irq */
7694 	mdelay(1);
7695 	cik_irq_ack(rdev);
7696 	cik_disable_interrupt_state(rdev);
7697 }
7698 
7699 /**
7700  * cik_irq_disable - disable interrupts for suspend
7701  *
7702  * @rdev: radeon_device pointer
7703  *
7704  * Disable interrupts and stop the RLC (CIK).
7705  * Used for suspend.
7706  */
7707 static void cik_irq_suspend(struct radeon_device *rdev)
7708 {
7709 	cik_irq_disable(rdev);
7710 	cik_rlc_stop(rdev);
7711 }
7712 
7713 /**
7714  * cik_irq_fini - tear down interrupt support
7715  *
7716  * @rdev: radeon_device pointer
7717  *
7718  * Disable interrupts on the hw and free the IH ring
7719  * buffer (CIK).
7720  * Used for driver unload.
7721  */
7722 static void cik_irq_fini(struct radeon_device *rdev)
7723 {
7724 	cik_irq_suspend(rdev);
7725 	r600_ih_ring_fini(rdev);
7726 }
7727 
7728 /**
7729  * cik_get_ih_wptr - get the IH ring buffer wptr
7730  *
7731  * @rdev: radeon_device pointer
7732  *
7733  * Get the IH ring buffer wptr from either the register
7734  * or the writeback memory buffer (CIK).  Also check for
7735  * ring buffer overflow and deal with it.
7736  * Used by cik_irq_process().
7737  * Returns the value of the wptr.
7738  */
7739 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7740 {
7741 	u32 wptr, tmp;
7742 
7743 	if (rdev->wb.enabled)
7744 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7745 	else
7746 		wptr = RREG32(IH_RB_WPTR);
7747 
7748 	if (wptr & RB_OVERFLOW) {
7749 		wptr &= ~RB_OVERFLOW;
7750 		/* When a ring buffer overflow happen start parsing interrupt
7751 		 * from the last not overwritten vector (wptr + 16). Hopefully
7752 		 * this should allow us to catchup.
7753 		 */
7754 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7755 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7756 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7757 		tmp = RREG32(IH_RB_CNTL);
7758 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7759 		WREG32(IH_RB_CNTL, tmp);
7760 	}
7761 	return (wptr & rdev->ih.ptr_mask);
7762 }
7763 
7764 /*        CIK IV Ring
7765  * Each IV ring entry is 128 bits:
7766  * [7:0]    - interrupt source id
7767  * [31:8]   - reserved
7768  * [59:32]  - interrupt source data
7769  * [63:60]  - reserved
7770  * [71:64]  - RINGID
7771  *            CP:
7772  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7773  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7774  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7775  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7776  *            PIPE_ID - ME0 0=3D
7777  *                    - ME1&2 compute dispatcher (4 pipes each)
7778  *            SDMA:
7779  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7780  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7781  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7782  * [79:72]  - VMID
7783  * [95:80]  - PASID
7784  * [127:96] - reserved
7785  */
7786 /**
7787  * cik_irq_process - interrupt handler
7788  *
7789  * @rdev: radeon_device pointer
7790  *
7791  * Interrupt hander (CIK).  Walk the IH ring,
7792  * ack interrupts and schedule work to handle
7793  * interrupt events.
7794  * Returns irq process return code.
7795  */
7796 int cik_irq_process(struct radeon_device *rdev)
7797 {
7798 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7799 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7800 	u32 wptr;
7801 	u32 rptr;
7802 	u32 src_id, src_data, ring_id;
7803 	u8 me_id, pipe_id, queue_id;
7804 	u32 ring_index;
7805 	bool queue_hotplug = false;
7806 	bool queue_reset = false;
7807 	u32 addr, status, mc_client;
7808 	bool queue_thermal = false;
7809 
7810 	if (!rdev->ih.enabled || rdev->shutdown)
7811 		return IRQ_NONE;
7812 
7813 	wptr = cik_get_ih_wptr(rdev);
7814 
7815 restart_ih:
7816 	/* is somebody else already processing irqs? */
7817 	if (atomic_xchg(&rdev->ih.lock, 1))
7818 		return IRQ_NONE;
7819 
7820 	rptr = rdev->ih.rptr;
7821 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7822 
7823 	/* Order reading of wptr vs. reading of IH ring data */
7824 	rmb();
7825 
7826 	/* display interrupts */
7827 	cik_irq_ack(rdev);
7828 
7829 	while (rptr != wptr) {
7830 		/* wptr/rptr are in bytes! */
7831 		ring_index = rptr / 4;
7832 
7833 		radeon_kfd_interrupt(rdev,
7834 				(const void *) &rdev->ih.ring[ring_index]);
7835 
7836 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7837 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7838 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7839 
7840 		switch (src_id) {
7841 		case 1: /* D1 vblank/vline */
7842 			switch (src_data) {
7843 			case 0: /* D1 vblank */
7844 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7845 					if (rdev->irq.crtc_vblank_int[0]) {
7846 						drm_handle_vblank(rdev->ddev, 0);
7847 						rdev->pm.vblank_sync = true;
7848 						wake_up(&rdev->irq.vblank_queue);
7849 					}
7850 					if (atomic_read(&rdev->irq.pflip[0]))
7851 						radeon_crtc_handle_vblank(rdev, 0);
7852 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7853 					DRM_DEBUG("IH: D1 vblank\n");
7854 				}
7855 				break;
7856 			case 1: /* D1 vline */
7857 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7858 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7859 					DRM_DEBUG("IH: D1 vline\n");
7860 				}
7861 				break;
7862 			default:
7863 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7864 				break;
7865 			}
7866 			break;
7867 		case 2: /* D2 vblank/vline */
7868 			switch (src_data) {
7869 			case 0: /* D2 vblank */
7870 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7871 					if (rdev->irq.crtc_vblank_int[1]) {
7872 						drm_handle_vblank(rdev->ddev, 1);
7873 						rdev->pm.vblank_sync = true;
7874 						wake_up(&rdev->irq.vblank_queue);
7875 					}
7876 					if (atomic_read(&rdev->irq.pflip[1]))
7877 						radeon_crtc_handle_vblank(rdev, 1);
7878 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7879 					DRM_DEBUG("IH: D2 vblank\n");
7880 				}
7881 				break;
7882 			case 1: /* D2 vline */
7883 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7884 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7885 					DRM_DEBUG("IH: D2 vline\n");
7886 				}
7887 				break;
7888 			default:
7889 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7890 				break;
7891 			}
7892 			break;
7893 		case 3: /* D3 vblank/vline */
7894 			switch (src_data) {
7895 			case 0: /* D3 vblank */
7896 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7897 					if (rdev->irq.crtc_vblank_int[2]) {
7898 						drm_handle_vblank(rdev->ddev, 2);
7899 						rdev->pm.vblank_sync = true;
7900 						wake_up(&rdev->irq.vblank_queue);
7901 					}
7902 					if (atomic_read(&rdev->irq.pflip[2]))
7903 						radeon_crtc_handle_vblank(rdev, 2);
7904 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7905 					DRM_DEBUG("IH: D3 vblank\n");
7906 				}
7907 				break;
7908 			case 1: /* D3 vline */
7909 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7910 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7911 					DRM_DEBUG("IH: D3 vline\n");
7912 				}
7913 				break;
7914 			default:
7915 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7916 				break;
7917 			}
7918 			break;
7919 		case 4: /* D4 vblank/vline */
7920 			switch (src_data) {
7921 			case 0: /* D4 vblank */
7922 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7923 					if (rdev->irq.crtc_vblank_int[3]) {
7924 						drm_handle_vblank(rdev->ddev, 3);
7925 						rdev->pm.vblank_sync = true;
7926 						wake_up(&rdev->irq.vblank_queue);
7927 					}
7928 					if (atomic_read(&rdev->irq.pflip[3]))
7929 						radeon_crtc_handle_vblank(rdev, 3);
7930 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7931 					DRM_DEBUG("IH: D4 vblank\n");
7932 				}
7933 				break;
7934 			case 1: /* D4 vline */
7935 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7936 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7937 					DRM_DEBUG("IH: D4 vline\n");
7938 				}
7939 				break;
7940 			default:
7941 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7942 				break;
7943 			}
7944 			break;
7945 		case 5: /* D5 vblank/vline */
7946 			switch (src_data) {
7947 			case 0: /* D5 vblank */
7948 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7949 					if (rdev->irq.crtc_vblank_int[4]) {
7950 						drm_handle_vblank(rdev->ddev, 4);
7951 						rdev->pm.vblank_sync = true;
7952 						wake_up(&rdev->irq.vblank_queue);
7953 					}
7954 					if (atomic_read(&rdev->irq.pflip[4]))
7955 						radeon_crtc_handle_vblank(rdev, 4);
7956 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7957 					DRM_DEBUG("IH: D5 vblank\n");
7958 				}
7959 				break;
7960 			case 1: /* D5 vline */
7961 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7962 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7963 					DRM_DEBUG("IH: D5 vline\n");
7964 				}
7965 				break;
7966 			default:
7967 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7968 				break;
7969 			}
7970 			break;
7971 		case 6: /* D6 vblank/vline */
7972 			switch (src_data) {
7973 			case 0: /* D6 vblank */
7974 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7975 					if (rdev->irq.crtc_vblank_int[5]) {
7976 						drm_handle_vblank(rdev->ddev, 5);
7977 						rdev->pm.vblank_sync = true;
7978 						wake_up(&rdev->irq.vblank_queue);
7979 					}
7980 					if (atomic_read(&rdev->irq.pflip[5]))
7981 						radeon_crtc_handle_vblank(rdev, 5);
7982 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7983 					DRM_DEBUG("IH: D6 vblank\n");
7984 				}
7985 				break;
7986 			case 1: /* D6 vline */
7987 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7988 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7989 					DRM_DEBUG("IH: D6 vline\n");
7990 				}
7991 				break;
7992 			default:
7993 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7994 				break;
7995 			}
7996 			break;
7997 		case 8: /* D1 page flip */
7998 		case 10: /* D2 page flip */
7999 		case 12: /* D3 page flip */
8000 		case 14: /* D4 page flip */
8001 		case 16: /* D5 page flip */
8002 		case 18: /* D6 page flip */
8003 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
8004 			if (radeon_use_pflipirq > 0)
8005 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
8006 			break;
8007 		case 42: /* HPD hotplug */
8008 			switch (src_data) {
8009 			case 0:
8010 				if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
8011 					rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
8012 					queue_hotplug = true;
8013 					DRM_DEBUG("IH: HPD1\n");
8014 				}
8015 				break;
8016 			case 1:
8017 				if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
8018 					rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
8019 					queue_hotplug = true;
8020 					DRM_DEBUG("IH: HPD2\n");
8021 				}
8022 				break;
8023 			case 2:
8024 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
8025 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
8026 					queue_hotplug = true;
8027 					DRM_DEBUG("IH: HPD3\n");
8028 				}
8029 				break;
8030 			case 3:
8031 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
8032 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
8033 					queue_hotplug = true;
8034 					DRM_DEBUG("IH: HPD4\n");
8035 				}
8036 				break;
8037 			case 4:
8038 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
8039 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
8040 					queue_hotplug = true;
8041 					DRM_DEBUG("IH: HPD5\n");
8042 				}
8043 				break;
8044 			case 5:
8045 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
8046 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
8047 					queue_hotplug = true;
8048 					DRM_DEBUG("IH: HPD6\n");
8049 				}
8050 				break;
8051 			default:
8052 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8053 				break;
8054 			}
8055 			break;
8056 		case 96:
8057 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
8058 			WREG32(SRBM_INT_ACK, 0x1);
8059 			break;
8060 		case 124: /* UVD */
8061 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
8062 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
8063 			break;
8064 		case 146:
8065 		case 147:
8066 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
8067 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
8068 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
8069 			/* reset addr and status */
8070 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
8071 			if (addr == 0x0 && status == 0x0)
8072 				break;
8073 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
8074 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
8075 				addr);
8076 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
8077 				status);
8078 			cik_vm_decode_fault(rdev, status, addr, mc_client);
8079 			break;
8080 		case 167: /* VCE */
8081 			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
8082 			switch (src_data) {
8083 			case 0:
8084 				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
8085 				break;
8086 			case 1:
8087 				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
8088 				break;
8089 			default:
8090 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
8091 				break;
8092 			}
8093 			break;
8094 		case 176: /* GFX RB CP_INT */
8095 		case 177: /* GFX IB CP_INT */
8096 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8097 			break;
8098 		case 181: /* CP EOP event */
8099 			DRM_DEBUG("IH: CP EOP\n");
8100 			/* XXX check the bitfield order! */
8101 			me_id = (ring_id & 0x60) >> 5;
8102 			pipe_id = (ring_id & 0x18) >> 3;
8103 			queue_id = (ring_id & 0x7) >> 0;
8104 			switch (me_id) {
8105 			case 0:
8106 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8107 				break;
8108 			case 1:
8109 			case 2:
8110 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
8111 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8112 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
8113 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8114 				break;
8115 			}
8116 			break;
8117 		case 184: /* CP Privileged reg access */
8118 			DRM_ERROR("Illegal register access in command stream\n");
8119 			/* XXX check the bitfield order! */
8120 			me_id = (ring_id & 0x60) >> 5;
8121 			pipe_id = (ring_id & 0x18) >> 3;
8122 			queue_id = (ring_id & 0x7) >> 0;
8123 			switch (me_id) {
8124 			case 0:
8125 				/* This results in a full GPU reset, but all we need to do is soft
8126 				 * reset the CP for gfx
8127 				 */
8128 				queue_reset = true;
8129 				break;
8130 			case 1:
8131 				/* XXX compute */
8132 				queue_reset = true;
8133 				break;
8134 			case 2:
8135 				/* XXX compute */
8136 				queue_reset = true;
8137 				break;
8138 			}
8139 			break;
8140 		case 185: /* CP Privileged inst */
8141 			DRM_ERROR("Illegal instruction in command stream\n");
8142 			/* XXX check the bitfield order! */
8143 			me_id = (ring_id & 0x60) >> 5;
8144 			pipe_id = (ring_id & 0x18) >> 3;
8145 			queue_id = (ring_id & 0x7) >> 0;
8146 			switch (me_id) {
8147 			case 0:
8148 				/* This results in a full GPU reset, but all we need to do is soft
8149 				 * reset the CP for gfx
8150 				 */
8151 				queue_reset = true;
8152 				break;
8153 			case 1:
8154 				/* XXX compute */
8155 				queue_reset = true;
8156 				break;
8157 			case 2:
8158 				/* XXX compute */
8159 				queue_reset = true;
8160 				break;
8161 			}
8162 			break;
8163 		case 224: /* SDMA trap event */
8164 			/* XXX check the bitfield order! */
8165 			me_id = (ring_id & 0x3) >> 0;
8166 			queue_id = (ring_id & 0xc) >> 2;
8167 			DRM_DEBUG("IH: SDMA trap\n");
8168 			switch (me_id) {
8169 			case 0:
8170 				switch (queue_id) {
8171 				case 0:
8172 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8173 					break;
8174 				case 1:
8175 					/* XXX compute */
8176 					break;
8177 				case 2:
8178 					/* XXX compute */
8179 					break;
8180 				}
8181 				break;
8182 			case 1:
8183 				switch (queue_id) {
8184 				case 0:
8185 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8186 					break;
8187 				case 1:
8188 					/* XXX compute */
8189 					break;
8190 				case 2:
8191 					/* XXX compute */
8192 					break;
8193 				}
8194 				break;
8195 			}
8196 			break;
8197 		case 230: /* thermal low to high */
8198 			DRM_DEBUG("IH: thermal low to high\n");
8199 			rdev->pm.dpm.thermal.high_to_low = false;
8200 			queue_thermal = true;
8201 			break;
8202 		case 231: /* thermal high to low */
8203 			DRM_DEBUG("IH: thermal high to low\n");
8204 			rdev->pm.dpm.thermal.high_to_low = true;
8205 			queue_thermal = true;
8206 			break;
8207 		case 233: /* GUI IDLE */
8208 			DRM_DEBUG("IH: GUI idle\n");
8209 			break;
8210 		case 241: /* SDMA Privileged inst */
8211 		case 247: /* SDMA Privileged inst */
8212 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8213 			/* XXX check the bitfield order! */
8214 			me_id = (ring_id & 0x3) >> 0;
8215 			queue_id = (ring_id & 0xc) >> 2;
8216 			switch (me_id) {
8217 			case 0:
8218 				switch (queue_id) {
8219 				case 0:
8220 					queue_reset = true;
8221 					break;
8222 				case 1:
8223 					/* XXX compute */
8224 					queue_reset = true;
8225 					break;
8226 				case 2:
8227 					/* XXX compute */
8228 					queue_reset = true;
8229 					break;
8230 				}
8231 				break;
8232 			case 1:
8233 				switch (queue_id) {
8234 				case 0:
8235 					queue_reset = true;
8236 					break;
8237 				case 1:
8238 					/* XXX compute */
8239 					queue_reset = true;
8240 					break;
8241 				case 2:
8242 					/* XXX compute */
8243 					queue_reset = true;
8244 					break;
8245 				}
8246 				break;
8247 			}
8248 			break;
8249 		default:
8250 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8251 			break;
8252 		}
8253 
8254 		/* wptr/rptr are in bytes! */
8255 		rptr += 16;
8256 		rptr &= rdev->ih.ptr_mask;
8257 		WREG32(IH_RB_RPTR, rptr);
8258 	}
8259 	if (queue_hotplug)
8260 		schedule_work(&rdev->hotplug_work);
8261 	if (queue_reset) {
8262 		rdev->needs_reset = true;
8263 		wake_up_all(&rdev->fence_queue);
8264 	}
8265 	if (queue_thermal)
8266 		schedule_work(&rdev->pm.dpm.thermal.work);
8267 	rdev->ih.rptr = rptr;
8268 	atomic_set(&rdev->ih.lock, 0);
8269 
8270 	/* make sure wptr hasn't changed while processing */
8271 	wptr = cik_get_ih_wptr(rdev);
8272 	if (wptr != rptr)
8273 		goto restart_ih;
8274 
8275 	return IRQ_HANDLED;
8276 }
8277 
8278 /*
8279  * startup/shutdown callbacks
8280  */
8281 /**
8282  * cik_startup - program the asic to a functional state
8283  *
8284  * @rdev: radeon_device pointer
8285  *
8286  * Programs the asic to a functional state (CIK).
8287  * Called by cik_init() and cik_resume().
8288  * Returns 0 for success, error for failure.
8289  */
8290 static int cik_startup(struct radeon_device *rdev)
8291 {
8292 	struct radeon_ring *ring;
8293 	u32 nop;
8294 	int r;
8295 
8296 	/* enable pcie gen2/3 link */
8297 	cik_pcie_gen3_enable(rdev);
8298 	/* enable aspm */
8299 	cik_program_aspm(rdev);
8300 
8301 	/* scratch needs to be initialized before MC */
8302 	r = r600_vram_scratch_init(rdev);
8303 	if (r)
8304 		return r;
8305 
8306 	cik_mc_program(rdev);
8307 
8308 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8309 		r = ci_mc_load_microcode(rdev);
8310 		if (r) {
8311 			DRM_ERROR("Failed to load MC firmware!\n");
8312 			return r;
8313 		}
8314 	}
8315 
8316 	r = cik_pcie_gart_enable(rdev);
8317 	if (r)
8318 		return r;
8319 	cik_gpu_init(rdev);
8320 
8321 	/* allocate rlc buffers */
8322 	if (rdev->flags & RADEON_IS_IGP) {
8323 		if (rdev->family == CHIP_KAVERI) {
8324 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8325 			rdev->rlc.reg_list_size =
8326 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8327 		} else {
8328 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8329 			rdev->rlc.reg_list_size =
8330 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8331 		}
8332 	}
8333 	rdev->rlc.cs_data = ci_cs_data;
8334 	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
8335 	r = sumo_rlc_init(rdev);
8336 	if (r) {
8337 		DRM_ERROR("Failed to init rlc BOs!\n");
8338 		return r;
8339 	}
8340 
8341 	/* allocate wb buffer */
8342 	r = radeon_wb_init(rdev);
8343 	if (r)
8344 		return r;
8345 
8346 	/* allocate mec buffers */
8347 	r = cik_mec_init(rdev);
8348 	if (r) {
8349 		DRM_ERROR("Failed to init MEC BOs!\n");
8350 		return r;
8351 	}
8352 
8353 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8354 	if (r) {
8355 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8356 		return r;
8357 	}
8358 
8359 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8360 	if (r) {
8361 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8362 		return r;
8363 	}
8364 
8365 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8366 	if (r) {
8367 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8368 		return r;
8369 	}
8370 
8371 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8372 	if (r) {
8373 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8374 		return r;
8375 	}
8376 
8377 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8378 	if (r) {
8379 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8380 		return r;
8381 	}
8382 
8383 	r = radeon_uvd_resume(rdev);
8384 	if (!r) {
8385 		r = uvd_v4_2_resume(rdev);
8386 		if (!r) {
8387 			r = radeon_fence_driver_start_ring(rdev,
8388 							   R600_RING_TYPE_UVD_INDEX);
8389 			if (r)
8390 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
8391 		}
8392 	}
8393 	if (r)
8394 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8395 
8396 	r = radeon_vce_resume(rdev);
8397 	if (!r) {
8398 		r = vce_v2_0_resume(rdev);
8399 		if (!r)
8400 			r = radeon_fence_driver_start_ring(rdev,
8401 							   TN_RING_TYPE_VCE1_INDEX);
8402 		if (!r)
8403 			r = radeon_fence_driver_start_ring(rdev,
8404 							   TN_RING_TYPE_VCE2_INDEX);
8405 	}
8406 	if (r) {
8407 		dev_err(rdev->dev, "VCE init error (%d).\n", r);
8408 		rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8409 		rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8410 	}
8411 
8412 	/* Enable IRQ */
8413 	if (!rdev->irq.installed) {
8414 		r = radeon_irq_kms_init(rdev);
8415 		if (r)
8416 			return r;
8417 	}
8418 
8419 	r = cik_irq_init(rdev);
8420 	if (r) {
8421 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8422 		radeon_irq_kms_fini(rdev);
8423 		return r;
8424 	}
8425 	cik_irq_set(rdev);
8426 
8427 	if (rdev->family == CHIP_HAWAII) {
8428 		if (rdev->new_fw)
8429 			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8430 		else
8431 			nop = RADEON_CP_PACKET2;
8432 	} else {
8433 		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8434 	}
8435 
8436 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8437 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8438 			     nop);
8439 	if (r)
8440 		return r;
8441 
8442 	/* set up the compute queues */
8443 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8444 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8445 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8446 			     nop);
8447 	if (r)
8448 		return r;
8449 	ring->me = 1; /* first MEC */
8450 	ring->pipe = 0; /* first pipe */
8451 	ring->queue = 0; /* first queue */
8452 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8453 
8454 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8455 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8456 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8457 			     nop);
8458 	if (r)
8459 		return r;
8460 	/* dGPU only have 1 MEC */
8461 	ring->me = 1; /* first MEC */
8462 	ring->pipe = 0; /* first pipe */
8463 	ring->queue = 1; /* second queue */
8464 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8465 
8466 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8467 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8468 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8469 	if (r)
8470 		return r;
8471 
8472 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8473 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8474 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8475 	if (r)
8476 		return r;
8477 
8478 	r = cik_cp_resume(rdev);
8479 	if (r)
8480 		return r;
8481 
8482 	r = cik_sdma_resume(rdev);
8483 	if (r)
8484 		return r;
8485 
8486 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8487 	if (ring->ring_size) {
8488 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8489 				     RADEON_CP_PACKET2);
8490 		if (!r)
8491 			r = uvd_v1_0_init(rdev);
8492 		if (r)
8493 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8494 	}
8495 
8496 	r = -ENOENT;
8497 
8498 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8499 	if (ring->ring_size)
8500 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8501 				     VCE_CMD_NO_OP);
8502 
8503 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8504 	if (ring->ring_size)
8505 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8506 				     VCE_CMD_NO_OP);
8507 
8508 	if (!r)
8509 		r = vce_v1_0_init(rdev);
8510 	else if (r != -ENOENT)
8511 		DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8512 
8513 	r = radeon_ib_pool_init(rdev);
8514 	if (r) {
8515 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8516 		return r;
8517 	}
8518 
8519 	r = radeon_vm_manager_init(rdev);
8520 	if (r) {
8521 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8522 		return r;
8523 	}
8524 
8525 	r = radeon_audio_init(rdev);
8526 	if (r)
8527 		return r;
8528 
8529 	r = radeon_kfd_resume(rdev);
8530 	if (r)
8531 		return r;
8532 
8533 	return 0;
8534 }
8535 
8536 /**
8537  * cik_resume - resume the asic to a functional state
8538  *
8539  * @rdev: radeon_device pointer
8540  *
8541  * Programs the asic to a functional state (CIK).
8542  * Called at resume.
8543  * Returns 0 for success, error for failure.
8544  */
8545 int cik_resume(struct radeon_device *rdev)
8546 {
8547 	int r;
8548 
8549 	/* post card */
8550 	atom_asic_init(rdev->mode_info.atom_context);
8551 
8552 	/* init golden registers */
8553 	cik_init_golden_registers(rdev);
8554 
8555 	if (rdev->pm.pm_method == PM_METHOD_DPM)
8556 		radeon_pm_resume(rdev);
8557 
8558 	rdev->accel_working = true;
8559 	r = cik_startup(rdev);
8560 	if (r) {
8561 		DRM_ERROR("cik startup failed on resume\n");
8562 		rdev->accel_working = false;
8563 		return r;
8564 	}
8565 
8566 	return r;
8567 
8568 }
8569 
8570 /**
8571  * cik_suspend - suspend the asic
8572  *
8573  * @rdev: radeon_device pointer
8574  *
8575  * Bring the chip into a state suitable for suspend (CIK).
8576  * Called at suspend.
8577  * Returns 0 for success.
8578  */
8579 int cik_suspend(struct radeon_device *rdev)
8580 {
8581 	radeon_kfd_suspend(rdev);
8582 	radeon_pm_suspend(rdev);
8583 	radeon_audio_fini(rdev);
8584 	radeon_vm_manager_fini(rdev);
8585 	cik_cp_enable(rdev, false);
8586 	cik_sdma_enable(rdev, false);
8587 	uvd_v1_0_fini(rdev);
8588 	radeon_uvd_suspend(rdev);
8589 	radeon_vce_suspend(rdev);
8590 	cik_fini_pg(rdev);
8591 	cik_fini_cg(rdev);
8592 	cik_irq_suspend(rdev);
8593 	radeon_wb_disable(rdev);
8594 	cik_pcie_gart_disable(rdev);
8595 	return 0;
8596 }
8597 
8598 /* Plan is to move initialization in that function and use
8599  * helper function so that radeon_device_init pretty much
8600  * do nothing more than calling asic specific function. This
8601  * should also allow to remove a bunch of callback function
8602  * like vram_info.
8603  */
8604 /**
8605  * cik_init - asic specific driver and hw init
8606  *
8607  * @rdev: radeon_device pointer
8608  *
8609  * Setup asic specific driver variables and program the hw
8610  * to a functional state (CIK).
8611  * Called at driver startup.
8612  * Returns 0 for success, errors for failure.
8613  */
8614 int cik_init(struct radeon_device *rdev)
8615 {
8616 	struct radeon_ring *ring;
8617 	int r;
8618 
8619 	/* Read BIOS */
8620 	if (!radeon_get_bios(rdev)) {
8621 		if (ASIC_IS_AVIVO(rdev))
8622 			return -EINVAL;
8623 	}
8624 	/* Must be an ATOMBIOS */
8625 	if (!rdev->is_atom_bios) {
8626 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8627 		return -EINVAL;
8628 	}
8629 	r = radeon_atombios_init(rdev);
8630 	if (r)
8631 		return r;
8632 
8633 	/* Post card if necessary */
8634 	if (!radeon_card_posted(rdev)) {
8635 		if (!rdev->bios) {
8636 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8637 			return -EINVAL;
8638 		}
8639 		DRM_INFO("GPU not posted. posting now...\n");
8640 		atom_asic_init(rdev->mode_info.atom_context);
8641 	}
8642 	/* init golden registers */
8643 	cik_init_golden_registers(rdev);
8644 	/* Initialize scratch registers */
8645 	cik_scratch_init(rdev);
8646 	/* Initialize surface registers */
8647 	radeon_surface_init(rdev);
8648 	/* Initialize clocks */
8649 	radeon_get_clock_info(rdev->ddev);
8650 
8651 	/* Fence driver */
8652 	r = radeon_fence_driver_init(rdev);
8653 	if (r)
8654 		return r;
8655 
8656 	/* initialize memory controller */
8657 	r = cik_mc_init(rdev);
8658 	if (r)
8659 		return r;
8660 	/* Memory manager */
8661 	r = radeon_bo_init(rdev);
8662 	if (r)
8663 		return r;
8664 
8665 	if (rdev->flags & RADEON_IS_IGP) {
8666 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8667 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8668 			r = cik_init_microcode(rdev);
8669 			if (r) {
8670 				DRM_ERROR("Failed to load firmware!\n");
8671 				return r;
8672 			}
8673 		}
8674 	} else {
8675 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8676 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8677 		    !rdev->mc_fw) {
8678 			r = cik_init_microcode(rdev);
8679 			if (r) {
8680 				DRM_ERROR("Failed to load firmware!\n");
8681 				return r;
8682 			}
8683 		}
8684 	}
8685 
8686 	/* Initialize power management */
8687 	radeon_pm_init(rdev);
8688 
8689 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8690 	ring->ring_obj = NULL;
8691 	r600_ring_init(rdev, ring, 1024 * 1024);
8692 
8693 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8694 	ring->ring_obj = NULL;
8695 	r600_ring_init(rdev, ring, 1024 * 1024);
8696 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8697 	if (r)
8698 		return r;
8699 
8700 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8701 	ring->ring_obj = NULL;
8702 	r600_ring_init(rdev, ring, 1024 * 1024);
8703 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8704 	if (r)
8705 		return r;
8706 
8707 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8708 	ring->ring_obj = NULL;
8709 	r600_ring_init(rdev, ring, 256 * 1024);
8710 
8711 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8712 	ring->ring_obj = NULL;
8713 	r600_ring_init(rdev, ring, 256 * 1024);
8714 
8715 	r = radeon_uvd_init(rdev);
8716 	if (!r) {
8717 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8718 		ring->ring_obj = NULL;
8719 		r600_ring_init(rdev, ring, 4096);
8720 	}
8721 
8722 	r = radeon_vce_init(rdev);
8723 	if (!r) {
8724 		ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8725 		ring->ring_obj = NULL;
8726 		r600_ring_init(rdev, ring, 4096);
8727 
8728 		ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8729 		ring->ring_obj = NULL;
8730 		r600_ring_init(rdev, ring, 4096);
8731 	}
8732 
8733 	rdev->ih.ring_obj = NULL;
8734 	r600_ih_ring_init(rdev, 64 * 1024);
8735 
8736 	r = r600_pcie_gart_init(rdev);
8737 	if (r)
8738 		return r;
8739 
8740 	rdev->accel_working = true;
8741 	r = cik_startup(rdev);
8742 	if (r) {
8743 		dev_err(rdev->dev, "disabling GPU acceleration\n");
8744 		cik_cp_fini(rdev);
8745 		cik_sdma_fini(rdev);
8746 		cik_irq_fini(rdev);
8747 		sumo_rlc_fini(rdev);
8748 		cik_mec_fini(rdev);
8749 		radeon_wb_fini(rdev);
8750 		radeon_ib_pool_fini(rdev);
8751 		radeon_vm_manager_fini(rdev);
8752 		radeon_irq_kms_fini(rdev);
8753 		cik_pcie_gart_fini(rdev);
8754 		rdev->accel_working = false;
8755 	}
8756 
8757 	/* Don't start up if the MC ucode is missing.
8758 	 * The default clocks and voltages before the MC ucode
8759 	 * is loaded are not suffient for advanced operations.
8760 	 */
8761 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8762 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8763 		return -EINVAL;
8764 	}
8765 
8766 	return 0;
8767 }
8768 
8769 /**
8770  * cik_fini - asic specific driver and hw fini
8771  *
8772  * @rdev: radeon_device pointer
8773  *
8774  * Tear down the asic specific driver variables and program the hw
8775  * to an idle state (CIK).
8776  * Called at driver unload.
8777  */
8778 void cik_fini(struct radeon_device *rdev)
8779 {
8780 	radeon_pm_fini(rdev);
8781 	cik_cp_fini(rdev);
8782 	cik_sdma_fini(rdev);
8783 	cik_fini_pg(rdev);
8784 	cik_fini_cg(rdev);
8785 	cik_irq_fini(rdev);
8786 	sumo_rlc_fini(rdev);
8787 	cik_mec_fini(rdev);
8788 	radeon_wb_fini(rdev);
8789 	radeon_vm_manager_fini(rdev);
8790 	radeon_ib_pool_fini(rdev);
8791 	radeon_irq_kms_fini(rdev);
8792 	uvd_v1_0_fini(rdev);
8793 	radeon_uvd_fini(rdev);
8794 	radeon_vce_fini(rdev);
8795 	cik_pcie_gart_fini(rdev);
8796 	r600_vram_scratch_fini(rdev);
8797 	radeon_gem_fini(rdev);
8798 	radeon_fence_driver_fini(rdev);
8799 	radeon_bo_fini(rdev);
8800 	radeon_atombios_fini(rdev);
8801 	kfree(rdev->bios);
8802 	rdev->bios = NULL;
8803 }
8804 
8805 void dce8_program_fmt(struct drm_encoder *encoder)
8806 {
8807 	struct drm_device *dev = encoder->dev;
8808 	struct radeon_device *rdev = dev->dev_private;
8809 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8810 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8811 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8812 	int bpc = 0;
8813 	u32 tmp = 0;
8814 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8815 
8816 	if (connector) {
8817 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8818 		bpc = radeon_get_monitor_bpc(connector);
8819 		dither = radeon_connector->dither;
8820 	}
8821 
8822 	/* LVDS/eDP FMT is set up by atom */
8823 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8824 		return;
8825 
8826 	/* not needed for analog */
8827 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8828 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8829 		return;
8830 
8831 	if (bpc == 0)
8832 		return;
8833 
8834 	switch (bpc) {
8835 	case 6:
8836 		if (dither == RADEON_FMT_DITHER_ENABLE)
8837 			/* XXX sort out optimal dither settings */
8838 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8839 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8840 		else
8841 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8842 		break;
8843 	case 8:
8844 		if (dither == RADEON_FMT_DITHER_ENABLE)
8845 			/* XXX sort out optimal dither settings */
8846 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8847 				FMT_RGB_RANDOM_ENABLE |
8848 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8849 		else
8850 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8851 		break;
8852 	case 10:
8853 		if (dither == RADEON_FMT_DITHER_ENABLE)
8854 			/* XXX sort out optimal dither settings */
8855 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8856 				FMT_RGB_RANDOM_ENABLE |
8857 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8858 		else
8859 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8860 		break;
8861 	default:
8862 		/* not needed */
8863 		break;
8864 	}
8865 
8866 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8867 }
8868 
8869 /* display watermark setup */
8870 /**
8871  * dce8_line_buffer_adjust - Set up the line buffer
8872  *
8873  * @rdev: radeon_device pointer
8874  * @radeon_crtc: the selected display controller
8875  * @mode: the current display mode on the selected display
8876  * controller
8877  *
8878  * Setup up the line buffer allocation for
8879  * the selected display controller (CIK).
8880  * Returns the line buffer size in pixels.
8881  */
8882 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8883 				   struct radeon_crtc *radeon_crtc,
8884 				   struct drm_display_mode *mode)
8885 {
8886 	u32 tmp, buffer_alloc, i;
8887 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8888 	/*
8889 	 * Line Buffer Setup
8890 	 * There are 6 line buffers, one for each display controllers.
8891 	 * There are 3 partitions per LB. Select the number of partitions
8892 	 * to enable based on the display width.  For display widths larger
8893 	 * than 4096, you need use to use 2 display controllers and combine
8894 	 * them using the stereo blender.
8895 	 */
8896 	if (radeon_crtc->base.enabled && mode) {
8897 		if (mode->crtc_hdisplay < 1920) {
8898 			tmp = 1;
8899 			buffer_alloc = 2;
8900 		} else if (mode->crtc_hdisplay < 2560) {
8901 			tmp = 2;
8902 			buffer_alloc = 2;
8903 		} else if (mode->crtc_hdisplay < 4096) {
8904 			tmp = 0;
8905 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8906 		} else {
8907 			DRM_DEBUG_KMS("Mode too big for LB!\n");
8908 			tmp = 0;
8909 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8910 		}
8911 	} else {
8912 		tmp = 1;
8913 		buffer_alloc = 0;
8914 	}
8915 
8916 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8917 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8918 
8919 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8920 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8921 	for (i = 0; i < rdev->usec_timeout; i++) {
8922 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8923 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8924 			break;
8925 		udelay(1);
8926 	}
8927 
8928 	if (radeon_crtc->base.enabled && mode) {
8929 		switch (tmp) {
8930 		case 0:
8931 		default:
8932 			return 4096 * 2;
8933 		case 1:
8934 			return 1920 * 2;
8935 		case 2:
8936 			return 2560 * 2;
8937 		}
8938 	}
8939 
8940 	/* controller not enabled, so no lb used */
8941 	return 0;
8942 }
8943 
8944 /**
8945  * cik_get_number_of_dram_channels - get the number of dram channels
8946  *
8947  * @rdev: radeon_device pointer
8948  *
8949  * Look up the number of video ram channels (CIK).
8950  * Used for display watermark bandwidth calculations
8951  * Returns the number of dram channels
8952  */
8953 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8954 {
8955 	u32 tmp = RREG32(MC_SHARED_CHMAP);
8956 
8957 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8958 	case 0:
8959 	default:
8960 		return 1;
8961 	case 1:
8962 		return 2;
8963 	case 2:
8964 		return 4;
8965 	case 3:
8966 		return 8;
8967 	case 4:
8968 		return 3;
8969 	case 5:
8970 		return 6;
8971 	case 6:
8972 		return 10;
8973 	case 7:
8974 		return 12;
8975 	case 8:
8976 		return 16;
8977 	}
8978 }
8979 
8980 struct dce8_wm_params {
8981 	u32 dram_channels; /* number of dram channels */
8982 	u32 yclk;          /* bandwidth per dram data pin in kHz */
8983 	u32 sclk;          /* engine clock in kHz */
8984 	u32 disp_clk;      /* display clock in kHz */
8985 	u32 src_width;     /* viewport width */
8986 	u32 active_time;   /* active display time in ns */
8987 	u32 blank_time;    /* blank time in ns */
8988 	bool interlaced;    /* mode is interlaced */
8989 	fixed20_12 vsc;    /* vertical scale ratio */
8990 	u32 num_heads;     /* number of active crtcs */
8991 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8992 	u32 lb_size;       /* line buffer allocated to pipe */
8993 	u32 vtaps;         /* vertical scaler taps */
8994 };
8995 
8996 /**
8997  * dce8_dram_bandwidth - get the dram bandwidth
8998  *
8999  * @wm: watermark calculation data
9000  *
9001  * Calculate the raw dram bandwidth (CIK).
9002  * Used for display watermark bandwidth calculations
9003  * Returns the dram bandwidth in MBytes/s
9004  */
9005 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
9006 {
9007 	/* Calculate raw DRAM Bandwidth */
9008 	fixed20_12 dram_efficiency; /* 0.7 */
9009 	fixed20_12 yclk, dram_channels, bandwidth;
9010 	fixed20_12 a;
9011 
9012 	a.full = dfixed_const(1000);
9013 	yclk.full = dfixed_const(wm->yclk);
9014 	yclk.full = dfixed_div(yclk, a);
9015 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9016 	a.full = dfixed_const(10);
9017 	dram_efficiency.full = dfixed_const(7);
9018 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
9019 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9020 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
9021 
9022 	return dfixed_trunc(bandwidth);
9023 }
9024 
9025 /**
9026  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
9027  *
9028  * @wm: watermark calculation data
9029  *
9030  * Calculate the dram bandwidth used for display (CIK).
9031  * Used for display watermark bandwidth calculations
9032  * Returns the dram bandwidth for display in MBytes/s
9033  */
9034 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9035 {
9036 	/* Calculate DRAM Bandwidth and the part allocated to display. */
9037 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9038 	fixed20_12 yclk, dram_channels, bandwidth;
9039 	fixed20_12 a;
9040 
9041 	a.full = dfixed_const(1000);
9042 	yclk.full = dfixed_const(wm->yclk);
9043 	yclk.full = dfixed_div(yclk, a);
9044 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9045 	a.full = dfixed_const(10);
9046 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9047 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9048 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9049 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9050 
9051 	return dfixed_trunc(bandwidth);
9052 }
9053 
9054 /**
9055  * dce8_data_return_bandwidth - get the data return bandwidth
9056  *
9057  * @wm: watermark calculation data
9058  *
9059  * Calculate the data return bandwidth used for display (CIK).
9060  * Used for display watermark bandwidth calculations
9061  * Returns the data return bandwidth in MBytes/s
9062  */
9063 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9064 {
9065 	/* Calculate the display Data return Bandwidth */
9066 	fixed20_12 return_efficiency; /* 0.8 */
9067 	fixed20_12 sclk, bandwidth;
9068 	fixed20_12 a;
9069 
9070 	a.full = dfixed_const(1000);
9071 	sclk.full = dfixed_const(wm->sclk);
9072 	sclk.full = dfixed_div(sclk, a);
9073 	a.full = dfixed_const(10);
9074 	return_efficiency.full = dfixed_const(8);
9075 	return_efficiency.full = dfixed_div(return_efficiency, a);
9076 	a.full = dfixed_const(32);
9077 	bandwidth.full = dfixed_mul(a, sclk);
9078 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9079 
9080 	return dfixed_trunc(bandwidth);
9081 }
9082 
9083 /**
9084  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9085  *
9086  * @wm: watermark calculation data
9087  *
9088  * Calculate the dmif bandwidth used for display (CIK).
9089  * Used for display watermark bandwidth calculations
9090  * Returns the dmif bandwidth in MBytes/s
9091  */
9092 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9093 {
9094 	/* Calculate the DMIF Request Bandwidth */
9095 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9096 	fixed20_12 disp_clk, bandwidth;
9097 	fixed20_12 a, b;
9098 
9099 	a.full = dfixed_const(1000);
9100 	disp_clk.full = dfixed_const(wm->disp_clk);
9101 	disp_clk.full = dfixed_div(disp_clk, a);
9102 	a.full = dfixed_const(32);
9103 	b.full = dfixed_mul(a, disp_clk);
9104 
9105 	a.full = dfixed_const(10);
9106 	disp_clk_request_efficiency.full = dfixed_const(8);
9107 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9108 
9109 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9110 
9111 	return dfixed_trunc(bandwidth);
9112 }
9113 
9114 /**
9115  * dce8_available_bandwidth - get the min available bandwidth
9116  *
9117  * @wm: watermark calculation data
9118  *
9119  * Calculate the min available bandwidth used for display (CIK).
9120  * Used for display watermark bandwidth calculations
9121  * Returns the min available bandwidth in MBytes/s
9122  */
9123 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9124 {
9125 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9126 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9127 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9128 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9129 
9130 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9131 }
9132 
9133 /**
9134  * dce8_average_bandwidth - get the average available bandwidth
9135  *
9136  * @wm: watermark calculation data
9137  *
9138  * Calculate the average available bandwidth used for display (CIK).
9139  * Used for display watermark bandwidth calculations
9140  * Returns the average available bandwidth in MBytes/s
9141  */
9142 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9143 {
9144 	/* Calculate the display mode Average Bandwidth
9145 	 * DisplayMode should contain the source and destination dimensions,
9146 	 * timing, etc.
9147 	 */
9148 	fixed20_12 bpp;
9149 	fixed20_12 line_time;
9150 	fixed20_12 src_width;
9151 	fixed20_12 bandwidth;
9152 	fixed20_12 a;
9153 
9154 	a.full = dfixed_const(1000);
9155 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9156 	line_time.full = dfixed_div(line_time, a);
9157 	bpp.full = dfixed_const(wm->bytes_per_pixel);
9158 	src_width.full = dfixed_const(wm->src_width);
9159 	bandwidth.full = dfixed_mul(src_width, bpp);
9160 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9161 	bandwidth.full = dfixed_div(bandwidth, line_time);
9162 
9163 	return dfixed_trunc(bandwidth);
9164 }
9165 
9166 /**
9167  * dce8_latency_watermark - get the latency watermark
9168  *
9169  * @wm: watermark calculation data
9170  *
9171  * Calculate the latency watermark (CIK).
9172  * Used for display watermark bandwidth calculations
9173  * Returns the latency watermark in ns
9174  */
9175 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9176 {
9177 	/* First calculate the latency in ns */
9178 	u32 mc_latency = 2000; /* 2000 ns. */
9179 	u32 available_bandwidth = dce8_available_bandwidth(wm);
9180 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9181 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9182 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9183 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9184 		(wm->num_heads * cursor_line_pair_return_time);
9185 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9186 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9187 	u32 tmp, dmif_size = 12288;
9188 	fixed20_12 a, b, c;
9189 
9190 	if (wm->num_heads == 0)
9191 		return 0;
9192 
9193 	a.full = dfixed_const(2);
9194 	b.full = dfixed_const(1);
9195 	if ((wm->vsc.full > a.full) ||
9196 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9197 	    (wm->vtaps >= 5) ||
9198 	    ((wm->vsc.full >= a.full) && wm->interlaced))
9199 		max_src_lines_per_dst_line = 4;
9200 	else
9201 		max_src_lines_per_dst_line = 2;
9202 
9203 	a.full = dfixed_const(available_bandwidth);
9204 	b.full = dfixed_const(wm->num_heads);
9205 	a.full = dfixed_div(a, b);
9206 
9207 	b.full = dfixed_const(mc_latency + 512);
9208 	c.full = dfixed_const(wm->disp_clk);
9209 	b.full = dfixed_div(b, c);
9210 
9211 	c.full = dfixed_const(dmif_size);
9212 	b.full = dfixed_div(c, b);
9213 
9214 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9215 
9216 	b.full = dfixed_const(1000);
9217 	c.full = dfixed_const(wm->disp_clk);
9218 	b.full = dfixed_div(c, b);
9219 	c.full = dfixed_const(wm->bytes_per_pixel);
9220 	b.full = dfixed_mul(b, c);
9221 
9222 	lb_fill_bw = min(tmp, dfixed_trunc(b));
9223 
9224 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9225 	b.full = dfixed_const(1000);
9226 	c.full = dfixed_const(lb_fill_bw);
9227 	b.full = dfixed_div(c, b);
9228 	a.full = dfixed_div(a, b);
9229 	line_fill_time = dfixed_trunc(a);
9230 
9231 	if (line_fill_time < wm->active_time)
9232 		return latency;
9233 	else
9234 		return latency + (line_fill_time - wm->active_time);
9235 
9236 }
9237 
9238 /**
9239  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9240  * average and available dram bandwidth
9241  *
9242  * @wm: watermark calculation data
9243  *
9244  * Check if the display average bandwidth fits in the display
9245  * dram bandwidth (CIK).
9246  * Used for display watermark bandwidth calculations
9247  * Returns true if the display fits, false if not.
9248  */
9249 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9250 {
9251 	if (dce8_average_bandwidth(wm) <=
9252 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9253 		return true;
9254 	else
9255 		return false;
9256 }
9257 
9258 /**
9259  * dce8_average_bandwidth_vs_available_bandwidth - check
9260  * average and available bandwidth
9261  *
9262  * @wm: watermark calculation data
9263  *
9264  * Check if the display average bandwidth fits in the display
9265  * available bandwidth (CIK).
9266  * Used for display watermark bandwidth calculations
9267  * Returns true if the display fits, false if not.
9268  */
9269 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9270 {
9271 	if (dce8_average_bandwidth(wm) <=
9272 	    (dce8_available_bandwidth(wm) / wm->num_heads))
9273 		return true;
9274 	else
9275 		return false;
9276 }
9277 
9278 /**
9279  * dce8_check_latency_hiding - check latency hiding
9280  *
9281  * @wm: watermark calculation data
9282  *
9283  * Check latency hiding (CIK).
9284  * Used for display watermark bandwidth calculations
9285  * Returns true if the display fits, false if not.
9286  */
9287 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9288 {
9289 	u32 lb_partitions = wm->lb_size / wm->src_width;
9290 	u32 line_time = wm->active_time + wm->blank_time;
9291 	u32 latency_tolerant_lines;
9292 	u32 latency_hiding;
9293 	fixed20_12 a;
9294 
9295 	a.full = dfixed_const(1);
9296 	if (wm->vsc.full > a.full)
9297 		latency_tolerant_lines = 1;
9298 	else {
9299 		if (lb_partitions <= (wm->vtaps + 1))
9300 			latency_tolerant_lines = 1;
9301 		else
9302 			latency_tolerant_lines = 2;
9303 	}
9304 
9305 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9306 
9307 	if (dce8_latency_watermark(wm) <= latency_hiding)
9308 		return true;
9309 	else
9310 		return false;
9311 }
9312 
9313 /**
9314  * dce8_program_watermarks - program display watermarks
9315  *
9316  * @rdev: radeon_device pointer
9317  * @radeon_crtc: the selected display controller
9318  * @lb_size: line buffer size
9319  * @num_heads: number of display controllers in use
9320  *
9321  * Calculate and program the display watermarks for the
9322  * selected display controller (CIK).
9323  */
9324 static void dce8_program_watermarks(struct radeon_device *rdev,
9325 				    struct radeon_crtc *radeon_crtc,
9326 				    u32 lb_size, u32 num_heads)
9327 {
9328 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9329 	struct dce8_wm_params wm_low, wm_high;
9330 	u32 pixel_period;
9331 	u32 line_time = 0;
9332 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9333 	u32 tmp, wm_mask;
9334 
9335 	if (radeon_crtc->base.enabled && num_heads && mode) {
9336 		pixel_period = 1000000 / (u32)mode->clock;
9337 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9338 
9339 		/* watermark for high clocks */
9340 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9341 		    rdev->pm.dpm_enabled) {
9342 			wm_high.yclk =
9343 				radeon_dpm_get_mclk(rdev, false) * 10;
9344 			wm_high.sclk =
9345 				radeon_dpm_get_sclk(rdev, false) * 10;
9346 		} else {
9347 			wm_high.yclk = rdev->pm.current_mclk * 10;
9348 			wm_high.sclk = rdev->pm.current_sclk * 10;
9349 		}
9350 
9351 		wm_high.disp_clk = mode->clock;
9352 		wm_high.src_width = mode->crtc_hdisplay;
9353 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9354 		wm_high.blank_time = line_time - wm_high.active_time;
9355 		wm_high.interlaced = false;
9356 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9357 			wm_high.interlaced = true;
9358 		wm_high.vsc = radeon_crtc->vsc;
9359 		wm_high.vtaps = 1;
9360 		if (radeon_crtc->rmx_type != RMX_OFF)
9361 			wm_high.vtaps = 2;
9362 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9363 		wm_high.lb_size = lb_size;
9364 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9365 		wm_high.num_heads = num_heads;
9366 
9367 		/* set for high clocks */
9368 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9369 
9370 		/* possibly force display priority to high */
9371 		/* should really do this at mode validation time... */
9372 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9373 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9374 		    !dce8_check_latency_hiding(&wm_high) ||
9375 		    (rdev->disp_priority == 2)) {
9376 			DRM_DEBUG_KMS("force priority to high\n");
9377 		}
9378 
9379 		/* watermark for low clocks */
9380 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9381 		    rdev->pm.dpm_enabled) {
9382 			wm_low.yclk =
9383 				radeon_dpm_get_mclk(rdev, true) * 10;
9384 			wm_low.sclk =
9385 				radeon_dpm_get_sclk(rdev, true) * 10;
9386 		} else {
9387 			wm_low.yclk = rdev->pm.current_mclk * 10;
9388 			wm_low.sclk = rdev->pm.current_sclk * 10;
9389 		}
9390 
9391 		wm_low.disp_clk = mode->clock;
9392 		wm_low.src_width = mode->crtc_hdisplay;
9393 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9394 		wm_low.blank_time = line_time - wm_low.active_time;
9395 		wm_low.interlaced = false;
9396 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9397 			wm_low.interlaced = true;
9398 		wm_low.vsc = radeon_crtc->vsc;
9399 		wm_low.vtaps = 1;
9400 		if (radeon_crtc->rmx_type != RMX_OFF)
9401 			wm_low.vtaps = 2;
9402 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9403 		wm_low.lb_size = lb_size;
9404 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9405 		wm_low.num_heads = num_heads;
9406 
9407 		/* set for low clocks */
9408 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9409 
9410 		/* possibly force display priority to high */
9411 		/* should really do this at mode validation time... */
9412 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9413 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9414 		    !dce8_check_latency_hiding(&wm_low) ||
9415 		    (rdev->disp_priority == 2)) {
9416 			DRM_DEBUG_KMS("force priority to high\n");
9417 		}
9418 	}
9419 
9420 	/* select wm A */
9421 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9422 	tmp = wm_mask;
9423 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9424 	tmp |= LATENCY_WATERMARK_MASK(1);
9425 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9426 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9427 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9428 		LATENCY_HIGH_WATERMARK(line_time)));
9429 	/* select wm B */
9430 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9431 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9432 	tmp |= LATENCY_WATERMARK_MASK(2);
9433 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9434 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9435 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9436 		LATENCY_HIGH_WATERMARK(line_time)));
9437 	/* restore original selection */
9438 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9439 
9440 	/* save values for DPM */
9441 	radeon_crtc->line_time = line_time;
9442 	radeon_crtc->wm_high = latency_watermark_a;
9443 	radeon_crtc->wm_low = latency_watermark_b;
9444 }
9445 
9446 /**
9447  * dce8_bandwidth_update - program display watermarks
9448  *
9449  * @rdev: radeon_device pointer
9450  *
9451  * Calculate and program the display watermarks and line
9452  * buffer allocation (CIK).
9453  */
9454 void dce8_bandwidth_update(struct radeon_device *rdev)
9455 {
9456 	struct drm_display_mode *mode = NULL;
9457 	u32 num_heads = 0, lb_size;
9458 	int i;
9459 
9460 	if (!rdev->mode_info.mode_config_initialized)
9461 		return;
9462 
9463 	radeon_update_display_priority(rdev);
9464 
9465 	for (i = 0; i < rdev->num_crtc; i++) {
9466 		if (rdev->mode_info.crtcs[i]->base.enabled)
9467 			num_heads++;
9468 	}
9469 	for (i = 0; i < rdev->num_crtc; i++) {
9470 		mode = &rdev->mode_info.crtcs[i]->base.mode;
9471 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9472 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9473 	}
9474 }
9475 
9476 /**
9477  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9478  *
9479  * @rdev: radeon_device pointer
9480  *
9481  * Fetches a GPU clock counter snapshot (SI).
9482  * Returns the 64 bit clock counter snapshot.
9483  */
9484 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9485 {
9486 	uint64_t clock;
9487 
9488 	mutex_lock(&rdev->gpu_clock_mutex);
9489 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9490 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9491 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9492 	mutex_unlock(&rdev->gpu_clock_mutex);
9493 	return clock;
9494 }
9495 
9496 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9497                               u32 cntl_reg, u32 status_reg)
9498 {
9499 	int r, i;
9500 	struct atom_clock_dividers dividers;
9501 	uint32_t tmp;
9502 
9503 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9504 					   clock, false, &dividers);
9505 	if (r)
9506 		return r;
9507 
9508 	tmp = RREG32_SMC(cntl_reg);
9509 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9510 	tmp |= dividers.post_divider;
9511 	WREG32_SMC(cntl_reg, tmp);
9512 
9513 	for (i = 0; i < 100; i++) {
9514 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9515 			break;
9516 		mdelay(10);
9517 	}
9518 	if (i == 100)
9519 		return -ETIMEDOUT;
9520 
9521 	return 0;
9522 }
9523 
9524 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9525 {
9526 	int r = 0;
9527 
9528 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9529 	if (r)
9530 		return r;
9531 
9532 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9533 	return r;
9534 }
9535 
9536 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9537 {
9538 	int r, i;
9539 	struct atom_clock_dividers dividers;
9540 	u32 tmp;
9541 
9542 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9543 					   ecclk, false, &dividers);
9544 	if (r)
9545 		return r;
9546 
9547 	for (i = 0; i < 100; i++) {
9548 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9549 			break;
9550 		mdelay(10);
9551 	}
9552 	if (i == 100)
9553 		return -ETIMEDOUT;
9554 
9555 	tmp = RREG32_SMC(CG_ECLK_CNTL);
9556 	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9557 	tmp |= dividers.post_divider;
9558 	WREG32_SMC(CG_ECLK_CNTL, tmp);
9559 
9560 	for (i = 0; i < 100; i++) {
9561 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9562 			break;
9563 		mdelay(10);
9564 	}
9565 	if (i == 100)
9566 		return -ETIMEDOUT;
9567 
9568 	return 0;
9569 }
9570 
9571 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9572 {
9573 	struct pci_dev *root = rdev->pdev->bus->self;
9574 	int bridge_pos, gpu_pos;
9575 	u32 speed_cntl, mask, current_data_rate;
9576 	int ret, i;
9577 	u16 tmp16;
9578 
9579 	if (pci_is_root_bus(rdev->pdev->bus))
9580 		return;
9581 
9582 	if (radeon_pcie_gen2 == 0)
9583 		return;
9584 
9585 	if (rdev->flags & RADEON_IS_IGP)
9586 		return;
9587 
9588 	if (!(rdev->flags & RADEON_IS_PCIE))
9589 		return;
9590 
9591 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9592 	if (ret != 0)
9593 		return;
9594 
9595 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9596 		return;
9597 
9598 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9599 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9600 		LC_CURRENT_DATA_RATE_SHIFT;
9601 	if (mask & DRM_PCIE_SPEED_80) {
9602 		if (current_data_rate == 2) {
9603 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9604 			return;
9605 		}
9606 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9607 	} else if (mask & DRM_PCIE_SPEED_50) {
9608 		if (current_data_rate == 1) {
9609 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9610 			return;
9611 		}
9612 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9613 	}
9614 
9615 	bridge_pos = pci_pcie_cap(root);
9616 	if (!bridge_pos)
9617 		return;
9618 
9619 	gpu_pos = pci_pcie_cap(rdev->pdev);
9620 	if (!gpu_pos)
9621 		return;
9622 
9623 	if (mask & DRM_PCIE_SPEED_80) {
9624 		/* re-try equalization if gen3 is not already enabled */
9625 		if (current_data_rate != 2) {
9626 			u16 bridge_cfg, gpu_cfg;
9627 			u16 bridge_cfg2, gpu_cfg2;
9628 			u32 max_lw, current_lw, tmp;
9629 
9630 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9631 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9632 
9633 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9634 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9635 
9636 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9637 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9638 
9639 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9640 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9641 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9642 
9643 			if (current_lw < max_lw) {
9644 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9645 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9646 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9647 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9648 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9649 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9650 				}
9651 			}
9652 
9653 			for (i = 0; i < 10; i++) {
9654 				/* check status */
9655 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9656 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9657 					break;
9658 
9659 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9660 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9661 
9662 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9663 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9664 
9665 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9666 				tmp |= LC_SET_QUIESCE;
9667 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9668 
9669 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9670 				tmp |= LC_REDO_EQ;
9671 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9672 
9673 				mdelay(100);
9674 
9675 				/* linkctl */
9676 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9677 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9678 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9679 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9680 
9681 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9682 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9683 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9684 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9685 
9686 				/* linkctl2 */
9687 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9688 				tmp16 &= ~((1 << 4) | (7 << 9));
9689 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9690 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9691 
9692 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9693 				tmp16 &= ~((1 << 4) | (7 << 9));
9694 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9695 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9696 
9697 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9698 				tmp &= ~LC_SET_QUIESCE;
9699 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9700 			}
9701 		}
9702 	}
9703 
9704 	/* set the link speed */
9705 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9706 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9707 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9708 
9709 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9710 	tmp16 &= ~0xf;
9711 	if (mask & DRM_PCIE_SPEED_80)
9712 		tmp16 |= 3; /* gen3 */
9713 	else if (mask & DRM_PCIE_SPEED_50)
9714 		tmp16 |= 2; /* gen2 */
9715 	else
9716 		tmp16 |= 1; /* gen1 */
9717 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9718 
9719 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9720 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9721 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9722 
9723 	for (i = 0; i < rdev->usec_timeout; i++) {
9724 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9725 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9726 			break;
9727 		udelay(1);
9728 	}
9729 }
9730 
9731 static void cik_program_aspm(struct radeon_device *rdev)
9732 {
9733 	u32 data, orig;
9734 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9735 	bool disable_clkreq = false;
9736 
9737 	if (radeon_aspm == 0)
9738 		return;
9739 
9740 	/* XXX double check IGPs */
9741 	if (rdev->flags & RADEON_IS_IGP)
9742 		return;
9743 
9744 	if (!(rdev->flags & RADEON_IS_PCIE))
9745 		return;
9746 
9747 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9748 	data &= ~LC_XMIT_N_FTS_MASK;
9749 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9750 	if (orig != data)
9751 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9752 
9753 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9754 	data |= LC_GO_TO_RECOVERY;
9755 	if (orig != data)
9756 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9757 
9758 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9759 	data |= P_IGNORE_EDB_ERR;
9760 	if (orig != data)
9761 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9762 
9763 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9764 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9765 	data |= LC_PMI_TO_L1_DIS;
9766 	if (!disable_l0s)
9767 		data |= LC_L0S_INACTIVITY(7);
9768 
9769 	if (!disable_l1) {
9770 		data |= LC_L1_INACTIVITY(7);
9771 		data &= ~LC_PMI_TO_L1_DIS;
9772 		if (orig != data)
9773 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9774 
9775 		if (!disable_plloff_in_l1) {
9776 			bool clk_req_support;
9777 
9778 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9779 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9780 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9781 			if (orig != data)
9782 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9783 
9784 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9785 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9786 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9787 			if (orig != data)
9788 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9789 
9790 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9791 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9792 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9793 			if (orig != data)
9794 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9795 
9796 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9797 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9798 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9799 			if (orig != data)
9800 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9801 
9802 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9803 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9804 			data |= LC_DYN_LANES_PWR_STATE(3);
9805 			if (orig != data)
9806 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9807 
9808 			if (!disable_clkreq &&
9809 			    !pci_is_root_bus(rdev->pdev->bus)) {
9810 				struct pci_dev *root = rdev->pdev->bus->self;
9811 				u32 lnkcap;
9812 
9813 				clk_req_support = false;
9814 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9815 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9816 					clk_req_support = true;
9817 			} else {
9818 				clk_req_support = false;
9819 			}
9820 
9821 			if (clk_req_support) {
9822 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9823 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9824 				if (orig != data)
9825 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9826 
9827 				orig = data = RREG32_SMC(THM_CLK_CNTL);
9828 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9829 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9830 				if (orig != data)
9831 					WREG32_SMC(THM_CLK_CNTL, data);
9832 
9833 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9834 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9835 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9836 				if (orig != data)
9837 					WREG32_SMC(MISC_CLK_CTRL, data);
9838 
9839 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9840 				data &= ~BCLK_AS_XCLK;
9841 				if (orig != data)
9842 					WREG32_SMC(CG_CLKPIN_CNTL, data);
9843 
9844 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9845 				data &= ~FORCE_BIF_REFCLK_EN;
9846 				if (orig != data)
9847 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9848 
9849 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9850 				data &= ~MPLL_CLKOUT_SEL_MASK;
9851 				data |= MPLL_CLKOUT_SEL(4);
9852 				if (orig != data)
9853 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9854 			}
9855 		}
9856 	} else {
9857 		if (orig != data)
9858 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9859 	}
9860 
9861 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9862 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9863 	if (orig != data)
9864 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9865 
9866 	if (!disable_l0s) {
9867 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9868 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9869 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9870 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9871 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9872 				data &= ~LC_L0S_INACTIVITY_MASK;
9873 				if (orig != data)
9874 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9875 			}
9876 		}
9877 	}
9878 }
9879