xref: /openbmc/linux/drivers/gpu/drm/radeon/cik.c (revision a03a8dbe20eff6d57aae3147577bf84b52aba4e6)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include "cikd.h"
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_ci.h"
36 #include "radeon_kfd.h"
37 
38 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
46 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
47 
48 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
49 MODULE_FIRMWARE("radeon/bonaire_me.bin");
50 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
51 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
52 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
53 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
54 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
55 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
56 
57 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
58 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
59 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
60 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
64 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
65 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
66 
67 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
68 MODULE_FIRMWARE("radeon/hawaii_me.bin");
69 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
70 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
71 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
72 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
73 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
74 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
75 
76 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
77 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
78 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
79 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
80 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
81 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
82 
83 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
84 MODULE_FIRMWARE("radeon/kaveri_me.bin");
85 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
86 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
87 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
88 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
89 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
90 
91 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
92 MODULE_FIRMWARE("radeon/KABINI_me.bin");
93 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
94 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
95 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
96 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
97 
98 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
99 MODULE_FIRMWARE("radeon/kabini_me.bin");
100 MODULE_FIRMWARE("radeon/kabini_ce.bin");
101 MODULE_FIRMWARE("radeon/kabini_mec.bin");
102 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
103 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
104 
105 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
106 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
107 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
108 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
109 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
110 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
111 
112 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
113 MODULE_FIRMWARE("radeon/mullins_me.bin");
114 MODULE_FIRMWARE("radeon/mullins_ce.bin");
115 MODULE_FIRMWARE("radeon/mullins_mec.bin");
116 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
117 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
118 
119 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
120 extern void r600_ih_ring_fini(struct radeon_device *rdev);
121 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
122 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
123 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
124 extern void sumo_rlc_fini(struct radeon_device *rdev);
125 extern int sumo_rlc_init(struct radeon_device *rdev);
126 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
127 extern void si_rlc_reset(struct radeon_device *rdev);
128 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
129 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
130 extern int cik_sdma_resume(struct radeon_device *rdev);
131 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
132 extern void cik_sdma_fini(struct radeon_device *rdev);
133 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
134 static void cik_rlc_stop(struct radeon_device *rdev);
135 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
136 static void cik_program_aspm(struct radeon_device *rdev);
137 static void cik_init_pg(struct radeon_device *rdev);
138 static void cik_init_cg(struct radeon_device *rdev);
139 static void cik_fini_pg(struct radeon_device *rdev);
140 static void cik_fini_cg(struct radeon_device *rdev);
141 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
142 					  bool enable);
143 
144 /* get temperature in millidegrees */
145 int ci_get_temp(struct radeon_device *rdev)
146 {
147 	u32 temp;
148 	int actual_temp = 0;
149 
150 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
151 		CTF_TEMP_SHIFT;
152 
153 	if (temp & 0x200)
154 		actual_temp = 255;
155 	else
156 		actual_temp = temp & 0x1ff;
157 
158 	actual_temp = actual_temp * 1000;
159 
160 	return actual_temp;
161 }
162 
163 /* get temperature in millidegrees */
164 int kv_get_temp(struct radeon_device *rdev)
165 {
166 	u32 temp;
167 	int actual_temp = 0;
168 
169 	temp = RREG32_SMC(0xC0300E0C);
170 
171 	if (temp)
172 		actual_temp = (temp / 8) - 49;
173 	else
174 		actual_temp = 0;
175 
176 	actual_temp = actual_temp * 1000;
177 
178 	return actual_temp;
179 }
180 
181 /*
182  * Indirect registers accessor
183  */
184 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
185 {
186 	unsigned long flags;
187 	u32 r;
188 
189 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
190 	WREG32(PCIE_INDEX, reg);
191 	(void)RREG32(PCIE_INDEX);
192 	r = RREG32(PCIE_DATA);
193 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
194 	return r;
195 }
196 
197 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
198 {
199 	unsigned long flags;
200 
201 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
202 	WREG32(PCIE_INDEX, reg);
203 	(void)RREG32(PCIE_INDEX);
204 	WREG32(PCIE_DATA, v);
205 	(void)RREG32(PCIE_DATA);
206 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
207 }
208 
209 static const u32 spectre_rlc_save_restore_register_list[] =
210 {
211 	(0x0e00 << 16) | (0xc12c >> 2),
212 	0x00000000,
213 	(0x0e00 << 16) | (0xc140 >> 2),
214 	0x00000000,
215 	(0x0e00 << 16) | (0xc150 >> 2),
216 	0x00000000,
217 	(0x0e00 << 16) | (0xc15c >> 2),
218 	0x00000000,
219 	(0x0e00 << 16) | (0xc168 >> 2),
220 	0x00000000,
221 	(0x0e00 << 16) | (0xc170 >> 2),
222 	0x00000000,
223 	(0x0e00 << 16) | (0xc178 >> 2),
224 	0x00000000,
225 	(0x0e00 << 16) | (0xc204 >> 2),
226 	0x00000000,
227 	(0x0e00 << 16) | (0xc2b4 >> 2),
228 	0x00000000,
229 	(0x0e00 << 16) | (0xc2b8 >> 2),
230 	0x00000000,
231 	(0x0e00 << 16) | (0xc2bc >> 2),
232 	0x00000000,
233 	(0x0e00 << 16) | (0xc2c0 >> 2),
234 	0x00000000,
235 	(0x0e00 << 16) | (0x8228 >> 2),
236 	0x00000000,
237 	(0x0e00 << 16) | (0x829c >> 2),
238 	0x00000000,
239 	(0x0e00 << 16) | (0x869c >> 2),
240 	0x00000000,
241 	(0x0600 << 16) | (0x98f4 >> 2),
242 	0x00000000,
243 	(0x0e00 << 16) | (0x98f8 >> 2),
244 	0x00000000,
245 	(0x0e00 << 16) | (0x9900 >> 2),
246 	0x00000000,
247 	(0x0e00 << 16) | (0xc260 >> 2),
248 	0x00000000,
249 	(0x0e00 << 16) | (0x90e8 >> 2),
250 	0x00000000,
251 	(0x0e00 << 16) | (0x3c000 >> 2),
252 	0x00000000,
253 	(0x0e00 << 16) | (0x3c00c >> 2),
254 	0x00000000,
255 	(0x0e00 << 16) | (0x8c1c >> 2),
256 	0x00000000,
257 	(0x0e00 << 16) | (0x9700 >> 2),
258 	0x00000000,
259 	(0x0e00 << 16) | (0xcd20 >> 2),
260 	0x00000000,
261 	(0x4e00 << 16) | (0xcd20 >> 2),
262 	0x00000000,
263 	(0x5e00 << 16) | (0xcd20 >> 2),
264 	0x00000000,
265 	(0x6e00 << 16) | (0xcd20 >> 2),
266 	0x00000000,
267 	(0x7e00 << 16) | (0xcd20 >> 2),
268 	0x00000000,
269 	(0x8e00 << 16) | (0xcd20 >> 2),
270 	0x00000000,
271 	(0x9e00 << 16) | (0xcd20 >> 2),
272 	0x00000000,
273 	(0xae00 << 16) | (0xcd20 >> 2),
274 	0x00000000,
275 	(0xbe00 << 16) | (0xcd20 >> 2),
276 	0x00000000,
277 	(0x0e00 << 16) | (0x89bc >> 2),
278 	0x00000000,
279 	(0x0e00 << 16) | (0x8900 >> 2),
280 	0x00000000,
281 	0x3,
282 	(0x0e00 << 16) | (0xc130 >> 2),
283 	0x00000000,
284 	(0x0e00 << 16) | (0xc134 >> 2),
285 	0x00000000,
286 	(0x0e00 << 16) | (0xc1fc >> 2),
287 	0x00000000,
288 	(0x0e00 << 16) | (0xc208 >> 2),
289 	0x00000000,
290 	(0x0e00 << 16) | (0xc264 >> 2),
291 	0x00000000,
292 	(0x0e00 << 16) | (0xc268 >> 2),
293 	0x00000000,
294 	(0x0e00 << 16) | (0xc26c >> 2),
295 	0x00000000,
296 	(0x0e00 << 16) | (0xc270 >> 2),
297 	0x00000000,
298 	(0x0e00 << 16) | (0xc274 >> 2),
299 	0x00000000,
300 	(0x0e00 << 16) | (0xc278 >> 2),
301 	0x00000000,
302 	(0x0e00 << 16) | (0xc27c >> 2),
303 	0x00000000,
304 	(0x0e00 << 16) | (0xc280 >> 2),
305 	0x00000000,
306 	(0x0e00 << 16) | (0xc284 >> 2),
307 	0x00000000,
308 	(0x0e00 << 16) | (0xc288 >> 2),
309 	0x00000000,
310 	(0x0e00 << 16) | (0xc28c >> 2),
311 	0x00000000,
312 	(0x0e00 << 16) | (0xc290 >> 2),
313 	0x00000000,
314 	(0x0e00 << 16) | (0xc294 >> 2),
315 	0x00000000,
316 	(0x0e00 << 16) | (0xc298 >> 2),
317 	0x00000000,
318 	(0x0e00 << 16) | (0xc29c >> 2),
319 	0x00000000,
320 	(0x0e00 << 16) | (0xc2a0 >> 2),
321 	0x00000000,
322 	(0x0e00 << 16) | (0xc2a4 >> 2),
323 	0x00000000,
324 	(0x0e00 << 16) | (0xc2a8 >> 2),
325 	0x00000000,
326 	(0x0e00 << 16) | (0xc2ac  >> 2),
327 	0x00000000,
328 	(0x0e00 << 16) | (0xc2b0 >> 2),
329 	0x00000000,
330 	(0x0e00 << 16) | (0x301d0 >> 2),
331 	0x00000000,
332 	(0x0e00 << 16) | (0x30238 >> 2),
333 	0x00000000,
334 	(0x0e00 << 16) | (0x30250 >> 2),
335 	0x00000000,
336 	(0x0e00 << 16) | (0x30254 >> 2),
337 	0x00000000,
338 	(0x0e00 << 16) | (0x30258 >> 2),
339 	0x00000000,
340 	(0x0e00 << 16) | (0x3025c >> 2),
341 	0x00000000,
342 	(0x4e00 << 16) | (0xc900 >> 2),
343 	0x00000000,
344 	(0x5e00 << 16) | (0xc900 >> 2),
345 	0x00000000,
346 	(0x6e00 << 16) | (0xc900 >> 2),
347 	0x00000000,
348 	(0x7e00 << 16) | (0xc900 >> 2),
349 	0x00000000,
350 	(0x8e00 << 16) | (0xc900 >> 2),
351 	0x00000000,
352 	(0x9e00 << 16) | (0xc900 >> 2),
353 	0x00000000,
354 	(0xae00 << 16) | (0xc900 >> 2),
355 	0x00000000,
356 	(0xbe00 << 16) | (0xc900 >> 2),
357 	0x00000000,
358 	(0x4e00 << 16) | (0xc904 >> 2),
359 	0x00000000,
360 	(0x5e00 << 16) | (0xc904 >> 2),
361 	0x00000000,
362 	(0x6e00 << 16) | (0xc904 >> 2),
363 	0x00000000,
364 	(0x7e00 << 16) | (0xc904 >> 2),
365 	0x00000000,
366 	(0x8e00 << 16) | (0xc904 >> 2),
367 	0x00000000,
368 	(0x9e00 << 16) | (0xc904 >> 2),
369 	0x00000000,
370 	(0xae00 << 16) | (0xc904 >> 2),
371 	0x00000000,
372 	(0xbe00 << 16) | (0xc904 >> 2),
373 	0x00000000,
374 	(0x4e00 << 16) | (0xc908 >> 2),
375 	0x00000000,
376 	(0x5e00 << 16) | (0xc908 >> 2),
377 	0x00000000,
378 	(0x6e00 << 16) | (0xc908 >> 2),
379 	0x00000000,
380 	(0x7e00 << 16) | (0xc908 >> 2),
381 	0x00000000,
382 	(0x8e00 << 16) | (0xc908 >> 2),
383 	0x00000000,
384 	(0x9e00 << 16) | (0xc908 >> 2),
385 	0x00000000,
386 	(0xae00 << 16) | (0xc908 >> 2),
387 	0x00000000,
388 	(0xbe00 << 16) | (0xc908 >> 2),
389 	0x00000000,
390 	(0x4e00 << 16) | (0xc90c >> 2),
391 	0x00000000,
392 	(0x5e00 << 16) | (0xc90c >> 2),
393 	0x00000000,
394 	(0x6e00 << 16) | (0xc90c >> 2),
395 	0x00000000,
396 	(0x7e00 << 16) | (0xc90c >> 2),
397 	0x00000000,
398 	(0x8e00 << 16) | (0xc90c >> 2),
399 	0x00000000,
400 	(0x9e00 << 16) | (0xc90c >> 2),
401 	0x00000000,
402 	(0xae00 << 16) | (0xc90c >> 2),
403 	0x00000000,
404 	(0xbe00 << 16) | (0xc90c >> 2),
405 	0x00000000,
406 	(0x4e00 << 16) | (0xc910 >> 2),
407 	0x00000000,
408 	(0x5e00 << 16) | (0xc910 >> 2),
409 	0x00000000,
410 	(0x6e00 << 16) | (0xc910 >> 2),
411 	0x00000000,
412 	(0x7e00 << 16) | (0xc910 >> 2),
413 	0x00000000,
414 	(0x8e00 << 16) | (0xc910 >> 2),
415 	0x00000000,
416 	(0x9e00 << 16) | (0xc910 >> 2),
417 	0x00000000,
418 	(0xae00 << 16) | (0xc910 >> 2),
419 	0x00000000,
420 	(0xbe00 << 16) | (0xc910 >> 2),
421 	0x00000000,
422 	(0x0e00 << 16) | (0xc99c >> 2),
423 	0x00000000,
424 	(0x0e00 << 16) | (0x9834 >> 2),
425 	0x00000000,
426 	(0x0000 << 16) | (0x30f00 >> 2),
427 	0x00000000,
428 	(0x0001 << 16) | (0x30f00 >> 2),
429 	0x00000000,
430 	(0x0000 << 16) | (0x30f04 >> 2),
431 	0x00000000,
432 	(0x0001 << 16) | (0x30f04 >> 2),
433 	0x00000000,
434 	(0x0000 << 16) | (0x30f08 >> 2),
435 	0x00000000,
436 	(0x0001 << 16) | (0x30f08 >> 2),
437 	0x00000000,
438 	(0x0000 << 16) | (0x30f0c >> 2),
439 	0x00000000,
440 	(0x0001 << 16) | (0x30f0c >> 2),
441 	0x00000000,
442 	(0x0600 << 16) | (0x9b7c >> 2),
443 	0x00000000,
444 	(0x0e00 << 16) | (0x8a14 >> 2),
445 	0x00000000,
446 	(0x0e00 << 16) | (0x8a18 >> 2),
447 	0x00000000,
448 	(0x0600 << 16) | (0x30a00 >> 2),
449 	0x00000000,
450 	(0x0e00 << 16) | (0x8bf0 >> 2),
451 	0x00000000,
452 	(0x0e00 << 16) | (0x8bcc >> 2),
453 	0x00000000,
454 	(0x0e00 << 16) | (0x8b24 >> 2),
455 	0x00000000,
456 	(0x0e00 << 16) | (0x30a04 >> 2),
457 	0x00000000,
458 	(0x0600 << 16) | (0x30a10 >> 2),
459 	0x00000000,
460 	(0x0600 << 16) | (0x30a14 >> 2),
461 	0x00000000,
462 	(0x0600 << 16) | (0x30a18 >> 2),
463 	0x00000000,
464 	(0x0600 << 16) | (0x30a2c >> 2),
465 	0x00000000,
466 	(0x0e00 << 16) | (0xc700 >> 2),
467 	0x00000000,
468 	(0x0e00 << 16) | (0xc704 >> 2),
469 	0x00000000,
470 	(0x0e00 << 16) | (0xc708 >> 2),
471 	0x00000000,
472 	(0x0e00 << 16) | (0xc768 >> 2),
473 	0x00000000,
474 	(0x0400 << 16) | (0xc770 >> 2),
475 	0x00000000,
476 	(0x0400 << 16) | (0xc774 >> 2),
477 	0x00000000,
478 	(0x0400 << 16) | (0xc778 >> 2),
479 	0x00000000,
480 	(0x0400 << 16) | (0xc77c >> 2),
481 	0x00000000,
482 	(0x0400 << 16) | (0xc780 >> 2),
483 	0x00000000,
484 	(0x0400 << 16) | (0xc784 >> 2),
485 	0x00000000,
486 	(0x0400 << 16) | (0xc788 >> 2),
487 	0x00000000,
488 	(0x0400 << 16) | (0xc78c >> 2),
489 	0x00000000,
490 	(0x0400 << 16) | (0xc798 >> 2),
491 	0x00000000,
492 	(0x0400 << 16) | (0xc79c >> 2),
493 	0x00000000,
494 	(0x0400 << 16) | (0xc7a0 >> 2),
495 	0x00000000,
496 	(0x0400 << 16) | (0xc7a4 >> 2),
497 	0x00000000,
498 	(0x0400 << 16) | (0xc7a8 >> 2),
499 	0x00000000,
500 	(0x0400 << 16) | (0xc7ac >> 2),
501 	0x00000000,
502 	(0x0400 << 16) | (0xc7b0 >> 2),
503 	0x00000000,
504 	(0x0400 << 16) | (0xc7b4 >> 2),
505 	0x00000000,
506 	(0x0e00 << 16) | (0x9100 >> 2),
507 	0x00000000,
508 	(0x0e00 << 16) | (0x3c010 >> 2),
509 	0x00000000,
510 	(0x0e00 << 16) | (0x92a8 >> 2),
511 	0x00000000,
512 	(0x0e00 << 16) | (0x92ac >> 2),
513 	0x00000000,
514 	(0x0e00 << 16) | (0x92b4 >> 2),
515 	0x00000000,
516 	(0x0e00 << 16) | (0x92b8 >> 2),
517 	0x00000000,
518 	(0x0e00 << 16) | (0x92bc >> 2),
519 	0x00000000,
520 	(0x0e00 << 16) | (0x92c0 >> 2),
521 	0x00000000,
522 	(0x0e00 << 16) | (0x92c4 >> 2),
523 	0x00000000,
524 	(0x0e00 << 16) | (0x92c8 >> 2),
525 	0x00000000,
526 	(0x0e00 << 16) | (0x92cc >> 2),
527 	0x00000000,
528 	(0x0e00 << 16) | (0x92d0 >> 2),
529 	0x00000000,
530 	(0x0e00 << 16) | (0x8c00 >> 2),
531 	0x00000000,
532 	(0x0e00 << 16) | (0x8c04 >> 2),
533 	0x00000000,
534 	(0x0e00 << 16) | (0x8c20 >> 2),
535 	0x00000000,
536 	(0x0e00 << 16) | (0x8c38 >> 2),
537 	0x00000000,
538 	(0x0e00 << 16) | (0x8c3c >> 2),
539 	0x00000000,
540 	(0x0e00 << 16) | (0xae00 >> 2),
541 	0x00000000,
542 	(0x0e00 << 16) | (0x9604 >> 2),
543 	0x00000000,
544 	(0x0e00 << 16) | (0xac08 >> 2),
545 	0x00000000,
546 	(0x0e00 << 16) | (0xac0c >> 2),
547 	0x00000000,
548 	(0x0e00 << 16) | (0xac10 >> 2),
549 	0x00000000,
550 	(0x0e00 << 16) | (0xac14 >> 2),
551 	0x00000000,
552 	(0x0e00 << 16) | (0xac58 >> 2),
553 	0x00000000,
554 	(0x0e00 << 16) | (0xac68 >> 2),
555 	0x00000000,
556 	(0x0e00 << 16) | (0xac6c >> 2),
557 	0x00000000,
558 	(0x0e00 << 16) | (0xac70 >> 2),
559 	0x00000000,
560 	(0x0e00 << 16) | (0xac74 >> 2),
561 	0x00000000,
562 	(0x0e00 << 16) | (0xac78 >> 2),
563 	0x00000000,
564 	(0x0e00 << 16) | (0xac7c >> 2),
565 	0x00000000,
566 	(0x0e00 << 16) | (0xac80 >> 2),
567 	0x00000000,
568 	(0x0e00 << 16) | (0xac84 >> 2),
569 	0x00000000,
570 	(0x0e00 << 16) | (0xac88 >> 2),
571 	0x00000000,
572 	(0x0e00 << 16) | (0xac8c >> 2),
573 	0x00000000,
574 	(0x0e00 << 16) | (0x970c >> 2),
575 	0x00000000,
576 	(0x0e00 << 16) | (0x9714 >> 2),
577 	0x00000000,
578 	(0x0e00 << 16) | (0x9718 >> 2),
579 	0x00000000,
580 	(0x0e00 << 16) | (0x971c >> 2),
581 	0x00000000,
582 	(0x0e00 << 16) | (0x31068 >> 2),
583 	0x00000000,
584 	(0x4e00 << 16) | (0x31068 >> 2),
585 	0x00000000,
586 	(0x5e00 << 16) | (0x31068 >> 2),
587 	0x00000000,
588 	(0x6e00 << 16) | (0x31068 >> 2),
589 	0x00000000,
590 	(0x7e00 << 16) | (0x31068 >> 2),
591 	0x00000000,
592 	(0x8e00 << 16) | (0x31068 >> 2),
593 	0x00000000,
594 	(0x9e00 << 16) | (0x31068 >> 2),
595 	0x00000000,
596 	(0xae00 << 16) | (0x31068 >> 2),
597 	0x00000000,
598 	(0xbe00 << 16) | (0x31068 >> 2),
599 	0x00000000,
600 	(0x0e00 << 16) | (0xcd10 >> 2),
601 	0x00000000,
602 	(0x0e00 << 16) | (0xcd14 >> 2),
603 	0x00000000,
604 	(0x0e00 << 16) | (0x88b0 >> 2),
605 	0x00000000,
606 	(0x0e00 << 16) | (0x88b4 >> 2),
607 	0x00000000,
608 	(0x0e00 << 16) | (0x88b8 >> 2),
609 	0x00000000,
610 	(0x0e00 << 16) | (0x88bc >> 2),
611 	0x00000000,
612 	(0x0400 << 16) | (0x89c0 >> 2),
613 	0x00000000,
614 	(0x0e00 << 16) | (0x88c4 >> 2),
615 	0x00000000,
616 	(0x0e00 << 16) | (0x88c8 >> 2),
617 	0x00000000,
618 	(0x0e00 << 16) | (0x88d0 >> 2),
619 	0x00000000,
620 	(0x0e00 << 16) | (0x88d4 >> 2),
621 	0x00000000,
622 	(0x0e00 << 16) | (0x88d8 >> 2),
623 	0x00000000,
624 	(0x0e00 << 16) | (0x8980 >> 2),
625 	0x00000000,
626 	(0x0e00 << 16) | (0x30938 >> 2),
627 	0x00000000,
628 	(0x0e00 << 16) | (0x3093c >> 2),
629 	0x00000000,
630 	(0x0e00 << 16) | (0x30940 >> 2),
631 	0x00000000,
632 	(0x0e00 << 16) | (0x89a0 >> 2),
633 	0x00000000,
634 	(0x0e00 << 16) | (0x30900 >> 2),
635 	0x00000000,
636 	(0x0e00 << 16) | (0x30904 >> 2),
637 	0x00000000,
638 	(0x0e00 << 16) | (0x89b4 >> 2),
639 	0x00000000,
640 	(0x0e00 << 16) | (0x3c210 >> 2),
641 	0x00000000,
642 	(0x0e00 << 16) | (0x3c214 >> 2),
643 	0x00000000,
644 	(0x0e00 << 16) | (0x3c218 >> 2),
645 	0x00000000,
646 	(0x0e00 << 16) | (0x8904 >> 2),
647 	0x00000000,
648 	0x5,
649 	(0x0e00 << 16) | (0x8c28 >> 2),
650 	(0x0e00 << 16) | (0x8c2c >> 2),
651 	(0x0e00 << 16) | (0x8c30 >> 2),
652 	(0x0e00 << 16) | (0x8c34 >> 2),
653 	(0x0e00 << 16) | (0x9600 >> 2),
654 };
655 
656 static const u32 kalindi_rlc_save_restore_register_list[] =
657 {
658 	(0x0e00 << 16) | (0xc12c >> 2),
659 	0x00000000,
660 	(0x0e00 << 16) | (0xc140 >> 2),
661 	0x00000000,
662 	(0x0e00 << 16) | (0xc150 >> 2),
663 	0x00000000,
664 	(0x0e00 << 16) | (0xc15c >> 2),
665 	0x00000000,
666 	(0x0e00 << 16) | (0xc168 >> 2),
667 	0x00000000,
668 	(0x0e00 << 16) | (0xc170 >> 2),
669 	0x00000000,
670 	(0x0e00 << 16) | (0xc204 >> 2),
671 	0x00000000,
672 	(0x0e00 << 16) | (0xc2b4 >> 2),
673 	0x00000000,
674 	(0x0e00 << 16) | (0xc2b8 >> 2),
675 	0x00000000,
676 	(0x0e00 << 16) | (0xc2bc >> 2),
677 	0x00000000,
678 	(0x0e00 << 16) | (0xc2c0 >> 2),
679 	0x00000000,
680 	(0x0e00 << 16) | (0x8228 >> 2),
681 	0x00000000,
682 	(0x0e00 << 16) | (0x829c >> 2),
683 	0x00000000,
684 	(0x0e00 << 16) | (0x869c >> 2),
685 	0x00000000,
686 	(0x0600 << 16) | (0x98f4 >> 2),
687 	0x00000000,
688 	(0x0e00 << 16) | (0x98f8 >> 2),
689 	0x00000000,
690 	(0x0e00 << 16) | (0x9900 >> 2),
691 	0x00000000,
692 	(0x0e00 << 16) | (0xc260 >> 2),
693 	0x00000000,
694 	(0x0e00 << 16) | (0x90e8 >> 2),
695 	0x00000000,
696 	(0x0e00 << 16) | (0x3c000 >> 2),
697 	0x00000000,
698 	(0x0e00 << 16) | (0x3c00c >> 2),
699 	0x00000000,
700 	(0x0e00 << 16) | (0x8c1c >> 2),
701 	0x00000000,
702 	(0x0e00 << 16) | (0x9700 >> 2),
703 	0x00000000,
704 	(0x0e00 << 16) | (0xcd20 >> 2),
705 	0x00000000,
706 	(0x4e00 << 16) | (0xcd20 >> 2),
707 	0x00000000,
708 	(0x5e00 << 16) | (0xcd20 >> 2),
709 	0x00000000,
710 	(0x6e00 << 16) | (0xcd20 >> 2),
711 	0x00000000,
712 	(0x7e00 << 16) | (0xcd20 >> 2),
713 	0x00000000,
714 	(0x0e00 << 16) | (0x89bc >> 2),
715 	0x00000000,
716 	(0x0e00 << 16) | (0x8900 >> 2),
717 	0x00000000,
718 	0x3,
719 	(0x0e00 << 16) | (0xc130 >> 2),
720 	0x00000000,
721 	(0x0e00 << 16) | (0xc134 >> 2),
722 	0x00000000,
723 	(0x0e00 << 16) | (0xc1fc >> 2),
724 	0x00000000,
725 	(0x0e00 << 16) | (0xc208 >> 2),
726 	0x00000000,
727 	(0x0e00 << 16) | (0xc264 >> 2),
728 	0x00000000,
729 	(0x0e00 << 16) | (0xc268 >> 2),
730 	0x00000000,
731 	(0x0e00 << 16) | (0xc26c >> 2),
732 	0x00000000,
733 	(0x0e00 << 16) | (0xc270 >> 2),
734 	0x00000000,
735 	(0x0e00 << 16) | (0xc274 >> 2),
736 	0x00000000,
737 	(0x0e00 << 16) | (0xc28c >> 2),
738 	0x00000000,
739 	(0x0e00 << 16) | (0xc290 >> 2),
740 	0x00000000,
741 	(0x0e00 << 16) | (0xc294 >> 2),
742 	0x00000000,
743 	(0x0e00 << 16) | (0xc298 >> 2),
744 	0x00000000,
745 	(0x0e00 << 16) | (0xc2a0 >> 2),
746 	0x00000000,
747 	(0x0e00 << 16) | (0xc2a4 >> 2),
748 	0x00000000,
749 	(0x0e00 << 16) | (0xc2a8 >> 2),
750 	0x00000000,
751 	(0x0e00 << 16) | (0xc2ac >> 2),
752 	0x00000000,
753 	(0x0e00 << 16) | (0x301d0 >> 2),
754 	0x00000000,
755 	(0x0e00 << 16) | (0x30238 >> 2),
756 	0x00000000,
757 	(0x0e00 << 16) | (0x30250 >> 2),
758 	0x00000000,
759 	(0x0e00 << 16) | (0x30254 >> 2),
760 	0x00000000,
761 	(0x0e00 << 16) | (0x30258 >> 2),
762 	0x00000000,
763 	(0x0e00 << 16) | (0x3025c >> 2),
764 	0x00000000,
765 	(0x4e00 << 16) | (0xc900 >> 2),
766 	0x00000000,
767 	(0x5e00 << 16) | (0xc900 >> 2),
768 	0x00000000,
769 	(0x6e00 << 16) | (0xc900 >> 2),
770 	0x00000000,
771 	(0x7e00 << 16) | (0xc900 >> 2),
772 	0x00000000,
773 	(0x4e00 << 16) | (0xc904 >> 2),
774 	0x00000000,
775 	(0x5e00 << 16) | (0xc904 >> 2),
776 	0x00000000,
777 	(0x6e00 << 16) | (0xc904 >> 2),
778 	0x00000000,
779 	(0x7e00 << 16) | (0xc904 >> 2),
780 	0x00000000,
781 	(0x4e00 << 16) | (0xc908 >> 2),
782 	0x00000000,
783 	(0x5e00 << 16) | (0xc908 >> 2),
784 	0x00000000,
785 	(0x6e00 << 16) | (0xc908 >> 2),
786 	0x00000000,
787 	(0x7e00 << 16) | (0xc908 >> 2),
788 	0x00000000,
789 	(0x4e00 << 16) | (0xc90c >> 2),
790 	0x00000000,
791 	(0x5e00 << 16) | (0xc90c >> 2),
792 	0x00000000,
793 	(0x6e00 << 16) | (0xc90c >> 2),
794 	0x00000000,
795 	(0x7e00 << 16) | (0xc90c >> 2),
796 	0x00000000,
797 	(0x4e00 << 16) | (0xc910 >> 2),
798 	0x00000000,
799 	(0x5e00 << 16) | (0xc910 >> 2),
800 	0x00000000,
801 	(0x6e00 << 16) | (0xc910 >> 2),
802 	0x00000000,
803 	(0x7e00 << 16) | (0xc910 >> 2),
804 	0x00000000,
805 	(0x0e00 << 16) | (0xc99c >> 2),
806 	0x00000000,
807 	(0x0e00 << 16) | (0x9834 >> 2),
808 	0x00000000,
809 	(0x0000 << 16) | (0x30f00 >> 2),
810 	0x00000000,
811 	(0x0000 << 16) | (0x30f04 >> 2),
812 	0x00000000,
813 	(0x0000 << 16) | (0x30f08 >> 2),
814 	0x00000000,
815 	(0x0000 << 16) | (0x30f0c >> 2),
816 	0x00000000,
817 	(0x0600 << 16) | (0x9b7c >> 2),
818 	0x00000000,
819 	(0x0e00 << 16) | (0x8a14 >> 2),
820 	0x00000000,
821 	(0x0e00 << 16) | (0x8a18 >> 2),
822 	0x00000000,
823 	(0x0600 << 16) | (0x30a00 >> 2),
824 	0x00000000,
825 	(0x0e00 << 16) | (0x8bf0 >> 2),
826 	0x00000000,
827 	(0x0e00 << 16) | (0x8bcc >> 2),
828 	0x00000000,
829 	(0x0e00 << 16) | (0x8b24 >> 2),
830 	0x00000000,
831 	(0x0e00 << 16) | (0x30a04 >> 2),
832 	0x00000000,
833 	(0x0600 << 16) | (0x30a10 >> 2),
834 	0x00000000,
835 	(0x0600 << 16) | (0x30a14 >> 2),
836 	0x00000000,
837 	(0x0600 << 16) | (0x30a18 >> 2),
838 	0x00000000,
839 	(0x0600 << 16) | (0x30a2c >> 2),
840 	0x00000000,
841 	(0x0e00 << 16) | (0xc700 >> 2),
842 	0x00000000,
843 	(0x0e00 << 16) | (0xc704 >> 2),
844 	0x00000000,
845 	(0x0e00 << 16) | (0xc708 >> 2),
846 	0x00000000,
847 	(0x0e00 << 16) | (0xc768 >> 2),
848 	0x00000000,
849 	(0x0400 << 16) | (0xc770 >> 2),
850 	0x00000000,
851 	(0x0400 << 16) | (0xc774 >> 2),
852 	0x00000000,
853 	(0x0400 << 16) | (0xc798 >> 2),
854 	0x00000000,
855 	(0x0400 << 16) | (0xc79c >> 2),
856 	0x00000000,
857 	(0x0e00 << 16) | (0x9100 >> 2),
858 	0x00000000,
859 	(0x0e00 << 16) | (0x3c010 >> 2),
860 	0x00000000,
861 	(0x0e00 << 16) | (0x8c00 >> 2),
862 	0x00000000,
863 	(0x0e00 << 16) | (0x8c04 >> 2),
864 	0x00000000,
865 	(0x0e00 << 16) | (0x8c20 >> 2),
866 	0x00000000,
867 	(0x0e00 << 16) | (0x8c38 >> 2),
868 	0x00000000,
869 	(0x0e00 << 16) | (0x8c3c >> 2),
870 	0x00000000,
871 	(0x0e00 << 16) | (0xae00 >> 2),
872 	0x00000000,
873 	(0x0e00 << 16) | (0x9604 >> 2),
874 	0x00000000,
875 	(0x0e00 << 16) | (0xac08 >> 2),
876 	0x00000000,
877 	(0x0e00 << 16) | (0xac0c >> 2),
878 	0x00000000,
879 	(0x0e00 << 16) | (0xac10 >> 2),
880 	0x00000000,
881 	(0x0e00 << 16) | (0xac14 >> 2),
882 	0x00000000,
883 	(0x0e00 << 16) | (0xac58 >> 2),
884 	0x00000000,
885 	(0x0e00 << 16) | (0xac68 >> 2),
886 	0x00000000,
887 	(0x0e00 << 16) | (0xac6c >> 2),
888 	0x00000000,
889 	(0x0e00 << 16) | (0xac70 >> 2),
890 	0x00000000,
891 	(0x0e00 << 16) | (0xac74 >> 2),
892 	0x00000000,
893 	(0x0e00 << 16) | (0xac78 >> 2),
894 	0x00000000,
895 	(0x0e00 << 16) | (0xac7c >> 2),
896 	0x00000000,
897 	(0x0e00 << 16) | (0xac80 >> 2),
898 	0x00000000,
899 	(0x0e00 << 16) | (0xac84 >> 2),
900 	0x00000000,
901 	(0x0e00 << 16) | (0xac88 >> 2),
902 	0x00000000,
903 	(0x0e00 << 16) | (0xac8c >> 2),
904 	0x00000000,
905 	(0x0e00 << 16) | (0x970c >> 2),
906 	0x00000000,
907 	(0x0e00 << 16) | (0x9714 >> 2),
908 	0x00000000,
909 	(0x0e00 << 16) | (0x9718 >> 2),
910 	0x00000000,
911 	(0x0e00 << 16) | (0x971c >> 2),
912 	0x00000000,
913 	(0x0e00 << 16) | (0x31068 >> 2),
914 	0x00000000,
915 	(0x4e00 << 16) | (0x31068 >> 2),
916 	0x00000000,
917 	(0x5e00 << 16) | (0x31068 >> 2),
918 	0x00000000,
919 	(0x6e00 << 16) | (0x31068 >> 2),
920 	0x00000000,
921 	(0x7e00 << 16) | (0x31068 >> 2),
922 	0x00000000,
923 	(0x0e00 << 16) | (0xcd10 >> 2),
924 	0x00000000,
925 	(0x0e00 << 16) | (0xcd14 >> 2),
926 	0x00000000,
927 	(0x0e00 << 16) | (0x88b0 >> 2),
928 	0x00000000,
929 	(0x0e00 << 16) | (0x88b4 >> 2),
930 	0x00000000,
931 	(0x0e00 << 16) | (0x88b8 >> 2),
932 	0x00000000,
933 	(0x0e00 << 16) | (0x88bc >> 2),
934 	0x00000000,
935 	(0x0400 << 16) | (0x89c0 >> 2),
936 	0x00000000,
937 	(0x0e00 << 16) | (0x88c4 >> 2),
938 	0x00000000,
939 	(0x0e00 << 16) | (0x88c8 >> 2),
940 	0x00000000,
941 	(0x0e00 << 16) | (0x88d0 >> 2),
942 	0x00000000,
943 	(0x0e00 << 16) | (0x88d4 >> 2),
944 	0x00000000,
945 	(0x0e00 << 16) | (0x88d8 >> 2),
946 	0x00000000,
947 	(0x0e00 << 16) | (0x8980 >> 2),
948 	0x00000000,
949 	(0x0e00 << 16) | (0x30938 >> 2),
950 	0x00000000,
951 	(0x0e00 << 16) | (0x3093c >> 2),
952 	0x00000000,
953 	(0x0e00 << 16) | (0x30940 >> 2),
954 	0x00000000,
955 	(0x0e00 << 16) | (0x89a0 >> 2),
956 	0x00000000,
957 	(0x0e00 << 16) | (0x30900 >> 2),
958 	0x00000000,
959 	(0x0e00 << 16) | (0x30904 >> 2),
960 	0x00000000,
961 	(0x0e00 << 16) | (0x89b4 >> 2),
962 	0x00000000,
963 	(0x0e00 << 16) | (0x3e1fc >> 2),
964 	0x00000000,
965 	(0x0e00 << 16) | (0x3c210 >> 2),
966 	0x00000000,
967 	(0x0e00 << 16) | (0x3c214 >> 2),
968 	0x00000000,
969 	(0x0e00 << 16) | (0x3c218 >> 2),
970 	0x00000000,
971 	(0x0e00 << 16) | (0x8904 >> 2),
972 	0x00000000,
973 	0x5,
974 	(0x0e00 << 16) | (0x8c28 >> 2),
975 	(0x0e00 << 16) | (0x8c2c >> 2),
976 	(0x0e00 << 16) | (0x8c30 >> 2),
977 	(0x0e00 << 16) | (0x8c34 >> 2),
978 	(0x0e00 << 16) | (0x9600 >> 2),
979 };
980 
981 static const u32 bonaire_golden_spm_registers[] =
982 {
983 	0x30800, 0xe0ffffff, 0xe0000000
984 };
985 
986 static const u32 bonaire_golden_common_registers[] =
987 {
988 	0xc770, 0xffffffff, 0x00000800,
989 	0xc774, 0xffffffff, 0x00000800,
990 	0xc798, 0xffffffff, 0x00007fbf,
991 	0xc79c, 0xffffffff, 0x00007faf
992 };
993 
994 static const u32 bonaire_golden_registers[] =
995 {
996 	0x3354, 0x00000333, 0x00000333,
997 	0x3350, 0x000c0fc0, 0x00040200,
998 	0x9a10, 0x00010000, 0x00058208,
999 	0x3c000, 0xffff1fff, 0x00140000,
1000 	0x3c200, 0xfdfc0fff, 0x00000100,
1001 	0x3c234, 0x40000000, 0x40000200,
1002 	0x9830, 0xffffffff, 0x00000000,
1003 	0x9834, 0xf00fffff, 0x00000400,
1004 	0x9838, 0x0002021c, 0x00020200,
1005 	0xc78, 0x00000080, 0x00000000,
1006 	0x5bb0, 0x000000f0, 0x00000070,
1007 	0x5bc0, 0xf0311fff, 0x80300000,
1008 	0x98f8, 0x73773777, 0x12010001,
1009 	0x350c, 0x00810000, 0x408af000,
1010 	0x7030, 0x31000111, 0x00000011,
1011 	0x2f48, 0x73773777, 0x12010001,
1012 	0x220c, 0x00007fb6, 0x0021a1b1,
1013 	0x2210, 0x00007fb6, 0x002021b1,
1014 	0x2180, 0x00007fb6, 0x00002191,
1015 	0x2218, 0x00007fb6, 0x002121b1,
1016 	0x221c, 0x00007fb6, 0x002021b1,
1017 	0x21dc, 0x00007fb6, 0x00002191,
1018 	0x21e0, 0x00007fb6, 0x00002191,
1019 	0x3628, 0x0000003f, 0x0000000a,
1020 	0x362c, 0x0000003f, 0x0000000a,
1021 	0x2ae4, 0x00073ffe, 0x000022a2,
1022 	0x240c, 0x000007ff, 0x00000000,
1023 	0x8a14, 0xf000003f, 0x00000007,
1024 	0x8bf0, 0x00002001, 0x00000001,
1025 	0x8b24, 0xffffffff, 0x00ffffff,
1026 	0x30a04, 0x0000ff0f, 0x00000000,
1027 	0x28a4c, 0x07ffffff, 0x06000000,
1028 	0x4d8, 0x00000fff, 0x00000100,
1029 	0x3e78, 0x00000001, 0x00000002,
1030 	0x9100, 0x03000000, 0x0362c688,
1031 	0x8c00, 0x000000ff, 0x00000001,
1032 	0xe40, 0x00001fff, 0x00001fff,
1033 	0x9060, 0x0000007f, 0x00000020,
1034 	0x9508, 0x00010000, 0x00010000,
1035 	0xac14, 0x000003ff, 0x000000f3,
1036 	0xac0c, 0xffffffff, 0x00001032
1037 };
1038 
1039 static const u32 bonaire_mgcg_cgcg_init[] =
1040 {
1041 	0xc420, 0xffffffff, 0xfffffffc,
1042 	0x30800, 0xffffffff, 0xe0000000,
1043 	0x3c2a0, 0xffffffff, 0x00000100,
1044 	0x3c208, 0xffffffff, 0x00000100,
1045 	0x3c2c0, 0xffffffff, 0xc0000100,
1046 	0x3c2c8, 0xffffffff, 0xc0000100,
1047 	0x3c2c4, 0xffffffff, 0xc0000100,
1048 	0x55e4, 0xffffffff, 0x00600100,
1049 	0x3c280, 0xffffffff, 0x00000100,
1050 	0x3c214, 0xffffffff, 0x06000100,
1051 	0x3c220, 0xffffffff, 0x00000100,
1052 	0x3c218, 0xffffffff, 0x06000100,
1053 	0x3c204, 0xffffffff, 0x00000100,
1054 	0x3c2e0, 0xffffffff, 0x00000100,
1055 	0x3c224, 0xffffffff, 0x00000100,
1056 	0x3c200, 0xffffffff, 0x00000100,
1057 	0x3c230, 0xffffffff, 0x00000100,
1058 	0x3c234, 0xffffffff, 0x00000100,
1059 	0x3c250, 0xffffffff, 0x00000100,
1060 	0x3c254, 0xffffffff, 0x00000100,
1061 	0x3c258, 0xffffffff, 0x00000100,
1062 	0x3c25c, 0xffffffff, 0x00000100,
1063 	0x3c260, 0xffffffff, 0x00000100,
1064 	0x3c27c, 0xffffffff, 0x00000100,
1065 	0x3c278, 0xffffffff, 0x00000100,
1066 	0x3c210, 0xffffffff, 0x06000100,
1067 	0x3c290, 0xffffffff, 0x00000100,
1068 	0x3c274, 0xffffffff, 0x00000100,
1069 	0x3c2b4, 0xffffffff, 0x00000100,
1070 	0x3c2b0, 0xffffffff, 0x00000100,
1071 	0x3c270, 0xffffffff, 0x00000100,
1072 	0x30800, 0xffffffff, 0xe0000000,
1073 	0x3c020, 0xffffffff, 0x00010000,
1074 	0x3c024, 0xffffffff, 0x00030002,
1075 	0x3c028, 0xffffffff, 0x00040007,
1076 	0x3c02c, 0xffffffff, 0x00060005,
1077 	0x3c030, 0xffffffff, 0x00090008,
1078 	0x3c034, 0xffffffff, 0x00010000,
1079 	0x3c038, 0xffffffff, 0x00030002,
1080 	0x3c03c, 0xffffffff, 0x00040007,
1081 	0x3c040, 0xffffffff, 0x00060005,
1082 	0x3c044, 0xffffffff, 0x00090008,
1083 	0x3c048, 0xffffffff, 0x00010000,
1084 	0x3c04c, 0xffffffff, 0x00030002,
1085 	0x3c050, 0xffffffff, 0x00040007,
1086 	0x3c054, 0xffffffff, 0x00060005,
1087 	0x3c058, 0xffffffff, 0x00090008,
1088 	0x3c05c, 0xffffffff, 0x00010000,
1089 	0x3c060, 0xffffffff, 0x00030002,
1090 	0x3c064, 0xffffffff, 0x00040007,
1091 	0x3c068, 0xffffffff, 0x00060005,
1092 	0x3c06c, 0xffffffff, 0x00090008,
1093 	0x3c070, 0xffffffff, 0x00010000,
1094 	0x3c074, 0xffffffff, 0x00030002,
1095 	0x3c078, 0xffffffff, 0x00040007,
1096 	0x3c07c, 0xffffffff, 0x00060005,
1097 	0x3c080, 0xffffffff, 0x00090008,
1098 	0x3c084, 0xffffffff, 0x00010000,
1099 	0x3c088, 0xffffffff, 0x00030002,
1100 	0x3c08c, 0xffffffff, 0x00040007,
1101 	0x3c090, 0xffffffff, 0x00060005,
1102 	0x3c094, 0xffffffff, 0x00090008,
1103 	0x3c098, 0xffffffff, 0x00010000,
1104 	0x3c09c, 0xffffffff, 0x00030002,
1105 	0x3c0a0, 0xffffffff, 0x00040007,
1106 	0x3c0a4, 0xffffffff, 0x00060005,
1107 	0x3c0a8, 0xffffffff, 0x00090008,
1108 	0x3c000, 0xffffffff, 0x96e00200,
1109 	0x8708, 0xffffffff, 0x00900100,
1110 	0xc424, 0xffffffff, 0x0020003f,
1111 	0x38, 0xffffffff, 0x0140001c,
1112 	0x3c, 0x000f0000, 0x000f0000,
1113 	0x220, 0xffffffff, 0xC060000C,
1114 	0x224, 0xc0000fff, 0x00000100,
1115 	0xf90, 0xffffffff, 0x00000100,
1116 	0xf98, 0x00000101, 0x00000000,
1117 	0x20a8, 0xffffffff, 0x00000104,
1118 	0x55e4, 0xff000fff, 0x00000100,
1119 	0x30cc, 0xc0000fff, 0x00000104,
1120 	0xc1e4, 0x00000001, 0x00000001,
1121 	0xd00c, 0xff000ff0, 0x00000100,
1122 	0xd80c, 0xff000ff0, 0x00000100
1123 };
1124 
1125 static const u32 spectre_golden_spm_registers[] =
1126 {
1127 	0x30800, 0xe0ffffff, 0xe0000000
1128 };
1129 
1130 static const u32 spectre_golden_common_registers[] =
1131 {
1132 	0xc770, 0xffffffff, 0x00000800,
1133 	0xc774, 0xffffffff, 0x00000800,
1134 	0xc798, 0xffffffff, 0x00007fbf,
1135 	0xc79c, 0xffffffff, 0x00007faf
1136 };
1137 
1138 static const u32 spectre_golden_registers[] =
1139 {
1140 	0x3c000, 0xffff1fff, 0x96940200,
1141 	0x3c00c, 0xffff0001, 0xff000000,
1142 	0x3c200, 0xfffc0fff, 0x00000100,
1143 	0x6ed8, 0x00010101, 0x00010000,
1144 	0x9834, 0xf00fffff, 0x00000400,
1145 	0x9838, 0xfffffffc, 0x00020200,
1146 	0x5bb0, 0x000000f0, 0x00000070,
1147 	0x5bc0, 0xf0311fff, 0x80300000,
1148 	0x98f8, 0x73773777, 0x12010001,
1149 	0x9b7c, 0x00ff0000, 0x00fc0000,
1150 	0x2f48, 0x73773777, 0x12010001,
1151 	0x8a14, 0xf000003f, 0x00000007,
1152 	0x8b24, 0xffffffff, 0x00ffffff,
1153 	0x28350, 0x3f3f3fff, 0x00000082,
1154 	0x28354, 0x0000003f, 0x00000000,
1155 	0x3e78, 0x00000001, 0x00000002,
1156 	0x913c, 0xffff03df, 0x00000004,
1157 	0xc768, 0x00000008, 0x00000008,
1158 	0x8c00, 0x000008ff, 0x00000800,
1159 	0x9508, 0x00010000, 0x00010000,
1160 	0xac0c, 0xffffffff, 0x54763210,
1161 	0x214f8, 0x01ff01ff, 0x00000002,
1162 	0x21498, 0x007ff800, 0x00200000,
1163 	0x2015c, 0xffffffff, 0x00000f40,
1164 	0x30934, 0xffffffff, 0x00000001
1165 };
1166 
1167 static const u32 spectre_mgcg_cgcg_init[] =
1168 {
1169 	0xc420, 0xffffffff, 0xfffffffc,
1170 	0x30800, 0xffffffff, 0xe0000000,
1171 	0x3c2a0, 0xffffffff, 0x00000100,
1172 	0x3c208, 0xffffffff, 0x00000100,
1173 	0x3c2c0, 0xffffffff, 0x00000100,
1174 	0x3c2c8, 0xffffffff, 0x00000100,
1175 	0x3c2c4, 0xffffffff, 0x00000100,
1176 	0x55e4, 0xffffffff, 0x00600100,
1177 	0x3c280, 0xffffffff, 0x00000100,
1178 	0x3c214, 0xffffffff, 0x06000100,
1179 	0x3c220, 0xffffffff, 0x00000100,
1180 	0x3c218, 0xffffffff, 0x06000100,
1181 	0x3c204, 0xffffffff, 0x00000100,
1182 	0x3c2e0, 0xffffffff, 0x00000100,
1183 	0x3c224, 0xffffffff, 0x00000100,
1184 	0x3c200, 0xffffffff, 0x00000100,
1185 	0x3c230, 0xffffffff, 0x00000100,
1186 	0x3c234, 0xffffffff, 0x00000100,
1187 	0x3c250, 0xffffffff, 0x00000100,
1188 	0x3c254, 0xffffffff, 0x00000100,
1189 	0x3c258, 0xffffffff, 0x00000100,
1190 	0x3c25c, 0xffffffff, 0x00000100,
1191 	0x3c260, 0xffffffff, 0x00000100,
1192 	0x3c27c, 0xffffffff, 0x00000100,
1193 	0x3c278, 0xffffffff, 0x00000100,
1194 	0x3c210, 0xffffffff, 0x06000100,
1195 	0x3c290, 0xffffffff, 0x00000100,
1196 	0x3c274, 0xffffffff, 0x00000100,
1197 	0x3c2b4, 0xffffffff, 0x00000100,
1198 	0x3c2b0, 0xffffffff, 0x00000100,
1199 	0x3c270, 0xffffffff, 0x00000100,
1200 	0x30800, 0xffffffff, 0xe0000000,
1201 	0x3c020, 0xffffffff, 0x00010000,
1202 	0x3c024, 0xffffffff, 0x00030002,
1203 	0x3c028, 0xffffffff, 0x00040007,
1204 	0x3c02c, 0xffffffff, 0x00060005,
1205 	0x3c030, 0xffffffff, 0x00090008,
1206 	0x3c034, 0xffffffff, 0x00010000,
1207 	0x3c038, 0xffffffff, 0x00030002,
1208 	0x3c03c, 0xffffffff, 0x00040007,
1209 	0x3c040, 0xffffffff, 0x00060005,
1210 	0x3c044, 0xffffffff, 0x00090008,
1211 	0x3c048, 0xffffffff, 0x00010000,
1212 	0x3c04c, 0xffffffff, 0x00030002,
1213 	0x3c050, 0xffffffff, 0x00040007,
1214 	0x3c054, 0xffffffff, 0x00060005,
1215 	0x3c058, 0xffffffff, 0x00090008,
1216 	0x3c05c, 0xffffffff, 0x00010000,
1217 	0x3c060, 0xffffffff, 0x00030002,
1218 	0x3c064, 0xffffffff, 0x00040007,
1219 	0x3c068, 0xffffffff, 0x00060005,
1220 	0x3c06c, 0xffffffff, 0x00090008,
1221 	0x3c070, 0xffffffff, 0x00010000,
1222 	0x3c074, 0xffffffff, 0x00030002,
1223 	0x3c078, 0xffffffff, 0x00040007,
1224 	0x3c07c, 0xffffffff, 0x00060005,
1225 	0x3c080, 0xffffffff, 0x00090008,
1226 	0x3c084, 0xffffffff, 0x00010000,
1227 	0x3c088, 0xffffffff, 0x00030002,
1228 	0x3c08c, 0xffffffff, 0x00040007,
1229 	0x3c090, 0xffffffff, 0x00060005,
1230 	0x3c094, 0xffffffff, 0x00090008,
1231 	0x3c098, 0xffffffff, 0x00010000,
1232 	0x3c09c, 0xffffffff, 0x00030002,
1233 	0x3c0a0, 0xffffffff, 0x00040007,
1234 	0x3c0a4, 0xffffffff, 0x00060005,
1235 	0x3c0a8, 0xffffffff, 0x00090008,
1236 	0x3c0ac, 0xffffffff, 0x00010000,
1237 	0x3c0b0, 0xffffffff, 0x00030002,
1238 	0x3c0b4, 0xffffffff, 0x00040007,
1239 	0x3c0b8, 0xffffffff, 0x00060005,
1240 	0x3c0bc, 0xffffffff, 0x00090008,
1241 	0x3c000, 0xffffffff, 0x96e00200,
1242 	0x8708, 0xffffffff, 0x00900100,
1243 	0xc424, 0xffffffff, 0x0020003f,
1244 	0x38, 0xffffffff, 0x0140001c,
1245 	0x3c, 0x000f0000, 0x000f0000,
1246 	0x220, 0xffffffff, 0xC060000C,
1247 	0x224, 0xc0000fff, 0x00000100,
1248 	0xf90, 0xffffffff, 0x00000100,
1249 	0xf98, 0x00000101, 0x00000000,
1250 	0x20a8, 0xffffffff, 0x00000104,
1251 	0x55e4, 0xff000fff, 0x00000100,
1252 	0x30cc, 0xc0000fff, 0x00000104,
1253 	0xc1e4, 0x00000001, 0x00000001,
1254 	0xd00c, 0xff000ff0, 0x00000100,
1255 	0xd80c, 0xff000ff0, 0x00000100
1256 };
1257 
1258 static const u32 kalindi_golden_spm_registers[] =
1259 {
1260 	0x30800, 0xe0ffffff, 0xe0000000
1261 };
1262 
1263 static const u32 kalindi_golden_common_registers[] =
1264 {
1265 	0xc770, 0xffffffff, 0x00000800,
1266 	0xc774, 0xffffffff, 0x00000800,
1267 	0xc798, 0xffffffff, 0x00007fbf,
1268 	0xc79c, 0xffffffff, 0x00007faf
1269 };
1270 
1271 static const u32 kalindi_golden_registers[] =
1272 {
1273 	0x3c000, 0xffffdfff, 0x6e944040,
1274 	0x55e4, 0xff607fff, 0xfc000100,
1275 	0x3c220, 0xff000fff, 0x00000100,
1276 	0x3c224, 0xff000fff, 0x00000100,
1277 	0x3c200, 0xfffc0fff, 0x00000100,
1278 	0x6ed8, 0x00010101, 0x00010000,
1279 	0x9830, 0xffffffff, 0x00000000,
1280 	0x9834, 0xf00fffff, 0x00000400,
1281 	0x5bb0, 0x000000f0, 0x00000070,
1282 	0x5bc0, 0xf0311fff, 0x80300000,
1283 	0x98f8, 0x73773777, 0x12010001,
1284 	0x98fc, 0xffffffff, 0x00000010,
1285 	0x9b7c, 0x00ff0000, 0x00fc0000,
1286 	0x8030, 0x00001f0f, 0x0000100a,
1287 	0x2f48, 0x73773777, 0x12010001,
1288 	0x2408, 0x000fffff, 0x000c007f,
1289 	0x8a14, 0xf000003f, 0x00000007,
1290 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1291 	0x30a04, 0x0000ff0f, 0x00000000,
1292 	0x28a4c, 0x07ffffff, 0x06000000,
1293 	0x4d8, 0x00000fff, 0x00000100,
1294 	0x3e78, 0x00000001, 0x00000002,
1295 	0xc768, 0x00000008, 0x00000008,
1296 	0x8c00, 0x000000ff, 0x00000003,
1297 	0x214f8, 0x01ff01ff, 0x00000002,
1298 	0x21498, 0x007ff800, 0x00200000,
1299 	0x2015c, 0xffffffff, 0x00000f40,
1300 	0x88c4, 0x001f3ae3, 0x00000082,
1301 	0x88d4, 0x0000001f, 0x00000010,
1302 	0x30934, 0xffffffff, 0x00000000
1303 };
1304 
1305 static const u32 kalindi_mgcg_cgcg_init[] =
1306 {
1307 	0xc420, 0xffffffff, 0xfffffffc,
1308 	0x30800, 0xffffffff, 0xe0000000,
1309 	0x3c2a0, 0xffffffff, 0x00000100,
1310 	0x3c208, 0xffffffff, 0x00000100,
1311 	0x3c2c0, 0xffffffff, 0x00000100,
1312 	0x3c2c8, 0xffffffff, 0x00000100,
1313 	0x3c2c4, 0xffffffff, 0x00000100,
1314 	0x55e4, 0xffffffff, 0x00600100,
1315 	0x3c280, 0xffffffff, 0x00000100,
1316 	0x3c214, 0xffffffff, 0x06000100,
1317 	0x3c220, 0xffffffff, 0x00000100,
1318 	0x3c218, 0xffffffff, 0x06000100,
1319 	0x3c204, 0xffffffff, 0x00000100,
1320 	0x3c2e0, 0xffffffff, 0x00000100,
1321 	0x3c224, 0xffffffff, 0x00000100,
1322 	0x3c200, 0xffffffff, 0x00000100,
1323 	0x3c230, 0xffffffff, 0x00000100,
1324 	0x3c234, 0xffffffff, 0x00000100,
1325 	0x3c250, 0xffffffff, 0x00000100,
1326 	0x3c254, 0xffffffff, 0x00000100,
1327 	0x3c258, 0xffffffff, 0x00000100,
1328 	0x3c25c, 0xffffffff, 0x00000100,
1329 	0x3c260, 0xffffffff, 0x00000100,
1330 	0x3c27c, 0xffffffff, 0x00000100,
1331 	0x3c278, 0xffffffff, 0x00000100,
1332 	0x3c210, 0xffffffff, 0x06000100,
1333 	0x3c290, 0xffffffff, 0x00000100,
1334 	0x3c274, 0xffffffff, 0x00000100,
1335 	0x3c2b4, 0xffffffff, 0x00000100,
1336 	0x3c2b0, 0xffffffff, 0x00000100,
1337 	0x3c270, 0xffffffff, 0x00000100,
1338 	0x30800, 0xffffffff, 0xe0000000,
1339 	0x3c020, 0xffffffff, 0x00010000,
1340 	0x3c024, 0xffffffff, 0x00030002,
1341 	0x3c028, 0xffffffff, 0x00040007,
1342 	0x3c02c, 0xffffffff, 0x00060005,
1343 	0x3c030, 0xffffffff, 0x00090008,
1344 	0x3c034, 0xffffffff, 0x00010000,
1345 	0x3c038, 0xffffffff, 0x00030002,
1346 	0x3c03c, 0xffffffff, 0x00040007,
1347 	0x3c040, 0xffffffff, 0x00060005,
1348 	0x3c044, 0xffffffff, 0x00090008,
1349 	0x3c000, 0xffffffff, 0x96e00200,
1350 	0x8708, 0xffffffff, 0x00900100,
1351 	0xc424, 0xffffffff, 0x0020003f,
1352 	0x38, 0xffffffff, 0x0140001c,
1353 	0x3c, 0x000f0000, 0x000f0000,
1354 	0x220, 0xffffffff, 0xC060000C,
1355 	0x224, 0xc0000fff, 0x00000100,
1356 	0x20a8, 0xffffffff, 0x00000104,
1357 	0x55e4, 0xff000fff, 0x00000100,
1358 	0x30cc, 0xc0000fff, 0x00000104,
1359 	0xc1e4, 0x00000001, 0x00000001,
1360 	0xd00c, 0xff000ff0, 0x00000100,
1361 	0xd80c, 0xff000ff0, 0x00000100
1362 };
1363 
1364 static const u32 hawaii_golden_spm_registers[] =
1365 {
1366 	0x30800, 0xe0ffffff, 0xe0000000
1367 };
1368 
1369 static const u32 hawaii_golden_common_registers[] =
1370 {
1371 	0x30800, 0xffffffff, 0xe0000000,
1372 	0x28350, 0xffffffff, 0x3a00161a,
1373 	0x28354, 0xffffffff, 0x0000002e,
1374 	0x9a10, 0xffffffff, 0x00018208,
1375 	0x98f8, 0xffffffff, 0x12011003
1376 };
1377 
1378 static const u32 hawaii_golden_registers[] =
1379 {
1380 	0x3354, 0x00000333, 0x00000333,
1381 	0x9a10, 0x00010000, 0x00058208,
1382 	0x9830, 0xffffffff, 0x00000000,
1383 	0x9834, 0xf00fffff, 0x00000400,
1384 	0x9838, 0x0002021c, 0x00020200,
1385 	0xc78, 0x00000080, 0x00000000,
1386 	0x5bb0, 0x000000f0, 0x00000070,
1387 	0x5bc0, 0xf0311fff, 0x80300000,
1388 	0x350c, 0x00810000, 0x408af000,
1389 	0x7030, 0x31000111, 0x00000011,
1390 	0x2f48, 0x73773777, 0x12010001,
1391 	0x2120, 0x0000007f, 0x0000001b,
1392 	0x21dc, 0x00007fb6, 0x00002191,
1393 	0x3628, 0x0000003f, 0x0000000a,
1394 	0x362c, 0x0000003f, 0x0000000a,
1395 	0x2ae4, 0x00073ffe, 0x000022a2,
1396 	0x240c, 0x000007ff, 0x00000000,
1397 	0x8bf0, 0x00002001, 0x00000001,
1398 	0x8b24, 0xffffffff, 0x00ffffff,
1399 	0x30a04, 0x0000ff0f, 0x00000000,
1400 	0x28a4c, 0x07ffffff, 0x06000000,
1401 	0x3e78, 0x00000001, 0x00000002,
1402 	0xc768, 0x00000008, 0x00000008,
1403 	0xc770, 0x00000f00, 0x00000800,
1404 	0xc774, 0x00000f00, 0x00000800,
1405 	0xc798, 0x00ffffff, 0x00ff7fbf,
1406 	0xc79c, 0x00ffffff, 0x00ff7faf,
1407 	0x8c00, 0x000000ff, 0x00000800,
1408 	0xe40, 0x00001fff, 0x00001fff,
1409 	0x9060, 0x0000007f, 0x00000020,
1410 	0x9508, 0x00010000, 0x00010000,
1411 	0xae00, 0x00100000, 0x000ff07c,
1412 	0xac14, 0x000003ff, 0x0000000f,
1413 	0xac10, 0xffffffff, 0x7564fdec,
1414 	0xac0c, 0xffffffff, 0x3120b9a8,
1415 	0xac08, 0x20000000, 0x0f9c0000
1416 };
1417 
1418 static const u32 hawaii_mgcg_cgcg_init[] =
1419 {
1420 	0xc420, 0xffffffff, 0xfffffffd,
1421 	0x30800, 0xffffffff, 0xe0000000,
1422 	0x3c2a0, 0xffffffff, 0x00000100,
1423 	0x3c208, 0xffffffff, 0x00000100,
1424 	0x3c2c0, 0xffffffff, 0x00000100,
1425 	0x3c2c8, 0xffffffff, 0x00000100,
1426 	0x3c2c4, 0xffffffff, 0x00000100,
1427 	0x55e4, 0xffffffff, 0x00200100,
1428 	0x3c280, 0xffffffff, 0x00000100,
1429 	0x3c214, 0xffffffff, 0x06000100,
1430 	0x3c220, 0xffffffff, 0x00000100,
1431 	0x3c218, 0xffffffff, 0x06000100,
1432 	0x3c204, 0xffffffff, 0x00000100,
1433 	0x3c2e0, 0xffffffff, 0x00000100,
1434 	0x3c224, 0xffffffff, 0x00000100,
1435 	0x3c200, 0xffffffff, 0x00000100,
1436 	0x3c230, 0xffffffff, 0x00000100,
1437 	0x3c234, 0xffffffff, 0x00000100,
1438 	0x3c250, 0xffffffff, 0x00000100,
1439 	0x3c254, 0xffffffff, 0x00000100,
1440 	0x3c258, 0xffffffff, 0x00000100,
1441 	0x3c25c, 0xffffffff, 0x00000100,
1442 	0x3c260, 0xffffffff, 0x00000100,
1443 	0x3c27c, 0xffffffff, 0x00000100,
1444 	0x3c278, 0xffffffff, 0x00000100,
1445 	0x3c210, 0xffffffff, 0x06000100,
1446 	0x3c290, 0xffffffff, 0x00000100,
1447 	0x3c274, 0xffffffff, 0x00000100,
1448 	0x3c2b4, 0xffffffff, 0x00000100,
1449 	0x3c2b0, 0xffffffff, 0x00000100,
1450 	0x3c270, 0xffffffff, 0x00000100,
1451 	0x30800, 0xffffffff, 0xe0000000,
1452 	0x3c020, 0xffffffff, 0x00010000,
1453 	0x3c024, 0xffffffff, 0x00030002,
1454 	0x3c028, 0xffffffff, 0x00040007,
1455 	0x3c02c, 0xffffffff, 0x00060005,
1456 	0x3c030, 0xffffffff, 0x00090008,
1457 	0x3c034, 0xffffffff, 0x00010000,
1458 	0x3c038, 0xffffffff, 0x00030002,
1459 	0x3c03c, 0xffffffff, 0x00040007,
1460 	0x3c040, 0xffffffff, 0x00060005,
1461 	0x3c044, 0xffffffff, 0x00090008,
1462 	0x3c048, 0xffffffff, 0x00010000,
1463 	0x3c04c, 0xffffffff, 0x00030002,
1464 	0x3c050, 0xffffffff, 0x00040007,
1465 	0x3c054, 0xffffffff, 0x00060005,
1466 	0x3c058, 0xffffffff, 0x00090008,
1467 	0x3c05c, 0xffffffff, 0x00010000,
1468 	0x3c060, 0xffffffff, 0x00030002,
1469 	0x3c064, 0xffffffff, 0x00040007,
1470 	0x3c068, 0xffffffff, 0x00060005,
1471 	0x3c06c, 0xffffffff, 0x00090008,
1472 	0x3c070, 0xffffffff, 0x00010000,
1473 	0x3c074, 0xffffffff, 0x00030002,
1474 	0x3c078, 0xffffffff, 0x00040007,
1475 	0x3c07c, 0xffffffff, 0x00060005,
1476 	0x3c080, 0xffffffff, 0x00090008,
1477 	0x3c084, 0xffffffff, 0x00010000,
1478 	0x3c088, 0xffffffff, 0x00030002,
1479 	0x3c08c, 0xffffffff, 0x00040007,
1480 	0x3c090, 0xffffffff, 0x00060005,
1481 	0x3c094, 0xffffffff, 0x00090008,
1482 	0x3c098, 0xffffffff, 0x00010000,
1483 	0x3c09c, 0xffffffff, 0x00030002,
1484 	0x3c0a0, 0xffffffff, 0x00040007,
1485 	0x3c0a4, 0xffffffff, 0x00060005,
1486 	0x3c0a8, 0xffffffff, 0x00090008,
1487 	0x3c0ac, 0xffffffff, 0x00010000,
1488 	0x3c0b0, 0xffffffff, 0x00030002,
1489 	0x3c0b4, 0xffffffff, 0x00040007,
1490 	0x3c0b8, 0xffffffff, 0x00060005,
1491 	0x3c0bc, 0xffffffff, 0x00090008,
1492 	0x3c0c0, 0xffffffff, 0x00010000,
1493 	0x3c0c4, 0xffffffff, 0x00030002,
1494 	0x3c0c8, 0xffffffff, 0x00040007,
1495 	0x3c0cc, 0xffffffff, 0x00060005,
1496 	0x3c0d0, 0xffffffff, 0x00090008,
1497 	0x3c0d4, 0xffffffff, 0x00010000,
1498 	0x3c0d8, 0xffffffff, 0x00030002,
1499 	0x3c0dc, 0xffffffff, 0x00040007,
1500 	0x3c0e0, 0xffffffff, 0x00060005,
1501 	0x3c0e4, 0xffffffff, 0x00090008,
1502 	0x3c0e8, 0xffffffff, 0x00010000,
1503 	0x3c0ec, 0xffffffff, 0x00030002,
1504 	0x3c0f0, 0xffffffff, 0x00040007,
1505 	0x3c0f4, 0xffffffff, 0x00060005,
1506 	0x3c0f8, 0xffffffff, 0x00090008,
1507 	0xc318, 0xffffffff, 0x00020200,
1508 	0x3350, 0xffffffff, 0x00000200,
1509 	0x15c0, 0xffffffff, 0x00000400,
1510 	0x55e8, 0xffffffff, 0x00000000,
1511 	0x2f50, 0xffffffff, 0x00000902,
1512 	0x3c000, 0xffffffff, 0x96940200,
1513 	0x8708, 0xffffffff, 0x00900100,
1514 	0xc424, 0xffffffff, 0x0020003f,
1515 	0x38, 0xffffffff, 0x0140001c,
1516 	0x3c, 0x000f0000, 0x000f0000,
1517 	0x220, 0xffffffff, 0xc060000c,
1518 	0x224, 0xc0000fff, 0x00000100,
1519 	0xf90, 0xffffffff, 0x00000100,
1520 	0xf98, 0x00000101, 0x00000000,
1521 	0x20a8, 0xffffffff, 0x00000104,
1522 	0x55e4, 0xff000fff, 0x00000100,
1523 	0x30cc, 0xc0000fff, 0x00000104,
1524 	0xc1e4, 0x00000001, 0x00000001,
1525 	0xd00c, 0xff000ff0, 0x00000100,
1526 	0xd80c, 0xff000ff0, 0x00000100
1527 };
1528 
1529 static const u32 godavari_golden_registers[] =
1530 {
1531 	0x55e4, 0xff607fff, 0xfc000100,
1532 	0x6ed8, 0x00010101, 0x00010000,
1533 	0x9830, 0xffffffff, 0x00000000,
1534 	0x98302, 0xf00fffff, 0x00000400,
1535 	0x6130, 0xffffffff, 0x00010000,
1536 	0x5bb0, 0x000000f0, 0x00000070,
1537 	0x5bc0, 0xf0311fff, 0x80300000,
1538 	0x98f8, 0x73773777, 0x12010001,
1539 	0x98fc, 0xffffffff, 0x00000010,
1540 	0x8030, 0x00001f0f, 0x0000100a,
1541 	0x2f48, 0x73773777, 0x12010001,
1542 	0x2408, 0x000fffff, 0x000c007f,
1543 	0x8a14, 0xf000003f, 0x00000007,
1544 	0x8b24, 0xffffffff, 0x00ff0fff,
1545 	0x30a04, 0x0000ff0f, 0x00000000,
1546 	0x28a4c, 0x07ffffff, 0x06000000,
1547 	0x4d8, 0x00000fff, 0x00000100,
1548 	0xd014, 0x00010000, 0x00810001,
1549 	0xd814, 0x00010000, 0x00810001,
1550 	0x3e78, 0x00000001, 0x00000002,
1551 	0xc768, 0x00000008, 0x00000008,
1552 	0xc770, 0x00000f00, 0x00000800,
1553 	0xc774, 0x00000f00, 0x00000800,
1554 	0xc798, 0x00ffffff, 0x00ff7fbf,
1555 	0xc79c, 0x00ffffff, 0x00ff7faf,
1556 	0x8c00, 0x000000ff, 0x00000001,
1557 	0x214f8, 0x01ff01ff, 0x00000002,
1558 	0x21498, 0x007ff800, 0x00200000,
1559 	0x2015c, 0xffffffff, 0x00000f40,
1560 	0x88c4, 0x001f3ae3, 0x00000082,
1561 	0x88d4, 0x0000001f, 0x00000010,
1562 	0x30934, 0xffffffff, 0x00000000
1563 };
1564 
1565 
1566 static void cik_init_golden_registers(struct radeon_device *rdev)
1567 {
1568 	/* Some of the registers might be dependent on GRBM_GFX_INDEX */
1569 	mutex_lock(&rdev->grbm_idx_mutex);
1570 	switch (rdev->family) {
1571 	case CHIP_BONAIRE:
1572 		radeon_program_register_sequence(rdev,
1573 						 bonaire_mgcg_cgcg_init,
1574 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1575 		radeon_program_register_sequence(rdev,
1576 						 bonaire_golden_registers,
1577 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1578 		radeon_program_register_sequence(rdev,
1579 						 bonaire_golden_common_registers,
1580 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1581 		radeon_program_register_sequence(rdev,
1582 						 bonaire_golden_spm_registers,
1583 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1584 		break;
1585 	case CHIP_KABINI:
1586 		radeon_program_register_sequence(rdev,
1587 						 kalindi_mgcg_cgcg_init,
1588 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1589 		radeon_program_register_sequence(rdev,
1590 						 kalindi_golden_registers,
1591 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1592 		radeon_program_register_sequence(rdev,
1593 						 kalindi_golden_common_registers,
1594 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1595 		radeon_program_register_sequence(rdev,
1596 						 kalindi_golden_spm_registers,
1597 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1598 		break;
1599 	case CHIP_MULLINS:
1600 		radeon_program_register_sequence(rdev,
1601 						 kalindi_mgcg_cgcg_init,
1602 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1603 		radeon_program_register_sequence(rdev,
1604 						 godavari_golden_registers,
1605 						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1606 		radeon_program_register_sequence(rdev,
1607 						 kalindi_golden_common_registers,
1608 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1609 		radeon_program_register_sequence(rdev,
1610 						 kalindi_golden_spm_registers,
1611 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1612 		break;
1613 	case CHIP_KAVERI:
1614 		radeon_program_register_sequence(rdev,
1615 						 spectre_mgcg_cgcg_init,
1616 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1617 		radeon_program_register_sequence(rdev,
1618 						 spectre_golden_registers,
1619 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1620 		radeon_program_register_sequence(rdev,
1621 						 spectre_golden_common_registers,
1622 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1623 		radeon_program_register_sequence(rdev,
1624 						 spectre_golden_spm_registers,
1625 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1626 		break;
1627 	case CHIP_HAWAII:
1628 		radeon_program_register_sequence(rdev,
1629 						 hawaii_mgcg_cgcg_init,
1630 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1631 		radeon_program_register_sequence(rdev,
1632 						 hawaii_golden_registers,
1633 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1634 		radeon_program_register_sequence(rdev,
1635 						 hawaii_golden_common_registers,
1636 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1637 		radeon_program_register_sequence(rdev,
1638 						 hawaii_golden_spm_registers,
1639 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1640 		break;
1641 	default:
1642 		break;
1643 	}
1644 	mutex_unlock(&rdev->grbm_idx_mutex);
1645 }
1646 
1647 /**
1648  * cik_get_xclk - get the xclk
1649  *
1650  * @rdev: radeon_device pointer
1651  *
1652  * Returns the reference clock used by the gfx engine
1653  * (CIK).
1654  */
1655 u32 cik_get_xclk(struct radeon_device *rdev)
1656 {
1657         u32 reference_clock = rdev->clock.spll.reference_freq;
1658 
1659 	if (rdev->flags & RADEON_IS_IGP) {
1660 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1661 			return reference_clock / 2;
1662 	} else {
1663 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1664 			return reference_clock / 4;
1665 	}
1666 	return reference_clock;
1667 }
1668 
1669 /**
1670  * cik_mm_rdoorbell - read a doorbell dword
1671  *
1672  * @rdev: radeon_device pointer
1673  * @index: doorbell index
1674  *
1675  * Returns the value in the doorbell aperture at the
1676  * requested doorbell index (CIK).
1677  */
1678 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1679 {
1680 	if (index < rdev->doorbell.num_doorbells) {
1681 		return readl(rdev->doorbell.ptr + index);
1682 	} else {
1683 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1684 		return 0;
1685 	}
1686 }
1687 
1688 /**
1689  * cik_mm_wdoorbell - write a doorbell dword
1690  *
1691  * @rdev: radeon_device pointer
1692  * @index: doorbell index
1693  * @v: value to write
1694  *
1695  * Writes @v to the doorbell aperture at the
1696  * requested doorbell index (CIK).
1697  */
1698 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1699 {
1700 	if (index < rdev->doorbell.num_doorbells) {
1701 		writel(v, rdev->doorbell.ptr + index);
1702 	} else {
1703 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1704 	}
1705 }
1706 
1707 #define BONAIRE_IO_MC_REGS_SIZE 36
1708 
1709 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1710 {
1711 	{0x00000070, 0x04400000},
1712 	{0x00000071, 0x80c01803},
1713 	{0x00000072, 0x00004004},
1714 	{0x00000073, 0x00000100},
1715 	{0x00000074, 0x00ff0000},
1716 	{0x00000075, 0x34000000},
1717 	{0x00000076, 0x08000014},
1718 	{0x00000077, 0x00cc08ec},
1719 	{0x00000078, 0x00000400},
1720 	{0x00000079, 0x00000000},
1721 	{0x0000007a, 0x04090000},
1722 	{0x0000007c, 0x00000000},
1723 	{0x0000007e, 0x4408a8e8},
1724 	{0x0000007f, 0x00000304},
1725 	{0x00000080, 0x00000000},
1726 	{0x00000082, 0x00000001},
1727 	{0x00000083, 0x00000002},
1728 	{0x00000084, 0xf3e4f400},
1729 	{0x00000085, 0x052024e3},
1730 	{0x00000087, 0x00000000},
1731 	{0x00000088, 0x01000000},
1732 	{0x0000008a, 0x1c0a0000},
1733 	{0x0000008b, 0xff010000},
1734 	{0x0000008d, 0xffffefff},
1735 	{0x0000008e, 0xfff3efff},
1736 	{0x0000008f, 0xfff3efbf},
1737 	{0x00000092, 0xf7ffffff},
1738 	{0x00000093, 0xffffff7f},
1739 	{0x00000095, 0x00101101},
1740 	{0x00000096, 0x00000fff},
1741 	{0x00000097, 0x00116fff},
1742 	{0x00000098, 0x60010000},
1743 	{0x00000099, 0x10010000},
1744 	{0x0000009a, 0x00006000},
1745 	{0x0000009b, 0x00001000},
1746 	{0x0000009f, 0x00b48000}
1747 };
1748 
1749 #define HAWAII_IO_MC_REGS_SIZE 22
1750 
1751 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1752 {
1753 	{0x0000007d, 0x40000000},
1754 	{0x0000007e, 0x40180304},
1755 	{0x0000007f, 0x0000ff00},
1756 	{0x00000081, 0x00000000},
1757 	{0x00000083, 0x00000800},
1758 	{0x00000086, 0x00000000},
1759 	{0x00000087, 0x00000100},
1760 	{0x00000088, 0x00020100},
1761 	{0x00000089, 0x00000000},
1762 	{0x0000008b, 0x00040000},
1763 	{0x0000008c, 0x00000100},
1764 	{0x0000008e, 0xff010000},
1765 	{0x00000090, 0xffffefff},
1766 	{0x00000091, 0xfff3efff},
1767 	{0x00000092, 0xfff3efbf},
1768 	{0x00000093, 0xf7ffffff},
1769 	{0x00000094, 0xffffff7f},
1770 	{0x00000095, 0x00000fff},
1771 	{0x00000096, 0x00116fff},
1772 	{0x00000097, 0x60010000},
1773 	{0x00000098, 0x10010000},
1774 	{0x0000009f, 0x00c79000}
1775 };
1776 
1777 
1778 /**
1779  * cik_srbm_select - select specific register instances
1780  *
1781  * @rdev: radeon_device pointer
1782  * @me: selected ME (micro engine)
1783  * @pipe: pipe
1784  * @queue: queue
1785  * @vmid: VMID
1786  *
1787  * Switches the currently active registers instances.  Some
1788  * registers are instanced per VMID, others are instanced per
1789  * me/pipe/queue combination.
1790  */
1791 static void cik_srbm_select(struct radeon_device *rdev,
1792 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1793 {
1794 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1795 			     MEID(me & 0x3) |
1796 			     VMID(vmid & 0xf) |
1797 			     QUEUEID(queue & 0x7));
1798 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1799 }
1800 
1801 /* ucode loading */
1802 /**
1803  * ci_mc_load_microcode - load MC ucode into the hw
1804  *
1805  * @rdev: radeon_device pointer
1806  *
1807  * Load the GDDR MC ucode into the hw (CIK).
1808  * Returns 0 on success, error on failure.
1809  */
1810 int ci_mc_load_microcode(struct radeon_device *rdev)
1811 {
1812 	const __be32 *fw_data = NULL;
1813 	const __le32 *new_fw_data = NULL;
1814 	u32 running, blackout = 0, tmp;
1815 	u32 *io_mc_regs = NULL;
1816 	const __le32 *new_io_mc_regs = NULL;
1817 	int i, regs_size, ucode_size;
1818 
1819 	if (!rdev->mc_fw)
1820 		return -EINVAL;
1821 
1822 	if (rdev->new_fw) {
1823 		const struct mc_firmware_header_v1_0 *hdr =
1824 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1825 
1826 		radeon_ucode_print_mc_hdr(&hdr->header);
1827 
1828 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1829 		new_io_mc_regs = (const __le32 *)
1830 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1831 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1832 		new_fw_data = (const __le32 *)
1833 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1834 	} else {
1835 		ucode_size = rdev->mc_fw->size / 4;
1836 
1837 		switch (rdev->family) {
1838 		case CHIP_BONAIRE:
1839 			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1840 			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1841 			break;
1842 		case CHIP_HAWAII:
1843 			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1844 			regs_size = HAWAII_IO_MC_REGS_SIZE;
1845 			break;
1846 		default:
1847 			return -EINVAL;
1848 		}
1849 		fw_data = (const __be32 *)rdev->mc_fw->data;
1850 	}
1851 
1852 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1853 
1854 	if (running == 0) {
1855 		if (running) {
1856 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1857 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1858 		}
1859 
1860 		/* reset the engine and set to writable */
1861 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1862 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1863 
1864 		/* load mc io regs */
1865 		for (i = 0; i < regs_size; i++) {
1866 			if (rdev->new_fw) {
1867 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1868 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1869 			} else {
1870 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1871 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1872 			}
1873 		}
1874 
1875 		tmp = RREG32(MC_SEQ_MISC0);
1876 		if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1877 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1878 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1879 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1880 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1881 		}
1882 
1883 		/* load the MC ucode */
1884 		for (i = 0; i < ucode_size; i++) {
1885 			if (rdev->new_fw)
1886 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1887 			else
1888 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1889 		}
1890 
1891 		/* put the engine back into the active state */
1892 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1893 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1894 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1895 
1896 		/* wait for training to complete */
1897 		for (i = 0; i < rdev->usec_timeout; i++) {
1898 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1899 				break;
1900 			udelay(1);
1901 		}
1902 		for (i = 0; i < rdev->usec_timeout; i++) {
1903 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1904 				break;
1905 			udelay(1);
1906 		}
1907 
1908 		if (running)
1909 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1910 	}
1911 
1912 	return 0;
1913 }
1914 
1915 /**
1916  * cik_init_microcode - load ucode images from disk
1917  *
1918  * @rdev: radeon_device pointer
1919  *
1920  * Use the firmware interface to load the ucode images into
1921  * the driver (not loaded into hw).
1922  * Returns 0 on success, error on failure.
1923  */
1924 static int cik_init_microcode(struct radeon_device *rdev)
1925 {
1926 	const char *chip_name;
1927 	const char *new_chip_name;
1928 	size_t pfp_req_size, me_req_size, ce_req_size,
1929 		mec_req_size, rlc_req_size, mc_req_size = 0,
1930 		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1931 	char fw_name[30];
1932 	int new_fw = 0;
1933 	int err;
1934 	int num_fw;
1935 
1936 	DRM_DEBUG("\n");
1937 
1938 	switch (rdev->family) {
1939 	case CHIP_BONAIRE:
1940 		chip_name = "BONAIRE";
1941 		new_chip_name = "bonaire";
1942 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1943 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1944 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1945 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1946 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1947 		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1948 		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1949 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1950 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1951 		num_fw = 8;
1952 		break;
1953 	case CHIP_HAWAII:
1954 		chip_name = "HAWAII";
1955 		new_chip_name = "hawaii";
1956 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1957 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1958 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1959 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1960 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1961 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1962 		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
1963 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1964 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1965 		num_fw = 8;
1966 		break;
1967 	case CHIP_KAVERI:
1968 		chip_name = "KAVERI";
1969 		new_chip_name = "kaveri";
1970 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1971 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1972 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1973 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1974 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1975 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1976 		num_fw = 7;
1977 		break;
1978 	case CHIP_KABINI:
1979 		chip_name = "KABINI";
1980 		new_chip_name = "kabini";
1981 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1982 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1983 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1984 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1985 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1986 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1987 		num_fw = 6;
1988 		break;
1989 	case CHIP_MULLINS:
1990 		chip_name = "MULLINS";
1991 		new_chip_name = "mullins";
1992 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1993 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1994 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1995 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1996 		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
1997 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1998 		num_fw = 6;
1999 		break;
2000 	default: BUG();
2001 	}
2002 
2003 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
2004 
2005 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2006 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2007 	if (err) {
2008 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2009 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2010 		if (err)
2011 			goto out;
2012 		if (rdev->pfp_fw->size != pfp_req_size) {
2013 			printk(KERN_ERR
2014 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2015 			       rdev->pfp_fw->size, fw_name);
2016 			err = -EINVAL;
2017 			goto out;
2018 		}
2019 	} else {
2020 		err = radeon_ucode_validate(rdev->pfp_fw);
2021 		if (err) {
2022 			printk(KERN_ERR
2023 			       "cik_fw: validation failed for firmware \"%s\"\n",
2024 			       fw_name);
2025 			goto out;
2026 		} else {
2027 			new_fw++;
2028 		}
2029 	}
2030 
2031 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2032 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2033 	if (err) {
2034 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2035 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2036 		if (err)
2037 			goto out;
2038 		if (rdev->me_fw->size != me_req_size) {
2039 			printk(KERN_ERR
2040 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2041 			       rdev->me_fw->size, fw_name);
2042 			err = -EINVAL;
2043 		}
2044 	} else {
2045 		err = radeon_ucode_validate(rdev->me_fw);
2046 		if (err) {
2047 			printk(KERN_ERR
2048 			       "cik_fw: validation failed for firmware \"%s\"\n",
2049 			       fw_name);
2050 			goto out;
2051 		} else {
2052 			new_fw++;
2053 		}
2054 	}
2055 
2056 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2057 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2058 	if (err) {
2059 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2060 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2061 		if (err)
2062 			goto out;
2063 		if (rdev->ce_fw->size != ce_req_size) {
2064 			printk(KERN_ERR
2065 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2066 			       rdev->ce_fw->size, fw_name);
2067 			err = -EINVAL;
2068 		}
2069 	} else {
2070 		err = radeon_ucode_validate(rdev->ce_fw);
2071 		if (err) {
2072 			printk(KERN_ERR
2073 			       "cik_fw: validation failed for firmware \"%s\"\n",
2074 			       fw_name);
2075 			goto out;
2076 		} else {
2077 			new_fw++;
2078 		}
2079 	}
2080 
2081 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2082 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2083 	if (err) {
2084 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2085 		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2086 		if (err)
2087 			goto out;
2088 		if (rdev->mec_fw->size != mec_req_size) {
2089 			printk(KERN_ERR
2090 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2091 			       rdev->mec_fw->size, fw_name);
2092 			err = -EINVAL;
2093 		}
2094 	} else {
2095 		err = radeon_ucode_validate(rdev->mec_fw);
2096 		if (err) {
2097 			printk(KERN_ERR
2098 			       "cik_fw: validation failed for firmware \"%s\"\n",
2099 			       fw_name);
2100 			goto out;
2101 		} else {
2102 			new_fw++;
2103 		}
2104 	}
2105 
2106 	if (rdev->family == CHIP_KAVERI) {
2107 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2108 		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2109 		if (err) {
2110 			goto out;
2111 		} else {
2112 			err = radeon_ucode_validate(rdev->mec2_fw);
2113 			if (err) {
2114 				goto out;
2115 			} else {
2116 				new_fw++;
2117 			}
2118 		}
2119 	}
2120 
2121 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2122 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2123 	if (err) {
2124 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2125 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2126 		if (err)
2127 			goto out;
2128 		if (rdev->rlc_fw->size != rlc_req_size) {
2129 			printk(KERN_ERR
2130 			       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2131 			       rdev->rlc_fw->size, fw_name);
2132 			err = -EINVAL;
2133 		}
2134 	} else {
2135 		err = radeon_ucode_validate(rdev->rlc_fw);
2136 		if (err) {
2137 			printk(KERN_ERR
2138 			       "cik_fw: validation failed for firmware \"%s\"\n",
2139 			       fw_name);
2140 			goto out;
2141 		} else {
2142 			new_fw++;
2143 		}
2144 	}
2145 
2146 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2147 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2148 	if (err) {
2149 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2150 		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2151 		if (err)
2152 			goto out;
2153 		if (rdev->sdma_fw->size != sdma_req_size) {
2154 			printk(KERN_ERR
2155 			       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2156 			       rdev->sdma_fw->size, fw_name);
2157 			err = -EINVAL;
2158 		}
2159 	} else {
2160 		err = radeon_ucode_validate(rdev->sdma_fw);
2161 		if (err) {
2162 			printk(KERN_ERR
2163 			       "cik_fw: validation failed for firmware \"%s\"\n",
2164 			       fw_name);
2165 			goto out;
2166 		} else {
2167 			new_fw++;
2168 		}
2169 	}
2170 
2171 	/* No SMC, MC ucode on APUs */
2172 	if (!(rdev->flags & RADEON_IS_IGP)) {
2173 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2174 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2175 		if (err) {
2176 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2177 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2178 			if (err) {
2179 				snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2180 				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2181 				if (err)
2182 					goto out;
2183 			}
2184 			if ((rdev->mc_fw->size != mc_req_size) &&
2185 			    (rdev->mc_fw->size != mc2_req_size)){
2186 				printk(KERN_ERR
2187 				       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2188 				       rdev->mc_fw->size, fw_name);
2189 				err = -EINVAL;
2190 			}
2191 			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2192 		} else {
2193 			err = radeon_ucode_validate(rdev->mc_fw);
2194 			if (err) {
2195 				printk(KERN_ERR
2196 				       "cik_fw: validation failed for firmware \"%s\"\n",
2197 				       fw_name);
2198 				goto out;
2199 			} else {
2200 				new_fw++;
2201 			}
2202 		}
2203 
2204 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2205 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2206 		if (err) {
2207 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2208 			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2209 			if (err) {
2210 				printk(KERN_ERR
2211 				       "smc: error loading firmware \"%s\"\n",
2212 				       fw_name);
2213 				release_firmware(rdev->smc_fw);
2214 				rdev->smc_fw = NULL;
2215 				err = 0;
2216 			} else if (rdev->smc_fw->size != smc_req_size) {
2217 				printk(KERN_ERR
2218 				       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2219 				       rdev->smc_fw->size, fw_name);
2220 				err = -EINVAL;
2221 			}
2222 		} else {
2223 			err = radeon_ucode_validate(rdev->smc_fw);
2224 			if (err) {
2225 				printk(KERN_ERR
2226 				       "cik_fw: validation failed for firmware \"%s\"\n",
2227 				       fw_name);
2228 				goto out;
2229 			} else {
2230 				new_fw++;
2231 			}
2232 		}
2233 	}
2234 
2235 	if (new_fw == 0) {
2236 		rdev->new_fw = false;
2237 	} else if (new_fw < num_fw) {
2238 		printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2239 		err = -EINVAL;
2240 	} else {
2241 		rdev->new_fw = true;
2242 	}
2243 
2244 out:
2245 	if (err) {
2246 		if (err != -EINVAL)
2247 			printk(KERN_ERR
2248 			       "cik_cp: Failed to load firmware \"%s\"\n",
2249 			       fw_name);
2250 		release_firmware(rdev->pfp_fw);
2251 		rdev->pfp_fw = NULL;
2252 		release_firmware(rdev->me_fw);
2253 		rdev->me_fw = NULL;
2254 		release_firmware(rdev->ce_fw);
2255 		rdev->ce_fw = NULL;
2256 		release_firmware(rdev->mec_fw);
2257 		rdev->mec_fw = NULL;
2258 		release_firmware(rdev->mec2_fw);
2259 		rdev->mec2_fw = NULL;
2260 		release_firmware(rdev->rlc_fw);
2261 		rdev->rlc_fw = NULL;
2262 		release_firmware(rdev->sdma_fw);
2263 		rdev->sdma_fw = NULL;
2264 		release_firmware(rdev->mc_fw);
2265 		rdev->mc_fw = NULL;
2266 		release_firmware(rdev->smc_fw);
2267 		rdev->smc_fw = NULL;
2268 	}
2269 	return err;
2270 }
2271 
2272 /*
2273  * Core functions
2274  */
2275 /**
2276  * cik_tiling_mode_table_init - init the hw tiling table
2277  *
2278  * @rdev: radeon_device pointer
2279  *
2280  * Starting with SI, the tiling setup is done globally in a
2281  * set of 32 tiling modes.  Rather than selecting each set of
2282  * parameters per surface as on older asics, we just select
2283  * which index in the tiling table we want to use, and the
2284  * surface uses those parameters (CIK).
2285  */
2286 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2287 {
2288 	const u32 num_tile_mode_states = 32;
2289 	const u32 num_secondary_tile_mode_states = 16;
2290 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2291 	u32 num_pipe_configs;
2292 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2293 		rdev->config.cik.max_shader_engines;
2294 
2295 	switch (rdev->config.cik.mem_row_size_in_kb) {
2296 	case 1:
2297 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2298 		break;
2299 	case 2:
2300 	default:
2301 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2302 		break;
2303 	case 4:
2304 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2305 		break;
2306 	}
2307 
2308 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2309 	if (num_pipe_configs > 8)
2310 		num_pipe_configs = 16;
2311 
2312 	if (num_pipe_configs == 16) {
2313 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2314 			switch (reg_offset) {
2315 			case 0:
2316 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2317 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2318 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2319 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2320 				break;
2321 			case 1:
2322 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2323 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2324 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2325 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2326 				break;
2327 			case 2:
2328 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2329 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2330 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2331 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2332 				break;
2333 			case 3:
2334 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2335 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2336 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2337 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2338 				break;
2339 			case 4:
2340 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2341 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2342 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2343 						 TILE_SPLIT(split_equal_to_row_size));
2344 				break;
2345 			case 5:
2346 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2347 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2348 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2349 				break;
2350 			case 6:
2351 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2352 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2353 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2354 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2355 				break;
2356 			case 7:
2357 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2358 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2359 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2360 						 TILE_SPLIT(split_equal_to_row_size));
2361 				break;
2362 			case 8:
2363 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2364 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2365 				break;
2366 			case 9:
2367 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2368 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2369 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2370 				break;
2371 			case 10:
2372 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2373 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2374 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2375 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2376 				break;
2377 			case 11:
2378 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2379 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2380 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2381 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2382 				break;
2383 			case 12:
2384 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2385 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2386 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2387 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2388 				break;
2389 			case 13:
2390 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2391 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2393 				break;
2394 			case 14:
2395 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2396 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2397 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2398 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2399 				break;
2400 			case 16:
2401 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2402 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2403 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2404 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2405 				break;
2406 			case 17:
2407 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2408 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2409 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2410 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2411 				break;
2412 			case 27:
2413 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2414 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2415 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2416 				break;
2417 			case 28:
2418 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2419 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2420 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2421 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2422 				break;
2423 			case 29:
2424 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2425 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2426 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2427 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2428 				break;
2429 			case 30:
2430 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2431 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2432 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2433 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2434 				break;
2435 			default:
2436 				gb_tile_moden = 0;
2437 				break;
2438 			}
2439 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2440 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2441 		}
2442 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2443 			switch (reg_offset) {
2444 			case 0:
2445 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2446 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2447 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2448 						 NUM_BANKS(ADDR_SURF_16_BANK));
2449 				break;
2450 			case 1:
2451 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2452 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2453 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2454 						 NUM_BANKS(ADDR_SURF_16_BANK));
2455 				break;
2456 			case 2:
2457 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2459 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2460 						 NUM_BANKS(ADDR_SURF_16_BANK));
2461 				break;
2462 			case 3:
2463 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2464 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2465 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2466 						 NUM_BANKS(ADDR_SURF_16_BANK));
2467 				break;
2468 			case 4:
2469 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2470 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2471 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2472 						 NUM_BANKS(ADDR_SURF_8_BANK));
2473 				break;
2474 			case 5:
2475 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2476 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2477 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2478 						 NUM_BANKS(ADDR_SURF_4_BANK));
2479 				break;
2480 			case 6:
2481 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2483 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2484 						 NUM_BANKS(ADDR_SURF_2_BANK));
2485 				break;
2486 			case 8:
2487 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2488 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2489 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2490 						 NUM_BANKS(ADDR_SURF_16_BANK));
2491 				break;
2492 			case 9:
2493 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2494 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2495 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2496 						 NUM_BANKS(ADDR_SURF_16_BANK));
2497 				break;
2498 			case 10:
2499 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2500 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2501 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2502 						 NUM_BANKS(ADDR_SURF_16_BANK));
2503 				break;
2504 			case 11:
2505 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2506 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2507 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2508 						 NUM_BANKS(ADDR_SURF_8_BANK));
2509 				break;
2510 			case 12:
2511 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2512 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2513 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2514 						 NUM_BANKS(ADDR_SURF_4_BANK));
2515 				break;
2516 			case 13:
2517 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2518 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2519 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2520 						 NUM_BANKS(ADDR_SURF_2_BANK));
2521 				break;
2522 			case 14:
2523 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2524 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2525 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2526 						 NUM_BANKS(ADDR_SURF_2_BANK));
2527 				break;
2528 			default:
2529 				gb_tile_moden = 0;
2530 				break;
2531 			}
2532 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2533 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2534 		}
2535 	} else if (num_pipe_configs == 8) {
2536 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2537 			switch (reg_offset) {
2538 			case 0:
2539 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2540 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2541 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2542 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2543 				break;
2544 			case 1:
2545 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2546 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2547 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2548 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2549 				break;
2550 			case 2:
2551 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2552 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2553 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2554 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2555 				break;
2556 			case 3:
2557 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2558 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2559 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2560 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2561 				break;
2562 			case 4:
2563 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2564 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2565 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2566 						 TILE_SPLIT(split_equal_to_row_size));
2567 				break;
2568 			case 5:
2569 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2570 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2571 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2572 				break;
2573 			case 6:
2574 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2575 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2576 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2577 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2578 				break;
2579 			case 7:
2580 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2581 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2582 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2583 						 TILE_SPLIT(split_equal_to_row_size));
2584 				break;
2585 			case 8:
2586 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2587 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2588 				break;
2589 			case 9:
2590 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2591 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2592 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2593 				break;
2594 			case 10:
2595 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2596 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2597 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2598 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2599 				break;
2600 			case 11:
2601 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2602 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2603 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2604 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2605 				break;
2606 			case 12:
2607 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2608 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2609 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2610 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2611 				break;
2612 			case 13:
2613 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2614 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2615 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2616 				break;
2617 			case 14:
2618 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2619 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2620 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2621 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2622 				break;
2623 			case 16:
2624 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2625 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2626 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2627 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2628 				break;
2629 			case 17:
2630 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2631 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2632 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2633 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2634 				break;
2635 			case 27:
2636 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2637 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2638 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2639 				break;
2640 			case 28:
2641 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2642 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2643 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2644 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2645 				break;
2646 			case 29:
2647 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2648 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2649 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2650 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2651 				break;
2652 			case 30:
2653 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2654 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2655 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2656 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2657 				break;
2658 			default:
2659 				gb_tile_moden = 0;
2660 				break;
2661 			}
2662 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2663 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2664 		}
2665 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2666 			switch (reg_offset) {
2667 			case 0:
2668 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2669 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2670 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2671 						 NUM_BANKS(ADDR_SURF_16_BANK));
2672 				break;
2673 			case 1:
2674 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2675 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2676 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2677 						 NUM_BANKS(ADDR_SURF_16_BANK));
2678 				break;
2679 			case 2:
2680 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2681 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2682 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2683 						 NUM_BANKS(ADDR_SURF_16_BANK));
2684 				break;
2685 			case 3:
2686 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2687 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2688 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2689 						 NUM_BANKS(ADDR_SURF_16_BANK));
2690 				break;
2691 			case 4:
2692 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2693 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2694 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2695 						 NUM_BANKS(ADDR_SURF_8_BANK));
2696 				break;
2697 			case 5:
2698 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2699 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2700 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2701 						 NUM_BANKS(ADDR_SURF_4_BANK));
2702 				break;
2703 			case 6:
2704 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2705 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2706 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2707 						 NUM_BANKS(ADDR_SURF_2_BANK));
2708 				break;
2709 			case 8:
2710 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2711 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2712 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2713 						 NUM_BANKS(ADDR_SURF_16_BANK));
2714 				break;
2715 			case 9:
2716 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2717 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2718 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2719 						 NUM_BANKS(ADDR_SURF_16_BANK));
2720 				break;
2721 			case 10:
2722 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2723 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2724 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2725 						 NUM_BANKS(ADDR_SURF_16_BANK));
2726 				break;
2727 			case 11:
2728 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2729 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2730 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2731 						 NUM_BANKS(ADDR_SURF_16_BANK));
2732 				break;
2733 			case 12:
2734 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2735 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2736 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2737 						 NUM_BANKS(ADDR_SURF_8_BANK));
2738 				break;
2739 			case 13:
2740 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2741 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2742 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2743 						 NUM_BANKS(ADDR_SURF_4_BANK));
2744 				break;
2745 			case 14:
2746 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2747 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2748 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2749 						 NUM_BANKS(ADDR_SURF_2_BANK));
2750 				break;
2751 			default:
2752 				gb_tile_moden = 0;
2753 				break;
2754 			}
2755 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2756 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2757 		}
2758 	} else if (num_pipe_configs == 4) {
2759 		if (num_rbs == 4) {
2760 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2761 				switch (reg_offset) {
2762 				case 0:
2763 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2764 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2765 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2766 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2767 					break;
2768 				case 1:
2769 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2770 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2771 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2772 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2773 					break;
2774 				case 2:
2775 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2776 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2777 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2778 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2779 					break;
2780 				case 3:
2781 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2782 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2783 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2784 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2785 					break;
2786 				case 4:
2787 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2788 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2789 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2790 							 TILE_SPLIT(split_equal_to_row_size));
2791 					break;
2792 				case 5:
2793 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2794 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2795 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2796 					break;
2797 				case 6:
2798 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2799 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2800 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2801 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2802 					break;
2803 				case 7:
2804 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2805 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2806 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2807 							 TILE_SPLIT(split_equal_to_row_size));
2808 					break;
2809 				case 8:
2810 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2811 							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2812 					break;
2813 				case 9:
2814 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2815 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2816 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2817 					break;
2818 				case 10:
2819 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2820 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2821 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2822 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2823 					break;
2824 				case 11:
2825 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2826 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2827 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2828 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2829 					break;
2830 				case 12:
2831 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2832 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2833 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2834 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2835 					break;
2836 				case 13:
2837 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2838 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2839 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2840 					break;
2841 				case 14:
2842 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2843 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2844 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2845 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2846 					break;
2847 				case 16:
2848 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2849 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2850 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2851 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2852 					break;
2853 				case 17:
2854 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2855 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2856 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2857 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2858 					break;
2859 				case 27:
2860 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2861 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2862 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2863 					break;
2864 				case 28:
2865 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2866 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2867 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2868 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2869 					break;
2870 				case 29:
2871 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2872 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2873 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2874 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2875 					break;
2876 				case 30:
2877 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2878 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2879 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2880 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2881 					break;
2882 				default:
2883 					gb_tile_moden = 0;
2884 					break;
2885 				}
2886 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2887 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2888 			}
2889 		} else if (num_rbs < 4) {
2890 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2891 				switch (reg_offset) {
2892 				case 0:
2893 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2894 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2895 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2896 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2897 					break;
2898 				case 1:
2899 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2900 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2901 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2902 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2903 					break;
2904 				case 2:
2905 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2906 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2907 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2908 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2909 					break;
2910 				case 3:
2911 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2912 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2913 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2914 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2915 					break;
2916 				case 4:
2917 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2918 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2919 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2920 							 TILE_SPLIT(split_equal_to_row_size));
2921 					break;
2922 				case 5:
2923 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2924 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2925 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2926 					break;
2927 				case 6:
2928 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2929 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2930 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2931 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2932 					break;
2933 				case 7:
2934 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2935 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2936 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2937 							 TILE_SPLIT(split_equal_to_row_size));
2938 					break;
2939 				case 8:
2940 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2941 						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2942 					break;
2943 				case 9:
2944 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2945 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2946 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2947 					break;
2948 				case 10:
2949 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2950 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2951 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2952 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2953 					break;
2954 				case 11:
2955 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2956 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2957 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2958 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2959 					break;
2960 				case 12:
2961 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2962 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2963 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2964 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2965 					break;
2966 				case 13:
2967 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2968 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2969 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2970 					break;
2971 				case 14:
2972 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2973 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2974 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2975 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2976 					break;
2977 				case 16:
2978 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2979 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2980 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2981 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2982 					break;
2983 				case 17:
2984 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2985 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2986 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2987 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2988 					break;
2989 				case 27:
2990 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2991 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2992 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2993 					break;
2994 				case 28:
2995 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2996 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2997 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2998 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2999 					break;
3000 				case 29:
3001 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3002 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3003 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3004 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3005 					break;
3006 				case 30:
3007 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3008 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3009 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3010 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3011 					break;
3012 				default:
3013 					gb_tile_moden = 0;
3014 					break;
3015 				}
3016 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3017 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3018 			}
3019 		}
3020 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3021 			switch (reg_offset) {
3022 			case 0:
3023 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3024 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3025 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3026 						 NUM_BANKS(ADDR_SURF_16_BANK));
3027 				break;
3028 			case 1:
3029 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3030 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3031 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3032 						 NUM_BANKS(ADDR_SURF_16_BANK));
3033 				break;
3034 			case 2:
3035 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3036 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3037 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3038 						 NUM_BANKS(ADDR_SURF_16_BANK));
3039 				break;
3040 			case 3:
3041 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3042 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3043 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3044 						 NUM_BANKS(ADDR_SURF_16_BANK));
3045 				break;
3046 			case 4:
3047 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3048 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3049 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3050 						 NUM_BANKS(ADDR_SURF_16_BANK));
3051 				break;
3052 			case 5:
3053 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3054 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3055 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3056 						 NUM_BANKS(ADDR_SURF_8_BANK));
3057 				break;
3058 			case 6:
3059 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3060 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3061 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3062 						 NUM_BANKS(ADDR_SURF_4_BANK));
3063 				break;
3064 			case 8:
3065 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3066 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3067 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3068 						 NUM_BANKS(ADDR_SURF_16_BANK));
3069 				break;
3070 			case 9:
3071 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3072 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3073 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3074 						 NUM_BANKS(ADDR_SURF_16_BANK));
3075 				break;
3076 			case 10:
3077 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3078 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3079 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3080 						 NUM_BANKS(ADDR_SURF_16_BANK));
3081 				break;
3082 			case 11:
3083 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3084 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3085 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3086 						 NUM_BANKS(ADDR_SURF_16_BANK));
3087 				break;
3088 			case 12:
3089 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3090 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3091 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3092 						 NUM_BANKS(ADDR_SURF_16_BANK));
3093 				break;
3094 			case 13:
3095 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3096 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3097 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3098 						 NUM_BANKS(ADDR_SURF_8_BANK));
3099 				break;
3100 			case 14:
3101 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3102 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3103 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3104 						 NUM_BANKS(ADDR_SURF_4_BANK));
3105 				break;
3106 			default:
3107 				gb_tile_moden = 0;
3108 				break;
3109 			}
3110 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3111 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3112 		}
3113 	} else if (num_pipe_configs == 2) {
3114 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
3115 			switch (reg_offset) {
3116 			case 0:
3117 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3118 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3119 						 PIPE_CONFIG(ADDR_SURF_P2) |
3120 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
3121 				break;
3122 			case 1:
3123 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3124 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3125 						 PIPE_CONFIG(ADDR_SURF_P2) |
3126 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
3127 				break;
3128 			case 2:
3129 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3130 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3131 						 PIPE_CONFIG(ADDR_SURF_P2) |
3132 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3133 				break;
3134 			case 3:
3135 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3136 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3137 						 PIPE_CONFIG(ADDR_SURF_P2) |
3138 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
3139 				break;
3140 			case 4:
3141 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3142 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3143 						 PIPE_CONFIG(ADDR_SURF_P2) |
3144 						 TILE_SPLIT(split_equal_to_row_size));
3145 				break;
3146 			case 5:
3147 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3148 						 PIPE_CONFIG(ADDR_SURF_P2) |
3149 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3150 				break;
3151 			case 6:
3152 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3153 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3154 						 PIPE_CONFIG(ADDR_SURF_P2) |
3155 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3156 				break;
3157 			case 7:
3158 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3159 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3160 						 PIPE_CONFIG(ADDR_SURF_P2) |
3161 						 TILE_SPLIT(split_equal_to_row_size));
3162 				break;
3163 			case 8:
3164 				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3165 						PIPE_CONFIG(ADDR_SURF_P2);
3166 				break;
3167 			case 9:
3168 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3169 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3170 						 PIPE_CONFIG(ADDR_SURF_P2));
3171 				break;
3172 			case 10:
3173 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3174 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3175 						 PIPE_CONFIG(ADDR_SURF_P2) |
3176 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3177 				break;
3178 			case 11:
3179 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3180 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3181 						 PIPE_CONFIG(ADDR_SURF_P2) |
3182 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3183 				break;
3184 			case 12:
3185 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3186 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3187 						 PIPE_CONFIG(ADDR_SURF_P2) |
3188 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3189 				break;
3190 			case 13:
3191 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3192 						 PIPE_CONFIG(ADDR_SURF_P2) |
3193 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3194 				break;
3195 			case 14:
3196 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3197 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3198 						 PIPE_CONFIG(ADDR_SURF_P2) |
3199 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3200 				break;
3201 			case 16:
3202 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3203 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3204 						 PIPE_CONFIG(ADDR_SURF_P2) |
3205 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3206 				break;
3207 			case 17:
3208 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3209 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3210 						 PIPE_CONFIG(ADDR_SURF_P2) |
3211 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3212 				break;
3213 			case 27:
3214 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3215 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3216 						 PIPE_CONFIG(ADDR_SURF_P2));
3217 				break;
3218 			case 28:
3219 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3220 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3221 						 PIPE_CONFIG(ADDR_SURF_P2) |
3222 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3223 				break;
3224 			case 29:
3225 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3226 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3227 						 PIPE_CONFIG(ADDR_SURF_P2) |
3228 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3229 				break;
3230 			case 30:
3231 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3232 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3233 						 PIPE_CONFIG(ADDR_SURF_P2) |
3234 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3235 				break;
3236 			default:
3237 				gb_tile_moden = 0;
3238 				break;
3239 			}
3240 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3241 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3242 		}
3243 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3244 			switch (reg_offset) {
3245 			case 0:
3246 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3247 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3248 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3249 						 NUM_BANKS(ADDR_SURF_16_BANK));
3250 				break;
3251 			case 1:
3252 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3253 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3254 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3255 						 NUM_BANKS(ADDR_SURF_16_BANK));
3256 				break;
3257 			case 2:
3258 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3259 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3260 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3261 						 NUM_BANKS(ADDR_SURF_16_BANK));
3262 				break;
3263 			case 3:
3264 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3265 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3266 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3267 						 NUM_BANKS(ADDR_SURF_16_BANK));
3268 				break;
3269 			case 4:
3270 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3271 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3272 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3273 						 NUM_BANKS(ADDR_SURF_16_BANK));
3274 				break;
3275 			case 5:
3276 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3277 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3278 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3279 						 NUM_BANKS(ADDR_SURF_16_BANK));
3280 				break;
3281 			case 6:
3282 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3283 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3284 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3285 						 NUM_BANKS(ADDR_SURF_8_BANK));
3286 				break;
3287 			case 8:
3288 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3289 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3290 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3291 						 NUM_BANKS(ADDR_SURF_16_BANK));
3292 				break;
3293 			case 9:
3294 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3295 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3296 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3297 						 NUM_BANKS(ADDR_SURF_16_BANK));
3298 				break;
3299 			case 10:
3300 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3301 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3302 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3303 						 NUM_BANKS(ADDR_SURF_16_BANK));
3304 				break;
3305 			case 11:
3306 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3307 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3308 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3309 						 NUM_BANKS(ADDR_SURF_16_BANK));
3310 				break;
3311 			case 12:
3312 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3313 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3314 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3315 						 NUM_BANKS(ADDR_SURF_16_BANK));
3316 				break;
3317 			case 13:
3318 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3319 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3320 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3321 						 NUM_BANKS(ADDR_SURF_16_BANK));
3322 				break;
3323 			case 14:
3324 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3325 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3326 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3327 						 NUM_BANKS(ADDR_SURF_8_BANK));
3328 				break;
3329 			default:
3330 				gb_tile_moden = 0;
3331 				break;
3332 			}
3333 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3334 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3335 		}
3336 	} else
3337 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3338 }
3339 
3340 /**
3341  * cik_select_se_sh - select which SE, SH to address
3342  *
3343  * @rdev: radeon_device pointer
3344  * @se_num: shader engine to address
3345  * @sh_num: sh block to address
3346  *
3347  * Select which SE, SH combinations to address. Certain
3348  * registers are instanced per SE or SH.  0xffffffff means
3349  * broadcast to all SEs or SHs (CIK).
3350  */
3351 static void cik_select_se_sh(struct radeon_device *rdev,
3352 			     u32 se_num, u32 sh_num)
3353 {
3354 	u32 data = INSTANCE_BROADCAST_WRITES;
3355 
3356 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3357 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3358 	else if (se_num == 0xffffffff)
3359 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3360 	else if (sh_num == 0xffffffff)
3361 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3362 	else
3363 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3364 	WREG32(GRBM_GFX_INDEX, data);
3365 }
3366 
3367 /**
3368  * cik_create_bitmask - create a bitmask
3369  *
3370  * @bit_width: length of the mask
3371  *
3372  * create a variable length bit mask (CIK).
3373  * Returns the bitmask.
3374  */
3375 static u32 cik_create_bitmask(u32 bit_width)
3376 {
3377 	u32 i, mask = 0;
3378 
3379 	for (i = 0; i < bit_width; i++) {
3380 		mask <<= 1;
3381 		mask |= 1;
3382 	}
3383 	return mask;
3384 }
3385 
3386 /**
3387  * cik_get_rb_disabled - computes the mask of disabled RBs
3388  *
3389  * @rdev: radeon_device pointer
3390  * @max_rb_num: max RBs (render backends) for the asic
3391  * @se_num: number of SEs (shader engines) for the asic
3392  * @sh_per_se: number of SH blocks per SE for the asic
3393  *
3394  * Calculates the bitmask of disabled RBs (CIK).
3395  * Returns the disabled RB bitmask.
3396  */
3397 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3398 			      u32 max_rb_num_per_se,
3399 			      u32 sh_per_se)
3400 {
3401 	u32 data, mask;
3402 
3403 	data = RREG32(CC_RB_BACKEND_DISABLE);
3404 	if (data & 1)
3405 		data &= BACKEND_DISABLE_MASK;
3406 	else
3407 		data = 0;
3408 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3409 
3410 	data >>= BACKEND_DISABLE_SHIFT;
3411 
3412 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3413 
3414 	return data & mask;
3415 }
3416 
3417 /**
3418  * cik_setup_rb - setup the RBs on the asic
3419  *
3420  * @rdev: radeon_device pointer
3421  * @se_num: number of SEs (shader engines) for the asic
3422  * @sh_per_se: number of SH blocks per SE for the asic
3423  * @max_rb_num: max RBs (render backends) for the asic
3424  *
3425  * Configures per-SE/SH RB registers (CIK).
3426  */
3427 static void cik_setup_rb(struct radeon_device *rdev,
3428 			 u32 se_num, u32 sh_per_se,
3429 			 u32 max_rb_num_per_se)
3430 {
3431 	int i, j;
3432 	u32 data, mask;
3433 	u32 disabled_rbs = 0;
3434 	u32 enabled_rbs = 0;
3435 
3436 	mutex_lock(&rdev->grbm_idx_mutex);
3437 	for (i = 0; i < se_num; i++) {
3438 		for (j = 0; j < sh_per_se; j++) {
3439 			cik_select_se_sh(rdev, i, j);
3440 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3441 			if (rdev->family == CHIP_HAWAII)
3442 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3443 			else
3444 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3445 		}
3446 	}
3447 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3448 	mutex_unlock(&rdev->grbm_idx_mutex);
3449 
3450 	mask = 1;
3451 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3452 		if (!(disabled_rbs & mask))
3453 			enabled_rbs |= mask;
3454 		mask <<= 1;
3455 	}
3456 
3457 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3458 
3459 	mutex_lock(&rdev->grbm_idx_mutex);
3460 	for (i = 0; i < se_num; i++) {
3461 		cik_select_se_sh(rdev, i, 0xffffffff);
3462 		data = 0;
3463 		for (j = 0; j < sh_per_se; j++) {
3464 			switch (enabled_rbs & 3) {
3465 			case 0:
3466 				if (j == 0)
3467 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3468 				else
3469 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3470 				break;
3471 			case 1:
3472 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3473 				break;
3474 			case 2:
3475 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3476 				break;
3477 			case 3:
3478 			default:
3479 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3480 				break;
3481 			}
3482 			enabled_rbs >>= 2;
3483 		}
3484 		WREG32(PA_SC_RASTER_CONFIG, data);
3485 	}
3486 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3487 	mutex_unlock(&rdev->grbm_idx_mutex);
3488 }
3489 
3490 /**
3491  * cik_gpu_init - setup the 3D engine
3492  *
3493  * @rdev: radeon_device pointer
3494  *
3495  * Configures the 3D engine and tiling configuration
3496  * registers so that the 3D engine is usable.
3497  */
3498 static void cik_gpu_init(struct radeon_device *rdev)
3499 {
3500 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3501 	u32 mc_shared_chmap, mc_arb_ramcfg;
3502 	u32 hdp_host_path_cntl;
3503 	u32 tmp;
3504 	int i, j;
3505 
3506 	switch (rdev->family) {
3507 	case CHIP_BONAIRE:
3508 		rdev->config.cik.max_shader_engines = 2;
3509 		rdev->config.cik.max_tile_pipes = 4;
3510 		rdev->config.cik.max_cu_per_sh = 7;
3511 		rdev->config.cik.max_sh_per_se = 1;
3512 		rdev->config.cik.max_backends_per_se = 2;
3513 		rdev->config.cik.max_texture_channel_caches = 4;
3514 		rdev->config.cik.max_gprs = 256;
3515 		rdev->config.cik.max_gs_threads = 32;
3516 		rdev->config.cik.max_hw_contexts = 8;
3517 
3518 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3519 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3520 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3521 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3522 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3523 		break;
3524 	case CHIP_HAWAII:
3525 		rdev->config.cik.max_shader_engines = 4;
3526 		rdev->config.cik.max_tile_pipes = 16;
3527 		rdev->config.cik.max_cu_per_sh = 11;
3528 		rdev->config.cik.max_sh_per_se = 1;
3529 		rdev->config.cik.max_backends_per_se = 4;
3530 		rdev->config.cik.max_texture_channel_caches = 16;
3531 		rdev->config.cik.max_gprs = 256;
3532 		rdev->config.cik.max_gs_threads = 32;
3533 		rdev->config.cik.max_hw_contexts = 8;
3534 
3535 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3536 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3537 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3538 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3539 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3540 		break;
3541 	case CHIP_KAVERI:
3542 		rdev->config.cik.max_shader_engines = 1;
3543 		rdev->config.cik.max_tile_pipes = 4;
3544 		if ((rdev->pdev->device == 0x1304) ||
3545 		    (rdev->pdev->device == 0x1305) ||
3546 		    (rdev->pdev->device == 0x130C) ||
3547 		    (rdev->pdev->device == 0x130F) ||
3548 		    (rdev->pdev->device == 0x1310) ||
3549 		    (rdev->pdev->device == 0x1311) ||
3550 		    (rdev->pdev->device == 0x131C)) {
3551 			rdev->config.cik.max_cu_per_sh = 8;
3552 			rdev->config.cik.max_backends_per_se = 2;
3553 		} else if ((rdev->pdev->device == 0x1309) ||
3554 			   (rdev->pdev->device == 0x130A) ||
3555 			   (rdev->pdev->device == 0x130D) ||
3556 			   (rdev->pdev->device == 0x1313) ||
3557 			   (rdev->pdev->device == 0x131D)) {
3558 			rdev->config.cik.max_cu_per_sh = 6;
3559 			rdev->config.cik.max_backends_per_se = 2;
3560 		} else if ((rdev->pdev->device == 0x1306) ||
3561 			   (rdev->pdev->device == 0x1307) ||
3562 			   (rdev->pdev->device == 0x130B) ||
3563 			   (rdev->pdev->device == 0x130E) ||
3564 			   (rdev->pdev->device == 0x1315) ||
3565 			   (rdev->pdev->device == 0x1318) ||
3566 			   (rdev->pdev->device == 0x131B)) {
3567 			rdev->config.cik.max_cu_per_sh = 4;
3568 			rdev->config.cik.max_backends_per_se = 1;
3569 		} else {
3570 			rdev->config.cik.max_cu_per_sh = 3;
3571 			rdev->config.cik.max_backends_per_se = 1;
3572 		}
3573 		rdev->config.cik.max_sh_per_se = 1;
3574 		rdev->config.cik.max_texture_channel_caches = 4;
3575 		rdev->config.cik.max_gprs = 256;
3576 		rdev->config.cik.max_gs_threads = 16;
3577 		rdev->config.cik.max_hw_contexts = 8;
3578 
3579 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3580 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3581 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3582 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3583 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3584 		break;
3585 	case CHIP_KABINI:
3586 	case CHIP_MULLINS:
3587 	default:
3588 		rdev->config.cik.max_shader_engines = 1;
3589 		rdev->config.cik.max_tile_pipes = 2;
3590 		rdev->config.cik.max_cu_per_sh = 2;
3591 		rdev->config.cik.max_sh_per_se = 1;
3592 		rdev->config.cik.max_backends_per_se = 1;
3593 		rdev->config.cik.max_texture_channel_caches = 2;
3594 		rdev->config.cik.max_gprs = 256;
3595 		rdev->config.cik.max_gs_threads = 16;
3596 		rdev->config.cik.max_hw_contexts = 8;
3597 
3598 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3599 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3600 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3601 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3602 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3603 		break;
3604 	}
3605 
3606 	/* Initialize HDP */
3607 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3608 		WREG32((0x2c14 + j), 0x00000000);
3609 		WREG32((0x2c18 + j), 0x00000000);
3610 		WREG32((0x2c1c + j), 0x00000000);
3611 		WREG32((0x2c20 + j), 0x00000000);
3612 		WREG32((0x2c24 + j), 0x00000000);
3613 	}
3614 
3615 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3616 	WREG32(SRBM_INT_CNTL, 0x1);
3617 	WREG32(SRBM_INT_ACK, 0x1);
3618 
3619 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3620 
3621 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3622 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3623 
3624 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3625 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3626 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3627 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3628 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3629 		rdev->config.cik.mem_row_size_in_kb = 4;
3630 	/* XXX use MC settings? */
3631 	rdev->config.cik.shader_engine_tile_size = 32;
3632 	rdev->config.cik.num_gpus = 1;
3633 	rdev->config.cik.multi_gpu_tile_size = 64;
3634 
3635 	/* fix up row size */
3636 	gb_addr_config &= ~ROW_SIZE_MASK;
3637 	switch (rdev->config.cik.mem_row_size_in_kb) {
3638 	case 1:
3639 	default:
3640 		gb_addr_config |= ROW_SIZE(0);
3641 		break;
3642 	case 2:
3643 		gb_addr_config |= ROW_SIZE(1);
3644 		break;
3645 	case 4:
3646 		gb_addr_config |= ROW_SIZE(2);
3647 		break;
3648 	}
3649 
3650 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3651 	 * not have bank info, so create a custom tiling dword.
3652 	 * bits 3:0   num_pipes
3653 	 * bits 7:4   num_banks
3654 	 * bits 11:8  group_size
3655 	 * bits 15:12 row_size
3656 	 */
3657 	rdev->config.cik.tile_config = 0;
3658 	switch (rdev->config.cik.num_tile_pipes) {
3659 	case 1:
3660 		rdev->config.cik.tile_config |= (0 << 0);
3661 		break;
3662 	case 2:
3663 		rdev->config.cik.tile_config |= (1 << 0);
3664 		break;
3665 	case 4:
3666 		rdev->config.cik.tile_config |= (2 << 0);
3667 		break;
3668 	case 8:
3669 	default:
3670 		/* XXX what about 12? */
3671 		rdev->config.cik.tile_config |= (3 << 0);
3672 		break;
3673 	}
3674 	rdev->config.cik.tile_config |=
3675 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3676 	rdev->config.cik.tile_config |=
3677 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3678 	rdev->config.cik.tile_config |=
3679 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3680 
3681 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3682 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3683 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3684 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3685 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3686 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3687 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3688 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3689 
3690 	cik_tiling_mode_table_init(rdev);
3691 
3692 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3693 		     rdev->config.cik.max_sh_per_se,
3694 		     rdev->config.cik.max_backends_per_se);
3695 
3696 	rdev->config.cik.active_cus = 0;
3697 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3698 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3699 			rdev->config.cik.active_cus +=
3700 				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3701 		}
3702 	}
3703 
3704 	/* set HW defaults for 3D engine */
3705 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3706 
3707 	mutex_lock(&rdev->grbm_idx_mutex);
3708 	/*
3709 	 * making sure that the following register writes will be broadcasted
3710 	 * to all the shaders
3711 	 */
3712 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3713 	WREG32(SX_DEBUG_1, 0x20);
3714 
3715 	WREG32(TA_CNTL_AUX, 0x00010000);
3716 
3717 	tmp = RREG32(SPI_CONFIG_CNTL);
3718 	tmp |= 0x03000000;
3719 	WREG32(SPI_CONFIG_CNTL, tmp);
3720 
3721 	WREG32(SQ_CONFIG, 1);
3722 
3723 	WREG32(DB_DEBUG, 0);
3724 
3725 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3726 	tmp |= 0x00000400;
3727 	WREG32(DB_DEBUG2, tmp);
3728 
3729 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3730 	tmp |= 0x00020200;
3731 	WREG32(DB_DEBUG3, tmp);
3732 
3733 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3734 	tmp |= 0x00018208;
3735 	WREG32(CB_HW_CONTROL, tmp);
3736 
3737 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3738 
3739 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3740 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3741 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3742 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3743 
3744 	WREG32(VGT_NUM_INSTANCES, 1);
3745 
3746 	WREG32(CP_PERFMON_CNTL, 0);
3747 
3748 	WREG32(SQ_CONFIG, 0);
3749 
3750 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3751 					  FORCE_EOV_MAX_REZ_CNT(255)));
3752 
3753 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3754 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3755 
3756 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3757 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3758 
3759 	tmp = RREG32(HDP_MISC_CNTL);
3760 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3761 	WREG32(HDP_MISC_CNTL, tmp);
3762 
3763 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3764 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3765 
3766 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3767 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3768 	mutex_unlock(&rdev->grbm_idx_mutex);
3769 
3770 	udelay(50);
3771 }
3772 
3773 /*
3774  * GPU scratch registers helpers function.
3775  */
3776 /**
3777  * cik_scratch_init - setup driver info for CP scratch regs
3778  *
3779  * @rdev: radeon_device pointer
3780  *
3781  * Set up the number and offset of the CP scratch registers.
3782  * NOTE: use of CP scratch registers is a legacy inferface and
3783  * is not used by default on newer asics (r6xx+).  On newer asics,
3784  * memory buffers are used for fences rather than scratch regs.
3785  */
3786 static void cik_scratch_init(struct radeon_device *rdev)
3787 {
3788 	int i;
3789 
3790 	rdev->scratch.num_reg = 7;
3791 	rdev->scratch.reg_base = SCRATCH_REG0;
3792 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3793 		rdev->scratch.free[i] = true;
3794 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3795 	}
3796 }
3797 
3798 /**
3799  * cik_ring_test - basic gfx ring test
3800  *
3801  * @rdev: radeon_device pointer
3802  * @ring: radeon_ring structure holding ring information
3803  *
3804  * Allocate a scratch register and write to it using the gfx ring (CIK).
3805  * Provides a basic gfx ring test to verify that the ring is working.
3806  * Used by cik_cp_gfx_resume();
3807  * Returns 0 on success, error on failure.
3808  */
3809 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3810 {
3811 	uint32_t scratch;
3812 	uint32_t tmp = 0;
3813 	unsigned i;
3814 	int r;
3815 
3816 	r = radeon_scratch_get(rdev, &scratch);
3817 	if (r) {
3818 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3819 		return r;
3820 	}
3821 	WREG32(scratch, 0xCAFEDEAD);
3822 	r = radeon_ring_lock(rdev, ring, 3);
3823 	if (r) {
3824 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3825 		radeon_scratch_free(rdev, scratch);
3826 		return r;
3827 	}
3828 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3829 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3830 	radeon_ring_write(ring, 0xDEADBEEF);
3831 	radeon_ring_unlock_commit(rdev, ring, false);
3832 
3833 	for (i = 0; i < rdev->usec_timeout; i++) {
3834 		tmp = RREG32(scratch);
3835 		if (tmp == 0xDEADBEEF)
3836 			break;
3837 		DRM_UDELAY(1);
3838 	}
3839 	if (i < rdev->usec_timeout) {
3840 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3841 	} else {
3842 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3843 			  ring->idx, scratch, tmp);
3844 		r = -EINVAL;
3845 	}
3846 	radeon_scratch_free(rdev, scratch);
3847 	return r;
3848 }
3849 
3850 /**
3851  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3852  *
3853  * @rdev: radeon_device pointer
3854  * @ridx: radeon ring index
3855  *
3856  * Emits an hdp flush on the cp.
3857  */
3858 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3859 				       int ridx)
3860 {
3861 	struct radeon_ring *ring = &rdev->ring[ridx];
3862 	u32 ref_and_mask;
3863 
3864 	switch (ring->idx) {
3865 	case CAYMAN_RING_TYPE_CP1_INDEX:
3866 	case CAYMAN_RING_TYPE_CP2_INDEX:
3867 	default:
3868 		switch (ring->me) {
3869 		case 0:
3870 			ref_and_mask = CP2 << ring->pipe;
3871 			break;
3872 		case 1:
3873 			ref_and_mask = CP6 << ring->pipe;
3874 			break;
3875 		default:
3876 			return;
3877 		}
3878 		break;
3879 	case RADEON_RING_TYPE_GFX_INDEX:
3880 		ref_and_mask = CP0;
3881 		break;
3882 	}
3883 
3884 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3885 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3886 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3887 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3888 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3889 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3890 	radeon_ring_write(ring, ref_and_mask);
3891 	radeon_ring_write(ring, ref_and_mask);
3892 	radeon_ring_write(ring, 0x20); /* poll interval */
3893 }
3894 
3895 /**
3896  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3897  *
3898  * @rdev: radeon_device pointer
3899  * @fence: radeon fence object
3900  *
3901  * Emits a fence sequnce number on the gfx ring and flushes
3902  * GPU caches.
3903  */
3904 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3905 			     struct radeon_fence *fence)
3906 {
3907 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3908 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3909 
3910 	/* Workaround for cache flush problems. First send a dummy EOP
3911 	 * event down the pipe with seq one below.
3912 	 */
3913 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3914 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3915 				 EOP_TC_ACTION_EN |
3916 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3917 				 EVENT_INDEX(5)));
3918 	radeon_ring_write(ring, addr & 0xfffffffc);
3919 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3920 				DATA_SEL(1) | INT_SEL(0));
3921 	radeon_ring_write(ring, fence->seq - 1);
3922 	radeon_ring_write(ring, 0);
3923 
3924 	/* Then send the real EOP event down the pipe. */
3925 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3926 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3927 				 EOP_TC_ACTION_EN |
3928 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3929 				 EVENT_INDEX(5)));
3930 	radeon_ring_write(ring, addr & 0xfffffffc);
3931 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3932 	radeon_ring_write(ring, fence->seq);
3933 	radeon_ring_write(ring, 0);
3934 }
3935 
3936 /**
3937  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3938  *
3939  * @rdev: radeon_device pointer
3940  * @fence: radeon fence object
3941  *
3942  * Emits a fence sequnce number on the compute ring and flushes
3943  * GPU caches.
3944  */
3945 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3946 				 struct radeon_fence *fence)
3947 {
3948 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3949 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3950 
3951 	/* RELEASE_MEM - flush caches, send int */
3952 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3953 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3954 				 EOP_TC_ACTION_EN |
3955 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3956 				 EVENT_INDEX(5)));
3957 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3958 	radeon_ring_write(ring, addr & 0xfffffffc);
3959 	radeon_ring_write(ring, upper_32_bits(addr));
3960 	radeon_ring_write(ring, fence->seq);
3961 	radeon_ring_write(ring, 0);
3962 }
3963 
3964 /**
3965  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3966  *
3967  * @rdev: radeon_device pointer
3968  * @ring: radeon ring buffer object
3969  * @semaphore: radeon semaphore object
3970  * @emit_wait: Is this a sempahore wait?
3971  *
3972  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3973  * from running ahead of semaphore waits.
3974  */
3975 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3976 			     struct radeon_ring *ring,
3977 			     struct radeon_semaphore *semaphore,
3978 			     bool emit_wait)
3979 {
3980 	uint64_t addr = semaphore->gpu_addr;
3981 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3982 
3983 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3984 	radeon_ring_write(ring, lower_32_bits(addr));
3985 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3986 
3987 	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3988 		/* Prevent the PFP from running ahead of the semaphore wait */
3989 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3990 		radeon_ring_write(ring, 0x0);
3991 	}
3992 
3993 	return true;
3994 }
3995 
3996 /**
3997  * cik_copy_cpdma - copy pages using the CP DMA engine
3998  *
3999  * @rdev: radeon_device pointer
4000  * @src_offset: src GPU address
4001  * @dst_offset: dst GPU address
4002  * @num_gpu_pages: number of GPU pages to xfer
4003  * @resv: reservation object to sync to
4004  *
4005  * Copy GPU paging using the CP DMA engine (CIK+).
4006  * Used by the radeon ttm implementation to move pages if
4007  * registered as the asic copy callback.
4008  */
4009 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
4010 				    uint64_t src_offset, uint64_t dst_offset,
4011 				    unsigned num_gpu_pages,
4012 				    struct reservation_object *resv)
4013 {
4014 	struct radeon_fence *fence;
4015 	struct radeon_sync sync;
4016 	int ring_index = rdev->asic->copy.blit_ring_index;
4017 	struct radeon_ring *ring = &rdev->ring[ring_index];
4018 	u32 size_in_bytes, cur_size_in_bytes, control;
4019 	int i, num_loops;
4020 	int r = 0;
4021 
4022 	radeon_sync_create(&sync);
4023 
4024 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
4025 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
4026 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
4027 	if (r) {
4028 		DRM_ERROR("radeon: moving bo (%d).\n", r);
4029 		radeon_sync_free(rdev, &sync, NULL);
4030 		return ERR_PTR(r);
4031 	}
4032 
4033 	radeon_sync_resv(rdev, &sync, resv, false);
4034 	radeon_sync_rings(rdev, &sync, ring->idx);
4035 
4036 	for (i = 0; i < num_loops; i++) {
4037 		cur_size_in_bytes = size_in_bytes;
4038 		if (cur_size_in_bytes > 0x1fffff)
4039 			cur_size_in_bytes = 0x1fffff;
4040 		size_in_bytes -= cur_size_in_bytes;
4041 		control = 0;
4042 		if (size_in_bytes == 0)
4043 			control |= PACKET3_DMA_DATA_CP_SYNC;
4044 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4045 		radeon_ring_write(ring, control);
4046 		radeon_ring_write(ring, lower_32_bits(src_offset));
4047 		radeon_ring_write(ring, upper_32_bits(src_offset));
4048 		radeon_ring_write(ring, lower_32_bits(dst_offset));
4049 		radeon_ring_write(ring, upper_32_bits(dst_offset));
4050 		radeon_ring_write(ring, cur_size_in_bytes);
4051 		src_offset += cur_size_in_bytes;
4052 		dst_offset += cur_size_in_bytes;
4053 	}
4054 
4055 	r = radeon_fence_emit(rdev, &fence, ring->idx);
4056 	if (r) {
4057 		radeon_ring_unlock_undo(rdev, ring);
4058 		radeon_sync_free(rdev, &sync, NULL);
4059 		return ERR_PTR(r);
4060 	}
4061 
4062 	radeon_ring_unlock_commit(rdev, ring, false);
4063 	radeon_sync_free(rdev, &sync, fence);
4064 
4065 	return fence;
4066 }
4067 
4068 /*
4069  * IB stuff
4070  */
4071 /**
4072  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
4073  *
4074  * @rdev: radeon_device pointer
4075  * @ib: radeon indirect buffer object
4076  *
4077  * Emits an DE (drawing engine) or CE (constant engine) IB
4078  * on the gfx ring.  IBs are usually generated by userspace
4079  * acceleration drivers and submitted to the kernel for
4080  * sheduling on the ring.  This function schedules the IB
4081  * on the gfx ring for execution by the GPU.
4082  */
4083 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
4084 {
4085 	struct radeon_ring *ring = &rdev->ring[ib->ring];
4086 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
4087 	u32 header, control = INDIRECT_BUFFER_VALID;
4088 
4089 	if (ib->is_const_ib) {
4090 		/* set switch buffer packet before const IB */
4091 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4092 		radeon_ring_write(ring, 0);
4093 
4094 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4095 	} else {
4096 		u32 next_rptr;
4097 		if (ring->rptr_save_reg) {
4098 			next_rptr = ring->wptr + 3 + 4;
4099 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4100 			radeon_ring_write(ring, ((ring->rptr_save_reg -
4101 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
4102 			radeon_ring_write(ring, next_rptr);
4103 		} else if (rdev->wb.enabled) {
4104 			next_rptr = ring->wptr + 5 + 4;
4105 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4106 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
4107 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4108 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
4109 			radeon_ring_write(ring, next_rptr);
4110 		}
4111 
4112 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4113 	}
4114 
4115 	control |= ib->length_dw | (vm_id << 24);
4116 
4117 	radeon_ring_write(ring, header);
4118 	radeon_ring_write(ring,
4119 #ifdef __BIG_ENDIAN
4120 			  (2 << 0) |
4121 #endif
4122 			  (ib->gpu_addr & 0xFFFFFFFC));
4123 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4124 	radeon_ring_write(ring, control);
4125 }
4126 
4127 /**
4128  * cik_ib_test - basic gfx ring IB test
4129  *
4130  * @rdev: radeon_device pointer
4131  * @ring: radeon_ring structure holding ring information
4132  *
4133  * Allocate an IB and execute it on the gfx ring (CIK).
4134  * Provides a basic gfx ring test to verify that IBs are working.
4135  * Returns 0 on success, error on failure.
4136  */
4137 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4138 {
4139 	struct radeon_ib ib;
4140 	uint32_t scratch;
4141 	uint32_t tmp = 0;
4142 	unsigned i;
4143 	int r;
4144 
4145 	r = radeon_scratch_get(rdev, &scratch);
4146 	if (r) {
4147 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
4148 		return r;
4149 	}
4150 	WREG32(scratch, 0xCAFEDEAD);
4151 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
4152 	if (r) {
4153 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
4154 		radeon_scratch_free(rdev, scratch);
4155 		return r;
4156 	}
4157 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
4158 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
4159 	ib.ptr[2] = 0xDEADBEEF;
4160 	ib.length_dw = 3;
4161 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
4162 	if (r) {
4163 		radeon_scratch_free(rdev, scratch);
4164 		radeon_ib_free(rdev, &ib);
4165 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4166 		return r;
4167 	}
4168 	r = radeon_fence_wait(ib.fence, false);
4169 	if (r) {
4170 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4171 		radeon_scratch_free(rdev, scratch);
4172 		radeon_ib_free(rdev, &ib);
4173 		return r;
4174 	}
4175 	for (i = 0; i < rdev->usec_timeout; i++) {
4176 		tmp = RREG32(scratch);
4177 		if (tmp == 0xDEADBEEF)
4178 			break;
4179 		DRM_UDELAY(1);
4180 	}
4181 	if (i < rdev->usec_timeout) {
4182 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
4183 	} else {
4184 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
4185 			  scratch, tmp);
4186 		r = -EINVAL;
4187 	}
4188 	radeon_scratch_free(rdev, scratch);
4189 	radeon_ib_free(rdev, &ib);
4190 	return r;
4191 }
4192 
4193 /*
4194  * CP.
4195  * On CIK, gfx and compute now have independant command processors.
4196  *
4197  * GFX
4198  * Gfx consists of a single ring and can process both gfx jobs and
4199  * compute jobs.  The gfx CP consists of three microengines (ME):
4200  * PFP - Pre-Fetch Parser
4201  * ME - Micro Engine
4202  * CE - Constant Engine
4203  * The PFP and ME make up what is considered the Drawing Engine (DE).
4204  * The CE is an asynchronous engine used for updating buffer desciptors
4205  * used by the DE so that they can be loaded into cache in parallel
4206  * while the DE is processing state update packets.
4207  *
4208  * Compute
4209  * The compute CP consists of two microengines (ME):
4210  * MEC1 - Compute MicroEngine 1
4211  * MEC2 - Compute MicroEngine 2
4212  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
4213  * The queues are exposed to userspace and are programmed directly
4214  * by the compute runtime.
4215  */
4216 /**
4217  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
4218  *
4219  * @rdev: radeon_device pointer
4220  * @enable: enable or disable the MEs
4221  *
4222  * Halts or unhalts the gfx MEs.
4223  */
4224 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
4225 {
4226 	if (enable)
4227 		WREG32(CP_ME_CNTL, 0);
4228 	else {
4229 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4230 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
4231 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
4232 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4233 	}
4234 	udelay(50);
4235 }
4236 
4237 /**
4238  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
4239  *
4240  * @rdev: radeon_device pointer
4241  *
4242  * Loads the gfx PFP, ME, and CE ucode.
4243  * Returns 0 for success, -EINVAL if the ucode is not available.
4244  */
4245 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
4246 {
4247 	int i;
4248 
4249 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
4250 		return -EINVAL;
4251 
4252 	cik_cp_gfx_enable(rdev, false);
4253 
4254 	if (rdev->new_fw) {
4255 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
4256 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
4257 		const struct gfx_firmware_header_v1_0 *ce_hdr =
4258 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
4259 		const struct gfx_firmware_header_v1_0 *me_hdr =
4260 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
4261 		const __le32 *fw_data;
4262 		u32 fw_size;
4263 
4264 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
4265 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
4266 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
4267 
4268 		/* PFP */
4269 		fw_data = (const __le32 *)
4270 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4271 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4272 		WREG32(CP_PFP_UCODE_ADDR, 0);
4273 		for (i = 0; i < fw_size; i++)
4274 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4275 		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
4276 
4277 		/* CE */
4278 		fw_data = (const __le32 *)
4279 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4280 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4281 		WREG32(CP_CE_UCODE_ADDR, 0);
4282 		for (i = 0; i < fw_size; i++)
4283 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4284 		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
4285 
4286 		/* ME */
4287 		fw_data = (const __be32 *)
4288 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4289 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4290 		WREG32(CP_ME_RAM_WADDR, 0);
4291 		for (i = 0; i < fw_size; i++)
4292 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4293 		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
4294 		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
4295 	} else {
4296 		const __be32 *fw_data;
4297 
4298 		/* PFP */
4299 		fw_data = (const __be32 *)rdev->pfp_fw->data;
4300 		WREG32(CP_PFP_UCODE_ADDR, 0);
4301 		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4302 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4303 		WREG32(CP_PFP_UCODE_ADDR, 0);
4304 
4305 		/* CE */
4306 		fw_data = (const __be32 *)rdev->ce_fw->data;
4307 		WREG32(CP_CE_UCODE_ADDR, 0);
4308 		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4309 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4310 		WREG32(CP_CE_UCODE_ADDR, 0);
4311 
4312 		/* ME */
4313 		fw_data = (const __be32 *)rdev->me_fw->data;
4314 		WREG32(CP_ME_RAM_WADDR, 0);
4315 		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4316 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4317 		WREG32(CP_ME_RAM_WADDR, 0);
4318 	}
4319 
4320 	return 0;
4321 }
4322 
4323 /**
4324  * cik_cp_gfx_start - start the gfx ring
4325  *
4326  * @rdev: radeon_device pointer
4327  *
4328  * Enables the ring and loads the clear state context and other
4329  * packets required to init the ring.
4330  * Returns 0 for success, error for failure.
4331  */
4332 static int cik_cp_gfx_start(struct radeon_device *rdev)
4333 {
4334 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4335 	int r, i;
4336 
4337 	/* init the CP */
4338 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4339 	WREG32(CP_ENDIAN_SWAP, 0);
4340 	WREG32(CP_DEVICE_ID, 1);
4341 
4342 	cik_cp_gfx_enable(rdev, true);
4343 
4344 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4345 	if (r) {
4346 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4347 		return r;
4348 	}
4349 
4350 	/* init the CE partitions.  CE only used for gfx on CIK */
4351 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4352 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4353 	radeon_ring_write(ring, 0x8000);
4354 	radeon_ring_write(ring, 0x8000);
4355 
4356 	/* setup clear context state */
4357 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4358 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4359 
4360 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4361 	radeon_ring_write(ring, 0x80000000);
4362 	radeon_ring_write(ring, 0x80000000);
4363 
4364 	for (i = 0; i < cik_default_size; i++)
4365 		radeon_ring_write(ring, cik_default_state[i]);
4366 
4367 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4368 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4369 
4370 	/* set clear context state */
4371 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4372 	radeon_ring_write(ring, 0);
4373 
4374 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4375 	radeon_ring_write(ring, 0x00000316);
4376 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4377 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4378 
4379 	radeon_ring_unlock_commit(rdev, ring, false);
4380 
4381 	return 0;
4382 }
4383 
4384 /**
4385  * cik_cp_gfx_fini - stop the gfx ring
4386  *
4387  * @rdev: radeon_device pointer
4388  *
4389  * Stop the gfx ring and tear down the driver ring
4390  * info.
4391  */
4392 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4393 {
4394 	cik_cp_gfx_enable(rdev, false);
4395 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4396 }
4397 
4398 /**
4399  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4400  *
4401  * @rdev: radeon_device pointer
4402  *
4403  * Program the location and size of the gfx ring buffer
4404  * and test it to make sure it's working.
4405  * Returns 0 for success, error for failure.
4406  */
4407 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4408 {
4409 	struct radeon_ring *ring;
4410 	u32 tmp;
4411 	u32 rb_bufsz;
4412 	u64 rb_addr;
4413 	int r;
4414 
4415 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4416 	if (rdev->family != CHIP_HAWAII)
4417 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4418 
4419 	/* Set the write pointer delay */
4420 	WREG32(CP_RB_WPTR_DELAY, 0);
4421 
4422 	/* set the RB to use vmid 0 */
4423 	WREG32(CP_RB_VMID, 0);
4424 
4425 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4426 
4427 	/* ring 0 - compute and gfx */
4428 	/* Set ring buffer size */
4429 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4430 	rb_bufsz = order_base_2(ring->ring_size / 8);
4431 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4432 #ifdef __BIG_ENDIAN
4433 	tmp |= BUF_SWAP_32BIT;
4434 #endif
4435 	WREG32(CP_RB0_CNTL, tmp);
4436 
4437 	/* Initialize the ring buffer's read and write pointers */
4438 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4439 	ring->wptr = 0;
4440 	WREG32(CP_RB0_WPTR, ring->wptr);
4441 
4442 	/* set the wb address wether it's enabled or not */
4443 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4444 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4445 
4446 	/* scratch register shadowing is no longer supported */
4447 	WREG32(SCRATCH_UMSK, 0);
4448 
4449 	if (!rdev->wb.enabled)
4450 		tmp |= RB_NO_UPDATE;
4451 
4452 	mdelay(1);
4453 	WREG32(CP_RB0_CNTL, tmp);
4454 
4455 	rb_addr = ring->gpu_addr >> 8;
4456 	WREG32(CP_RB0_BASE, rb_addr);
4457 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4458 
4459 	/* start the ring */
4460 	cik_cp_gfx_start(rdev);
4461 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4462 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4463 	if (r) {
4464 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4465 		return r;
4466 	}
4467 
4468 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4469 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4470 
4471 	return 0;
4472 }
4473 
4474 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4475 		     struct radeon_ring *ring)
4476 {
4477 	u32 rptr;
4478 
4479 	if (rdev->wb.enabled)
4480 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4481 	else
4482 		rptr = RREG32(CP_RB0_RPTR);
4483 
4484 	return rptr;
4485 }
4486 
4487 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4488 		     struct radeon_ring *ring)
4489 {
4490 	u32 wptr;
4491 
4492 	wptr = RREG32(CP_RB0_WPTR);
4493 
4494 	return wptr;
4495 }
4496 
4497 void cik_gfx_set_wptr(struct radeon_device *rdev,
4498 		      struct radeon_ring *ring)
4499 {
4500 	WREG32(CP_RB0_WPTR, ring->wptr);
4501 	(void)RREG32(CP_RB0_WPTR);
4502 }
4503 
4504 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4505 			 struct radeon_ring *ring)
4506 {
4507 	u32 rptr;
4508 
4509 	if (rdev->wb.enabled) {
4510 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4511 	} else {
4512 		mutex_lock(&rdev->srbm_mutex);
4513 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4514 		rptr = RREG32(CP_HQD_PQ_RPTR);
4515 		cik_srbm_select(rdev, 0, 0, 0, 0);
4516 		mutex_unlock(&rdev->srbm_mutex);
4517 	}
4518 
4519 	return rptr;
4520 }
4521 
4522 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4523 			 struct radeon_ring *ring)
4524 {
4525 	u32 wptr;
4526 
4527 	if (rdev->wb.enabled) {
4528 		/* XXX check if swapping is necessary on BE */
4529 		wptr = rdev->wb.wb[ring->wptr_offs/4];
4530 	} else {
4531 		mutex_lock(&rdev->srbm_mutex);
4532 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4533 		wptr = RREG32(CP_HQD_PQ_WPTR);
4534 		cik_srbm_select(rdev, 0, 0, 0, 0);
4535 		mutex_unlock(&rdev->srbm_mutex);
4536 	}
4537 
4538 	return wptr;
4539 }
4540 
4541 void cik_compute_set_wptr(struct radeon_device *rdev,
4542 			  struct radeon_ring *ring)
4543 {
4544 	/* XXX check if swapping is necessary on BE */
4545 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4546 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4547 }
4548 
4549 /**
4550  * cik_cp_compute_enable - enable/disable the compute CP MEs
4551  *
4552  * @rdev: radeon_device pointer
4553  * @enable: enable or disable the MEs
4554  *
4555  * Halts or unhalts the compute MEs.
4556  */
4557 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4558 {
4559 	if (enable)
4560 		WREG32(CP_MEC_CNTL, 0);
4561 	else {
4562 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4563 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4564 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4565 	}
4566 	udelay(50);
4567 }
4568 
4569 /**
4570  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4571  *
4572  * @rdev: radeon_device pointer
4573  *
4574  * Loads the compute MEC1&2 ucode.
4575  * Returns 0 for success, -EINVAL if the ucode is not available.
4576  */
4577 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4578 {
4579 	int i;
4580 
4581 	if (!rdev->mec_fw)
4582 		return -EINVAL;
4583 
4584 	cik_cp_compute_enable(rdev, false);
4585 
4586 	if (rdev->new_fw) {
4587 		const struct gfx_firmware_header_v1_0 *mec_hdr =
4588 			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4589 		const __le32 *fw_data;
4590 		u32 fw_size;
4591 
4592 		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4593 
4594 		/* MEC1 */
4595 		fw_data = (const __le32 *)
4596 			(rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4597 		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4598 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4599 		for (i = 0; i < fw_size; i++)
4600 			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4601 		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4602 
4603 		/* MEC2 */
4604 		if (rdev->family == CHIP_KAVERI) {
4605 			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4606 				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4607 
4608 			fw_data = (const __le32 *)
4609 				(rdev->mec2_fw->data +
4610 				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4611 			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4612 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4613 			for (i = 0; i < fw_size; i++)
4614 				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4615 			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4616 		}
4617 	} else {
4618 		const __be32 *fw_data;
4619 
4620 		/* MEC1 */
4621 		fw_data = (const __be32 *)rdev->mec_fw->data;
4622 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4623 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4624 			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4625 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4626 
4627 		if (rdev->family == CHIP_KAVERI) {
4628 			/* MEC2 */
4629 			fw_data = (const __be32 *)rdev->mec_fw->data;
4630 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4631 			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4632 				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4633 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4634 		}
4635 	}
4636 
4637 	return 0;
4638 }
4639 
4640 /**
4641  * cik_cp_compute_start - start the compute queues
4642  *
4643  * @rdev: radeon_device pointer
4644  *
4645  * Enable the compute queues.
4646  * Returns 0 for success, error for failure.
4647  */
4648 static int cik_cp_compute_start(struct radeon_device *rdev)
4649 {
4650 	cik_cp_compute_enable(rdev, true);
4651 
4652 	return 0;
4653 }
4654 
4655 /**
4656  * cik_cp_compute_fini - stop the compute queues
4657  *
4658  * @rdev: radeon_device pointer
4659  *
4660  * Stop the compute queues and tear down the driver queue
4661  * info.
4662  */
4663 static void cik_cp_compute_fini(struct radeon_device *rdev)
4664 {
4665 	int i, idx, r;
4666 
4667 	cik_cp_compute_enable(rdev, false);
4668 
4669 	for (i = 0; i < 2; i++) {
4670 		if (i == 0)
4671 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4672 		else
4673 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4674 
4675 		if (rdev->ring[idx].mqd_obj) {
4676 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4677 			if (unlikely(r != 0))
4678 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4679 
4680 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4681 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4682 
4683 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4684 			rdev->ring[idx].mqd_obj = NULL;
4685 		}
4686 	}
4687 }
4688 
4689 static void cik_mec_fini(struct radeon_device *rdev)
4690 {
4691 	int r;
4692 
4693 	if (rdev->mec.hpd_eop_obj) {
4694 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4695 		if (unlikely(r != 0))
4696 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4697 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4698 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4699 
4700 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4701 		rdev->mec.hpd_eop_obj = NULL;
4702 	}
4703 }
4704 
4705 #define MEC_HPD_SIZE 2048
4706 
4707 static int cik_mec_init(struct radeon_device *rdev)
4708 {
4709 	int r;
4710 	u32 *hpd;
4711 
4712 	/*
4713 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4714 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4715 	 * Nonetheless, we assign only 1 pipe because all other pipes will
4716 	 * be handled by KFD
4717 	 */
4718 	rdev->mec.num_mec = 1;
4719 	rdev->mec.num_pipe = 1;
4720 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4721 
4722 	if (rdev->mec.hpd_eop_obj == NULL) {
4723 		r = radeon_bo_create(rdev,
4724 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4725 				     PAGE_SIZE, true,
4726 				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4727 				     &rdev->mec.hpd_eop_obj);
4728 		if (r) {
4729 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4730 			return r;
4731 		}
4732 	}
4733 
4734 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4735 	if (unlikely(r != 0)) {
4736 		cik_mec_fini(rdev);
4737 		return r;
4738 	}
4739 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4740 			  &rdev->mec.hpd_eop_gpu_addr);
4741 	if (r) {
4742 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4743 		cik_mec_fini(rdev);
4744 		return r;
4745 	}
4746 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4747 	if (r) {
4748 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4749 		cik_mec_fini(rdev);
4750 		return r;
4751 	}
4752 
4753 	/* clear memory.  Not sure if this is required or not */
4754 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4755 
4756 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4757 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4758 
4759 	return 0;
4760 }
4761 
4762 struct hqd_registers
4763 {
4764 	u32 cp_mqd_base_addr;
4765 	u32 cp_mqd_base_addr_hi;
4766 	u32 cp_hqd_active;
4767 	u32 cp_hqd_vmid;
4768 	u32 cp_hqd_persistent_state;
4769 	u32 cp_hqd_pipe_priority;
4770 	u32 cp_hqd_queue_priority;
4771 	u32 cp_hqd_quantum;
4772 	u32 cp_hqd_pq_base;
4773 	u32 cp_hqd_pq_base_hi;
4774 	u32 cp_hqd_pq_rptr;
4775 	u32 cp_hqd_pq_rptr_report_addr;
4776 	u32 cp_hqd_pq_rptr_report_addr_hi;
4777 	u32 cp_hqd_pq_wptr_poll_addr;
4778 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4779 	u32 cp_hqd_pq_doorbell_control;
4780 	u32 cp_hqd_pq_wptr;
4781 	u32 cp_hqd_pq_control;
4782 	u32 cp_hqd_ib_base_addr;
4783 	u32 cp_hqd_ib_base_addr_hi;
4784 	u32 cp_hqd_ib_rptr;
4785 	u32 cp_hqd_ib_control;
4786 	u32 cp_hqd_iq_timer;
4787 	u32 cp_hqd_iq_rptr;
4788 	u32 cp_hqd_dequeue_request;
4789 	u32 cp_hqd_dma_offload;
4790 	u32 cp_hqd_sema_cmd;
4791 	u32 cp_hqd_msg_type;
4792 	u32 cp_hqd_atomic0_preop_lo;
4793 	u32 cp_hqd_atomic0_preop_hi;
4794 	u32 cp_hqd_atomic1_preop_lo;
4795 	u32 cp_hqd_atomic1_preop_hi;
4796 	u32 cp_hqd_hq_scheduler0;
4797 	u32 cp_hqd_hq_scheduler1;
4798 	u32 cp_mqd_control;
4799 };
4800 
4801 struct bonaire_mqd
4802 {
4803 	u32 header;
4804 	u32 dispatch_initiator;
4805 	u32 dimensions[3];
4806 	u32 start_idx[3];
4807 	u32 num_threads[3];
4808 	u32 pipeline_stat_enable;
4809 	u32 perf_counter_enable;
4810 	u32 pgm[2];
4811 	u32 tba[2];
4812 	u32 tma[2];
4813 	u32 pgm_rsrc[2];
4814 	u32 vmid;
4815 	u32 resource_limits;
4816 	u32 static_thread_mgmt01[2];
4817 	u32 tmp_ring_size;
4818 	u32 static_thread_mgmt23[2];
4819 	u32 restart[3];
4820 	u32 thread_trace_enable;
4821 	u32 reserved1;
4822 	u32 user_data[16];
4823 	u32 vgtcs_invoke_count[2];
4824 	struct hqd_registers queue_state;
4825 	u32 dequeue_cntr;
4826 	u32 interrupt_queue[64];
4827 };
4828 
4829 /**
4830  * cik_cp_compute_resume - setup the compute queue registers
4831  *
4832  * @rdev: radeon_device pointer
4833  *
4834  * Program the compute queues and test them to make sure they
4835  * are working.
4836  * Returns 0 for success, error for failure.
4837  */
4838 static int cik_cp_compute_resume(struct radeon_device *rdev)
4839 {
4840 	int r, i, j, idx;
4841 	u32 tmp;
4842 	bool use_doorbell = true;
4843 	u64 hqd_gpu_addr;
4844 	u64 mqd_gpu_addr;
4845 	u64 eop_gpu_addr;
4846 	u64 wb_gpu_addr;
4847 	u32 *buf;
4848 	struct bonaire_mqd *mqd;
4849 
4850 	r = cik_cp_compute_start(rdev);
4851 	if (r)
4852 		return r;
4853 
4854 	/* fix up chicken bits */
4855 	tmp = RREG32(CP_CPF_DEBUG);
4856 	tmp |= (1 << 23);
4857 	WREG32(CP_CPF_DEBUG, tmp);
4858 
4859 	/* init the pipes */
4860 	mutex_lock(&rdev->srbm_mutex);
4861 
4862 	eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
4863 
4864 	cik_srbm_select(rdev, 0, 0, 0, 0);
4865 
4866 	/* write the EOP addr */
4867 	WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4868 	WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4869 
4870 	/* set the VMID assigned */
4871 	WREG32(CP_HPD_EOP_VMID, 0);
4872 
4873 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4874 	tmp = RREG32(CP_HPD_EOP_CONTROL);
4875 	tmp &= ~EOP_SIZE_MASK;
4876 	tmp |= order_base_2(MEC_HPD_SIZE / 8);
4877 	WREG32(CP_HPD_EOP_CONTROL, tmp);
4878 
4879 	mutex_unlock(&rdev->srbm_mutex);
4880 
4881 	/* init the queues.  Just two for now. */
4882 	for (i = 0; i < 2; i++) {
4883 		if (i == 0)
4884 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4885 		else
4886 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4887 
4888 		if (rdev->ring[idx].mqd_obj == NULL) {
4889 			r = radeon_bo_create(rdev,
4890 					     sizeof(struct bonaire_mqd),
4891 					     PAGE_SIZE, true,
4892 					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4893 					     NULL, &rdev->ring[idx].mqd_obj);
4894 			if (r) {
4895 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4896 				return r;
4897 			}
4898 		}
4899 
4900 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4901 		if (unlikely(r != 0)) {
4902 			cik_cp_compute_fini(rdev);
4903 			return r;
4904 		}
4905 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4906 				  &mqd_gpu_addr);
4907 		if (r) {
4908 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4909 			cik_cp_compute_fini(rdev);
4910 			return r;
4911 		}
4912 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4913 		if (r) {
4914 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4915 			cik_cp_compute_fini(rdev);
4916 			return r;
4917 		}
4918 
4919 		/* init the mqd struct */
4920 		memset(buf, 0, sizeof(struct bonaire_mqd));
4921 
4922 		mqd = (struct bonaire_mqd *)buf;
4923 		mqd->header = 0xC0310800;
4924 		mqd->static_thread_mgmt01[0] = 0xffffffff;
4925 		mqd->static_thread_mgmt01[1] = 0xffffffff;
4926 		mqd->static_thread_mgmt23[0] = 0xffffffff;
4927 		mqd->static_thread_mgmt23[1] = 0xffffffff;
4928 
4929 		mutex_lock(&rdev->srbm_mutex);
4930 		cik_srbm_select(rdev, rdev->ring[idx].me,
4931 				rdev->ring[idx].pipe,
4932 				rdev->ring[idx].queue, 0);
4933 
4934 		/* disable wptr polling */
4935 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4936 		tmp &= ~WPTR_POLL_EN;
4937 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4938 
4939 		/* enable doorbell? */
4940 		mqd->queue_state.cp_hqd_pq_doorbell_control =
4941 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4942 		if (use_doorbell)
4943 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4944 		else
4945 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4946 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4947 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4948 
4949 		/* disable the queue if it's active */
4950 		mqd->queue_state.cp_hqd_dequeue_request = 0;
4951 		mqd->queue_state.cp_hqd_pq_rptr = 0;
4952 		mqd->queue_state.cp_hqd_pq_wptr= 0;
4953 		if (RREG32(CP_HQD_ACTIVE) & 1) {
4954 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4955 			for (j = 0; j < rdev->usec_timeout; j++) {
4956 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4957 					break;
4958 				udelay(1);
4959 			}
4960 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4961 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4962 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4963 		}
4964 
4965 		/* set the pointer to the MQD */
4966 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4967 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4968 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4969 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4970 		/* set MQD vmid to 0 */
4971 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4972 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4973 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4974 
4975 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4976 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4977 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4978 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4979 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4980 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4981 
4982 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4983 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4984 		mqd->queue_state.cp_hqd_pq_control &=
4985 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4986 
4987 		mqd->queue_state.cp_hqd_pq_control |=
4988 			order_base_2(rdev->ring[idx].ring_size / 8);
4989 		mqd->queue_state.cp_hqd_pq_control |=
4990 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4991 #ifdef __BIG_ENDIAN
4992 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4993 #endif
4994 		mqd->queue_state.cp_hqd_pq_control &=
4995 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4996 		mqd->queue_state.cp_hqd_pq_control |=
4997 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4998 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4999 
5000 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
5001 		if (i == 0)
5002 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
5003 		else
5004 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
5005 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
5006 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
5007 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
5008 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
5009 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
5010 
5011 		/* set the wb address wether it's enabled or not */
5012 		if (i == 0)
5013 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
5014 		else
5015 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
5016 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
5017 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
5018 			upper_32_bits(wb_gpu_addr) & 0xffff;
5019 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
5020 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
5021 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5022 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
5023 
5024 		/* enable the doorbell if requested */
5025 		if (use_doorbell) {
5026 			mqd->queue_state.cp_hqd_pq_doorbell_control =
5027 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5028 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
5029 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
5030 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
5031 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5032 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
5033 				~(DOORBELL_SOURCE | DOORBELL_HIT);
5034 
5035 		} else {
5036 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
5037 		}
5038 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5039 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
5040 
5041 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5042 		rdev->ring[idx].wptr = 0;
5043 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
5044 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5045 		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
5046 
5047 		/* set the vmid for the queue */
5048 		mqd->queue_state.cp_hqd_vmid = 0;
5049 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
5050 
5051 		/* activate the queue */
5052 		mqd->queue_state.cp_hqd_active = 1;
5053 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
5054 
5055 		cik_srbm_select(rdev, 0, 0, 0, 0);
5056 		mutex_unlock(&rdev->srbm_mutex);
5057 
5058 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
5059 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
5060 
5061 		rdev->ring[idx].ready = true;
5062 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
5063 		if (r)
5064 			rdev->ring[idx].ready = false;
5065 	}
5066 
5067 	return 0;
5068 }
5069 
5070 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
5071 {
5072 	cik_cp_gfx_enable(rdev, enable);
5073 	cik_cp_compute_enable(rdev, enable);
5074 }
5075 
5076 static int cik_cp_load_microcode(struct radeon_device *rdev)
5077 {
5078 	int r;
5079 
5080 	r = cik_cp_gfx_load_microcode(rdev);
5081 	if (r)
5082 		return r;
5083 	r = cik_cp_compute_load_microcode(rdev);
5084 	if (r)
5085 		return r;
5086 
5087 	return 0;
5088 }
5089 
5090 static void cik_cp_fini(struct radeon_device *rdev)
5091 {
5092 	cik_cp_gfx_fini(rdev);
5093 	cik_cp_compute_fini(rdev);
5094 }
5095 
5096 static int cik_cp_resume(struct radeon_device *rdev)
5097 {
5098 	int r;
5099 
5100 	cik_enable_gui_idle_interrupt(rdev, false);
5101 
5102 	r = cik_cp_load_microcode(rdev);
5103 	if (r)
5104 		return r;
5105 
5106 	r = cik_cp_gfx_resume(rdev);
5107 	if (r)
5108 		return r;
5109 	r = cik_cp_compute_resume(rdev);
5110 	if (r)
5111 		return r;
5112 
5113 	cik_enable_gui_idle_interrupt(rdev, true);
5114 
5115 	return 0;
5116 }
5117 
5118 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
5119 {
5120 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
5121 		RREG32(GRBM_STATUS));
5122 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
5123 		RREG32(GRBM_STATUS2));
5124 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
5125 		RREG32(GRBM_STATUS_SE0));
5126 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
5127 		RREG32(GRBM_STATUS_SE1));
5128 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
5129 		RREG32(GRBM_STATUS_SE2));
5130 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
5131 		RREG32(GRBM_STATUS_SE3));
5132 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
5133 		RREG32(SRBM_STATUS));
5134 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
5135 		RREG32(SRBM_STATUS2));
5136 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
5137 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
5138 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
5139 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
5140 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
5141 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
5142 		 RREG32(CP_STALLED_STAT1));
5143 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
5144 		 RREG32(CP_STALLED_STAT2));
5145 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
5146 		 RREG32(CP_STALLED_STAT3));
5147 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
5148 		 RREG32(CP_CPF_BUSY_STAT));
5149 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
5150 		 RREG32(CP_CPF_STALLED_STAT1));
5151 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
5152 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
5153 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
5154 		 RREG32(CP_CPC_STALLED_STAT1));
5155 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
5156 }
5157 
5158 /**
5159  * cik_gpu_check_soft_reset - check which blocks are busy
5160  *
5161  * @rdev: radeon_device pointer
5162  *
5163  * Check which blocks are busy and return the relevant reset
5164  * mask to be used by cik_gpu_soft_reset().
5165  * Returns a mask of the blocks to be reset.
5166  */
5167 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
5168 {
5169 	u32 reset_mask = 0;
5170 	u32 tmp;
5171 
5172 	/* GRBM_STATUS */
5173 	tmp = RREG32(GRBM_STATUS);
5174 	if (tmp & (PA_BUSY | SC_BUSY |
5175 		   BCI_BUSY | SX_BUSY |
5176 		   TA_BUSY | VGT_BUSY |
5177 		   DB_BUSY | CB_BUSY |
5178 		   GDS_BUSY | SPI_BUSY |
5179 		   IA_BUSY | IA_BUSY_NO_DMA))
5180 		reset_mask |= RADEON_RESET_GFX;
5181 
5182 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
5183 		reset_mask |= RADEON_RESET_CP;
5184 
5185 	/* GRBM_STATUS2 */
5186 	tmp = RREG32(GRBM_STATUS2);
5187 	if (tmp & RLC_BUSY)
5188 		reset_mask |= RADEON_RESET_RLC;
5189 
5190 	/* SDMA0_STATUS_REG */
5191 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
5192 	if (!(tmp & SDMA_IDLE))
5193 		reset_mask |= RADEON_RESET_DMA;
5194 
5195 	/* SDMA1_STATUS_REG */
5196 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
5197 	if (!(tmp & SDMA_IDLE))
5198 		reset_mask |= RADEON_RESET_DMA1;
5199 
5200 	/* SRBM_STATUS2 */
5201 	tmp = RREG32(SRBM_STATUS2);
5202 	if (tmp & SDMA_BUSY)
5203 		reset_mask |= RADEON_RESET_DMA;
5204 
5205 	if (tmp & SDMA1_BUSY)
5206 		reset_mask |= RADEON_RESET_DMA1;
5207 
5208 	/* SRBM_STATUS */
5209 	tmp = RREG32(SRBM_STATUS);
5210 
5211 	if (tmp & IH_BUSY)
5212 		reset_mask |= RADEON_RESET_IH;
5213 
5214 	if (tmp & SEM_BUSY)
5215 		reset_mask |= RADEON_RESET_SEM;
5216 
5217 	if (tmp & GRBM_RQ_PENDING)
5218 		reset_mask |= RADEON_RESET_GRBM;
5219 
5220 	if (tmp & VMC_BUSY)
5221 		reset_mask |= RADEON_RESET_VMC;
5222 
5223 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
5224 		   MCC_BUSY | MCD_BUSY))
5225 		reset_mask |= RADEON_RESET_MC;
5226 
5227 	if (evergreen_is_display_hung(rdev))
5228 		reset_mask |= RADEON_RESET_DISPLAY;
5229 
5230 	/* Skip MC reset as it's mostly likely not hung, just busy */
5231 	if (reset_mask & RADEON_RESET_MC) {
5232 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
5233 		reset_mask &= ~RADEON_RESET_MC;
5234 	}
5235 
5236 	return reset_mask;
5237 }
5238 
5239 /**
5240  * cik_gpu_soft_reset - soft reset GPU
5241  *
5242  * @rdev: radeon_device pointer
5243  * @reset_mask: mask of which blocks to reset
5244  *
5245  * Soft reset the blocks specified in @reset_mask.
5246  */
5247 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
5248 {
5249 	struct evergreen_mc_save save;
5250 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5251 	u32 tmp;
5252 
5253 	if (reset_mask == 0)
5254 		return;
5255 
5256 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
5257 
5258 	cik_print_gpu_status_regs(rdev);
5259 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5260 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5261 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5262 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5263 
5264 	/* disable CG/PG */
5265 	cik_fini_pg(rdev);
5266 	cik_fini_cg(rdev);
5267 
5268 	/* stop the rlc */
5269 	cik_rlc_stop(rdev);
5270 
5271 	/* Disable GFX parsing/prefetching */
5272 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5273 
5274 	/* Disable MEC parsing/prefetching */
5275 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5276 
5277 	if (reset_mask & RADEON_RESET_DMA) {
5278 		/* sdma0 */
5279 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5280 		tmp |= SDMA_HALT;
5281 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5282 	}
5283 	if (reset_mask & RADEON_RESET_DMA1) {
5284 		/* sdma1 */
5285 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5286 		tmp |= SDMA_HALT;
5287 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5288 	}
5289 
5290 	evergreen_mc_stop(rdev, &save);
5291 	if (evergreen_mc_wait_for_idle(rdev)) {
5292 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5293 	}
5294 
5295 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5296 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5297 
5298 	if (reset_mask & RADEON_RESET_CP) {
5299 		grbm_soft_reset |= SOFT_RESET_CP;
5300 
5301 		srbm_soft_reset |= SOFT_RESET_GRBM;
5302 	}
5303 
5304 	if (reset_mask & RADEON_RESET_DMA)
5305 		srbm_soft_reset |= SOFT_RESET_SDMA;
5306 
5307 	if (reset_mask & RADEON_RESET_DMA1)
5308 		srbm_soft_reset |= SOFT_RESET_SDMA1;
5309 
5310 	if (reset_mask & RADEON_RESET_DISPLAY)
5311 		srbm_soft_reset |= SOFT_RESET_DC;
5312 
5313 	if (reset_mask & RADEON_RESET_RLC)
5314 		grbm_soft_reset |= SOFT_RESET_RLC;
5315 
5316 	if (reset_mask & RADEON_RESET_SEM)
5317 		srbm_soft_reset |= SOFT_RESET_SEM;
5318 
5319 	if (reset_mask & RADEON_RESET_IH)
5320 		srbm_soft_reset |= SOFT_RESET_IH;
5321 
5322 	if (reset_mask & RADEON_RESET_GRBM)
5323 		srbm_soft_reset |= SOFT_RESET_GRBM;
5324 
5325 	if (reset_mask & RADEON_RESET_VMC)
5326 		srbm_soft_reset |= SOFT_RESET_VMC;
5327 
5328 	if (!(rdev->flags & RADEON_IS_IGP)) {
5329 		if (reset_mask & RADEON_RESET_MC)
5330 			srbm_soft_reset |= SOFT_RESET_MC;
5331 	}
5332 
5333 	if (grbm_soft_reset) {
5334 		tmp = RREG32(GRBM_SOFT_RESET);
5335 		tmp |= grbm_soft_reset;
5336 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5337 		WREG32(GRBM_SOFT_RESET, tmp);
5338 		tmp = RREG32(GRBM_SOFT_RESET);
5339 
5340 		udelay(50);
5341 
5342 		tmp &= ~grbm_soft_reset;
5343 		WREG32(GRBM_SOFT_RESET, tmp);
5344 		tmp = RREG32(GRBM_SOFT_RESET);
5345 	}
5346 
5347 	if (srbm_soft_reset) {
5348 		tmp = RREG32(SRBM_SOFT_RESET);
5349 		tmp |= srbm_soft_reset;
5350 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5351 		WREG32(SRBM_SOFT_RESET, tmp);
5352 		tmp = RREG32(SRBM_SOFT_RESET);
5353 
5354 		udelay(50);
5355 
5356 		tmp &= ~srbm_soft_reset;
5357 		WREG32(SRBM_SOFT_RESET, tmp);
5358 		tmp = RREG32(SRBM_SOFT_RESET);
5359 	}
5360 
5361 	/* Wait a little for things to settle down */
5362 	udelay(50);
5363 
5364 	evergreen_mc_resume(rdev, &save);
5365 	udelay(50);
5366 
5367 	cik_print_gpu_status_regs(rdev);
5368 }
5369 
5370 struct kv_reset_save_regs {
5371 	u32 gmcon_reng_execute;
5372 	u32 gmcon_misc;
5373 	u32 gmcon_misc3;
5374 };
5375 
5376 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5377 				   struct kv_reset_save_regs *save)
5378 {
5379 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5380 	save->gmcon_misc = RREG32(GMCON_MISC);
5381 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5382 
5383 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5384 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5385 						STCTRL_STUTTER_EN));
5386 }
5387 
5388 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5389 				      struct kv_reset_save_regs *save)
5390 {
5391 	int i;
5392 
5393 	WREG32(GMCON_PGFSM_WRITE, 0);
5394 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5395 
5396 	for (i = 0; i < 5; i++)
5397 		WREG32(GMCON_PGFSM_WRITE, 0);
5398 
5399 	WREG32(GMCON_PGFSM_WRITE, 0);
5400 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5401 
5402 	for (i = 0; i < 5; i++)
5403 		WREG32(GMCON_PGFSM_WRITE, 0);
5404 
5405 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5406 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5407 
5408 	for (i = 0; i < 5; i++)
5409 		WREG32(GMCON_PGFSM_WRITE, 0);
5410 
5411 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5412 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5413 
5414 	for (i = 0; i < 5; i++)
5415 		WREG32(GMCON_PGFSM_WRITE, 0);
5416 
5417 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5418 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5419 
5420 	for (i = 0; i < 5; i++)
5421 		WREG32(GMCON_PGFSM_WRITE, 0);
5422 
5423 	WREG32(GMCON_PGFSM_WRITE, 0);
5424 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5425 
5426 	for (i = 0; i < 5; i++)
5427 		WREG32(GMCON_PGFSM_WRITE, 0);
5428 
5429 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5430 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5431 
5432 	for (i = 0; i < 5; i++)
5433 		WREG32(GMCON_PGFSM_WRITE, 0);
5434 
5435 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5436 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5437 
5438 	for (i = 0; i < 5; i++)
5439 		WREG32(GMCON_PGFSM_WRITE, 0);
5440 
5441 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5442 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5443 
5444 	for (i = 0; i < 5; i++)
5445 		WREG32(GMCON_PGFSM_WRITE, 0);
5446 
5447 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5448 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5449 
5450 	for (i = 0; i < 5; i++)
5451 		WREG32(GMCON_PGFSM_WRITE, 0);
5452 
5453 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5454 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5455 
5456 	WREG32(GMCON_MISC3, save->gmcon_misc3);
5457 	WREG32(GMCON_MISC, save->gmcon_misc);
5458 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5459 }
5460 
5461 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5462 {
5463 	struct evergreen_mc_save save;
5464 	struct kv_reset_save_regs kv_save = { 0 };
5465 	u32 tmp, i;
5466 
5467 	dev_info(rdev->dev, "GPU pci config reset\n");
5468 
5469 	/* disable dpm? */
5470 
5471 	/* disable cg/pg */
5472 	cik_fini_pg(rdev);
5473 	cik_fini_cg(rdev);
5474 
5475 	/* Disable GFX parsing/prefetching */
5476 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5477 
5478 	/* Disable MEC parsing/prefetching */
5479 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5480 
5481 	/* sdma0 */
5482 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5483 	tmp |= SDMA_HALT;
5484 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5485 	/* sdma1 */
5486 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5487 	tmp |= SDMA_HALT;
5488 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5489 	/* XXX other engines? */
5490 
5491 	/* halt the rlc, disable cp internal ints */
5492 	cik_rlc_stop(rdev);
5493 
5494 	udelay(50);
5495 
5496 	/* disable mem access */
5497 	evergreen_mc_stop(rdev, &save);
5498 	if (evergreen_mc_wait_for_idle(rdev)) {
5499 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5500 	}
5501 
5502 	if (rdev->flags & RADEON_IS_IGP)
5503 		kv_save_regs_for_reset(rdev, &kv_save);
5504 
5505 	/* disable BM */
5506 	pci_clear_master(rdev->pdev);
5507 	/* reset */
5508 	radeon_pci_config_reset(rdev);
5509 
5510 	udelay(100);
5511 
5512 	/* wait for asic to come out of reset */
5513 	for (i = 0; i < rdev->usec_timeout; i++) {
5514 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5515 			break;
5516 		udelay(1);
5517 	}
5518 
5519 	/* does asic init need to be run first??? */
5520 	if (rdev->flags & RADEON_IS_IGP)
5521 		kv_restore_regs_for_reset(rdev, &kv_save);
5522 }
5523 
5524 /**
5525  * cik_asic_reset - soft reset GPU
5526  *
5527  * @rdev: radeon_device pointer
5528  *
5529  * Look up which blocks are hung and attempt
5530  * to reset them.
5531  * Returns 0 for success.
5532  */
5533 int cik_asic_reset(struct radeon_device *rdev)
5534 {
5535 	u32 reset_mask;
5536 
5537 	reset_mask = cik_gpu_check_soft_reset(rdev);
5538 
5539 	if (reset_mask)
5540 		r600_set_bios_scratch_engine_hung(rdev, true);
5541 
5542 	/* try soft reset */
5543 	cik_gpu_soft_reset(rdev, reset_mask);
5544 
5545 	reset_mask = cik_gpu_check_soft_reset(rdev);
5546 
5547 	/* try pci config reset */
5548 	if (reset_mask && radeon_hard_reset)
5549 		cik_gpu_pci_config_reset(rdev);
5550 
5551 	reset_mask = cik_gpu_check_soft_reset(rdev);
5552 
5553 	if (!reset_mask)
5554 		r600_set_bios_scratch_engine_hung(rdev, false);
5555 
5556 	return 0;
5557 }
5558 
5559 /**
5560  * cik_gfx_is_lockup - check if the 3D engine is locked up
5561  *
5562  * @rdev: radeon_device pointer
5563  * @ring: radeon_ring structure holding ring information
5564  *
5565  * Check if the 3D engine is locked up (CIK).
5566  * Returns true if the engine is locked, false if not.
5567  */
5568 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5569 {
5570 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5571 
5572 	if (!(reset_mask & (RADEON_RESET_GFX |
5573 			    RADEON_RESET_COMPUTE |
5574 			    RADEON_RESET_CP))) {
5575 		radeon_ring_lockup_update(rdev, ring);
5576 		return false;
5577 	}
5578 	return radeon_ring_test_lockup(rdev, ring);
5579 }
5580 
5581 /* MC */
5582 /**
5583  * cik_mc_program - program the GPU memory controller
5584  *
5585  * @rdev: radeon_device pointer
5586  *
5587  * Set the location of vram, gart, and AGP in the GPU's
5588  * physical address space (CIK).
5589  */
5590 static void cik_mc_program(struct radeon_device *rdev)
5591 {
5592 	struct evergreen_mc_save save;
5593 	u32 tmp;
5594 	int i, j;
5595 
5596 	/* Initialize HDP */
5597 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5598 		WREG32((0x2c14 + j), 0x00000000);
5599 		WREG32((0x2c18 + j), 0x00000000);
5600 		WREG32((0x2c1c + j), 0x00000000);
5601 		WREG32((0x2c20 + j), 0x00000000);
5602 		WREG32((0x2c24 + j), 0x00000000);
5603 	}
5604 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5605 
5606 	evergreen_mc_stop(rdev, &save);
5607 	if (radeon_mc_wait_for_idle(rdev)) {
5608 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5609 	}
5610 	/* Lockout access through VGA aperture*/
5611 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5612 	/* Update configuration */
5613 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5614 	       rdev->mc.vram_start >> 12);
5615 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5616 	       rdev->mc.vram_end >> 12);
5617 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5618 	       rdev->vram_scratch.gpu_addr >> 12);
5619 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5620 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5621 	WREG32(MC_VM_FB_LOCATION, tmp);
5622 	/* XXX double check these! */
5623 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5624 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5625 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5626 	WREG32(MC_VM_AGP_BASE, 0);
5627 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5628 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5629 	if (radeon_mc_wait_for_idle(rdev)) {
5630 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5631 	}
5632 	evergreen_mc_resume(rdev, &save);
5633 	/* we need to own VRAM, so turn off the VGA renderer here
5634 	 * to stop it overwriting our objects */
5635 	rv515_vga_render_disable(rdev);
5636 }
5637 
5638 /**
5639  * cik_mc_init - initialize the memory controller driver params
5640  *
5641  * @rdev: radeon_device pointer
5642  *
5643  * Look up the amount of vram, vram width, and decide how to place
5644  * vram and gart within the GPU's physical address space (CIK).
5645  * Returns 0 for success.
5646  */
5647 static int cik_mc_init(struct radeon_device *rdev)
5648 {
5649 	u32 tmp;
5650 	int chansize, numchan;
5651 
5652 	/* Get VRAM informations */
5653 	rdev->mc.vram_is_ddr = true;
5654 	tmp = RREG32(MC_ARB_RAMCFG);
5655 	if (tmp & CHANSIZE_MASK) {
5656 		chansize = 64;
5657 	} else {
5658 		chansize = 32;
5659 	}
5660 	tmp = RREG32(MC_SHARED_CHMAP);
5661 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5662 	case 0:
5663 	default:
5664 		numchan = 1;
5665 		break;
5666 	case 1:
5667 		numchan = 2;
5668 		break;
5669 	case 2:
5670 		numchan = 4;
5671 		break;
5672 	case 3:
5673 		numchan = 8;
5674 		break;
5675 	case 4:
5676 		numchan = 3;
5677 		break;
5678 	case 5:
5679 		numchan = 6;
5680 		break;
5681 	case 6:
5682 		numchan = 10;
5683 		break;
5684 	case 7:
5685 		numchan = 12;
5686 		break;
5687 	case 8:
5688 		numchan = 16;
5689 		break;
5690 	}
5691 	rdev->mc.vram_width = numchan * chansize;
5692 	/* Could aper size report 0 ? */
5693 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5694 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5695 	/* size in MB on si */
5696 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5697 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5698 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5699 	si_vram_gtt_location(rdev, &rdev->mc);
5700 	radeon_update_bandwidth_info(rdev);
5701 
5702 	return 0;
5703 }
5704 
5705 /*
5706  * GART
5707  * VMID 0 is the physical GPU addresses as used by the kernel.
5708  * VMIDs 1-15 are used for userspace clients and are handled
5709  * by the radeon vm/hsa code.
5710  */
5711 /**
5712  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5713  *
5714  * @rdev: radeon_device pointer
5715  *
5716  * Flush the TLB for the VMID 0 page table (CIK).
5717  */
5718 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5719 {
5720 	/* flush hdp cache */
5721 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5722 
5723 	/* bits 0-15 are the VM contexts0-15 */
5724 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5725 }
5726 
5727 static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5728 {
5729 	int i;
5730 	uint32_t sh_mem_bases, sh_mem_config;
5731 
5732 	sh_mem_bases = 0x6000 | 0x6000 << 16;
5733 	sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5734 	sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5735 
5736 	mutex_lock(&rdev->srbm_mutex);
5737 	for (i = 8; i < 16; i++) {
5738 		cik_srbm_select(rdev, 0, 0, 0, i);
5739 		/* CP and shaders */
5740 		WREG32(SH_MEM_CONFIG, sh_mem_config);
5741 		WREG32(SH_MEM_APE1_BASE, 1);
5742 		WREG32(SH_MEM_APE1_LIMIT, 0);
5743 		WREG32(SH_MEM_BASES, sh_mem_bases);
5744 	}
5745 	cik_srbm_select(rdev, 0, 0, 0, 0);
5746 	mutex_unlock(&rdev->srbm_mutex);
5747 }
5748 
5749 /**
5750  * cik_pcie_gart_enable - gart enable
5751  *
5752  * @rdev: radeon_device pointer
5753  *
5754  * This sets up the TLBs, programs the page tables for VMID0,
5755  * sets up the hw for VMIDs 1-15 which are allocated on
5756  * demand, and sets up the global locations for the LDS, GDS,
5757  * and GPUVM for FSA64 clients (CIK).
5758  * Returns 0 for success, errors for failure.
5759  */
5760 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5761 {
5762 	int r, i;
5763 
5764 	if (rdev->gart.robj == NULL) {
5765 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5766 		return -EINVAL;
5767 	}
5768 	r = radeon_gart_table_vram_pin(rdev);
5769 	if (r)
5770 		return r;
5771 	/* Setup TLB control */
5772 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5773 	       (0xA << 7) |
5774 	       ENABLE_L1_TLB |
5775 	       ENABLE_L1_FRAGMENT_PROCESSING |
5776 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5777 	       ENABLE_ADVANCED_DRIVER_MODEL |
5778 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5779 	/* Setup L2 cache */
5780 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5781 	       ENABLE_L2_FRAGMENT_PROCESSING |
5782 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5783 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5784 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5785 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5786 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5787 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5788 	       BANK_SELECT(4) |
5789 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5790 	/* setup context0 */
5791 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5792 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5793 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5794 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5795 			(u32)(rdev->dummy_page.addr >> 12));
5796 	WREG32(VM_CONTEXT0_CNTL2, 0);
5797 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5798 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5799 
5800 	WREG32(0x15D4, 0);
5801 	WREG32(0x15D8, 0);
5802 	WREG32(0x15DC, 0);
5803 
5804 	/* restore context1-15 */
5805 	/* set vm size, must be a multiple of 4 */
5806 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5807 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5808 	for (i = 1; i < 16; i++) {
5809 		if (i < 8)
5810 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5811 			       rdev->vm_manager.saved_table_addr[i]);
5812 		else
5813 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5814 			       rdev->vm_manager.saved_table_addr[i]);
5815 	}
5816 
5817 	/* enable context1-15 */
5818 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5819 	       (u32)(rdev->dummy_page.addr >> 12));
5820 	WREG32(VM_CONTEXT1_CNTL2, 4);
5821 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5822 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5823 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5824 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5825 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5826 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5827 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5828 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5829 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5830 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5831 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5832 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5833 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5834 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5835 
5836 	if (rdev->family == CHIP_KAVERI) {
5837 		u32 tmp = RREG32(CHUB_CONTROL);
5838 		tmp &= ~BYPASS_VM;
5839 		WREG32(CHUB_CONTROL, tmp);
5840 	}
5841 
5842 	/* XXX SH_MEM regs */
5843 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5844 	mutex_lock(&rdev->srbm_mutex);
5845 	for (i = 0; i < 16; i++) {
5846 		cik_srbm_select(rdev, 0, 0, 0, i);
5847 		/* CP and shaders */
5848 		WREG32(SH_MEM_CONFIG, 0);
5849 		WREG32(SH_MEM_APE1_BASE, 1);
5850 		WREG32(SH_MEM_APE1_LIMIT, 0);
5851 		WREG32(SH_MEM_BASES, 0);
5852 		/* SDMA GFX */
5853 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5854 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5855 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5856 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5857 		/* XXX SDMA RLC - todo */
5858 	}
5859 	cik_srbm_select(rdev, 0, 0, 0, 0);
5860 	mutex_unlock(&rdev->srbm_mutex);
5861 
5862 	cik_pcie_init_compute_vmid(rdev);
5863 
5864 	cik_pcie_gart_tlb_flush(rdev);
5865 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5866 		 (unsigned)(rdev->mc.gtt_size >> 20),
5867 		 (unsigned long long)rdev->gart.table_addr);
5868 	rdev->gart.ready = true;
5869 	return 0;
5870 }
5871 
5872 /**
5873  * cik_pcie_gart_disable - gart disable
5874  *
5875  * @rdev: radeon_device pointer
5876  *
5877  * This disables all VM page table (CIK).
5878  */
5879 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5880 {
5881 	unsigned i;
5882 
5883 	for (i = 1; i < 16; ++i) {
5884 		uint32_t reg;
5885 		if (i < 8)
5886 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5887 		else
5888 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5889 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5890 	}
5891 
5892 	/* Disable all tables */
5893 	WREG32(VM_CONTEXT0_CNTL, 0);
5894 	WREG32(VM_CONTEXT1_CNTL, 0);
5895 	/* Setup TLB control */
5896 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5897 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5898 	/* Setup L2 cache */
5899 	WREG32(VM_L2_CNTL,
5900 	       ENABLE_L2_FRAGMENT_PROCESSING |
5901 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5902 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5903 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5904 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5905 	WREG32(VM_L2_CNTL2, 0);
5906 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5907 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5908 	radeon_gart_table_vram_unpin(rdev);
5909 }
5910 
5911 /**
5912  * cik_pcie_gart_fini - vm fini callback
5913  *
5914  * @rdev: radeon_device pointer
5915  *
5916  * Tears down the driver GART/VM setup (CIK).
5917  */
5918 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5919 {
5920 	cik_pcie_gart_disable(rdev);
5921 	radeon_gart_table_vram_free(rdev);
5922 	radeon_gart_fini(rdev);
5923 }
5924 
5925 /* vm parser */
5926 /**
5927  * cik_ib_parse - vm ib_parse callback
5928  *
5929  * @rdev: radeon_device pointer
5930  * @ib: indirect buffer pointer
5931  *
5932  * CIK uses hw IB checking so this is a nop (CIK).
5933  */
5934 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5935 {
5936 	return 0;
5937 }
5938 
5939 /*
5940  * vm
5941  * VMID 0 is the physical GPU addresses as used by the kernel.
5942  * VMIDs 1-15 are used for userspace clients and are handled
5943  * by the radeon vm/hsa code.
5944  */
5945 /**
5946  * cik_vm_init - cik vm init callback
5947  *
5948  * @rdev: radeon_device pointer
5949  *
5950  * Inits cik specific vm parameters (number of VMs, base of vram for
5951  * VMIDs 1-15) (CIK).
5952  * Returns 0 for success.
5953  */
5954 int cik_vm_init(struct radeon_device *rdev)
5955 {
5956 	/*
5957 	 * number of VMs
5958 	 * VMID 0 is reserved for System
5959 	 * radeon graphics/compute will use VMIDs 1-7
5960 	 * amdkfd will use VMIDs 8-15
5961 	 */
5962 	rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
5963 	/* base offset of vram pages */
5964 	if (rdev->flags & RADEON_IS_IGP) {
5965 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5966 		tmp <<= 22;
5967 		rdev->vm_manager.vram_base_offset = tmp;
5968 	} else
5969 		rdev->vm_manager.vram_base_offset = 0;
5970 
5971 	return 0;
5972 }
5973 
5974 /**
5975  * cik_vm_fini - cik vm fini callback
5976  *
5977  * @rdev: radeon_device pointer
5978  *
5979  * Tear down any asic specific VM setup (CIK).
5980  */
5981 void cik_vm_fini(struct radeon_device *rdev)
5982 {
5983 }
5984 
5985 /**
5986  * cik_vm_decode_fault - print human readable fault info
5987  *
5988  * @rdev: radeon_device pointer
5989  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5990  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5991  *
5992  * Print human readable fault information (CIK).
5993  */
5994 static void cik_vm_decode_fault(struct radeon_device *rdev,
5995 				u32 status, u32 addr, u32 mc_client)
5996 {
5997 	u32 mc_id;
5998 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5999 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
6000 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
6001 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
6002 
6003 	if (rdev->family == CHIP_HAWAII)
6004 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
6005 	else
6006 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
6007 
6008 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
6009 	       protections, vmid, addr,
6010 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
6011 	       block, mc_client, mc_id);
6012 }
6013 
6014 /**
6015  * cik_vm_flush - cik vm flush using the CP
6016  *
6017  * @rdev: radeon_device pointer
6018  *
6019  * Update the page table base and flush the VM TLB
6020  * using the CP (CIK).
6021  */
6022 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
6023 		  unsigned vm_id, uint64_t pd_addr)
6024 {
6025 	int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
6026 
6027 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6028 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6029 				 WRITE_DATA_DST_SEL(0)));
6030 	if (vm_id < 8) {
6031 		radeon_ring_write(ring,
6032 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
6033 	} else {
6034 		radeon_ring_write(ring,
6035 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
6036 	}
6037 	radeon_ring_write(ring, 0);
6038 	radeon_ring_write(ring, pd_addr >> 12);
6039 
6040 	/* update SH_MEM_* regs */
6041 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6042 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6043 				 WRITE_DATA_DST_SEL(0)));
6044 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6045 	radeon_ring_write(ring, 0);
6046 	radeon_ring_write(ring, VMID(vm_id));
6047 
6048 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
6049 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6050 				 WRITE_DATA_DST_SEL(0)));
6051 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
6052 	radeon_ring_write(ring, 0);
6053 
6054 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
6055 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
6056 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
6057 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
6058 
6059 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6060 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6061 				 WRITE_DATA_DST_SEL(0)));
6062 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6063 	radeon_ring_write(ring, 0);
6064 	radeon_ring_write(ring, VMID(0));
6065 
6066 	/* HDP flush */
6067 	cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
6068 
6069 	/* bits 0-15 are the VM contexts0-15 */
6070 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6071 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6072 				 WRITE_DATA_DST_SEL(0)));
6073 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6074 	radeon_ring_write(ring, 0);
6075 	radeon_ring_write(ring, 1 << vm_id);
6076 
6077 	/* wait for the invalidate to complete */
6078 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6079 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6080 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
6081 				 WAIT_REG_MEM_ENGINE(0))); /* me */
6082 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6083 	radeon_ring_write(ring, 0);
6084 	radeon_ring_write(ring, 0); /* ref */
6085 	radeon_ring_write(ring, 0); /* mask */
6086 	radeon_ring_write(ring, 0x20); /* poll interval */
6087 
6088 	/* compute doesn't have PFP */
6089 	if (usepfp) {
6090 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6091 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6092 		radeon_ring_write(ring, 0x0);
6093 	}
6094 }
6095 
6096 /*
6097  * RLC
6098  * The RLC is a multi-purpose microengine that handles a
6099  * variety of functions, the most important of which is
6100  * the interrupt controller.
6101  */
6102 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
6103 					  bool enable)
6104 {
6105 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
6106 
6107 	if (enable)
6108 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6109 	else
6110 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6111 	WREG32(CP_INT_CNTL_RING0, tmp);
6112 }
6113 
6114 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
6115 {
6116 	u32 tmp;
6117 
6118 	tmp = RREG32(RLC_LB_CNTL);
6119 	if (enable)
6120 		tmp |= LOAD_BALANCE_ENABLE;
6121 	else
6122 		tmp &= ~LOAD_BALANCE_ENABLE;
6123 	WREG32(RLC_LB_CNTL, tmp);
6124 }
6125 
6126 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
6127 {
6128 	u32 i, j, k;
6129 	u32 mask;
6130 
6131 	mutex_lock(&rdev->grbm_idx_mutex);
6132 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6133 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6134 			cik_select_se_sh(rdev, i, j);
6135 			for (k = 0; k < rdev->usec_timeout; k++) {
6136 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
6137 					break;
6138 				udelay(1);
6139 			}
6140 		}
6141 	}
6142 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6143 	mutex_unlock(&rdev->grbm_idx_mutex);
6144 
6145 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
6146 	for (k = 0; k < rdev->usec_timeout; k++) {
6147 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
6148 			break;
6149 		udelay(1);
6150 	}
6151 }
6152 
6153 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
6154 {
6155 	u32 tmp;
6156 
6157 	tmp = RREG32(RLC_CNTL);
6158 	if (tmp != rlc)
6159 		WREG32(RLC_CNTL, rlc);
6160 }
6161 
6162 static u32 cik_halt_rlc(struct radeon_device *rdev)
6163 {
6164 	u32 data, orig;
6165 
6166 	orig = data = RREG32(RLC_CNTL);
6167 
6168 	if (data & RLC_ENABLE) {
6169 		u32 i;
6170 
6171 		data &= ~RLC_ENABLE;
6172 		WREG32(RLC_CNTL, data);
6173 
6174 		for (i = 0; i < rdev->usec_timeout; i++) {
6175 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
6176 				break;
6177 			udelay(1);
6178 		}
6179 
6180 		cik_wait_for_rlc_serdes(rdev);
6181 	}
6182 
6183 	return orig;
6184 }
6185 
6186 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
6187 {
6188 	u32 tmp, i, mask;
6189 
6190 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
6191 	WREG32(RLC_GPR_REG2, tmp);
6192 
6193 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
6194 	for (i = 0; i < rdev->usec_timeout; i++) {
6195 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
6196 			break;
6197 		udelay(1);
6198 	}
6199 
6200 	for (i = 0; i < rdev->usec_timeout; i++) {
6201 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
6202 			break;
6203 		udelay(1);
6204 	}
6205 }
6206 
6207 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
6208 {
6209 	u32 tmp;
6210 
6211 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
6212 	WREG32(RLC_GPR_REG2, tmp);
6213 }
6214 
6215 /**
6216  * cik_rlc_stop - stop the RLC ME
6217  *
6218  * @rdev: radeon_device pointer
6219  *
6220  * Halt the RLC ME (MicroEngine) (CIK).
6221  */
6222 static void cik_rlc_stop(struct radeon_device *rdev)
6223 {
6224 	WREG32(RLC_CNTL, 0);
6225 
6226 	cik_enable_gui_idle_interrupt(rdev, false);
6227 
6228 	cik_wait_for_rlc_serdes(rdev);
6229 }
6230 
6231 /**
6232  * cik_rlc_start - start the RLC ME
6233  *
6234  * @rdev: radeon_device pointer
6235  *
6236  * Unhalt the RLC ME (MicroEngine) (CIK).
6237  */
6238 static void cik_rlc_start(struct radeon_device *rdev)
6239 {
6240 	WREG32(RLC_CNTL, RLC_ENABLE);
6241 
6242 	cik_enable_gui_idle_interrupt(rdev, true);
6243 
6244 	udelay(50);
6245 }
6246 
6247 /**
6248  * cik_rlc_resume - setup the RLC hw
6249  *
6250  * @rdev: radeon_device pointer
6251  *
6252  * Initialize the RLC registers, load the ucode,
6253  * and start the RLC (CIK).
6254  * Returns 0 for success, -EINVAL if the ucode is not available.
6255  */
6256 static int cik_rlc_resume(struct radeon_device *rdev)
6257 {
6258 	u32 i, size, tmp;
6259 
6260 	if (!rdev->rlc_fw)
6261 		return -EINVAL;
6262 
6263 	cik_rlc_stop(rdev);
6264 
6265 	/* disable CG */
6266 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6267 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6268 
6269 	si_rlc_reset(rdev);
6270 
6271 	cik_init_pg(rdev);
6272 
6273 	cik_init_cg(rdev);
6274 
6275 	WREG32(RLC_LB_CNTR_INIT, 0);
6276 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6277 
6278 	mutex_lock(&rdev->grbm_idx_mutex);
6279 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6280 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6281 	WREG32(RLC_LB_PARAMS, 0x00600408);
6282 	WREG32(RLC_LB_CNTL, 0x80000004);
6283 	mutex_unlock(&rdev->grbm_idx_mutex);
6284 
6285 	WREG32(RLC_MC_CNTL, 0);
6286 	WREG32(RLC_UCODE_CNTL, 0);
6287 
6288 	if (rdev->new_fw) {
6289 		const struct rlc_firmware_header_v1_0 *hdr =
6290 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6291 		const __le32 *fw_data = (const __le32 *)
6292 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6293 
6294 		radeon_ucode_print_rlc_hdr(&hdr->header);
6295 
6296 		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6297 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6298 		for (i = 0; i < size; i++)
6299 			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6300 		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6301 	} else {
6302 		const __be32 *fw_data;
6303 
6304 		switch (rdev->family) {
6305 		case CHIP_BONAIRE:
6306 		case CHIP_HAWAII:
6307 		default:
6308 			size = BONAIRE_RLC_UCODE_SIZE;
6309 			break;
6310 		case CHIP_KAVERI:
6311 			size = KV_RLC_UCODE_SIZE;
6312 			break;
6313 		case CHIP_KABINI:
6314 			size = KB_RLC_UCODE_SIZE;
6315 			break;
6316 		case CHIP_MULLINS:
6317 			size = ML_RLC_UCODE_SIZE;
6318 			break;
6319 		}
6320 
6321 		fw_data = (const __be32 *)rdev->rlc_fw->data;
6322 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6323 		for (i = 0; i < size; i++)
6324 			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6325 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6326 	}
6327 
6328 	/* XXX - find out what chips support lbpw */
6329 	cik_enable_lbpw(rdev, false);
6330 
6331 	if (rdev->family == CHIP_BONAIRE)
6332 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
6333 
6334 	cik_rlc_start(rdev);
6335 
6336 	return 0;
6337 }
6338 
6339 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6340 {
6341 	u32 data, orig, tmp, tmp2;
6342 
6343 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6344 
6345 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6346 		cik_enable_gui_idle_interrupt(rdev, true);
6347 
6348 		tmp = cik_halt_rlc(rdev);
6349 
6350 		mutex_lock(&rdev->grbm_idx_mutex);
6351 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6352 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6353 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6354 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6355 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6356 		mutex_unlock(&rdev->grbm_idx_mutex);
6357 
6358 		cik_update_rlc(rdev, tmp);
6359 
6360 		data |= CGCG_EN | CGLS_EN;
6361 	} else {
6362 		cik_enable_gui_idle_interrupt(rdev, false);
6363 
6364 		RREG32(CB_CGTT_SCLK_CTRL);
6365 		RREG32(CB_CGTT_SCLK_CTRL);
6366 		RREG32(CB_CGTT_SCLK_CTRL);
6367 		RREG32(CB_CGTT_SCLK_CTRL);
6368 
6369 		data &= ~(CGCG_EN | CGLS_EN);
6370 	}
6371 
6372 	if (orig != data)
6373 		WREG32(RLC_CGCG_CGLS_CTRL, data);
6374 
6375 }
6376 
6377 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6378 {
6379 	u32 data, orig, tmp = 0;
6380 
6381 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6382 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6383 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6384 				orig = data = RREG32(CP_MEM_SLP_CNTL);
6385 				data |= CP_MEM_LS_EN;
6386 				if (orig != data)
6387 					WREG32(CP_MEM_SLP_CNTL, data);
6388 			}
6389 		}
6390 
6391 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6392 		data |= 0x00000001;
6393 		data &= 0xfffffffd;
6394 		if (orig != data)
6395 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6396 
6397 		tmp = cik_halt_rlc(rdev);
6398 
6399 		mutex_lock(&rdev->grbm_idx_mutex);
6400 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6401 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6402 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6403 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6404 		WREG32(RLC_SERDES_WR_CTRL, data);
6405 		mutex_unlock(&rdev->grbm_idx_mutex);
6406 
6407 		cik_update_rlc(rdev, tmp);
6408 
6409 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6410 			orig = data = RREG32(CGTS_SM_CTRL_REG);
6411 			data &= ~SM_MODE_MASK;
6412 			data |= SM_MODE(0x2);
6413 			data |= SM_MODE_ENABLE;
6414 			data &= ~CGTS_OVERRIDE;
6415 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6416 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6417 				data &= ~CGTS_LS_OVERRIDE;
6418 			data &= ~ON_MONITOR_ADD_MASK;
6419 			data |= ON_MONITOR_ADD_EN;
6420 			data |= ON_MONITOR_ADD(0x96);
6421 			if (orig != data)
6422 				WREG32(CGTS_SM_CTRL_REG, data);
6423 		}
6424 	} else {
6425 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6426 		data |= 0x00000003;
6427 		if (orig != data)
6428 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6429 
6430 		data = RREG32(RLC_MEM_SLP_CNTL);
6431 		if (data & RLC_MEM_LS_EN) {
6432 			data &= ~RLC_MEM_LS_EN;
6433 			WREG32(RLC_MEM_SLP_CNTL, data);
6434 		}
6435 
6436 		data = RREG32(CP_MEM_SLP_CNTL);
6437 		if (data & CP_MEM_LS_EN) {
6438 			data &= ~CP_MEM_LS_EN;
6439 			WREG32(CP_MEM_SLP_CNTL, data);
6440 		}
6441 
6442 		orig = data = RREG32(CGTS_SM_CTRL_REG);
6443 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6444 		if (orig != data)
6445 			WREG32(CGTS_SM_CTRL_REG, data);
6446 
6447 		tmp = cik_halt_rlc(rdev);
6448 
6449 		mutex_lock(&rdev->grbm_idx_mutex);
6450 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6451 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6452 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6453 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6454 		WREG32(RLC_SERDES_WR_CTRL, data);
6455 		mutex_unlock(&rdev->grbm_idx_mutex);
6456 
6457 		cik_update_rlc(rdev, tmp);
6458 	}
6459 }
6460 
6461 static const u32 mc_cg_registers[] =
6462 {
6463 	MC_HUB_MISC_HUB_CG,
6464 	MC_HUB_MISC_SIP_CG,
6465 	MC_HUB_MISC_VM_CG,
6466 	MC_XPB_CLK_GAT,
6467 	ATC_MISC_CG,
6468 	MC_CITF_MISC_WR_CG,
6469 	MC_CITF_MISC_RD_CG,
6470 	MC_CITF_MISC_VM_CG,
6471 	VM_L2_CG,
6472 };
6473 
6474 static void cik_enable_mc_ls(struct radeon_device *rdev,
6475 			     bool enable)
6476 {
6477 	int i;
6478 	u32 orig, data;
6479 
6480 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6481 		orig = data = RREG32(mc_cg_registers[i]);
6482 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6483 			data |= MC_LS_ENABLE;
6484 		else
6485 			data &= ~MC_LS_ENABLE;
6486 		if (data != orig)
6487 			WREG32(mc_cg_registers[i], data);
6488 	}
6489 }
6490 
6491 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6492 			       bool enable)
6493 {
6494 	int i;
6495 	u32 orig, data;
6496 
6497 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6498 		orig = data = RREG32(mc_cg_registers[i]);
6499 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6500 			data |= MC_CG_ENABLE;
6501 		else
6502 			data &= ~MC_CG_ENABLE;
6503 		if (data != orig)
6504 			WREG32(mc_cg_registers[i], data);
6505 	}
6506 }
6507 
6508 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6509 				 bool enable)
6510 {
6511 	u32 orig, data;
6512 
6513 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6514 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6515 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6516 	} else {
6517 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6518 		data |= 0xff000000;
6519 		if (data != orig)
6520 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6521 
6522 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6523 		data |= 0xff000000;
6524 		if (data != orig)
6525 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6526 	}
6527 }
6528 
6529 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6530 				 bool enable)
6531 {
6532 	u32 orig, data;
6533 
6534 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6535 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6536 		data |= 0x100;
6537 		if (orig != data)
6538 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6539 
6540 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6541 		data |= 0x100;
6542 		if (orig != data)
6543 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6544 	} else {
6545 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6546 		data &= ~0x100;
6547 		if (orig != data)
6548 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6549 
6550 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6551 		data &= ~0x100;
6552 		if (orig != data)
6553 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6554 	}
6555 }
6556 
6557 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6558 				bool enable)
6559 {
6560 	u32 orig, data;
6561 
6562 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6563 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6564 		data = 0xfff;
6565 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6566 
6567 		orig = data = RREG32(UVD_CGC_CTRL);
6568 		data |= DCM;
6569 		if (orig != data)
6570 			WREG32(UVD_CGC_CTRL, data);
6571 	} else {
6572 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6573 		data &= ~0xfff;
6574 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6575 
6576 		orig = data = RREG32(UVD_CGC_CTRL);
6577 		data &= ~DCM;
6578 		if (orig != data)
6579 			WREG32(UVD_CGC_CTRL, data);
6580 	}
6581 }
6582 
6583 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6584 			       bool enable)
6585 {
6586 	u32 orig, data;
6587 
6588 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6589 
6590 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6591 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6592 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6593 	else
6594 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6595 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6596 
6597 	if (orig != data)
6598 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6599 }
6600 
6601 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6602 				bool enable)
6603 {
6604 	u32 orig, data;
6605 
6606 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6607 
6608 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6609 		data &= ~CLOCK_GATING_DIS;
6610 	else
6611 		data |= CLOCK_GATING_DIS;
6612 
6613 	if (orig != data)
6614 		WREG32(HDP_HOST_PATH_CNTL, data);
6615 }
6616 
6617 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6618 			      bool enable)
6619 {
6620 	u32 orig, data;
6621 
6622 	orig = data = RREG32(HDP_MEM_POWER_LS);
6623 
6624 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6625 		data |= HDP_LS_ENABLE;
6626 	else
6627 		data &= ~HDP_LS_ENABLE;
6628 
6629 	if (orig != data)
6630 		WREG32(HDP_MEM_POWER_LS, data);
6631 }
6632 
6633 void cik_update_cg(struct radeon_device *rdev,
6634 		   u32 block, bool enable)
6635 {
6636 
6637 	if (block & RADEON_CG_BLOCK_GFX) {
6638 		cik_enable_gui_idle_interrupt(rdev, false);
6639 		/* order matters! */
6640 		if (enable) {
6641 			cik_enable_mgcg(rdev, true);
6642 			cik_enable_cgcg(rdev, true);
6643 		} else {
6644 			cik_enable_cgcg(rdev, false);
6645 			cik_enable_mgcg(rdev, false);
6646 		}
6647 		cik_enable_gui_idle_interrupt(rdev, true);
6648 	}
6649 
6650 	if (block & RADEON_CG_BLOCK_MC) {
6651 		if (!(rdev->flags & RADEON_IS_IGP)) {
6652 			cik_enable_mc_mgcg(rdev, enable);
6653 			cik_enable_mc_ls(rdev, enable);
6654 		}
6655 	}
6656 
6657 	if (block & RADEON_CG_BLOCK_SDMA) {
6658 		cik_enable_sdma_mgcg(rdev, enable);
6659 		cik_enable_sdma_mgls(rdev, enable);
6660 	}
6661 
6662 	if (block & RADEON_CG_BLOCK_BIF) {
6663 		cik_enable_bif_mgls(rdev, enable);
6664 	}
6665 
6666 	if (block & RADEON_CG_BLOCK_UVD) {
6667 		if (rdev->has_uvd)
6668 			cik_enable_uvd_mgcg(rdev, enable);
6669 	}
6670 
6671 	if (block & RADEON_CG_BLOCK_HDP) {
6672 		cik_enable_hdp_mgcg(rdev, enable);
6673 		cik_enable_hdp_ls(rdev, enable);
6674 	}
6675 
6676 	if (block & RADEON_CG_BLOCK_VCE) {
6677 		vce_v2_0_enable_mgcg(rdev, enable);
6678 	}
6679 }
6680 
6681 static void cik_init_cg(struct radeon_device *rdev)
6682 {
6683 
6684 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6685 
6686 	if (rdev->has_uvd)
6687 		si_init_uvd_internal_cg(rdev);
6688 
6689 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6690 			     RADEON_CG_BLOCK_SDMA |
6691 			     RADEON_CG_BLOCK_BIF |
6692 			     RADEON_CG_BLOCK_UVD |
6693 			     RADEON_CG_BLOCK_HDP), true);
6694 }
6695 
6696 static void cik_fini_cg(struct radeon_device *rdev)
6697 {
6698 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6699 			     RADEON_CG_BLOCK_SDMA |
6700 			     RADEON_CG_BLOCK_BIF |
6701 			     RADEON_CG_BLOCK_UVD |
6702 			     RADEON_CG_BLOCK_HDP), false);
6703 
6704 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6705 }
6706 
6707 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6708 					  bool enable)
6709 {
6710 	u32 data, orig;
6711 
6712 	orig = data = RREG32(RLC_PG_CNTL);
6713 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6714 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6715 	else
6716 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6717 	if (orig != data)
6718 		WREG32(RLC_PG_CNTL, data);
6719 }
6720 
6721 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6722 					  bool enable)
6723 {
6724 	u32 data, orig;
6725 
6726 	orig = data = RREG32(RLC_PG_CNTL);
6727 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6728 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6729 	else
6730 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6731 	if (orig != data)
6732 		WREG32(RLC_PG_CNTL, data);
6733 }
6734 
6735 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6736 {
6737 	u32 data, orig;
6738 
6739 	orig = data = RREG32(RLC_PG_CNTL);
6740 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6741 		data &= ~DISABLE_CP_PG;
6742 	else
6743 		data |= DISABLE_CP_PG;
6744 	if (orig != data)
6745 		WREG32(RLC_PG_CNTL, data);
6746 }
6747 
6748 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6749 {
6750 	u32 data, orig;
6751 
6752 	orig = data = RREG32(RLC_PG_CNTL);
6753 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6754 		data &= ~DISABLE_GDS_PG;
6755 	else
6756 		data |= DISABLE_GDS_PG;
6757 	if (orig != data)
6758 		WREG32(RLC_PG_CNTL, data);
6759 }
6760 
6761 #define CP_ME_TABLE_SIZE    96
6762 #define CP_ME_TABLE_OFFSET  2048
6763 #define CP_MEC_TABLE_OFFSET 4096
6764 
6765 void cik_init_cp_pg_table(struct radeon_device *rdev)
6766 {
6767 	volatile u32 *dst_ptr;
6768 	int me, i, max_me = 4;
6769 	u32 bo_offset = 0;
6770 	u32 table_offset, table_size;
6771 
6772 	if (rdev->family == CHIP_KAVERI)
6773 		max_me = 5;
6774 
6775 	if (rdev->rlc.cp_table_ptr == NULL)
6776 		return;
6777 
6778 	/* write the cp table buffer */
6779 	dst_ptr = rdev->rlc.cp_table_ptr;
6780 	for (me = 0; me < max_me; me++) {
6781 		if (rdev->new_fw) {
6782 			const __le32 *fw_data;
6783 			const struct gfx_firmware_header_v1_0 *hdr;
6784 
6785 			if (me == 0) {
6786 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6787 				fw_data = (const __le32 *)
6788 					(rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6789 				table_offset = le32_to_cpu(hdr->jt_offset);
6790 				table_size = le32_to_cpu(hdr->jt_size);
6791 			} else if (me == 1) {
6792 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6793 				fw_data = (const __le32 *)
6794 					(rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6795 				table_offset = le32_to_cpu(hdr->jt_offset);
6796 				table_size = le32_to_cpu(hdr->jt_size);
6797 			} else if (me == 2) {
6798 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6799 				fw_data = (const __le32 *)
6800 					(rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6801 				table_offset = le32_to_cpu(hdr->jt_offset);
6802 				table_size = le32_to_cpu(hdr->jt_size);
6803 			} else if (me == 3) {
6804 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6805 				fw_data = (const __le32 *)
6806 					(rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6807 				table_offset = le32_to_cpu(hdr->jt_offset);
6808 				table_size = le32_to_cpu(hdr->jt_size);
6809 			} else {
6810 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6811 				fw_data = (const __le32 *)
6812 					(rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6813 				table_offset = le32_to_cpu(hdr->jt_offset);
6814 				table_size = le32_to_cpu(hdr->jt_size);
6815 			}
6816 
6817 			for (i = 0; i < table_size; i ++) {
6818 				dst_ptr[bo_offset + i] =
6819 					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6820 			}
6821 			bo_offset += table_size;
6822 		} else {
6823 			const __be32 *fw_data;
6824 			table_size = CP_ME_TABLE_SIZE;
6825 
6826 			if (me == 0) {
6827 				fw_data = (const __be32 *)rdev->ce_fw->data;
6828 				table_offset = CP_ME_TABLE_OFFSET;
6829 			} else if (me == 1) {
6830 				fw_data = (const __be32 *)rdev->pfp_fw->data;
6831 				table_offset = CP_ME_TABLE_OFFSET;
6832 			} else if (me == 2) {
6833 				fw_data = (const __be32 *)rdev->me_fw->data;
6834 				table_offset = CP_ME_TABLE_OFFSET;
6835 			} else {
6836 				fw_data = (const __be32 *)rdev->mec_fw->data;
6837 				table_offset = CP_MEC_TABLE_OFFSET;
6838 			}
6839 
6840 			for (i = 0; i < table_size; i ++) {
6841 				dst_ptr[bo_offset + i] =
6842 					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6843 			}
6844 			bo_offset += table_size;
6845 		}
6846 	}
6847 }
6848 
6849 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6850 				bool enable)
6851 {
6852 	u32 data, orig;
6853 
6854 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6855 		orig = data = RREG32(RLC_PG_CNTL);
6856 		data |= GFX_PG_ENABLE;
6857 		if (orig != data)
6858 			WREG32(RLC_PG_CNTL, data);
6859 
6860 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6861 		data |= AUTO_PG_EN;
6862 		if (orig != data)
6863 			WREG32(RLC_AUTO_PG_CTRL, data);
6864 	} else {
6865 		orig = data = RREG32(RLC_PG_CNTL);
6866 		data &= ~GFX_PG_ENABLE;
6867 		if (orig != data)
6868 			WREG32(RLC_PG_CNTL, data);
6869 
6870 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6871 		data &= ~AUTO_PG_EN;
6872 		if (orig != data)
6873 			WREG32(RLC_AUTO_PG_CTRL, data);
6874 
6875 		data = RREG32(DB_RENDER_CONTROL);
6876 	}
6877 }
6878 
6879 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6880 {
6881 	u32 mask = 0, tmp, tmp1;
6882 	int i;
6883 
6884 	mutex_lock(&rdev->grbm_idx_mutex);
6885 	cik_select_se_sh(rdev, se, sh);
6886 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6887 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6888 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6889 	mutex_unlock(&rdev->grbm_idx_mutex);
6890 
6891 	tmp &= 0xffff0000;
6892 
6893 	tmp |= tmp1;
6894 	tmp >>= 16;
6895 
6896 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6897 		mask <<= 1;
6898 		mask |= 1;
6899 	}
6900 
6901 	return (~tmp) & mask;
6902 }
6903 
6904 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6905 {
6906 	u32 i, j, k, active_cu_number = 0;
6907 	u32 mask, counter, cu_bitmap;
6908 	u32 tmp = 0;
6909 
6910 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6911 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6912 			mask = 1;
6913 			cu_bitmap = 0;
6914 			counter = 0;
6915 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6916 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6917 					if (counter < 2)
6918 						cu_bitmap |= mask;
6919 					counter ++;
6920 				}
6921 				mask <<= 1;
6922 			}
6923 
6924 			active_cu_number += counter;
6925 			tmp |= (cu_bitmap << (i * 16 + j * 8));
6926 		}
6927 	}
6928 
6929 	WREG32(RLC_PG_AO_CU_MASK, tmp);
6930 
6931 	tmp = RREG32(RLC_MAX_PG_CU);
6932 	tmp &= ~MAX_PU_CU_MASK;
6933 	tmp |= MAX_PU_CU(active_cu_number);
6934 	WREG32(RLC_MAX_PG_CU, tmp);
6935 }
6936 
6937 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6938 				       bool enable)
6939 {
6940 	u32 data, orig;
6941 
6942 	orig = data = RREG32(RLC_PG_CNTL);
6943 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6944 		data |= STATIC_PER_CU_PG_ENABLE;
6945 	else
6946 		data &= ~STATIC_PER_CU_PG_ENABLE;
6947 	if (orig != data)
6948 		WREG32(RLC_PG_CNTL, data);
6949 }
6950 
6951 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6952 					bool enable)
6953 {
6954 	u32 data, orig;
6955 
6956 	orig = data = RREG32(RLC_PG_CNTL);
6957 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6958 		data |= DYN_PER_CU_PG_ENABLE;
6959 	else
6960 		data &= ~DYN_PER_CU_PG_ENABLE;
6961 	if (orig != data)
6962 		WREG32(RLC_PG_CNTL, data);
6963 }
6964 
6965 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6966 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6967 
6968 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6969 {
6970 	u32 data, orig;
6971 	u32 i;
6972 
6973 	if (rdev->rlc.cs_data) {
6974 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6975 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6976 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6977 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6978 	} else {
6979 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6980 		for (i = 0; i < 3; i++)
6981 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6982 	}
6983 	if (rdev->rlc.reg_list) {
6984 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6985 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6986 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6987 	}
6988 
6989 	orig = data = RREG32(RLC_PG_CNTL);
6990 	data |= GFX_PG_SRC;
6991 	if (orig != data)
6992 		WREG32(RLC_PG_CNTL, data);
6993 
6994 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6995 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6996 
6997 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6998 	data &= ~IDLE_POLL_COUNT_MASK;
6999 	data |= IDLE_POLL_COUNT(0x60);
7000 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
7001 
7002 	data = 0x10101010;
7003 	WREG32(RLC_PG_DELAY, data);
7004 
7005 	data = RREG32(RLC_PG_DELAY_2);
7006 	data &= ~0xff;
7007 	data |= 0x3;
7008 	WREG32(RLC_PG_DELAY_2, data);
7009 
7010 	data = RREG32(RLC_AUTO_PG_CTRL);
7011 	data &= ~GRBM_REG_SGIT_MASK;
7012 	data |= GRBM_REG_SGIT(0x700);
7013 	WREG32(RLC_AUTO_PG_CTRL, data);
7014 
7015 }
7016 
7017 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
7018 {
7019 	cik_enable_gfx_cgpg(rdev, enable);
7020 	cik_enable_gfx_static_mgpg(rdev, enable);
7021 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
7022 }
7023 
7024 u32 cik_get_csb_size(struct radeon_device *rdev)
7025 {
7026 	u32 count = 0;
7027 	const struct cs_section_def *sect = NULL;
7028 	const struct cs_extent_def *ext = NULL;
7029 
7030 	if (rdev->rlc.cs_data == NULL)
7031 		return 0;
7032 
7033 	/* begin clear state */
7034 	count += 2;
7035 	/* context control state */
7036 	count += 3;
7037 
7038 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7039 		for (ext = sect->section; ext->extent != NULL; ++ext) {
7040 			if (sect->id == SECT_CONTEXT)
7041 				count += 2 + ext->reg_count;
7042 			else
7043 				return 0;
7044 		}
7045 	}
7046 	/* pa_sc_raster_config/pa_sc_raster_config1 */
7047 	count += 4;
7048 	/* end clear state */
7049 	count += 2;
7050 	/* clear state */
7051 	count += 2;
7052 
7053 	return count;
7054 }
7055 
7056 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
7057 {
7058 	u32 count = 0, i;
7059 	const struct cs_section_def *sect = NULL;
7060 	const struct cs_extent_def *ext = NULL;
7061 
7062 	if (rdev->rlc.cs_data == NULL)
7063 		return;
7064 	if (buffer == NULL)
7065 		return;
7066 
7067 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7068 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
7069 
7070 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
7071 	buffer[count++] = cpu_to_le32(0x80000000);
7072 	buffer[count++] = cpu_to_le32(0x80000000);
7073 
7074 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7075 		for (ext = sect->section; ext->extent != NULL; ++ext) {
7076 			if (sect->id == SECT_CONTEXT) {
7077 				buffer[count++] =
7078 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
7079 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
7080 				for (i = 0; i < ext->reg_count; i++)
7081 					buffer[count++] = cpu_to_le32(ext->extent[i]);
7082 			} else {
7083 				return;
7084 			}
7085 		}
7086 	}
7087 
7088 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
7089 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
7090 	switch (rdev->family) {
7091 	case CHIP_BONAIRE:
7092 		buffer[count++] = cpu_to_le32(0x16000012);
7093 		buffer[count++] = cpu_to_le32(0x00000000);
7094 		break;
7095 	case CHIP_KAVERI:
7096 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7097 		buffer[count++] = cpu_to_le32(0x00000000);
7098 		break;
7099 	case CHIP_KABINI:
7100 	case CHIP_MULLINS:
7101 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7102 		buffer[count++] = cpu_to_le32(0x00000000);
7103 		break;
7104 	case CHIP_HAWAII:
7105 		buffer[count++] = cpu_to_le32(0x3a00161a);
7106 		buffer[count++] = cpu_to_le32(0x0000002e);
7107 		break;
7108 	default:
7109 		buffer[count++] = cpu_to_le32(0x00000000);
7110 		buffer[count++] = cpu_to_le32(0x00000000);
7111 		break;
7112 	}
7113 
7114 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7115 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
7116 
7117 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
7118 	buffer[count++] = cpu_to_le32(0);
7119 }
7120 
7121 static void cik_init_pg(struct radeon_device *rdev)
7122 {
7123 	if (rdev->pg_flags) {
7124 		cik_enable_sck_slowdown_on_pu(rdev, true);
7125 		cik_enable_sck_slowdown_on_pd(rdev, true);
7126 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7127 			cik_init_gfx_cgpg(rdev);
7128 			cik_enable_cp_pg(rdev, true);
7129 			cik_enable_gds_pg(rdev, true);
7130 		}
7131 		cik_init_ao_cu_mask(rdev);
7132 		cik_update_gfx_pg(rdev, true);
7133 	}
7134 }
7135 
7136 static void cik_fini_pg(struct radeon_device *rdev)
7137 {
7138 	if (rdev->pg_flags) {
7139 		cik_update_gfx_pg(rdev, false);
7140 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7141 			cik_enable_cp_pg(rdev, false);
7142 			cik_enable_gds_pg(rdev, false);
7143 		}
7144 	}
7145 }
7146 
7147 /*
7148  * Interrupts
7149  * Starting with r6xx, interrupts are handled via a ring buffer.
7150  * Ring buffers are areas of GPU accessible memory that the GPU
7151  * writes interrupt vectors into and the host reads vectors out of.
7152  * There is a rptr (read pointer) that determines where the
7153  * host is currently reading, and a wptr (write pointer)
7154  * which determines where the GPU has written.  When the
7155  * pointers are equal, the ring is idle.  When the GPU
7156  * writes vectors to the ring buffer, it increments the
7157  * wptr.  When there is an interrupt, the host then starts
7158  * fetching commands and processing them until the pointers are
7159  * equal again at which point it updates the rptr.
7160  */
7161 
7162 /**
7163  * cik_enable_interrupts - Enable the interrupt ring buffer
7164  *
7165  * @rdev: radeon_device pointer
7166  *
7167  * Enable the interrupt ring buffer (CIK).
7168  */
7169 static void cik_enable_interrupts(struct radeon_device *rdev)
7170 {
7171 	u32 ih_cntl = RREG32(IH_CNTL);
7172 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7173 
7174 	ih_cntl |= ENABLE_INTR;
7175 	ih_rb_cntl |= IH_RB_ENABLE;
7176 	WREG32(IH_CNTL, ih_cntl);
7177 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7178 	rdev->ih.enabled = true;
7179 }
7180 
7181 /**
7182  * cik_disable_interrupts - Disable the interrupt ring buffer
7183  *
7184  * @rdev: radeon_device pointer
7185  *
7186  * Disable the interrupt ring buffer (CIK).
7187  */
7188 static void cik_disable_interrupts(struct radeon_device *rdev)
7189 {
7190 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7191 	u32 ih_cntl = RREG32(IH_CNTL);
7192 
7193 	ih_rb_cntl &= ~IH_RB_ENABLE;
7194 	ih_cntl &= ~ENABLE_INTR;
7195 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7196 	WREG32(IH_CNTL, ih_cntl);
7197 	/* set rptr, wptr to 0 */
7198 	WREG32(IH_RB_RPTR, 0);
7199 	WREG32(IH_RB_WPTR, 0);
7200 	rdev->ih.enabled = false;
7201 	rdev->ih.rptr = 0;
7202 }
7203 
7204 /**
7205  * cik_disable_interrupt_state - Disable all interrupt sources
7206  *
7207  * @rdev: radeon_device pointer
7208  *
7209  * Clear all interrupt enable bits used by the driver (CIK).
7210  */
7211 static void cik_disable_interrupt_state(struct radeon_device *rdev)
7212 {
7213 	u32 tmp;
7214 
7215 	/* gfx ring */
7216 	tmp = RREG32(CP_INT_CNTL_RING0) &
7217 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7218 	WREG32(CP_INT_CNTL_RING0, tmp);
7219 	/* sdma */
7220 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7221 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
7222 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7223 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
7224 	/* compute queues */
7225 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
7226 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
7227 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
7228 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
7229 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
7230 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
7231 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
7232 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
7233 	/* grbm */
7234 	WREG32(GRBM_INT_CNTL, 0);
7235 	/* SRBM */
7236 	WREG32(SRBM_INT_CNTL, 0);
7237 	/* vline/vblank, etc. */
7238 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7239 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7240 	if (rdev->num_crtc >= 4) {
7241 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7242 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7243 	}
7244 	if (rdev->num_crtc >= 6) {
7245 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7246 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7247 	}
7248 	/* pflip */
7249 	if (rdev->num_crtc >= 2) {
7250 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7251 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7252 	}
7253 	if (rdev->num_crtc >= 4) {
7254 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7255 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7256 	}
7257 	if (rdev->num_crtc >= 6) {
7258 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7259 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7260 	}
7261 
7262 	/* dac hotplug */
7263 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7264 
7265 	/* digital hotplug */
7266 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7267 	WREG32(DC_HPD1_INT_CONTROL, tmp);
7268 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7269 	WREG32(DC_HPD2_INT_CONTROL, tmp);
7270 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7271 	WREG32(DC_HPD3_INT_CONTROL, tmp);
7272 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7273 	WREG32(DC_HPD4_INT_CONTROL, tmp);
7274 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7275 	WREG32(DC_HPD5_INT_CONTROL, tmp);
7276 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7277 	WREG32(DC_HPD6_INT_CONTROL, tmp);
7278 
7279 }
7280 
7281 /**
7282  * cik_irq_init - init and enable the interrupt ring
7283  *
7284  * @rdev: radeon_device pointer
7285  *
7286  * Allocate a ring buffer for the interrupt controller,
7287  * enable the RLC, disable interrupts, enable the IH
7288  * ring buffer and enable it (CIK).
7289  * Called at device load and reume.
7290  * Returns 0 for success, errors for failure.
7291  */
7292 static int cik_irq_init(struct radeon_device *rdev)
7293 {
7294 	int ret = 0;
7295 	int rb_bufsz;
7296 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7297 
7298 	/* allocate ring */
7299 	ret = r600_ih_ring_alloc(rdev);
7300 	if (ret)
7301 		return ret;
7302 
7303 	/* disable irqs */
7304 	cik_disable_interrupts(rdev);
7305 
7306 	/* init rlc */
7307 	ret = cik_rlc_resume(rdev);
7308 	if (ret) {
7309 		r600_ih_ring_fini(rdev);
7310 		return ret;
7311 	}
7312 
7313 	/* setup interrupt control */
7314 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
7315 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7316 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
7317 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7318 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7319 	 */
7320 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7321 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7322 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7323 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
7324 
7325 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7326 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7327 
7328 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7329 		      IH_WPTR_OVERFLOW_CLEAR |
7330 		      (rb_bufsz << 1));
7331 
7332 	if (rdev->wb.enabled)
7333 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7334 
7335 	/* set the writeback address whether it's enabled or not */
7336 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7337 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7338 
7339 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7340 
7341 	/* set rptr, wptr to 0 */
7342 	WREG32(IH_RB_RPTR, 0);
7343 	WREG32(IH_RB_WPTR, 0);
7344 
7345 	/* Default settings for IH_CNTL (disabled at first) */
7346 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7347 	/* RPTR_REARM only works if msi's are enabled */
7348 	if (rdev->msi_enabled)
7349 		ih_cntl |= RPTR_REARM;
7350 	WREG32(IH_CNTL, ih_cntl);
7351 
7352 	/* force the active interrupt state to all disabled */
7353 	cik_disable_interrupt_state(rdev);
7354 
7355 	pci_set_master(rdev->pdev);
7356 
7357 	/* enable irqs */
7358 	cik_enable_interrupts(rdev);
7359 
7360 	return ret;
7361 }
7362 
7363 /**
7364  * cik_irq_set - enable/disable interrupt sources
7365  *
7366  * @rdev: radeon_device pointer
7367  *
7368  * Enable interrupt sources on the GPU (vblanks, hpd,
7369  * etc.) (CIK).
7370  * Returns 0 for success, errors for failure.
7371  */
7372 int cik_irq_set(struct radeon_device *rdev)
7373 {
7374 	u32 cp_int_cntl;
7375 	u32 cp_m1p0;
7376 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7377 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7378 	u32 grbm_int_cntl = 0;
7379 	u32 dma_cntl, dma_cntl1;
7380 
7381 	if (!rdev->irq.installed) {
7382 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7383 		return -EINVAL;
7384 	}
7385 	/* don't enable anything if the ih is disabled */
7386 	if (!rdev->ih.enabled) {
7387 		cik_disable_interrupts(rdev);
7388 		/* force the active interrupt state to all disabled */
7389 		cik_disable_interrupt_state(rdev);
7390 		return 0;
7391 	}
7392 
7393 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7394 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7395 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7396 
7397 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
7398 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
7399 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
7400 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
7401 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
7402 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
7403 
7404 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7405 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7406 
7407 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7408 
7409 	/* enable CP interrupts on all rings */
7410 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7411 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7412 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7413 	}
7414 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7415 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7416 		DRM_DEBUG("si_irq_set: sw int cp1\n");
7417 		if (ring->me == 1) {
7418 			switch (ring->pipe) {
7419 			case 0:
7420 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7421 				break;
7422 			default:
7423 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7424 				break;
7425 			}
7426 		} else {
7427 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7428 		}
7429 	}
7430 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7431 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7432 		DRM_DEBUG("si_irq_set: sw int cp2\n");
7433 		if (ring->me == 1) {
7434 			switch (ring->pipe) {
7435 			case 0:
7436 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7437 				break;
7438 			default:
7439 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7440 				break;
7441 			}
7442 		} else {
7443 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7444 		}
7445 	}
7446 
7447 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7448 		DRM_DEBUG("cik_irq_set: sw int dma\n");
7449 		dma_cntl |= TRAP_ENABLE;
7450 	}
7451 
7452 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7453 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7454 		dma_cntl1 |= TRAP_ENABLE;
7455 	}
7456 
7457 	if (rdev->irq.crtc_vblank_int[0] ||
7458 	    atomic_read(&rdev->irq.pflip[0])) {
7459 		DRM_DEBUG("cik_irq_set: vblank 0\n");
7460 		crtc1 |= VBLANK_INTERRUPT_MASK;
7461 	}
7462 	if (rdev->irq.crtc_vblank_int[1] ||
7463 	    atomic_read(&rdev->irq.pflip[1])) {
7464 		DRM_DEBUG("cik_irq_set: vblank 1\n");
7465 		crtc2 |= VBLANK_INTERRUPT_MASK;
7466 	}
7467 	if (rdev->irq.crtc_vblank_int[2] ||
7468 	    atomic_read(&rdev->irq.pflip[2])) {
7469 		DRM_DEBUG("cik_irq_set: vblank 2\n");
7470 		crtc3 |= VBLANK_INTERRUPT_MASK;
7471 	}
7472 	if (rdev->irq.crtc_vblank_int[3] ||
7473 	    atomic_read(&rdev->irq.pflip[3])) {
7474 		DRM_DEBUG("cik_irq_set: vblank 3\n");
7475 		crtc4 |= VBLANK_INTERRUPT_MASK;
7476 	}
7477 	if (rdev->irq.crtc_vblank_int[4] ||
7478 	    atomic_read(&rdev->irq.pflip[4])) {
7479 		DRM_DEBUG("cik_irq_set: vblank 4\n");
7480 		crtc5 |= VBLANK_INTERRUPT_MASK;
7481 	}
7482 	if (rdev->irq.crtc_vblank_int[5] ||
7483 	    atomic_read(&rdev->irq.pflip[5])) {
7484 		DRM_DEBUG("cik_irq_set: vblank 5\n");
7485 		crtc6 |= VBLANK_INTERRUPT_MASK;
7486 	}
7487 	if (rdev->irq.hpd[0]) {
7488 		DRM_DEBUG("cik_irq_set: hpd 1\n");
7489 		hpd1 |= DC_HPDx_INT_EN;
7490 	}
7491 	if (rdev->irq.hpd[1]) {
7492 		DRM_DEBUG("cik_irq_set: hpd 2\n");
7493 		hpd2 |= DC_HPDx_INT_EN;
7494 	}
7495 	if (rdev->irq.hpd[2]) {
7496 		DRM_DEBUG("cik_irq_set: hpd 3\n");
7497 		hpd3 |= DC_HPDx_INT_EN;
7498 	}
7499 	if (rdev->irq.hpd[3]) {
7500 		DRM_DEBUG("cik_irq_set: hpd 4\n");
7501 		hpd4 |= DC_HPDx_INT_EN;
7502 	}
7503 	if (rdev->irq.hpd[4]) {
7504 		DRM_DEBUG("cik_irq_set: hpd 5\n");
7505 		hpd5 |= DC_HPDx_INT_EN;
7506 	}
7507 	if (rdev->irq.hpd[5]) {
7508 		DRM_DEBUG("cik_irq_set: hpd 6\n");
7509 		hpd6 |= DC_HPDx_INT_EN;
7510 	}
7511 
7512 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7513 
7514 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7515 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7516 
7517 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7518 
7519 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7520 
7521 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7522 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7523 	if (rdev->num_crtc >= 4) {
7524 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7525 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7526 	}
7527 	if (rdev->num_crtc >= 6) {
7528 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7529 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7530 	}
7531 
7532 	if (rdev->num_crtc >= 2) {
7533 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7534 		       GRPH_PFLIP_INT_MASK);
7535 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7536 		       GRPH_PFLIP_INT_MASK);
7537 	}
7538 	if (rdev->num_crtc >= 4) {
7539 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7540 		       GRPH_PFLIP_INT_MASK);
7541 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7542 		       GRPH_PFLIP_INT_MASK);
7543 	}
7544 	if (rdev->num_crtc >= 6) {
7545 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7546 		       GRPH_PFLIP_INT_MASK);
7547 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7548 		       GRPH_PFLIP_INT_MASK);
7549 	}
7550 
7551 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7552 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7553 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7554 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7555 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7556 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7557 
7558 	return 0;
7559 }
7560 
7561 /**
7562  * cik_irq_ack - ack interrupt sources
7563  *
7564  * @rdev: radeon_device pointer
7565  *
7566  * Ack interrupt sources on the GPU (vblanks, hpd,
7567  * etc.) (CIK).  Certain interrupts sources are sw
7568  * generated and do not require an explicit ack.
7569  */
7570 static inline void cik_irq_ack(struct radeon_device *rdev)
7571 {
7572 	u32 tmp;
7573 
7574 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7575 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7576 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7577 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7578 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7579 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7580 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7581 
7582 	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7583 		EVERGREEN_CRTC0_REGISTER_OFFSET);
7584 	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7585 		EVERGREEN_CRTC1_REGISTER_OFFSET);
7586 	if (rdev->num_crtc >= 4) {
7587 		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7588 			EVERGREEN_CRTC2_REGISTER_OFFSET);
7589 		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7590 			EVERGREEN_CRTC3_REGISTER_OFFSET);
7591 	}
7592 	if (rdev->num_crtc >= 6) {
7593 		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7594 			EVERGREEN_CRTC4_REGISTER_OFFSET);
7595 		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7596 			EVERGREEN_CRTC5_REGISTER_OFFSET);
7597 	}
7598 
7599 	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7600 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7601 		       GRPH_PFLIP_INT_CLEAR);
7602 	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7603 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7604 		       GRPH_PFLIP_INT_CLEAR);
7605 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7606 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7607 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7608 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7609 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7610 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7611 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7612 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7613 
7614 	if (rdev->num_crtc >= 4) {
7615 		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7616 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7617 			       GRPH_PFLIP_INT_CLEAR);
7618 		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7619 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7620 			       GRPH_PFLIP_INT_CLEAR);
7621 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7622 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7623 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7624 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7625 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7626 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7627 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7628 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7629 	}
7630 
7631 	if (rdev->num_crtc >= 6) {
7632 		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7633 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7634 			       GRPH_PFLIP_INT_CLEAR);
7635 		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7636 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7637 			       GRPH_PFLIP_INT_CLEAR);
7638 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7639 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7640 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7641 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7642 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7643 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7644 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7645 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7646 	}
7647 
7648 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7649 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7650 		tmp |= DC_HPDx_INT_ACK;
7651 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7652 	}
7653 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7654 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7655 		tmp |= DC_HPDx_INT_ACK;
7656 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7657 	}
7658 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7659 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7660 		tmp |= DC_HPDx_INT_ACK;
7661 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7662 	}
7663 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7664 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7665 		tmp |= DC_HPDx_INT_ACK;
7666 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7667 	}
7668 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7669 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7670 		tmp |= DC_HPDx_INT_ACK;
7671 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7672 	}
7673 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7674 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7675 		tmp |= DC_HPDx_INT_ACK;
7676 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7677 	}
7678 }
7679 
7680 /**
7681  * cik_irq_disable - disable interrupts
7682  *
7683  * @rdev: radeon_device pointer
7684  *
7685  * Disable interrupts on the hw (CIK).
7686  */
7687 static void cik_irq_disable(struct radeon_device *rdev)
7688 {
7689 	cik_disable_interrupts(rdev);
7690 	/* Wait and acknowledge irq */
7691 	mdelay(1);
7692 	cik_irq_ack(rdev);
7693 	cik_disable_interrupt_state(rdev);
7694 }
7695 
7696 /**
7697  * cik_irq_disable - disable interrupts for suspend
7698  *
7699  * @rdev: radeon_device pointer
7700  *
7701  * Disable interrupts and stop the RLC (CIK).
7702  * Used for suspend.
7703  */
7704 static void cik_irq_suspend(struct radeon_device *rdev)
7705 {
7706 	cik_irq_disable(rdev);
7707 	cik_rlc_stop(rdev);
7708 }
7709 
7710 /**
7711  * cik_irq_fini - tear down interrupt support
7712  *
7713  * @rdev: radeon_device pointer
7714  *
7715  * Disable interrupts on the hw and free the IH ring
7716  * buffer (CIK).
7717  * Used for driver unload.
7718  */
7719 static void cik_irq_fini(struct radeon_device *rdev)
7720 {
7721 	cik_irq_suspend(rdev);
7722 	r600_ih_ring_fini(rdev);
7723 }
7724 
7725 /**
7726  * cik_get_ih_wptr - get the IH ring buffer wptr
7727  *
7728  * @rdev: radeon_device pointer
7729  *
7730  * Get the IH ring buffer wptr from either the register
7731  * or the writeback memory buffer (CIK).  Also check for
7732  * ring buffer overflow and deal with it.
7733  * Used by cik_irq_process().
7734  * Returns the value of the wptr.
7735  */
7736 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7737 {
7738 	u32 wptr, tmp;
7739 
7740 	if (rdev->wb.enabled)
7741 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7742 	else
7743 		wptr = RREG32(IH_RB_WPTR);
7744 
7745 	if (wptr & RB_OVERFLOW) {
7746 		wptr &= ~RB_OVERFLOW;
7747 		/* When a ring buffer overflow happen start parsing interrupt
7748 		 * from the last not overwritten vector (wptr + 16). Hopefully
7749 		 * this should allow us to catchup.
7750 		 */
7751 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7752 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7753 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7754 		tmp = RREG32(IH_RB_CNTL);
7755 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7756 		WREG32(IH_RB_CNTL, tmp);
7757 	}
7758 	return (wptr & rdev->ih.ptr_mask);
7759 }
7760 
7761 /*        CIK IV Ring
7762  * Each IV ring entry is 128 bits:
7763  * [7:0]    - interrupt source id
7764  * [31:8]   - reserved
7765  * [59:32]  - interrupt source data
7766  * [63:60]  - reserved
7767  * [71:64]  - RINGID
7768  *            CP:
7769  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7770  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7771  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7772  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7773  *            PIPE_ID - ME0 0=3D
7774  *                    - ME1&2 compute dispatcher (4 pipes each)
7775  *            SDMA:
7776  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7777  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7778  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7779  * [79:72]  - VMID
7780  * [95:80]  - PASID
7781  * [127:96] - reserved
7782  */
7783 /**
7784  * cik_irq_process - interrupt handler
7785  *
7786  * @rdev: radeon_device pointer
7787  *
7788  * Interrupt hander (CIK).  Walk the IH ring,
7789  * ack interrupts and schedule work to handle
7790  * interrupt events.
7791  * Returns irq process return code.
7792  */
7793 int cik_irq_process(struct radeon_device *rdev)
7794 {
7795 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7796 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7797 	u32 wptr;
7798 	u32 rptr;
7799 	u32 src_id, src_data, ring_id;
7800 	u8 me_id, pipe_id, queue_id;
7801 	u32 ring_index;
7802 	bool queue_hotplug = false;
7803 	bool queue_reset = false;
7804 	u32 addr, status, mc_client;
7805 	bool queue_thermal = false;
7806 
7807 	if (!rdev->ih.enabled || rdev->shutdown)
7808 		return IRQ_NONE;
7809 
7810 	wptr = cik_get_ih_wptr(rdev);
7811 
7812 restart_ih:
7813 	/* is somebody else already processing irqs? */
7814 	if (atomic_xchg(&rdev->ih.lock, 1))
7815 		return IRQ_NONE;
7816 
7817 	rptr = rdev->ih.rptr;
7818 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7819 
7820 	/* Order reading of wptr vs. reading of IH ring data */
7821 	rmb();
7822 
7823 	/* display interrupts */
7824 	cik_irq_ack(rdev);
7825 
7826 	while (rptr != wptr) {
7827 		/* wptr/rptr are in bytes! */
7828 		ring_index = rptr / 4;
7829 
7830 		radeon_kfd_interrupt(rdev,
7831 				(const void *) &rdev->ih.ring[ring_index]);
7832 
7833 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7834 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7835 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7836 
7837 		switch (src_id) {
7838 		case 1: /* D1 vblank/vline */
7839 			switch (src_data) {
7840 			case 0: /* D1 vblank */
7841 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7842 					if (rdev->irq.crtc_vblank_int[0]) {
7843 						drm_handle_vblank(rdev->ddev, 0);
7844 						rdev->pm.vblank_sync = true;
7845 						wake_up(&rdev->irq.vblank_queue);
7846 					}
7847 					if (atomic_read(&rdev->irq.pflip[0]))
7848 						radeon_crtc_handle_vblank(rdev, 0);
7849 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7850 					DRM_DEBUG("IH: D1 vblank\n");
7851 				}
7852 				break;
7853 			case 1: /* D1 vline */
7854 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7855 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7856 					DRM_DEBUG("IH: D1 vline\n");
7857 				}
7858 				break;
7859 			default:
7860 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7861 				break;
7862 			}
7863 			break;
7864 		case 2: /* D2 vblank/vline */
7865 			switch (src_data) {
7866 			case 0: /* D2 vblank */
7867 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7868 					if (rdev->irq.crtc_vblank_int[1]) {
7869 						drm_handle_vblank(rdev->ddev, 1);
7870 						rdev->pm.vblank_sync = true;
7871 						wake_up(&rdev->irq.vblank_queue);
7872 					}
7873 					if (atomic_read(&rdev->irq.pflip[1]))
7874 						radeon_crtc_handle_vblank(rdev, 1);
7875 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7876 					DRM_DEBUG("IH: D2 vblank\n");
7877 				}
7878 				break;
7879 			case 1: /* D2 vline */
7880 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7881 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7882 					DRM_DEBUG("IH: D2 vline\n");
7883 				}
7884 				break;
7885 			default:
7886 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7887 				break;
7888 			}
7889 			break;
7890 		case 3: /* D3 vblank/vline */
7891 			switch (src_data) {
7892 			case 0: /* D3 vblank */
7893 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7894 					if (rdev->irq.crtc_vblank_int[2]) {
7895 						drm_handle_vblank(rdev->ddev, 2);
7896 						rdev->pm.vblank_sync = true;
7897 						wake_up(&rdev->irq.vblank_queue);
7898 					}
7899 					if (atomic_read(&rdev->irq.pflip[2]))
7900 						radeon_crtc_handle_vblank(rdev, 2);
7901 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7902 					DRM_DEBUG("IH: D3 vblank\n");
7903 				}
7904 				break;
7905 			case 1: /* D3 vline */
7906 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7907 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7908 					DRM_DEBUG("IH: D3 vline\n");
7909 				}
7910 				break;
7911 			default:
7912 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7913 				break;
7914 			}
7915 			break;
7916 		case 4: /* D4 vblank/vline */
7917 			switch (src_data) {
7918 			case 0: /* D4 vblank */
7919 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7920 					if (rdev->irq.crtc_vblank_int[3]) {
7921 						drm_handle_vblank(rdev->ddev, 3);
7922 						rdev->pm.vblank_sync = true;
7923 						wake_up(&rdev->irq.vblank_queue);
7924 					}
7925 					if (atomic_read(&rdev->irq.pflip[3]))
7926 						radeon_crtc_handle_vblank(rdev, 3);
7927 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7928 					DRM_DEBUG("IH: D4 vblank\n");
7929 				}
7930 				break;
7931 			case 1: /* D4 vline */
7932 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7933 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7934 					DRM_DEBUG("IH: D4 vline\n");
7935 				}
7936 				break;
7937 			default:
7938 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7939 				break;
7940 			}
7941 			break;
7942 		case 5: /* D5 vblank/vline */
7943 			switch (src_data) {
7944 			case 0: /* D5 vblank */
7945 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7946 					if (rdev->irq.crtc_vblank_int[4]) {
7947 						drm_handle_vblank(rdev->ddev, 4);
7948 						rdev->pm.vblank_sync = true;
7949 						wake_up(&rdev->irq.vblank_queue);
7950 					}
7951 					if (atomic_read(&rdev->irq.pflip[4]))
7952 						radeon_crtc_handle_vblank(rdev, 4);
7953 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7954 					DRM_DEBUG("IH: D5 vblank\n");
7955 				}
7956 				break;
7957 			case 1: /* D5 vline */
7958 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7959 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7960 					DRM_DEBUG("IH: D5 vline\n");
7961 				}
7962 				break;
7963 			default:
7964 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7965 				break;
7966 			}
7967 			break;
7968 		case 6: /* D6 vblank/vline */
7969 			switch (src_data) {
7970 			case 0: /* D6 vblank */
7971 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7972 					if (rdev->irq.crtc_vblank_int[5]) {
7973 						drm_handle_vblank(rdev->ddev, 5);
7974 						rdev->pm.vblank_sync = true;
7975 						wake_up(&rdev->irq.vblank_queue);
7976 					}
7977 					if (atomic_read(&rdev->irq.pflip[5]))
7978 						radeon_crtc_handle_vblank(rdev, 5);
7979 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7980 					DRM_DEBUG("IH: D6 vblank\n");
7981 				}
7982 				break;
7983 			case 1: /* D6 vline */
7984 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7985 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7986 					DRM_DEBUG("IH: D6 vline\n");
7987 				}
7988 				break;
7989 			default:
7990 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7991 				break;
7992 			}
7993 			break;
7994 		case 8: /* D1 page flip */
7995 		case 10: /* D2 page flip */
7996 		case 12: /* D3 page flip */
7997 		case 14: /* D4 page flip */
7998 		case 16: /* D5 page flip */
7999 		case 18: /* D6 page flip */
8000 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
8001 			if (radeon_use_pflipirq > 0)
8002 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
8003 			break;
8004 		case 42: /* HPD hotplug */
8005 			switch (src_data) {
8006 			case 0:
8007 				if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
8008 					rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
8009 					queue_hotplug = true;
8010 					DRM_DEBUG("IH: HPD1\n");
8011 				}
8012 				break;
8013 			case 1:
8014 				if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
8015 					rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
8016 					queue_hotplug = true;
8017 					DRM_DEBUG("IH: HPD2\n");
8018 				}
8019 				break;
8020 			case 2:
8021 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
8022 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
8023 					queue_hotplug = true;
8024 					DRM_DEBUG("IH: HPD3\n");
8025 				}
8026 				break;
8027 			case 3:
8028 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
8029 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
8030 					queue_hotplug = true;
8031 					DRM_DEBUG("IH: HPD4\n");
8032 				}
8033 				break;
8034 			case 4:
8035 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
8036 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
8037 					queue_hotplug = true;
8038 					DRM_DEBUG("IH: HPD5\n");
8039 				}
8040 				break;
8041 			case 5:
8042 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
8043 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
8044 					queue_hotplug = true;
8045 					DRM_DEBUG("IH: HPD6\n");
8046 				}
8047 				break;
8048 			default:
8049 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8050 				break;
8051 			}
8052 			break;
8053 		case 96:
8054 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
8055 			WREG32(SRBM_INT_ACK, 0x1);
8056 			break;
8057 		case 124: /* UVD */
8058 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
8059 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
8060 			break;
8061 		case 146:
8062 		case 147:
8063 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
8064 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
8065 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
8066 			/* reset addr and status */
8067 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
8068 			if (addr == 0x0 && status == 0x0)
8069 				break;
8070 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
8071 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
8072 				addr);
8073 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
8074 				status);
8075 			cik_vm_decode_fault(rdev, status, addr, mc_client);
8076 			break;
8077 		case 167: /* VCE */
8078 			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
8079 			switch (src_data) {
8080 			case 0:
8081 				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
8082 				break;
8083 			case 1:
8084 				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
8085 				break;
8086 			default:
8087 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
8088 				break;
8089 			}
8090 			break;
8091 		case 176: /* GFX RB CP_INT */
8092 		case 177: /* GFX IB CP_INT */
8093 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8094 			break;
8095 		case 181: /* CP EOP event */
8096 			DRM_DEBUG("IH: CP EOP\n");
8097 			/* XXX check the bitfield order! */
8098 			me_id = (ring_id & 0x60) >> 5;
8099 			pipe_id = (ring_id & 0x18) >> 3;
8100 			queue_id = (ring_id & 0x7) >> 0;
8101 			switch (me_id) {
8102 			case 0:
8103 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8104 				break;
8105 			case 1:
8106 			case 2:
8107 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
8108 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8109 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
8110 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8111 				break;
8112 			}
8113 			break;
8114 		case 184: /* CP Privileged reg access */
8115 			DRM_ERROR("Illegal register access in command stream\n");
8116 			/* XXX check the bitfield order! */
8117 			me_id = (ring_id & 0x60) >> 5;
8118 			pipe_id = (ring_id & 0x18) >> 3;
8119 			queue_id = (ring_id & 0x7) >> 0;
8120 			switch (me_id) {
8121 			case 0:
8122 				/* This results in a full GPU reset, but all we need to do is soft
8123 				 * reset the CP for gfx
8124 				 */
8125 				queue_reset = true;
8126 				break;
8127 			case 1:
8128 				/* XXX compute */
8129 				queue_reset = true;
8130 				break;
8131 			case 2:
8132 				/* XXX compute */
8133 				queue_reset = true;
8134 				break;
8135 			}
8136 			break;
8137 		case 185: /* CP Privileged inst */
8138 			DRM_ERROR("Illegal instruction in command stream\n");
8139 			/* XXX check the bitfield order! */
8140 			me_id = (ring_id & 0x60) >> 5;
8141 			pipe_id = (ring_id & 0x18) >> 3;
8142 			queue_id = (ring_id & 0x7) >> 0;
8143 			switch (me_id) {
8144 			case 0:
8145 				/* This results in a full GPU reset, but all we need to do is soft
8146 				 * reset the CP for gfx
8147 				 */
8148 				queue_reset = true;
8149 				break;
8150 			case 1:
8151 				/* XXX compute */
8152 				queue_reset = true;
8153 				break;
8154 			case 2:
8155 				/* XXX compute */
8156 				queue_reset = true;
8157 				break;
8158 			}
8159 			break;
8160 		case 224: /* SDMA trap event */
8161 			/* XXX check the bitfield order! */
8162 			me_id = (ring_id & 0x3) >> 0;
8163 			queue_id = (ring_id & 0xc) >> 2;
8164 			DRM_DEBUG("IH: SDMA trap\n");
8165 			switch (me_id) {
8166 			case 0:
8167 				switch (queue_id) {
8168 				case 0:
8169 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8170 					break;
8171 				case 1:
8172 					/* XXX compute */
8173 					break;
8174 				case 2:
8175 					/* XXX compute */
8176 					break;
8177 				}
8178 				break;
8179 			case 1:
8180 				switch (queue_id) {
8181 				case 0:
8182 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8183 					break;
8184 				case 1:
8185 					/* XXX compute */
8186 					break;
8187 				case 2:
8188 					/* XXX compute */
8189 					break;
8190 				}
8191 				break;
8192 			}
8193 			break;
8194 		case 230: /* thermal low to high */
8195 			DRM_DEBUG("IH: thermal low to high\n");
8196 			rdev->pm.dpm.thermal.high_to_low = false;
8197 			queue_thermal = true;
8198 			break;
8199 		case 231: /* thermal high to low */
8200 			DRM_DEBUG("IH: thermal high to low\n");
8201 			rdev->pm.dpm.thermal.high_to_low = true;
8202 			queue_thermal = true;
8203 			break;
8204 		case 233: /* GUI IDLE */
8205 			DRM_DEBUG("IH: GUI idle\n");
8206 			break;
8207 		case 241: /* SDMA Privileged inst */
8208 		case 247: /* SDMA Privileged inst */
8209 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8210 			/* XXX check the bitfield order! */
8211 			me_id = (ring_id & 0x3) >> 0;
8212 			queue_id = (ring_id & 0xc) >> 2;
8213 			switch (me_id) {
8214 			case 0:
8215 				switch (queue_id) {
8216 				case 0:
8217 					queue_reset = true;
8218 					break;
8219 				case 1:
8220 					/* XXX compute */
8221 					queue_reset = true;
8222 					break;
8223 				case 2:
8224 					/* XXX compute */
8225 					queue_reset = true;
8226 					break;
8227 				}
8228 				break;
8229 			case 1:
8230 				switch (queue_id) {
8231 				case 0:
8232 					queue_reset = true;
8233 					break;
8234 				case 1:
8235 					/* XXX compute */
8236 					queue_reset = true;
8237 					break;
8238 				case 2:
8239 					/* XXX compute */
8240 					queue_reset = true;
8241 					break;
8242 				}
8243 				break;
8244 			}
8245 			break;
8246 		default:
8247 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8248 			break;
8249 		}
8250 
8251 		/* wptr/rptr are in bytes! */
8252 		rptr += 16;
8253 		rptr &= rdev->ih.ptr_mask;
8254 		WREG32(IH_RB_RPTR, rptr);
8255 	}
8256 	if (queue_hotplug)
8257 		schedule_work(&rdev->hotplug_work);
8258 	if (queue_reset) {
8259 		rdev->needs_reset = true;
8260 		wake_up_all(&rdev->fence_queue);
8261 	}
8262 	if (queue_thermal)
8263 		schedule_work(&rdev->pm.dpm.thermal.work);
8264 	rdev->ih.rptr = rptr;
8265 	atomic_set(&rdev->ih.lock, 0);
8266 
8267 	/* make sure wptr hasn't changed while processing */
8268 	wptr = cik_get_ih_wptr(rdev);
8269 	if (wptr != rptr)
8270 		goto restart_ih;
8271 
8272 	return IRQ_HANDLED;
8273 }
8274 
8275 /*
8276  * startup/shutdown callbacks
8277  */
8278 /**
8279  * cik_startup - program the asic to a functional state
8280  *
8281  * @rdev: radeon_device pointer
8282  *
8283  * Programs the asic to a functional state (CIK).
8284  * Called by cik_init() and cik_resume().
8285  * Returns 0 for success, error for failure.
8286  */
8287 static int cik_startup(struct radeon_device *rdev)
8288 {
8289 	struct radeon_ring *ring;
8290 	u32 nop;
8291 	int r;
8292 
8293 	/* enable pcie gen2/3 link */
8294 	cik_pcie_gen3_enable(rdev);
8295 	/* enable aspm */
8296 	cik_program_aspm(rdev);
8297 
8298 	/* scratch needs to be initialized before MC */
8299 	r = r600_vram_scratch_init(rdev);
8300 	if (r)
8301 		return r;
8302 
8303 	cik_mc_program(rdev);
8304 
8305 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8306 		r = ci_mc_load_microcode(rdev);
8307 		if (r) {
8308 			DRM_ERROR("Failed to load MC firmware!\n");
8309 			return r;
8310 		}
8311 	}
8312 
8313 	r = cik_pcie_gart_enable(rdev);
8314 	if (r)
8315 		return r;
8316 	cik_gpu_init(rdev);
8317 
8318 	/* allocate rlc buffers */
8319 	if (rdev->flags & RADEON_IS_IGP) {
8320 		if (rdev->family == CHIP_KAVERI) {
8321 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8322 			rdev->rlc.reg_list_size =
8323 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8324 		} else {
8325 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8326 			rdev->rlc.reg_list_size =
8327 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8328 		}
8329 	}
8330 	rdev->rlc.cs_data = ci_cs_data;
8331 	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
8332 	r = sumo_rlc_init(rdev);
8333 	if (r) {
8334 		DRM_ERROR("Failed to init rlc BOs!\n");
8335 		return r;
8336 	}
8337 
8338 	/* allocate wb buffer */
8339 	r = radeon_wb_init(rdev);
8340 	if (r)
8341 		return r;
8342 
8343 	/* allocate mec buffers */
8344 	r = cik_mec_init(rdev);
8345 	if (r) {
8346 		DRM_ERROR("Failed to init MEC BOs!\n");
8347 		return r;
8348 	}
8349 
8350 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8351 	if (r) {
8352 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8353 		return r;
8354 	}
8355 
8356 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8357 	if (r) {
8358 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8359 		return r;
8360 	}
8361 
8362 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8363 	if (r) {
8364 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8365 		return r;
8366 	}
8367 
8368 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8369 	if (r) {
8370 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8371 		return r;
8372 	}
8373 
8374 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8375 	if (r) {
8376 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8377 		return r;
8378 	}
8379 
8380 	r = radeon_uvd_resume(rdev);
8381 	if (!r) {
8382 		r = uvd_v4_2_resume(rdev);
8383 		if (!r) {
8384 			r = radeon_fence_driver_start_ring(rdev,
8385 							   R600_RING_TYPE_UVD_INDEX);
8386 			if (r)
8387 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
8388 		}
8389 	}
8390 	if (r)
8391 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8392 
8393 	r = radeon_vce_resume(rdev);
8394 	if (!r) {
8395 		r = vce_v2_0_resume(rdev);
8396 		if (!r)
8397 			r = radeon_fence_driver_start_ring(rdev,
8398 							   TN_RING_TYPE_VCE1_INDEX);
8399 		if (!r)
8400 			r = radeon_fence_driver_start_ring(rdev,
8401 							   TN_RING_TYPE_VCE2_INDEX);
8402 	}
8403 	if (r) {
8404 		dev_err(rdev->dev, "VCE init error (%d).\n", r);
8405 		rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8406 		rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8407 	}
8408 
8409 	/* Enable IRQ */
8410 	if (!rdev->irq.installed) {
8411 		r = radeon_irq_kms_init(rdev);
8412 		if (r)
8413 			return r;
8414 	}
8415 
8416 	r = cik_irq_init(rdev);
8417 	if (r) {
8418 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8419 		radeon_irq_kms_fini(rdev);
8420 		return r;
8421 	}
8422 	cik_irq_set(rdev);
8423 
8424 	if (rdev->family == CHIP_HAWAII) {
8425 		if (rdev->new_fw)
8426 			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8427 		else
8428 			nop = RADEON_CP_PACKET2;
8429 	} else {
8430 		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8431 	}
8432 
8433 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8434 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8435 			     nop);
8436 	if (r)
8437 		return r;
8438 
8439 	/* set up the compute queues */
8440 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8441 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8442 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8443 			     nop);
8444 	if (r)
8445 		return r;
8446 	ring->me = 1; /* first MEC */
8447 	ring->pipe = 0; /* first pipe */
8448 	ring->queue = 0; /* first queue */
8449 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8450 
8451 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8452 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8453 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8454 			     nop);
8455 	if (r)
8456 		return r;
8457 	/* dGPU only have 1 MEC */
8458 	ring->me = 1; /* first MEC */
8459 	ring->pipe = 0; /* first pipe */
8460 	ring->queue = 1; /* second queue */
8461 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8462 
8463 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8464 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8465 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8466 	if (r)
8467 		return r;
8468 
8469 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8470 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8471 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8472 	if (r)
8473 		return r;
8474 
8475 	r = cik_cp_resume(rdev);
8476 	if (r)
8477 		return r;
8478 
8479 	r = cik_sdma_resume(rdev);
8480 	if (r)
8481 		return r;
8482 
8483 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8484 	if (ring->ring_size) {
8485 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8486 				     RADEON_CP_PACKET2);
8487 		if (!r)
8488 			r = uvd_v1_0_init(rdev);
8489 		if (r)
8490 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8491 	}
8492 
8493 	r = -ENOENT;
8494 
8495 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8496 	if (ring->ring_size)
8497 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8498 				     VCE_CMD_NO_OP);
8499 
8500 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8501 	if (ring->ring_size)
8502 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8503 				     VCE_CMD_NO_OP);
8504 
8505 	if (!r)
8506 		r = vce_v1_0_init(rdev);
8507 	else if (r != -ENOENT)
8508 		DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8509 
8510 	r = radeon_ib_pool_init(rdev);
8511 	if (r) {
8512 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8513 		return r;
8514 	}
8515 
8516 	r = radeon_vm_manager_init(rdev);
8517 	if (r) {
8518 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8519 		return r;
8520 	}
8521 
8522 	r = radeon_audio_init(rdev);
8523 	if (r)
8524 		return r;
8525 
8526 	r = radeon_kfd_resume(rdev);
8527 	if (r)
8528 		return r;
8529 
8530 	return 0;
8531 }
8532 
8533 /**
8534  * cik_resume - resume the asic to a functional state
8535  *
8536  * @rdev: radeon_device pointer
8537  *
8538  * Programs the asic to a functional state (CIK).
8539  * Called at resume.
8540  * Returns 0 for success, error for failure.
8541  */
8542 int cik_resume(struct radeon_device *rdev)
8543 {
8544 	int r;
8545 
8546 	/* post card */
8547 	atom_asic_init(rdev->mode_info.atom_context);
8548 
8549 	/* init golden registers */
8550 	cik_init_golden_registers(rdev);
8551 
8552 	if (rdev->pm.pm_method == PM_METHOD_DPM)
8553 		radeon_pm_resume(rdev);
8554 
8555 	rdev->accel_working = true;
8556 	r = cik_startup(rdev);
8557 	if (r) {
8558 		DRM_ERROR("cik startup failed on resume\n");
8559 		rdev->accel_working = false;
8560 		return r;
8561 	}
8562 
8563 	return r;
8564 
8565 }
8566 
8567 /**
8568  * cik_suspend - suspend the asic
8569  *
8570  * @rdev: radeon_device pointer
8571  *
8572  * Bring the chip into a state suitable for suspend (CIK).
8573  * Called at suspend.
8574  * Returns 0 for success.
8575  */
8576 int cik_suspend(struct radeon_device *rdev)
8577 {
8578 	radeon_kfd_suspend(rdev);
8579 	radeon_pm_suspend(rdev);
8580 	radeon_audio_fini(rdev);
8581 	radeon_vm_manager_fini(rdev);
8582 	cik_cp_enable(rdev, false);
8583 	cik_sdma_enable(rdev, false);
8584 	uvd_v1_0_fini(rdev);
8585 	radeon_uvd_suspend(rdev);
8586 	radeon_vce_suspend(rdev);
8587 	cik_fini_pg(rdev);
8588 	cik_fini_cg(rdev);
8589 	cik_irq_suspend(rdev);
8590 	radeon_wb_disable(rdev);
8591 	cik_pcie_gart_disable(rdev);
8592 	return 0;
8593 }
8594 
8595 /* Plan is to move initialization in that function and use
8596  * helper function so that radeon_device_init pretty much
8597  * do nothing more than calling asic specific function. This
8598  * should also allow to remove a bunch of callback function
8599  * like vram_info.
8600  */
8601 /**
8602  * cik_init - asic specific driver and hw init
8603  *
8604  * @rdev: radeon_device pointer
8605  *
8606  * Setup asic specific driver variables and program the hw
8607  * to a functional state (CIK).
8608  * Called at driver startup.
8609  * Returns 0 for success, errors for failure.
8610  */
8611 int cik_init(struct radeon_device *rdev)
8612 {
8613 	struct radeon_ring *ring;
8614 	int r;
8615 
8616 	/* Read BIOS */
8617 	if (!radeon_get_bios(rdev)) {
8618 		if (ASIC_IS_AVIVO(rdev))
8619 			return -EINVAL;
8620 	}
8621 	/* Must be an ATOMBIOS */
8622 	if (!rdev->is_atom_bios) {
8623 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8624 		return -EINVAL;
8625 	}
8626 	r = radeon_atombios_init(rdev);
8627 	if (r)
8628 		return r;
8629 
8630 	/* Post card if necessary */
8631 	if (!radeon_card_posted(rdev)) {
8632 		if (!rdev->bios) {
8633 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8634 			return -EINVAL;
8635 		}
8636 		DRM_INFO("GPU not posted. posting now...\n");
8637 		atom_asic_init(rdev->mode_info.atom_context);
8638 	}
8639 	/* init golden registers */
8640 	cik_init_golden_registers(rdev);
8641 	/* Initialize scratch registers */
8642 	cik_scratch_init(rdev);
8643 	/* Initialize surface registers */
8644 	radeon_surface_init(rdev);
8645 	/* Initialize clocks */
8646 	radeon_get_clock_info(rdev->ddev);
8647 
8648 	/* Fence driver */
8649 	r = radeon_fence_driver_init(rdev);
8650 	if (r)
8651 		return r;
8652 
8653 	/* initialize memory controller */
8654 	r = cik_mc_init(rdev);
8655 	if (r)
8656 		return r;
8657 	/* Memory manager */
8658 	r = radeon_bo_init(rdev);
8659 	if (r)
8660 		return r;
8661 
8662 	if (rdev->flags & RADEON_IS_IGP) {
8663 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8664 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8665 			r = cik_init_microcode(rdev);
8666 			if (r) {
8667 				DRM_ERROR("Failed to load firmware!\n");
8668 				return r;
8669 			}
8670 		}
8671 	} else {
8672 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8673 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8674 		    !rdev->mc_fw) {
8675 			r = cik_init_microcode(rdev);
8676 			if (r) {
8677 				DRM_ERROR("Failed to load firmware!\n");
8678 				return r;
8679 			}
8680 		}
8681 	}
8682 
8683 	/* Initialize power management */
8684 	radeon_pm_init(rdev);
8685 
8686 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8687 	ring->ring_obj = NULL;
8688 	r600_ring_init(rdev, ring, 1024 * 1024);
8689 
8690 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8691 	ring->ring_obj = NULL;
8692 	r600_ring_init(rdev, ring, 1024 * 1024);
8693 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8694 	if (r)
8695 		return r;
8696 
8697 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8698 	ring->ring_obj = NULL;
8699 	r600_ring_init(rdev, ring, 1024 * 1024);
8700 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8701 	if (r)
8702 		return r;
8703 
8704 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8705 	ring->ring_obj = NULL;
8706 	r600_ring_init(rdev, ring, 256 * 1024);
8707 
8708 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8709 	ring->ring_obj = NULL;
8710 	r600_ring_init(rdev, ring, 256 * 1024);
8711 
8712 	r = radeon_uvd_init(rdev);
8713 	if (!r) {
8714 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8715 		ring->ring_obj = NULL;
8716 		r600_ring_init(rdev, ring, 4096);
8717 	}
8718 
8719 	r = radeon_vce_init(rdev);
8720 	if (!r) {
8721 		ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8722 		ring->ring_obj = NULL;
8723 		r600_ring_init(rdev, ring, 4096);
8724 
8725 		ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8726 		ring->ring_obj = NULL;
8727 		r600_ring_init(rdev, ring, 4096);
8728 	}
8729 
8730 	rdev->ih.ring_obj = NULL;
8731 	r600_ih_ring_init(rdev, 64 * 1024);
8732 
8733 	r = r600_pcie_gart_init(rdev);
8734 	if (r)
8735 		return r;
8736 
8737 	rdev->accel_working = true;
8738 	r = cik_startup(rdev);
8739 	if (r) {
8740 		dev_err(rdev->dev, "disabling GPU acceleration\n");
8741 		cik_cp_fini(rdev);
8742 		cik_sdma_fini(rdev);
8743 		cik_irq_fini(rdev);
8744 		sumo_rlc_fini(rdev);
8745 		cik_mec_fini(rdev);
8746 		radeon_wb_fini(rdev);
8747 		radeon_ib_pool_fini(rdev);
8748 		radeon_vm_manager_fini(rdev);
8749 		radeon_irq_kms_fini(rdev);
8750 		cik_pcie_gart_fini(rdev);
8751 		rdev->accel_working = false;
8752 	}
8753 
8754 	/* Don't start up if the MC ucode is missing.
8755 	 * The default clocks and voltages before the MC ucode
8756 	 * is loaded are not suffient for advanced operations.
8757 	 */
8758 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8759 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8760 		return -EINVAL;
8761 	}
8762 
8763 	return 0;
8764 }
8765 
8766 /**
8767  * cik_fini - asic specific driver and hw fini
8768  *
8769  * @rdev: radeon_device pointer
8770  *
8771  * Tear down the asic specific driver variables and program the hw
8772  * to an idle state (CIK).
8773  * Called at driver unload.
8774  */
8775 void cik_fini(struct radeon_device *rdev)
8776 {
8777 	radeon_pm_fini(rdev);
8778 	cik_cp_fini(rdev);
8779 	cik_sdma_fini(rdev);
8780 	cik_fini_pg(rdev);
8781 	cik_fini_cg(rdev);
8782 	cik_irq_fini(rdev);
8783 	sumo_rlc_fini(rdev);
8784 	cik_mec_fini(rdev);
8785 	radeon_wb_fini(rdev);
8786 	radeon_vm_manager_fini(rdev);
8787 	radeon_ib_pool_fini(rdev);
8788 	radeon_irq_kms_fini(rdev);
8789 	uvd_v1_0_fini(rdev);
8790 	radeon_uvd_fini(rdev);
8791 	radeon_vce_fini(rdev);
8792 	cik_pcie_gart_fini(rdev);
8793 	r600_vram_scratch_fini(rdev);
8794 	radeon_gem_fini(rdev);
8795 	radeon_fence_driver_fini(rdev);
8796 	radeon_bo_fini(rdev);
8797 	radeon_atombios_fini(rdev);
8798 	kfree(rdev->bios);
8799 	rdev->bios = NULL;
8800 }
8801 
8802 void dce8_program_fmt(struct drm_encoder *encoder)
8803 {
8804 	struct drm_device *dev = encoder->dev;
8805 	struct radeon_device *rdev = dev->dev_private;
8806 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8807 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8808 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8809 	int bpc = 0;
8810 	u32 tmp = 0;
8811 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8812 
8813 	if (connector) {
8814 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8815 		bpc = radeon_get_monitor_bpc(connector);
8816 		dither = radeon_connector->dither;
8817 	}
8818 
8819 	/* LVDS/eDP FMT is set up by atom */
8820 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8821 		return;
8822 
8823 	/* not needed for analog */
8824 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8825 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8826 		return;
8827 
8828 	if (bpc == 0)
8829 		return;
8830 
8831 	switch (bpc) {
8832 	case 6:
8833 		if (dither == RADEON_FMT_DITHER_ENABLE)
8834 			/* XXX sort out optimal dither settings */
8835 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8836 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8837 		else
8838 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8839 		break;
8840 	case 8:
8841 		if (dither == RADEON_FMT_DITHER_ENABLE)
8842 			/* XXX sort out optimal dither settings */
8843 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8844 				FMT_RGB_RANDOM_ENABLE |
8845 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8846 		else
8847 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8848 		break;
8849 	case 10:
8850 		if (dither == RADEON_FMT_DITHER_ENABLE)
8851 			/* XXX sort out optimal dither settings */
8852 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8853 				FMT_RGB_RANDOM_ENABLE |
8854 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8855 		else
8856 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8857 		break;
8858 	default:
8859 		/* not needed */
8860 		break;
8861 	}
8862 
8863 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8864 }
8865 
8866 /* display watermark setup */
8867 /**
8868  * dce8_line_buffer_adjust - Set up the line buffer
8869  *
8870  * @rdev: radeon_device pointer
8871  * @radeon_crtc: the selected display controller
8872  * @mode: the current display mode on the selected display
8873  * controller
8874  *
8875  * Setup up the line buffer allocation for
8876  * the selected display controller (CIK).
8877  * Returns the line buffer size in pixels.
8878  */
8879 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8880 				   struct radeon_crtc *radeon_crtc,
8881 				   struct drm_display_mode *mode)
8882 {
8883 	u32 tmp, buffer_alloc, i;
8884 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8885 	/*
8886 	 * Line Buffer Setup
8887 	 * There are 6 line buffers, one for each display controllers.
8888 	 * There are 3 partitions per LB. Select the number of partitions
8889 	 * to enable based on the display width.  For display widths larger
8890 	 * than 4096, you need use to use 2 display controllers and combine
8891 	 * them using the stereo blender.
8892 	 */
8893 	if (radeon_crtc->base.enabled && mode) {
8894 		if (mode->crtc_hdisplay < 1920) {
8895 			tmp = 1;
8896 			buffer_alloc = 2;
8897 		} else if (mode->crtc_hdisplay < 2560) {
8898 			tmp = 2;
8899 			buffer_alloc = 2;
8900 		} else if (mode->crtc_hdisplay < 4096) {
8901 			tmp = 0;
8902 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8903 		} else {
8904 			DRM_DEBUG_KMS("Mode too big for LB!\n");
8905 			tmp = 0;
8906 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8907 		}
8908 	} else {
8909 		tmp = 1;
8910 		buffer_alloc = 0;
8911 	}
8912 
8913 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8914 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8915 
8916 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8917 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8918 	for (i = 0; i < rdev->usec_timeout; i++) {
8919 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8920 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8921 			break;
8922 		udelay(1);
8923 	}
8924 
8925 	if (radeon_crtc->base.enabled && mode) {
8926 		switch (tmp) {
8927 		case 0:
8928 		default:
8929 			return 4096 * 2;
8930 		case 1:
8931 			return 1920 * 2;
8932 		case 2:
8933 			return 2560 * 2;
8934 		}
8935 	}
8936 
8937 	/* controller not enabled, so no lb used */
8938 	return 0;
8939 }
8940 
8941 /**
8942  * cik_get_number_of_dram_channels - get the number of dram channels
8943  *
8944  * @rdev: radeon_device pointer
8945  *
8946  * Look up the number of video ram channels (CIK).
8947  * Used for display watermark bandwidth calculations
8948  * Returns the number of dram channels
8949  */
8950 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8951 {
8952 	u32 tmp = RREG32(MC_SHARED_CHMAP);
8953 
8954 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8955 	case 0:
8956 	default:
8957 		return 1;
8958 	case 1:
8959 		return 2;
8960 	case 2:
8961 		return 4;
8962 	case 3:
8963 		return 8;
8964 	case 4:
8965 		return 3;
8966 	case 5:
8967 		return 6;
8968 	case 6:
8969 		return 10;
8970 	case 7:
8971 		return 12;
8972 	case 8:
8973 		return 16;
8974 	}
8975 }
8976 
8977 struct dce8_wm_params {
8978 	u32 dram_channels; /* number of dram channels */
8979 	u32 yclk;          /* bandwidth per dram data pin in kHz */
8980 	u32 sclk;          /* engine clock in kHz */
8981 	u32 disp_clk;      /* display clock in kHz */
8982 	u32 src_width;     /* viewport width */
8983 	u32 active_time;   /* active display time in ns */
8984 	u32 blank_time;    /* blank time in ns */
8985 	bool interlaced;    /* mode is interlaced */
8986 	fixed20_12 vsc;    /* vertical scale ratio */
8987 	u32 num_heads;     /* number of active crtcs */
8988 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8989 	u32 lb_size;       /* line buffer allocated to pipe */
8990 	u32 vtaps;         /* vertical scaler taps */
8991 };
8992 
8993 /**
8994  * dce8_dram_bandwidth - get the dram bandwidth
8995  *
8996  * @wm: watermark calculation data
8997  *
8998  * Calculate the raw dram bandwidth (CIK).
8999  * Used for display watermark bandwidth calculations
9000  * Returns the dram bandwidth in MBytes/s
9001  */
9002 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
9003 {
9004 	/* Calculate raw DRAM Bandwidth */
9005 	fixed20_12 dram_efficiency; /* 0.7 */
9006 	fixed20_12 yclk, dram_channels, bandwidth;
9007 	fixed20_12 a;
9008 
9009 	a.full = dfixed_const(1000);
9010 	yclk.full = dfixed_const(wm->yclk);
9011 	yclk.full = dfixed_div(yclk, a);
9012 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9013 	a.full = dfixed_const(10);
9014 	dram_efficiency.full = dfixed_const(7);
9015 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
9016 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9017 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
9018 
9019 	return dfixed_trunc(bandwidth);
9020 }
9021 
9022 /**
9023  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
9024  *
9025  * @wm: watermark calculation data
9026  *
9027  * Calculate the dram bandwidth used for display (CIK).
9028  * Used for display watermark bandwidth calculations
9029  * Returns the dram bandwidth for display in MBytes/s
9030  */
9031 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9032 {
9033 	/* Calculate DRAM Bandwidth and the part allocated to display. */
9034 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9035 	fixed20_12 yclk, dram_channels, bandwidth;
9036 	fixed20_12 a;
9037 
9038 	a.full = dfixed_const(1000);
9039 	yclk.full = dfixed_const(wm->yclk);
9040 	yclk.full = dfixed_div(yclk, a);
9041 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9042 	a.full = dfixed_const(10);
9043 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9044 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9045 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9046 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9047 
9048 	return dfixed_trunc(bandwidth);
9049 }
9050 
9051 /**
9052  * dce8_data_return_bandwidth - get the data return bandwidth
9053  *
9054  * @wm: watermark calculation data
9055  *
9056  * Calculate the data return bandwidth used for display (CIK).
9057  * Used for display watermark bandwidth calculations
9058  * Returns the data return bandwidth in MBytes/s
9059  */
9060 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9061 {
9062 	/* Calculate the display Data return Bandwidth */
9063 	fixed20_12 return_efficiency; /* 0.8 */
9064 	fixed20_12 sclk, bandwidth;
9065 	fixed20_12 a;
9066 
9067 	a.full = dfixed_const(1000);
9068 	sclk.full = dfixed_const(wm->sclk);
9069 	sclk.full = dfixed_div(sclk, a);
9070 	a.full = dfixed_const(10);
9071 	return_efficiency.full = dfixed_const(8);
9072 	return_efficiency.full = dfixed_div(return_efficiency, a);
9073 	a.full = dfixed_const(32);
9074 	bandwidth.full = dfixed_mul(a, sclk);
9075 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9076 
9077 	return dfixed_trunc(bandwidth);
9078 }
9079 
9080 /**
9081  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9082  *
9083  * @wm: watermark calculation data
9084  *
9085  * Calculate the dmif bandwidth used for display (CIK).
9086  * Used for display watermark bandwidth calculations
9087  * Returns the dmif bandwidth in MBytes/s
9088  */
9089 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9090 {
9091 	/* Calculate the DMIF Request Bandwidth */
9092 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9093 	fixed20_12 disp_clk, bandwidth;
9094 	fixed20_12 a, b;
9095 
9096 	a.full = dfixed_const(1000);
9097 	disp_clk.full = dfixed_const(wm->disp_clk);
9098 	disp_clk.full = dfixed_div(disp_clk, a);
9099 	a.full = dfixed_const(32);
9100 	b.full = dfixed_mul(a, disp_clk);
9101 
9102 	a.full = dfixed_const(10);
9103 	disp_clk_request_efficiency.full = dfixed_const(8);
9104 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9105 
9106 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9107 
9108 	return dfixed_trunc(bandwidth);
9109 }
9110 
9111 /**
9112  * dce8_available_bandwidth - get the min available bandwidth
9113  *
9114  * @wm: watermark calculation data
9115  *
9116  * Calculate the min available bandwidth used for display (CIK).
9117  * Used for display watermark bandwidth calculations
9118  * Returns the min available bandwidth in MBytes/s
9119  */
9120 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9121 {
9122 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9123 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9124 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9125 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9126 
9127 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9128 }
9129 
9130 /**
9131  * dce8_average_bandwidth - get the average available bandwidth
9132  *
9133  * @wm: watermark calculation data
9134  *
9135  * Calculate the average available bandwidth used for display (CIK).
9136  * Used for display watermark bandwidth calculations
9137  * Returns the average available bandwidth in MBytes/s
9138  */
9139 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9140 {
9141 	/* Calculate the display mode Average Bandwidth
9142 	 * DisplayMode should contain the source and destination dimensions,
9143 	 * timing, etc.
9144 	 */
9145 	fixed20_12 bpp;
9146 	fixed20_12 line_time;
9147 	fixed20_12 src_width;
9148 	fixed20_12 bandwidth;
9149 	fixed20_12 a;
9150 
9151 	a.full = dfixed_const(1000);
9152 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9153 	line_time.full = dfixed_div(line_time, a);
9154 	bpp.full = dfixed_const(wm->bytes_per_pixel);
9155 	src_width.full = dfixed_const(wm->src_width);
9156 	bandwidth.full = dfixed_mul(src_width, bpp);
9157 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9158 	bandwidth.full = dfixed_div(bandwidth, line_time);
9159 
9160 	return dfixed_trunc(bandwidth);
9161 }
9162 
9163 /**
9164  * dce8_latency_watermark - get the latency watermark
9165  *
9166  * @wm: watermark calculation data
9167  *
9168  * Calculate the latency watermark (CIK).
9169  * Used for display watermark bandwidth calculations
9170  * Returns the latency watermark in ns
9171  */
9172 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9173 {
9174 	/* First calculate the latency in ns */
9175 	u32 mc_latency = 2000; /* 2000 ns. */
9176 	u32 available_bandwidth = dce8_available_bandwidth(wm);
9177 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9178 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9179 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9180 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9181 		(wm->num_heads * cursor_line_pair_return_time);
9182 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9183 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9184 	u32 tmp, dmif_size = 12288;
9185 	fixed20_12 a, b, c;
9186 
9187 	if (wm->num_heads == 0)
9188 		return 0;
9189 
9190 	a.full = dfixed_const(2);
9191 	b.full = dfixed_const(1);
9192 	if ((wm->vsc.full > a.full) ||
9193 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9194 	    (wm->vtaps >= 5) ||
9195 	    ((wm->vsc.full >= a.full) && wm->interlaced))
9196 		max_src_lines_per_dst_line = 4;
9197 	else
9198 		max_src_lines_per_dst_line = 2;
9199 
9200 	a.full = dfixed_const(available_bandwidth);
9201 	b.full = dfixed_const(wm->num_heads);
9202 	a.full = dfixed_div(a, b);
9203 
9204 	b.full = dfixed_const(mc_latency + 512);
9205 	c.full = dfixed_const(wm->disp_clk);
9206 	b.full = dfixed_div(b, c);
9207 
9208 	c.full = dfixed_const(dmif_size);
9209 	b.full = dfixed_div(c, b);
9210 
9211 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9212 
9213 	b.full = dfixed_const(1000);
9214 	c.full = dfixed_const(wm->disp_clk);
9215 	b.full = dfixed_div(c, b);
9216 	c.full = dfixed_const(wm->bytes_per_pixel);
9217 	b.full = dfixed_mul(b, c);
9218 
9219 	lb_fill_bw = min(tmp, dfixed_trunc(b));
9220 
9221 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9222 	b.full = dfixed_const(1000);
9223 	c.full = dfixed_const(lb_fill_bw);
9224 	b.full = dfixed_div(c, b);
9225 	a.full = dfixed_div(a, b);
9226 	line_fill_time = dfixed_trunc(a);
9227 
9228 	if (line_fill_time < wm->active_time)
9229 		return latency;
9230 	else
9231 		return latency + (line_fill_time - wm->active_time);
9232 
9233 }
9234 
9235 /**
9236  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9237  * average and available dram bandwidth
9238  *
9239  * @wm: watermark calculation data
9240  *
9241  * Check if the display average bandwidth fits in the display
9242  * dram bandwidth (CIK).
9243  * Used for display watermark bandwidth calculations
9244  * Returns true if the display fits, false if not.
9245  */
9246 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9247 {
9248 	if (dce8_average_bandwidth(wm) <=
9249 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9250 		return true;
9251 	else
9252 		return false;
9253 }
9254 
9255 /**
9256  * dce8_average_bandwidth_vs_available_bandwidth - check
9257  * average and available bandwidth
9258  *
9259  * @wm: watermark calculation data
9260  *
9261  * Check if the display average bandwidth fits in the display
9262  * available bandwidth (CIK).
9263  * Used for display watermark bandwidth calculations
9264  * Returns true if the display fits, false if not.
9265  */
9266 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9267 {
9268 	if (dce8_average_bandwidth(wm) <=
9269 	    (dce8_available_bandwidth(wm) / wm->num_heads))
9270 		return true;
9271 	else
9272 		return false;
9273 }
9274 
9275 /**
9276  * dce8_check_latency_hiding - check latency hiding
9277  *
9278  * @wm: watermark calculation data
9279  *
9280  * Check latency hiding (CIK).
9281  * Used for display watermark bandwidth calculations
9282  * Returns true if the display fits, false if not.
9283  */
9284 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9285 {
9286 	u32 lb_partitions = wm->lb_size / wm->src_width;
9287 	u32 line_time = wm->active_time + wm->blank_time;
9288 	u32 latency_tolerant_lines;
9289 	u32 latency_hiding;
9290 	fixed20_12 a;
9291 
9292 	a.full = dfixed_const(1);
9293 	if (wm->vsc.full > a.full)
9294 		latency_tolerant_lines = 1;
9295 	else {
9296 		if (lb_partitions <= (wm->vtaps + 1))
9297 			latency_tolerant_lines = 1;
9298 		else
9299 			latency_tolerant_lines = 2;
9300 	}
9301 
9302 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9303 
9304 	if (dce8_latency_watermark(wm) <= latency_hiding)
9305 		return true;
9306 	else
9307 		return false;
9308 }
9309 
9310 /**
9311  * dce8_program_watermarks - program display watermarks
9312  *
9313  * @rdev: radeon_device pointer
9314  * @radeon_crtc: the selected display controller
9315  * @lb_size: line buffer size
9316  * @num_heads: number of display controllers in use
9317  *
9318  * Calculate and program the display watermarks for the
9319  * selected display controller (CIK).
9320  */
9321 static void dce8_program_watermarks(struct radeon_device *rdev,
9322 				    struct radeon_crtc *radeon_crtc,
9323 				    u32 lb_size, u32 num_heads)
9324 {
9325 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9326 	struct dce8_wm_params wm_low, wm_high;
9327 	u32 pixel_period;
9328 	u32 line_time = 0;
9329 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9330 	u32 tmp, wm_mask;
9331 
9332 	if (radeon_crtc->base.enabled && num_heads && mode) {
9333 		pixel_period = 1000000 / (u32)mode->clock;
9334 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9335 
9336 		/* watermark for high clocks */
9337 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9338 		    rdev->pm.dpm_enabled) {
9339 			wm_high.yclk =
9340 				radeon_dpm_get_mclk(rdev, false) * 10;
9341 			wm_high.sclk =
9342 				radeon_dpm_get_sclk(rdev, false) * 10;
9343 		} else {
9344 			wm_high.yclk = rdev->pm.current_mclk * 10;
9345 			wm_high.sclk = rdev->pm.current_sclk * 10;
9346 		}
9347 
9348 		wm_high.disp_clk = mode->clock;
9349 		wm_high.src_width = mode->crtc_hdisplay;
9350 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9351 		wm_high.blank_time = line_time - wm_high.active_time;
9352 		wm_high.interlaced = false;
9353 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9354 			wm_high.interlaced = true;
9355 		wm_high.vsc = radeon_crtc->vsc;
9356 		wm_high.vtaps = 1;
9357 		if (radeon_crtc->rmx_type != RMX_OFF)
9358 			wm_high.vtaps = 2;
9359 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9360 		wm_high.lb_size = lb_size;
9361 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9362 		wm_high.num_heads = num_heads;
9363 
9364 		/* set for high clocks */
9365 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9366 
9367 		/* possibly force display priority to high */
9368 		/* should really do this at mode validation time... */
9369 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9370 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9371 		    !dce8_check_latency_hiding(&wm_high) ||
9372 		    (rdev->disp_priority == 2)) {
9373 			DRM_DEBUG_KMS("force priority to high\n");
9374 		}
9375 
9376 		/* watermark for low clocks */
9377 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9378 		    rdev->pm.dpm_enabled) {
9379 			wm_low.yclk =
9380 				radeon_dpm_get_mclk(rdev, true) * 10;
9381 			wm_low.sclk =
9382 				radeon_dpm_get_sclk(rdev, true) * 10;
9383 		} else {
9384 			wm_low.yclk = rdev->pm.current_mclk * 10;
9385 			wm_low.sclk = rdev->pm.current_sclk * 10;
9386 		}
9387 
9388 		wm_low.disp_clk = mode->clock;
9389 		wm_low.src_width = mode->crtc_hdisplay;
9390 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9391 		wm_low.blank_time = line_time - wm_low.active_time;
9392 		wm_low.interlaced = false;
9393 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9394 			wm_low.interlaced = true;
9395 		wm_low.vsc = radeon_crtc->vsc;
9396 		wm_low.vtaps = 1;
9397 		if (radeon_crtc->rmx_type != RMX_OFF)
9398 			wm_low.vtaps = 2;
9399 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9400 		wm_low.lb_size = lb_size;
9401 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9402 		wm_low.num_heads = num_heads;
9403 
9404 		/* set for low clocks */
9405 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9406 
9407 		/* possibly force display priority to high */
9408 		/* should really do this at mode validation time... */
9409 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9410 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9411 		    !dce8_check_latency_hiding(&wm_low) ||
9412 		    (rdev->disp_priority == 2)) {
9413 			DRM_DEBUG_KMS("force priority to high\n");
9414 		}
9415 	}
9416 
9417 	/* select wm A */
9418 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9419 	tmp = wm_mask;
9420 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9421 	tmp |= LATENCY_WATERMARK_MASK(1);
9422 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9423 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9424 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9425 		LATENCY_HIGH_WATERMARK(line_time)));
9426 	/* select wm B */
9427 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9428 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9429 	tmp |= LATENCY_WATERMARK_MASK(2);
9430 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9431 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9432 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9433 		LATENCY_HIGH_WATERMARK(line_time)));
9434 	/* restore original selection */
9435 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9436 
9437 	/* save values for DPM */
9438 	radeon_crtc->line_time = line_time;
9439 	radeon_crtc->wm_high = latency_watermark_a;
9440 	radeon_crtc->wm_low = latency_watermark_b;
9441 }
9442 
9443 /**
9444  * dce8_bandwidth_update - program display watermarks
9445  *
9446  * @rdev: radeon_device pointer
9447  *
9448  * Calculate and program the display watermarks and line
9449  * buffer allocation (CIK).
9450  */
9451 void dce8_bandwidth_update(struct radeon_device *rdev)
9452 {
9453 	struct drm_display_mode *mode = NULL;
9454 	u32 num_heads = 0, lb_size;
9455 	int i;
9456 
9457 	if (!rdev->mode_info.mode_config_initialized)
9458 		return;
9459 
9460 	radeon_update_display_priority(rdev);
9461 
9462 	for (i = 0; i < rdev->num_crtc; i++) {
9463 		if (rdev->mode_info.crtcs[i]->base.enabled)
9464 			num_heads++;
9465 	}
9466 	for (i = 0; i < rdev->num_crtc; i++) {
9467 		mode = &rdev->mode_info.crtcs[i]->base.mode;
9468 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9469 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9470 	}
9471 }
9472 
9473 /**
9474  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9475  *
9476  * @rdev: radeon_device pointer
9477  *
9478  * Fetches a GPU clock counter snapshot (SI).
9479  * Returns the 64 bit clock counter snapshot.
9480  */
9481 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9482 {
9483 	uint64_t clock;
9484 
9485 	mutex_lock(&rdev->gpu_clock_mutex);
9486 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9487 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9488 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9489 	mutex_unlock(&rdev->gpu_clock_mutex);
9490 	return clock;
9491 }
9492 
9493 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9494                               u32 cntl_reg, u32 status_reg)
9495 {
9496 	int r, i;
9497 	struct atom_clock_dividers dividers;
9498 	uint32_t tmp;
9499 
9500 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9501 					   clock, false, &dividers);
9502 	if (r)
9503 		return r;
9504 
9505 	tmp = RREG32_SMC(cntl_reg);
9506 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9507 	tmp |= dividers.post_divider;
9508 	WREG32_SMC(cntl_reg, tmp);
9509 
9510 	for (i = 0; i < 100; i++) {
9511 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9512 			break;
9513 		mdelay(10);
9514 	}
9515 	if (i == 100)
9516 		return -ETIMEDOUT;
9517 
9518 	return 0;
9519 }
9520 
9521 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9522 {
9523 	int r = 0;
9524 
9525 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9526 	if (r)
9527 		return r;
9528 
9529 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9530 	return r;
9531 }
9532 
9533 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9534 {
9535 	int r, i;
9536 	struct atom_clock_dividers dividers;
9537 	u32 tmp;
9538 
9539 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9540 					   ecclk, false, &dividers);
9541 	if (r)
9542 		return r;
9543 
9544 	for (i = 0; i < 100; i++) {
9545 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9546 			break;
9547 		mdelay(10);
9548 	}
9549 	if (i == 100)
9550 		return -ETIMEDOUT;
9551 
9552 	tmp = RREG32_SMC(CG_ECLK_CNTL);
9553 	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9554 	tmp |= dividers.post_divider;
9555 	WREG32_SMC(CG_ECLK_CNTL, tmp);
9556 
9557 	for (i = 0; i < 100; i++) {
9558 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9559 			break;
9560 		mdelay(10);
9561 	}
9562 	if (i == 100)
9563 		return -ETIMEDOUT;
9564 
9565 	return 0;
9566 }
9567 
9568 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9569 {
9570 	struct pci_dev *root = rdev->pdev->bus->self;
9571 	int bridge_pos, gpu_pos;
9572 	u32 speed_cntl, mask, current_data_rate;
9573 	int ret, i;
9574 	u16 tmp16;
9575 
9576 	if (pci_is_root_bus(rdev->pdev->bus))
9577 		return;
9578 
9579 	if (radeon_pcie_gen2 == 0)
9580 		return;
9581 
9582 	if (rdev->flags & RADEON_IS_IGP)
9583 		return;
9584 
9585 	if (!(rdev->flags & RADEON_IS_PCIE))
9586 		return;
9587 
9588 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9589 	if (ret != 0)
9590 		return;
9591 
9592 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9593 		return;
9594 
9595 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9596 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9597 		LC_CURRENT_DATA_RATE_SHIFT;
9598 	if (mask & DRM_PCIE_SPEED_80) {
9599 		if (current_data_rate == 2) {
9600 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9601 			return;
9602 		}
9603 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9604 	} else if (mask & DRM_PCIE_SPEED_50) {
9605 		if (current_data_rate == 1) {
9606 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9607 			return;
9608 		}
9609 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9610 	}
9611 
9612 	bridge_pos = pci_pcie_cap(root);
9613 	if (!bridge_pos)
9614 		return;
9615 
9616 	gpu_pos = pci_pcie_cap(rdev->pdev);
9617 	if (!gpu_pos)
9618 		return;
9619 
9620 	if (mask & DRM_PCIE_SPEED_80) {
9621 		/* re-try equalization if gen3 is not already enabled */
9622 		if (current_data_rate != 2) {
9623 			u16 bridge_cfg, gpu_cfg;
9624 			u16 bridge_cfg2, gpu_cfg2;
9625 			u32 max_lw, current_lw, tmp;
9626 
9627 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9628 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9629 
9630 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9631 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9632 
9633 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9634 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9635 
9636 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9637 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9638 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9639 
9640 			if (current_lw < max_lw) {
9641 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9642 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9643 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9644 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9645 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9646 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9647 				}
9648 			}
9649 
9650 			for (i = 0; i < 10; i++) {
9651 				/* check status */
9652 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9653 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9654 					break;
9655 
9656 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9657 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9658 
9659 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9660 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9661 
9662 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9663 				tmp |= LC_SET_QUIESCE;
9664 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9665 
9666 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9667 				tmp |= LC_REDO_EQ;
9668 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9669 
9670 				mdelay(100);
9671 
9672 				/* linkctl */
9673 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9674 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9675 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9676 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9677 
9678 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9679 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9680 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9681 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9682 
9683 				/* linkctl2 */
9684 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9685 				tmp16 &= ~((1 << 4) | (7 << 9));
9686 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9687 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9688 
9689 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9690 				tmp16 &= ~((1 << 4) | (7 << 9));
9691 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9692 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9693 
9694 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9695 				tmp &= ~LC_SET_QUIESCE;
9696 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9697 			}
9698 		}
9699 	}
9700 
9701 	/* set the link speed */
9702 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9703 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9704 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9705 
9706 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9707 	tmp16 &= ~0xf;
9708 	if (mask & DRM_PCIE_SPEED_80)
9709 		tmp16 |= 3; /* gen3 */
9710 	else if (mask & DRM_PCIE_SPEED_50)
9711 		tmp16 |= 2; /* gen2 */
9712 	else
9713 		tmp16 |= 1; /* gen1 */
9714 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9715 
9716 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9717 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9718 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9719 
9720 	for (i = 0; i < rdev->usec_timeout; i++) {
9721 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9722 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9723 			break;
9724 		udelay(1);
9725 	}
9726 }
9727 
9728 static void cik_program_aspm(struct radeon_device *rdev)
9729 {
9730 	u32 data, orig;
9731 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9732 	bool disable_clkreq = false;
9733 
9734 	if (radeon_aspm == 0)
9735 		return;
9736 
9737 	/* XXX double check IGPs */
9738 	if (rdev->flags & RADEON_IS_IGP)
9739 		return;
9740 
9741 	if (!(rdev->flags & RADEON_IS_PCIE))
9742 		return;
9743 
9744 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9745 	data &= ~LC_XMIT_N_FTS_MASK;
9746 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9747 	if (orig != data)
9748 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9749 
9750 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9751 	data |= LC_GO_TO_RECOVERY;
9752 	if (orig != data)
9753 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9754 
9755 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9756 	data |= P_IGNORE_EDB_ERR;
9757 	if (orig != data)
9758 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9759 
9760 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9761 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9762 	data |= LC_PMI_TO_L1_DIS;
9763 	if (!disable_l0s)
9764 		data |= LC_L0S_INACTIVITY(7);
9765 
9766 	if (!disable_l1) {
9767 		data |= LC_L1_INACTIVITY(7);
9768 		data &= ~LC_PMI_TO_L1_DIS;
9769 		if (orig != data)
9770 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9771 
9772 		if (!disable_plloff_in_l1) {
9773 			bool clk_req_support;
9774 
9775 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9776 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9777 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9778 			if (orig != data)
9779 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9780 
9781 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9782 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9783 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9784 			if (orig != data)
9785 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9786 
9787 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9788 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9789 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9790 			if (orig != data)
9791 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9792 
9793 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9794 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9795 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9796 			if (orig != data)
9797 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9798 
9799 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9800 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9801 			data |= LC_DYN_LANES_PWR_STATE(3);
9802 			if (orig != data)
9803 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9804 
9805 			if (!disable_clkreq &&
9806 			    !pci_is_root_bus(rdev->pdev->bus)) {
9807 				struct pci_dev *root = rdev->pdev->bus->self;
9808 				u32 lnkcap;
9809 
9810 				clk_req_support = false;
9811 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9812 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9813 					clk_req_support = true;
9814 			} else {
9815 				clk_req_support = false;
9816 			}
9817 
9818 			if (clk_req_support) {
9819 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9820 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9821 				if (orig != data)
9822 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9823 
9824 				orig = data = RREG32_SMC(THM_CLK_CNTL);
9825 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9826 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9827 				if (orig != data)
9828 					WREG32_SMC(THM_CLK_CNTL, data);
9829 
9830 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9831 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9832 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9833 				if (orig != data)
9834 					WREG32_SMC(MISC_CLK_CTRL, data);
9835 
9836 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9837 				data &= ~BCLK_AS_XCLK;
9838 				if (orig != data)
9839 					WREG32_SMC(CG_CLKPIN_CNTL, data);
9840 
9841 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9842 				data &= ~FORCE_BIF_REFCLK_EN;
9843 				if (orig != data)
9844 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9845 
9846 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9847 				data &= ~MPLL_CLKOUT_SEL_MASK;
9848 				data |= MPLL_CLKOUT_SEL(4);
9849 				if (orig != data)
9850 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9851 			}
9852 		}
9853 	} else {
9854 		if (orig != data)
9855 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9856 	}
9857 
9858 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9859 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9860 	if (orig != data)
9861 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9862 
9863 	if (!disable_l0s) {
9864 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9865 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9866 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9867 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9868 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9869 				data &= ~LC_L0S_INACTIVITY_MASK;
9870 				if (orig != data)
9871 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9872 			}
9873 		}
9874 	}
9875 }
9876