xref: /openbmc/linux/drivers/gpu/drm/radeon/cik.c (revision f677b30b487ca3763c3de3f1b4d8c976c2961cd1)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35 
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
44 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
45 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
46 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
47 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
48 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
49 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
50 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
51 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
52 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
53 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
54 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
55 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
56 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
57 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
58 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
59 MODULE_FIRMWARE("radeon/KABINI_me.bin");
60 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
61 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
62 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
63 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
64 
65 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
66 extern void r600_ih_ring_fini(struct radeon_device *rdev);
67 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
68 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
69 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
70 extern void sumo_rlc_fini(struct radeon_device *rdev);
71 extern int sumo_rlc_init(struct radeon_device *rdev);
72 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
73 extern void si_rlc_reset(struct radeon_device *rdev);
74 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
75 extern int cik_sdma_resume(struct radeon_device *rdev);
76 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
77 extern void cik_sdma_fini(struct radeon_device *rdev);
78 static void cik_rlc_stop(struct radeon_device *rdev);
79 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
80 static void cik_program_aspm(struct radeon_device *rdev);
81 static void cik_init_pg(struct radeon_device *rdev);
82 static void cik_init_cg(struct radeon_device *rdev);
83 static void cik_fini_pg(struct radeon_device *rdev);
84 static void cik_fini_cg(struct radeon_device *rdev);
85 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
86 					  bool enable);
87 
88 /* get temperature in millidegrees */
89 int ci_get_temp(struct radeon_device *rdev)
90 {
91 	u32 temp;
92 	int actual_temp = 0;
93 
94 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
95 		CTF_TEMP_SHIFT;
96 
97 	if (temp & 0x200)
98 		actual_temp = 255;
99 	else
100 		actual_temp = temp & 0x1ff;
101 
102 	actual_temp = actual_temp * 1000;
103 
104 	return actual_temp;
105 }
106 
107 /* get temperature in millidegrees */
108 int kv_get_temp(struct radeon_device *rdev)
109 {
110 	u32 temp;
111 	int actual_temp = 0;
112 
113 	temp = RREG32_SMC(0xC0300E0C);
114 
115 	if (temp)
116 		actual_temp = (temp / 8) - 49;
117 	else
118 		actual_temp = 0;
119 
120 	actual_temp = actual_temp * 1000;
121 
122 	return actual_temp;
123 }
124 
125 /*
126  * Indirect registers accessor
127  */
128 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
129 {
130 	unsigned long flags;
131 	u32 r;
132 
133 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
134 	WREG32(PCIE_INDEX, reg);
135 	(void)RREG32(PCIE_INDEX);
136 	r = RREG32(PCIE_DATA);
137 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
138 	return r;
139 }
140 
141 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
142 {
143 	unsigned long flags;
144 
145 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
146 	WREG32(PCIE_INDEX, reg);
147 	(void)RREG32(PCIE_INDEX);
148 	WREG32(PCIE_DATA, v);
149 	(void)RREG32(PCIE_DATA);
150 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
151 }
152 
153 static const u32 spectre_rlc_save_restore_register_list[] =
154 {
155 	(0x0e00 << 16) | (0xc12c >> 2),
156 	0x00000000,
157 	(0x0e00 << 16) | (0xc140 >> 2),
158 	0x00000000,
159 	(0x0e00 << 16) | (0xc150 >> 2),
160 	0x00000000,
161 	(0x0e00 << 16) | (0xc15c >> 2),
162 	0x00000000,
163 	(0x0e00 << 16) | (0xc168 >> 2),
164 	0x00000000,
165 	(0x0e00 << 16) | (0xc170 >> 2),
166 	0x00000000,
167 	(0x0e00 << 16) | (0xc178 >> 2),
168 	0x00000000,
169 	(0x0e00 << 16) | (0xc204 >> 2),
170 	0x00000000,
171 	(0x0e00 << 16) | (0xc2b4 >> 2),
172 	0x00000000,
173 	(0x0e00 << 16) | (0xc2b8 >> 2),
174 	0x00000000,
175 	(0x0e00 << 16) | (0xc2bc >> 2),
176 	0x00000000,
177 	(0x0e00 << 16) | (0xc2c0 >> 2),
178 	0x00000000,
179 	(0x0e00 << 16) | (0x8228 >> 2),
180 	0x00000000,
181 	(0x0e00 << 16) | (0x829c >> 2),
182 	0x00000000,
183 	(0x0e00 << 16) | (0x869c >> 2),
184 	0x00000000,
185 	(0x0600 << 16) | (0x98f4 >> 2),
186 	0x00000000,
187 	(0x0e00 << 16) | (0x98f8 >> 2),
188 	0x00000000,
189 	(0x0e00 << 16) | (0x9900 >> 2),
190 	0x00000000,
191 	(0x0e00 << 16) | (0xc260 >> 2),
192 	0x00000000,
193 	(0x0e00 << 16) | (0x90e8 >> 2),
194 	0x00000000,
195 	(0x0e00 << 16) | (0x3c000 >> 2),
196 	0x00000000,
197 	(0x0e00 << 16) | (0x3c00c >> 2),
198 	0x00000000,
199 	(0x0e00 << 16) | (0x8c1c >> 2),
200 	0x00000000,
201 	(0x0e00 << 16) | (0x9700 >> 2),
202 	0x00000000,
203 	(0x0e00 << 16) | (0xcd20 >> 2),
204 	0x00000000,
205 	(0x4e00 << 16) | (0xcd20 >> 2),
206 	0x00000000,
207 	(0x5e00 << 16) | (0xcd20 >> 2),
208 	0x00000000,
209 	(0x6e00 << 16) | (0xcd20 >> 2),
210 	0x00000000,
211 	(0x7e00 << 16) | (0xcd20 >> 2),
212 	0x00000000,
213 	(0x8e00 << 16) | (0xcd20 >> 2),
214 	0x00000000,
215 	(0x9e00 << 16) | (0xcd20 >> 2),
216 	0x00000000,
217 	(0xae00 << 16) | (0xcd20 >> 2),
218 	0x00000000,
219 	(0xbe00 << 16) | (0xcd20 >> 2),
220 	0x00000000,
221 	(0x0e00 << 16) | (0x89bc >> 2),
222 	0x00000000,
223 	(0x0e00 << 16) | (0x8900 >> 2),
224 	0x00000000,
225 	0x3,
226 	(0x0e00 << 16) | (0xc130 >> 2),
227 	0x00000000,
228 	(0x0e00 << 16) | (0xc134 >> 2),
229 	0x00000000,
230 	(0x0e00 << 16) | (0xc1fc >> 2),
231 	0x00000000,
232 	(0x0e00 << 16) | (0xc208 >> 2),
233 	0x00000000,
234 	(0x0e00 << 16) | (0xc264 >> 2),
235 	0x00000000,
236 	(0x0e00 << 16) | (0xc268 >> 2),
237 	0x00000000,
238 	(0x0e00 << 16) | (0xc26c >> 2),
239 	0x00000000,
240 	(0x0e00 << 16) | (0xc270 >> 2),
241 	0x00000000,
242 	(0x0e00 << 16) | (0xc274 >> 2),
243 	0x00000000,
244 	(0x0e00 << 16) | (0xc278 >> 2),
245 	0x00000000,
246 	(0x0e00 << 16) | (0xc27c >> 2),
247 	0x00000000,
248 	(0x0e00 << 16) | (0xc280 >> 2),
249 	0x00000000,
250 	(0x0e00 << 16) | (0xc284 >> 2),
251 	0x00000000,
252 	(0x0e00 << 16) | (0xc288 >> 2),
253 	0x00000000,
254 	(0x0e00 << 16) | (0xc28c >> 2),
255 	0x00000000,
256 	(0x0e00 << 16) | (0xc290 >> 2),
257 	0x00000000,
258 	(0x0e00 << 16) | (0xc294 >> 2),
259 	0x00000000,
260 	(0x0e00 << 16) | (0xc298 >> 2),
261 	0x00000000,
262 	(0x0e00 << 16) | (0xc29c >> 2),
263 	0x00000000,
264 	(0x0e00 << 16) | (0xc2a0 >> 2),
265 	0x00000000,
266 	(0x0e00 << 16) | (0xc2a4 >> 2),
267 	0x00000000,
268 	(0x0e00 << 16) | (0xc2a8 >> 2),
269 	0x00000000,
270 	(0x0e00 << 16) | (0xc2ac  >> 2),
271 	0x00000000,
272 	(0x0e00 << 16) | (0xc2b0 >> 2),
273 	0x00000000,
274 	(0x0e00 << 16) | (0x301d0 >> 2),
275 	0x00000000,
276 	(0x0e00 << 16) | (0x30238 >> 2),
277 	0x00000000,
278 	(0x0e00 << 16) | (0x30250 >> 2),
279 	0x00000000,
280 	(0x0e00 << 16) | (0x30254 >> 2),
281 	0x00000000,
282 	(0x0e00 << 16) | (0x30258 >> 2),
283 	0x00000000,
284 	(0x0e00 << 16) | (0x3025c >> 2),
285 	0x00000000,
286 	(0x4e00 << 16) | (0xc900 >> 2),
287 	0x00000000,
288 	(0x5e00 << 16) | (0xc900 >> 2),
289 	0x00000000,
290 	(0x6e00 << 16) | (0xc900 >> 2),
291 	0x00000000,
292 	(0x7e00 << 16) | (0xc900 >> 2),
293 	0x00000000,
294 	(0x8e00 << 16) | (0xc900 >> 2),
295 	0x00000000,
296 	(0x9e00 << 16) | (0xc900 >> 2),
297 	0x00000000,
298 	(0xae00 << 16) | (0xc900 >> 2),
299 	0x00000000,
300 	(0xbe00 << 16) | (0xc900 >> 2),
301 	0x00000000,
302 	(0x4e00 << 16) | (0xc904 >> 2),
303 	0x00000000,
304 	(0x5e00 << 16) | (0xc904 >> 2),
305 	0x00000000,
306 	(0x6e00 << 16) | (0xc904 >> 2),
307 	0x00000000,
308 	(0x7e00 << 16) | (0xc904 >> 2),
309 	0x00000000,
310 	(0x8e00 << 16) | (0xc904 >> 2),
311 	0x00000000,
312 	(0x9e00 << 16) | (0xc904 >> 2),
313 	0x00000000,
314 	(0xae00 << 16) | (0xc904 >> 2),
315 	0x00000000,
316 	(0xbe00 << 16) | (0xc904 >> 2),
317 	0x00000000,
318 	(0x4e00 << 16) | (0xc908 >> 2),
319 	0x00000000,
320 	(0x5e00 << 16) | (0xc908 >> 2),
321 	0x00000000,
322 	(0x6e00 << 16) | (0xc908 >> 2),
323 	0x00000000,
324 	(0x7e00 << 16) | (0xc908 >> 2),
325 	0x00000000,
326 	(0x8e00 << 16) | (0xc908 >> 2),
327 	0x00000000,
328 	(0x9e00 << 16) | (0xc908 >> 2),
329 	0x00000000,
330 	(0xae00 << 16) | (0xc908 >> 2),
331 	0x00000000,
332 	(0xbe00 << 16) | (0xc908 >> 2),
333 	0x00000000,
334 	(0x4e00 << 16) | (0xc90c >> 2),
335 	0x00000000,
336 	(0x5e00 << 16) | (0xc90c >> 2),
337 	0x00000000,
338 	(0x6e00 << 16) | (0xc90c >> 2),
339 	0x00000000,
340 	(0x7e00 << 16) | (0xc90c >> 2),
341 	0x00000000,
342 	(0x8e00 << 16) | (0xc90c >> 2),
343 	0x00000000,
344 	(0x9e00 << 16) | (0xc90c >> 2),
345 	0x00000000,
346 	(0xae00 << 16) | (0xc90c >> 2),
347 	0x00000000,
348 	(0xbe00 << 16) | (0xc90c >> 2),
349 	0x00000000,
350 	(0x4e00 << 16) | (0xc910 >> 2),
351 	0x00000000,
352 	(0x5e00 << 16) | (0xc910 >> 2),
353 	0x00000000,
354 	(0x6e00 << 16) | (0xc910 >> 2),
355 	0x00000000,
356 	(0x7e00 << 16) | (0xc910 >> 2),
357 	0x00000000,
358 	(0x8e00 << 16) | (0xc910 >> 2),
359 	0x00000000,
360 	(0x9e00 << 16) | (0xc910 >> 2),
361 	0x00000000,
362 	(0xae00 << 16) | (0xc910 >> 2),
363 	0x00000000,
364 	(0xbe00 << 16) | (0xc910 >> 2),
365 	0x00000000,
366 	(0x0e00 << 16) | (0xc99c >> 2),
367 	0x00000000,
368 	(0x0e00 << 16) | (0x9834 >> 2),
369 	0x00000000,
370 	(0x0000 << 16) | (0x30f00 >> 2),
371 	0x00000000,
372 	(0x0001 << 16) | (0x30f00 >> 2),
373 	0x00000000,
374 	(0x0000 << 16) | (0x30f04 >> 2),
375 	0x00000000,
376 	(0x0001 << 16) | (0x30f04 >> 2),
377 	0x00000000,
378 	(0x0000 << 16) | (0x30f08 >> 2),
379 	0x00000000,
380 	(0x0001 << 16) | (0x30f08 >> 2),
381 	0x00000000,
382 	(0x0000 << 16) | (0x30f0c >> 2),
383 	0x00000000,
384 	(0x0001 << 16) | (0x30f0c >> 2),
385 	0x00000000,
386 	(0x0600 << 16) | (0x9b7c >> 2),
387 	0x00000000,
388 	(0x0e00 << 16) | (0x8a14 >> 2),
389 	0x00000000,
390 	(0x0e00 << 16) | (0x8a18 >> 2),
391 	0x00000000,
392 	(0x0600 << 16) | (0x30a00 >> 2),
393 	0x00000000,
394 	(0x0e00 << 16) | (0x8bf0 >> 2),
395 	0x00000000,
396 	(0x0e00 << 16) | (0x8bcc >> 2),
397 	0x00000000,
398 	(0x0e00 << 16) | (0x8b24 >> 2),
399 	0x00000000,
400 	(0x0e00 << 16) | (0x30a04 >> 2),
401 	0x00000000,
402 	(0x0600 << 16) | (0x30a10 >> 2),
403 	0x00000000,
404 	(0x0600 << 16) | (0x30a14 >> 2),
405 	0x00000000,
406 	(0x0600 << 16) | (0x30a18 >> 2),
407 	0x00000000,
408 	(0x0600 << 16) | (0x30a2c >> 2),
409 	0x00000000,
410 	(0x0e00 << 16) | (0xc700 >> 2),
411 	0x00000000,
412 	(0x0e00 << 16) | (0xc704 >> 2),
413 	0x00000000,
414 	(0x0e00 << 16) | (0xc708 >> 2),
415 	0x00000000,
416 	(0x0e00 << 16) | (0xc768 >> 2),
417 	0x00000000,
418 	(0x0400 << 16) | (0xc770 >> 2),
419 	0x00000000,
420 	(0x0400 << 16) | (0xc774 >> 2),
421 	0x00000000,
422 	(0x0400 << 16) | (0xc778 >> 2),
423 	0x00000000,
424 	(0x0400 << 16) | (0xc77c >> 2),
425 	0x00000000,
426 	(0x0400 << 16) | (0xc780 >> 2),
427 	0x00000000,
428 	(0x0400 << 16) | (0xc784 >> 2),
429 	0x00000000,
430 	(0x0400 << 16) | (0xc788 >> 2),
431 	0x00000000,
432 	(0x0400 << 16) | (0xc78c >> 2),
433 	0x00000000,
434 	(0x0400 << 16) | (0xc798 >> 2),
435 	0x00000000,
436 	(0x0400 << 16) | (0xc79c >> 2),
437 	0x00000000,
438 	(0x0400 << 16) | (0xc7a0 >> 2),
439 	0x00000000,
440 	(0x0400 << 16) | (0xc7a4 >> 2),
441 	0x00000000,
442 	(0x0400 << 16) | (0xc7a8 >> 2),
443 	0x00000000,
444 	(0x0400 << 16) | (0xc7ac >> 2),
445 	0x00000000,
446 	(0x0400 << 16) | (0xc7b0 >> 2),
447 	0x00000000,
448 	(0x0400 << 16) | (0xc7b4 >> 2),
449 	0x00000000,
450 	(0x0e00 << 16) | (0x9100 >> 2),
451 	0x00000000,
452 	(0x0e00 << 16) | (0x3c010 >> 2),
453 	0x00000000,
454 	(0x0e00 << 16) | (0x92a8 >> 2),
455 	0x00000000,
456 	(0x0e00 << 16) | (0x92ac >> 2),
457 	0x00000000,
458 	(0x0e00 << 16) | (0x92b4 >> 2),
459 	0x00000000,
460 	(0x0e00 << 16) | (0x92b8 >> 2),
461 	0x00000000,
462 	(0x0e00 << 16) | (0x92bc >> 2),
463 	0x00000000,
464 	(0x0e00 << 16) | (0x92c0 >> 2),
465 	0x00000000,
466 	(0x0e00 << 16) | (0x92c4 >> 2),
467 	0x00000000,
468 	(0x0e00 << 16) | (0x92c8 >> 2),
469 	0x00000000,
470 	(0x0e00 << 16) | (0x92cc >> 2),
471 	0x00000000,
472 	(0x0e00 << 16) | (0x92d0 >> 2),
473 	0x00000000,
474 	(0x0e00 << 16) | (0x8c00 >> 2),
475 	0x00000000,
476 	(0x0e00 << 16) | (0x8c04 >> 2),
477 	0x00000000,
478 	(0x0e00 << 16) | (0x8c20 >> 2),
479 	0x00000000,
480 	(0x0e00 << 16) | (0x8c38 >> 2),
481 	0x00000000,
482 	(0x0e00 << 16) | (0x8c3c >> 2),
483 	0x00000000,
484 	(0x0e00 << 16) | (0xae00 >> 2),
485 	0x00000000,
486 	(0x0e00 << 16) | (0x9604 >> 2),
487 	0x00000000,
488 	(0x0e00 << 16) | (0xac08 >> 2),
489 	0x00000000,
490 	(0x0e00 << 16) | (0xac0c >> 2),
491 	0x00000000,
492 	(0x0e00 << 16) | (0xac10 >> 2),
493 	0x00000000,
494 	(0x0e00 << 16) | (0xac14 >> 2),
495 	0x00000000,
496 	(0x0e00 << 16) | (0xac58 >> 2),
497 	0x00000000,
498 	(0x0e00 << 16) | (0xac68 >> 2),
499 	0x00000000,
500 	(0x0e00 << 16) | (0xac6c >> 2),
501 	0x00000000,
502 	(0x0e00 << 16) | (0xac70 >> 2),
503 	0x00000000,
504 	(0x0e00 << 16) | (0xac74 >> 2),
505 	0x00000000,
506 	(0x0e00 << 16) | (0xac78 >> 2),
507 	0x00000000,
508 	(0x0e00 << 16) | (0xac7c >> 2),
509 	0x00000000,
510 	(0x0e00 << 16) | (0xac80 >> 2),
511 	0x00000000,
512 	(0x0e00 << 16) | (0xac84 >> 2),
513 	0x00000000,
514 	(0x0e00 << 16) | (0xac88 >> 2),
515 	0x00000000,
516 	(0x0e00 << 16) | (0xac8c >> 2),
517 	0x00000000,
518 	(0x0e00 << 16) | (0x970c >> 2),
519 	0x00000000,
520 	(0x0e00 << 16) | (0x9714 >> 2),
521 	0x00000000,
522 	(0x0e00 << 16) | (0x9718 >> 2),
523 	0x00000000,
524 	(0x0e00 << 16) | (0x971c >> 2),
525 	0x00000000,
526 	(0x0e00 << 16) | (0x31068 >> 2),
527 	0x00000000,
528 	(0x4e00 << 16) | (0x31068 >> 2),
529 	0x00000000,
530 	(0x5e00 << 16) | (0x31068 >> 2),
531 	0x00000000,
532 	(0x6e00 << 16) | (0x31068 >> 2),
533 	0x00000000,
534 	(0x7e00 << 16) | (0x31068 >> 2),
535 	0x00000000,
536 	(0x8e00 << 16) | (0x31068 >> 2),
537 	0x00000000,
538 	(0x9e00 << 16) | (0x31068 >> 2),
539 	0x00000000,
540 	(0xae00 << 16) | (0x31068 >> 2),
541 	0x00000000,
542 	(0xbe00 << 16) | (0x31068 >> 2),
543 	0x00000000,
544 	(0x0e00 << 16) | (0xcd10 >> 2),
545 	0x00000000,
546 	(0x0e00 << 16) | (0xcd14 >> 2),
547 	0x00000000,
548 	(0x0e00 << 16) | (0x88b0 >> 2),
549 	0x00000000,
550 	(0x0e00 << 16) | (0x88b4 >> 2),
551 	0x00000000,
552 	(0x0e00 << 16) | (0x88b8 >> 2),
553 	0x00000000,
554 	(0x0e00 << 16) | (0x88bc >> 2),
555 	0x00000000,
556 	(0x0400 << 16) | (0x89c0 >> 2),
557 	0x00000000,
558 	(0x0e00 << 16) | (0x88c4 >> 2),
559 	0x00000000,
560 	(0x0e00 << 16) | (0x88c8 >> 2),
561 	0x00000000,
562 	(0x0e00 << 16) | (0x88d0 >> 2),
563 	0x00000000,
564 	(0x0e00 << 16) | (0x88d4 >> 2),
565 	0x00000000,
566 	(0x0e00 << 16) | (0x88d8 >> 2),
567 	0x00000000,
568 	(0x0e00 << 16) | (0x8980 >> 2),
569 	0x00000000,
570 	(0x0e00 << 16) | (0x30938 >> 2),
571 	0x00000000,
572 	(0x0e00 << 16) | (0x3093c >> 2),
573 	0x00000000,
574 	(0x0e00 << 16) | (0x30940 >> 2),
575 	0x00000000,
576 	(0x0e00 << 16) | (0x89a0 >> 2),
577 	0x00000000,
578 	(0x0e00 << 16) | (0x30900 >> 2),
579 	0x00000000,
580 	(0x0e00 << 16) | (0x30904 >> 2),
581 	0x00000000,
582 	(0x0e00 << 16) | (0x89b4 >> 2),
583 	0x00000000,
584 	(0x0e00 << 16) | (0x3c210 >> 2),
585 	0x00000000,
586 	(0x0e00 << 16) | (0x3c214 >> 2),
587 	0x00000000,
588 	(0x0e00 << 16) | (0x3c218 >> 2),
589 	0x00000000,
590 	(0x0e00 << 16) | (0x8904 >> 2),
591 	0x00000000,
592 	0x5,
593 	(0x0e00 << 16) | (0x8c28 >> 2),
594 	(0x0e00 << 16) | (0x8c2c >> 2),
595 	(0x0e00 << 16) | (0x8c30 >> 2),
596 	(0x0e00 << 16) | (0x8c34 >> 2),
597 	(0x0e00 << 16) | (0x9600 >> 2),
598 };
599 
600 static const u32 kalindi_rlc_save_restore_register_list[] =
601 {
602 	(0x0e00 << 16) | (0xc12c >> 2),
603 	0x00000000,
604 	(0x0e00 << 16) | (0xc140 >> 2),
605 	0x00000000,
606 	(0x0e00 << 16) | (0xc150 >> 2),
607 	0x00000000,
608 	(0x0e00 << 16) | (0xc15c >> 2),
609 	0x00000000,
610 	(0x0e00 << 16) | (0xc168 >> 2),
611 	0x00000000,
612 	(0x0e00 << 16) | (0xc170 >> 2),
613 	0x00000000,
614 	(0x0e00 << 16) | (0xc204 >> 2),
615 	0x00000000,
616 	(0x0e00 << 16) | (0xc2b4 >> 2),
617 	0x00000000,
618 	(0x0e00 << 16) | (0xc2b8 >> 2),
619 	0x00000000,
620 	(0x0e00 << 16) | (0xc2bc >> 2),
621 	0x00000000,
622 	(0x0e00 << 16) | (0xc2c0 >> 2),
623 	0x00000000,
624 	(0x0e00 << 16) | (0x8228 >> 2),
625 	0x00000000,
626 	(0x0e00 << 16) | (0x829c >> 2),
627 	0x00000000,
628 	(0x0e00 << 16) | (0x869c >> 2),
629 	0x00000000,
630 	(0x0600 << 16) | (0x98f4 >> 2),
631 	0x00000000,
632 	(0x0e00 << 16) | (0x98f8 >> 2),
633 	0x00000000,
634 	(0x0e00 << 16) | (0x9900 >> 2),
635 	0x00000000,
636 	(0x0e00 << 16) | (0xc260 >> 2),
637 	0x00000000,
638 	(0x0e00 << 16) | (0x90e8 >> 2),
639 	0x00000000,
640 	(0x0e00 << 16) | (0x3c000 >> 2),
641 	0x00000000,
642 	(0x0e00 << 16) | (0x3c00c >> 2),
643 	0x00000000,
644 	(0x0e00 << 16) | (0x8c1c >> 2),
645 	0x00000000,
646 	(0x0e00 << 16) | (0x9700 >> 2),
647 	0x00000000,
648 	(0x0e00 << 16) | (0xcd20 >> 2),
649 	0x00000000,
650 	(0x4e00 << 16) | (0xcd20 >> 2),
651 	0x00000000,
652 	(0x5e00 << 16) | (0xcd20 >> 2),
653 	0x00000000,
654 	(0x6e00 << 16) | (0xcd20 >> 2),
655 	0x00000000,
656 	(0x7e00 << 16) | (0xcd20 >> 2),
657 	0x00000000,
658 	(0x0e00 << 16) | (0x89bc >> 2),
659 	0x00000000,
660 	(0x0e00 << 16) | (0x8900 >> 2),
661 	0x00000000,
662 	0x3,
663 	(0x0e00 << 16) | (0xc130 >> 2),
664 	0x00000000,
665 	(0x0e00 << 16) | (0xc134 >> 2),
666 	0x00000000,
667 	(0x0e00 << 16) | (0xc1fc >> 2),
668 	0x00000000,
669 	(0x0e00 << 16) | (0xc208 >> 2),
670 	0x00000000,
671 	(0x0e00 << 16) | (0xc264 >> 2),
672 	0x00000000,
673 	(0x0e00 << 16) | (0xc268 >> 2),
674 	0x00000000,
675 	(0x0e00 << 16) | (0xc26c >> 2),
676 	0x00000000,
677 	(0x0e00 << 16) | (0xc270 >> 2),
678 	0x00000000,
679 	(0x0e00 << 16) | (0xc274 >> 2),
680 	0x00000000,
681 	(0x0e00 << 16) | (0xc28c >> 2),
682 	0x00000000,
683 	(0x0e00 << 16) | (0xc290 >> 2),
684 	0x00000000,
685 	(0x0e00 << 16) | (0xc294 >> 2),
686 	0x00000000,
687 	(0x0e00 << 16) | (0xc298 >> 2),
688 	0x00000000,
689 	(0x0e00 << 16) | (0xc2a0 >> 2),
690 	0x00000000,
691 	(0x0e00 << 16) | (0xc2a4 >> 2),
692 	0x00000000,
693 	(0x0e00 << 16) | (0xc2a8 >> 2),
694 	0x00000000,
695 	(0x0e00 << 16) | (0xc2ac >> 2),
696 	0x00000000,
697 	(0x0e00 << 16) | (0x301d0 >> 2),
698 	0x00000000,
699 	(0x0e00 << 16) | (0x30238 >> 2),
700 	0x00000000,
701 	(0x0e00 << 16) | (0x30250 >> 2),
702 	0x00000000,
703 	(0x0e00 << 16) | (0x30254 >> 2),
704 	0x00000000,
705 	(0x0e00 << 16) | (0x30258 >> 2),
706 	0x00000000,
707 	(0x0e00 << 16) | (0x3025c >> 2),
708 	0x00000000,
709 	(0x4e00 << 16) | (0xc900 >> 2),
710 	0x00000000,
711 	(0x5e00 << 16) | (0xc900 >> 2),
712 	0x00000000,
713 	(0x6e00 << 16) | (0xc900 >> 2),
714 	0x00000000,
715 	(0x7e00 << 16) | (0xc900 >> 2),
716 	0x00000000,
717 	(0x4e00 << 16) | (0xc904 >> 2),
718 	0x00000000,
719 	(0x5e00 << 16) | (0xc904 >> 2),
720 	0x00000000,
721 	(0x6e00 << 16) | (0xc904 >> 2),
722 	0x00000000,
723 	(0x7e00 << 16) | (0xc904 >> 2),
724 	0x00000000,
725 	(0x4e00 << 16) | (0xc908 >> 2),
726 	0x00000000,
727 	(0x5e00 << 16) | (0xc908 >> 2),
728 	0x00000000,
729 	(0x6e00 << 16) | (0xc908 >> 2),
730 	0x00000000,
731 	(0x7e00 << 16) | (0xc908 >> 2),
732 	0x00000000,
733 	(0x4e00 << 16) | (0xc90c >> 2),
734 	0x00000000,
735 	(0x5e00 << 16) | (0xc90c >> 2),
736 	0x00000000,
737 	(0x6e00 << 16) | (0xc90c >> 2),
738 	0x00000000,
739 	(0x7e00 << 16) | (0xc90c >> 2),
740 	0x00000000,
741 	(0x4e00 << 16) | (0xc910 >> 2),
742 	0x00000000,
743 	(0x5e00 << 16) | (0xc910 >> 2),
744 	0x00000000,
745 	(0x6e00 << 16) | (0xc910 >> 2),
746 	0x00000000,
747 	(0x7e00 << 16) | (0xc910 >> 2),
748 	0x00000000,
749 	(0x0e00 << 16) | (0xc99c >> 2),
750 	0x00000000,
751 	(0x0e00 << 16) | (0x9834 >> 2),
752 	0x00000000,
753 	(0x0000 << 16) | (0x30f00 >> 2),
754 	0x00000000,
755 	(0x0000 << 16) | (0x30f04 >> 2),
756 	0x00000000,
757 	(0x0000 << 16) | (0x30f08 >> 2),
758 	0x00000000,
759 	(0x0000 << 16) | (0x30f0c >> 2),
760 	0x00000000,
761 	(0x0600 << 16) | (0x9b7c >> 2),
762 	0x00000000,
763 	(0x0e00 << 16) | (0x8a14 >> 2),
764 	0x00000000,
765 	(0x0e00 << 16) | (0x8a18 >> 2),
766 	0x00000000,
767 	(0x0600 << 16) | (0x30a00 >> 2),
768 	0x00000000,
769 	(0x0e00 << 16) | (0x8bf0 >> 2),
770 	0x00000000,
771 	(0x0e00 << 16) | (0x8bcc >> 2),
772 	0x00000000,
773 	(0x0e00 << 16) | (0x8b24 >> 2),
774 	0x00000000,
775 	(0x0e00 << 16) | (0x30a04 >> 2),
776 	0x00000000,
777 	(0x0600 << 16) | (0x30a10 >> 2),
778 	0x00000000,
779 	(0x0600 << 16) | (0x30a14 >> 2),
780 	0x00000000,
781 	(0x0600 << 16) | (0x30a18 >> 2),
782 	0x00000000,
783 	(0x0600 << 16) | (0x30a2c >> 2),
784 	0x00000000,
785 	(0x0e00 << 16) | (0xc700 >> 2),
786 	0x00000000,
787 	(0x0e00 << 16) | (0xc704 >> 2),
788 	0x00000000,
789 	(0x0e00 << 16) | (0xc708 >> 2),
790 	0x00000000,
791 	(0x0e00 << 16) | (0xc768 >> 2),
792 	0x00000000,
793 	(0x0400 << 16) | (0xc770 >> 2),
794 	0x00000000,
795 	(0x0400 << 16) | (0xc774 >> 2),
796 	0x00000000,
797 	(0x0400 << 16) | (0xc798 >> 2),
798 	0x00000000,
799 	(0x0400 << 16) | (0xc79c >> 2),
800 	0x00000000,
801 	(0x0e00 << 16) | (0x9100 >> 2),
802 	0x00000000,
803 	(0x0e00 << 16) | (0x3c010 >> 2),
804 	0x00000000,
805 	(0x0e00 << 16) | (0x8c00 >> 2),
806 	0x00000000,
807 	(0x0e00 << 16) | (0x8c04 >> 2),
808 	0x00000000,
809 	(0x0e00 << 16) | (0x8c20 >> 2),
810 	0x00000000,
811 	(0x0e00 << 16) | (0x8c38 >> 2),
812 	0x00000000,
813 	(0x0e00 << 16) | (0x8c3c >> 2),
814 	0x00000000,
815 	(0x0e00 << 16) | (0xae00 >> 2),
816 	0x00000000,
817 	(0x0e00 << 16) | (0x9604 >> 2),
818 	0x00000000,
819 	(0x0e00 << 16) | (0xac08 >> 2),
820 	0x00000000,
821 	(0x0e00 << 16) | (0xac0c >> 2),
822 	0x00000000,
823 	(0x0e00 << 16) | (0xac10 >> 2),
824 	0x00000000,
825 	(0x0e00 << 16) | (0xac14 >> 2),
826 	0x00000000,
827 	(0x0e00 << 16) | (0xac58 >> 2),
828 	0x00000000,
829 	(0x0e00 << 16) | (0xac68 >> 2),
830 	0x00000000,
831 	(0x0e00 << 16) | (0xac6c >> 2),
832 	0x00000000,
833 	(0x0e00 << 16) | (0xac70 >> 2),
834 	0x00000000,
835 	(0x0e00 << 16) | (0xac74 >> 2),
836 	0x00000000,
837 	(0x0e00 << 16) | (0xac78 >> 2),
838 	0x00000000,
839 	(0x0e00 << 16) | (0xac7c >> 2),
840 	0x00000000,
841 	(0x0e00 << 16) | (0xac80 >> 2),
842 	0x00000000,
843 	(0x0e00 << 16) | (0xac84 >> 2),
844 	0x00000000,
845 	(0x0e00 << 16) | (0xac88 >> 2),
846 	0x00000000,
847 	(0x0e00 << 16) | (0xac8c >> 2),
848 	0x00000000,
849 	(0x0e00 << 16) | (0x970c >> 2),
850 	0x00000000,
851 	(0x0e00 << 16) | (0x9714 >> 2),
852 	0x00000000,
853 	(0x0e00 << 16) | (0x9718 >> 2),
854 	0x00000000,
855 	(0x0e00 << 16) | (0x971c >> 2),
856 	0x00000000,
857 	(0x0e00 << 16) | (0x31068 >> 2),
858 	0x00000000,
859 	(0x4e00 << 16) | (0x31068 >> 2),
860 	0x00000000,
861 	(0x5e00 << 16) | (0x31068 >> 2),
862 	0x00000000,
863 	(0x6e00 << 16) | (0x31068 >> 2),
864 	0x00000000,
865 	(0x7e00 << 16) | (0x31068 >> 2),
866 	0x00000000,
867 	(0x0e00 << 16) | (0xcd10 >> 2),
868 	0x00000000,
869 	(0x0e00 << 16) | (0xcd14 >> 2),
870 	0x00000000,
871 	(0x0e00 << 16) | (0x88b0 >> 2),
872 	0x00000000,
873 	(0x0e00 << 16) | (0x88b4 >> 2),
874 	0x00000000,
875 	(0x0e00 << 16) | (0x88b8 >> 2),
876 	0x00000000,
877 	(0x0e00 << 16) | (0x88bc >> 2),
878 	0x00000000,
879 	(0x0400 << 16) | (0x89c0 >> 2),
880 	0x00000000,
881 	(0x0e00 << 16) | (0x88c4 >> 2),
882 	0x00000000,
883 	(0x0e00 << 16) | (0x88c8 >> 2),
884 	0x00000000,
885 	(0x0e00 << 16) | (0x88d0 >> 2),
886 	0x00000000,
887 	(0x0e00 << 16) | (0x88d4 >> 2),
888 	0x00000000,
889 	(0x0e00 << 16) | (0x88d8 >> 2),
890 	0x00000000,
891 	(0x0e00 << 16) | (0x8980 >> 2),
892 	0x00000000,
893 	(0x0e00 << 16) | (0x30938 >> 2),
894 	0x00000000,
895 	(0x0e00 << 16) | (0x3093c >> 2),
896 	0x00000000,
897 	(0x0e00 << 16) | (0x30940 >> 2),
898 	0x00000000,
899 	(0x0e00 << 16) | (0x89a0 >> 2),
900 	0x00000000,
901 	(0x0e00 << 16) | (0x30900 >> 2),
902 	0x00000000,
903 	(0x0e00 << 16) | (0x30904 >> 2),
904 	0x00000000,
905 	(0x0e00 << 16) | (0x89b4 >> 2),
906 	0x00000000,
907 	(0x0e00 << 16) | (0x3e1fc >> 2),
908 	0x00000000,
909 	(0x0e00 << 16) | (0x3c210 >> 2),
910 	0x00000000,
911 	(0x0e00 << 16) | (0x3c214 >> 2),
912 	0x00000000,
913 	(0x0e00 << 16) | (0x3c218 >> 2),
914 	0x00000000,
915 	(0x0e00 << 16) | (0x8904 >> 2),
916 	0x00000000,
917 	0x5,
918 	(0x0e00 << 16) | (0x8c28 >> 2),
919 	(0x0e00 << 16) | (0x8c2c >> 2),
920 	(0x0e00 << 16) | (0x8c30 >> 2),
921 	(0x0e00 << 16) | (0x8c34 >> 2),
922 	(0x0e00 << 16) | (0x9600 >> 2),
923 };
924 
925 static const u32 bonaire_golden_spm_registers[] =
926 {
927 	0x30800, 0xe0ffffff, 0xe0000000
928 };
929 
930 static const u32 bonaire_golden_common_registers[] =
931 {
932 	0xc770, 0xffffffff, 0x00000800,
933 	0xc774, 0xffffffff, 0x00000800,
934 	0xc798, 0xffffffff, 0x00007fbf,
935 	0xc79c, 0xffffffff, 0x00007faf
936 };
937 
938 static const u32 bonaire_golden_registers[] =
939 {
940 	0x3354, 0x00000333, 0x00000333,
941 	0x3350, 0x000c0fc0, 0x00040200,
942 	0x9a10, 0x00010000, 0x00058208,
943 	0x3c000, 0xffff1fff, 0x00140000,
944 	0x3c200, 0xfdfc0fff, 0x00000100,
945 	0x3c234, 0x40000000, 0x40000200,
946 	0x9830, 0xffffffff, 0x00000000,
947 	0x9834, 0xf00fffff, 0x00000400,
948 	0x9838, 0x0002021c, 0x00020200,
949 	0xc78, 0x00000080, 0x00000000,
950 	0x5bb0, 0x000000f0, 0x00000070,
951 	0x5bc0, 0xf0311fff, 0x80300000,
952 	0x98f8, 0x73773777, 0x12010001,
953 	0x350c, 0x00810000, 0x408af000,
954 	0x7030, 0x31000111, 0x00000011,
955 	0x2f48, 0x73773777, 0x12010001,
956 	0x220c, 0x00007fb6, 0x0021a1b1,
957 	0x2210, 0x00007fb6, 0x002021b1,
958 	0x2180, 0x00007fb6, 0x00002191,
959 	0x2218, 0x00007fb6, 0x002121b1,
960 	0x221c, 0x00007fb6, 0x002021b1,
961 	0x21dc, 0x00007fb6, 0x00002191,
962 	0x21e0, 0x00007fb6, 0x00002191,
963 	0x3628, 0x0000003f, 0x0000000a,
964 	0x362c, 0x0000003f, 0x0000000a,
965 	0x2ae4, 0x00073ffe, 0x000022a2,
966 	0x240c, 0x000007ff, 0x00000000,
967 	0x8a14, 0xf000003f, 0x00000007,
968 	0x8bf0, 0x00002001, 0x00000001,
969 	0x8b24, 0xffffffff, 0x00ffffff,
970 	0x30a04, 0x0000ff0f, 0x00000000,
971 	0x28a4c, 0x07ffffff, 0x06000000,
972 	0x4d8, 0x00000fff, 0x00000100,
973 	0x3e78, 0x00000001, 0x00000002,
974 	0x9100, 0x03000000, 0x0362c688,
975 	0x8c00, 0x000000ff, 0x00000001,
976 	0xe40, 0x00001fff, 0x00001fff,
977 	0x9060, 0x0000007f, 0x00000020,
978 	0x9508, 0x00010000, 0x00010000,
979 	0xac14, 0x000003ff, 0x000000f3,
980 	0xac0c, 0xffffffff, 0x00001032
981 };
982 
983 static const u32 bonaire_mgcg_cgcg_init[] =
984 {
985 	0xc420, 0xffffffff, 0xfffffffc,
986 	0x30800, 0xffffffff, 0xe0000000,
987 	0x3c2a0, 0xffffffff, 0x00000100,
988 	0x3c208, 0xffffffff, 0x00000100,
989 	0x3c2c0, 0xffffffff, 0xc0000100,
990 	0x3c2c8, 0xffffffff, 0xc0000100,
991 	0x3c2c4, 0xffffffff, 0xc0000100,
992 	0x55e4, 0xffffffff, 0x00600100,
993 	0x3c280, 0xffffffff, 0x00000100,
994 	0x3c214, 0xffffffff, 0x06000100,
995 	0x3c220, 0xffffffff, 0x00000100,
996 	0x3c218, 0xffffffff, 0x06000100,
997 	0x3c204, 0xffffffff, 0x00000100,
998 	0x3c2e0, 0xffffffff, 0x00000100,
999 	0x3c224, 0xffffffff, 0x00000100,
1000 	0x3c200, 0xffffffff, 0x00000100,
1001 	0x3c230, 0xffffffff, 0x00000100,
1002 	0x3c234, 0xffffffff, 0x00000100,
1003 	0x3c250, 0xffffffff, 0x00000100,
1004 	0x3c254, 0xffffffff, 0x00000100,
1005 	0x3c258, 0xffffffff, 0x00000100,
1006 	0x3c25c, 0xffffffff, 0x00000100,
1007 	0x3c260, 0xffffffff, 0x00000100,
1008 	0x3c27c, 0xffffffff, 0x00000100,
1009 	0x3c278, 0xffffffff, 0x00000100,
1010 	0x3c210, 0xffffffff, 0x06000100,
1011 	0x3c290, 0xffffffff, 0x00000100,
1012 	0x3c274, 0xffffffff, 0x00000100,
1013 	0x3c2b4, 0xffffffff, 0x00000100,
1014 	0x3c2b0, 0xffffffff, 0x00000100,
1015 	0x3c270, 0xffffffff, 0x00000100,
1016 	0x30800, 0xffffffff, 0xe0000000,
1017 	0x3c020, 0xffffffff, 0x00010000,
1018 	0x3c024, 0xffffffff, 0x00030002,
1019 	0x3c028, 0xffffffff, 0x00040007,
1020 	0x3c02c, 0xffffffff, 0x00060005,
1021 	0x3c030, 0xffffffff, 0x00090008,
1022 	0x3c034, 0xffffffff, 0x00010000,
1023 	0x3c038, 0xffffffff, 0x00030002,
1024 	0x3c03c, 0xffffffff, 0x00040007,
1025 	0x3c040, 0xffffffff, 0x00060005,
1026 	0x3c044, 0xffffffff, 0x00090008,
1027 	0x3c048, 0xffffffff, 0x00010000,
1028 	0x3c04c, 0xffffffff, 0x00030002,
1029 	0x3c050, 0xffffffff, 0x00040007,
1030 	0x3c054, 0xffffffff, 0x00060005,
1031 	0x3c058, 0xffffffff, 0x00090008,
1032 	0x3c05c, 0xffffffff, 0x00010000,
1033 	0x3c060, 0xffffffff, 0x00030002,
1034 	0x3c064, 0xffffffff, 0x00040007,
1035 	0x3c068, 0xffffffff, 0x00060005,
1036 	0x3c06c, 0xffffffff, 0x00090008,
1037 	0x3c070, 0xffffffff, 0x00010000,
1038 	0x3c074, 0xffffffff, 0x00030002,
1039 	0x3c078, 0xffffffff, 0x00040007,
1040 	0x3c07c, 0xffffffff, 0x00060005,
1041 	0x3c080, 0xffffffff, 0x00090008,
1042 	0x3c084, 0xffffffff, 0x00010000,
1043 	0x3c088, 0xffffffff, 0x00030002,
1044 	0x3c08c, 0xffffffff, 0x00040007,
1045 	0x3c090, 0xffffffff, 0x00060005,
1046 	0x3c094, 0xffffffff, 0x00090008,
1047 	0x3c098, 0xffffffff, 0x00010000,
1048 	0x3c09c, 0xffffffff, 0x00030002,
1049 	0x3c0a0, 0xffffffff, 0x00040007,
1050 	0x3c0a4, 0xffffffff, 0x00060005,
1051 	0x3c0a8, 0xffffffff, 0x00090008,
1052 	0x3c000, 0xffffffff, 0x96e00200,
1053 	0x8708, 0xffffffff, 0x00900100,
1054 	0xc424, 0xffffffff, 0x0020003f,
1055 	0x38, 0xffffffff, 0x0140001c,
1056 	0x3c, 0x000f0000, 0x000f0000,
1057 	0x220, 0xffffffff, 0xC060000C,
1058 	0x224, 0xc0000fff, 0x00000100,
1059 	0xf90, 0xffffffff, 0x00000100,
1060 	0xf98, 0x00000101, 0x00000000,
1061 	0x20a8, 0xffffffff, 0x00000104,
1062 	0x55e4, 0xff000fff, 0x00000100,
1063 	0x30cc, 0xc0000fff, 0x00000104,
1064 	0xc1e4, 0x00000001, 0x00000001,
1065 	0xd00c, 0xff000ff0, 0x00000100,
1066 	0xd80c, 0xff000ff0, 0x00000100
1067 };
1068 
1069 static const u32 spectre_golden_spm_registers[] =
1070 {
1071 	0x30800, 0xe0ffffff, 0xe0000000
1072 };
1073 
1074 static const u32 spectre_golden_common_registers[] =
1075 {
1076 	0xc770, 0xffffffff, 0x00000800,
1077 	0xc774, 0xffffffff, 0x00000800,
1078 	0xc798, 0xffffffff, 0x00007fbf,
1079 	0xc79c, 0xffffffff, 0x00007faf
1080 };
1081 
1082 static const u32 spectre_golden_registers[] =
1083 {
1084 	0x3c000, 0xffff1fff, 0x96940200,
1085 	0x3c00c, 0xffff0001, 0xff000000,
1086 	0x3c200, 0xfffc0fff, 0x00000100,
1087 	0x6ed8, 0x00010101, 0x00010000,
1088 	0x9834, 0xf00fffff, 0x00000400,
1089 	0x9838, 0xfffffffc, 0x00020200,
1090 	0x5bb0, 0x000000f0, 0x00000070,
1091 	0x5bc0, 0xf0311fff, 0x80300000,
1092 	0x98f8, 0x73773777, 0x12010001,
1093 	0x9b7c, 0x00ff0000, 0x00fc0000,
1094 	0x2f48, 0x73773777, 0x12010001,
1095 	0x8a14, 0xf000003f, 0x00000007,
1096 	0x8b24, 0xffffffff, 0x00ffffff,
1097 	0x28350, 0x3f3f3fff, 0x00000082,
1098 	0x28355, 0x0000003f, 0x00000000,
1099 	0x3e78, 0x00000001, 0x00000002,
1100 	0x913c, 0xffff03df, 0x00000004,
1101 	0xc768, 0x00000008, 0x00000008,
1102 	0x8c00, 0x000008ff, 0x00000800,
1103 	0x9508, 0x00010000, 0x00010000,
1104 	0xac0c, 0xffffffff, 0x54763210,
1105 	0x214f8, 0x01ff01ff, 0x00000002,
1106 	0x21498, 0x007ff800, 0x00200000,
1107 	0x2015c, 0xffffffff, 0x00000f40,
1108 	0x30934, 0xffffffff, 0x00000001
1109 };
1110 
1111 static const u32 spectre_mgcg_cgcg_init[] =
1112 {
1113 	0xc420, 0xffffffff, 0xfffffffc,
1114 	0x30800, 0xffffffff, 0xe0000000,
1115 	0x3c2a0, 0xffffffff, 0x00000100,
1116 	0x3c208, 0xffffffff, 0x00000100,
1117 	0x3c2c0, 0xffffffff, 0x00000100,
1118 	0x3c2c8, 0xffffffff, 0x00000100,
1119 	0x3c2c4, 0xffffffff, 0x00000100,
1120 	0x55e4, 0xffffffff, 0x00600100,
1121 	0x3c280, 0xffffffff, 0x00000100,
1122 	0x3c214, 0xffffffff, 0x06000100,
1123 	0x3c220, 0xffffffff, 0x00000100,
1124 	0x3c218, 0xffffffff, 0x06000100,
1125 	0x3c204, 0xffffffff, 0x00000100,
1126 	0x3c2e0, 0xffffffff, 0x00000100,
1127 	0x3c224, 0xffffffff, 0x00000100,
1128 	0x3c200, 0xffffffff, 0x00000100,
1129 	0x3c230, 0xffffffff, 0x00000100,
1130 	0x3c234, 0xffffffff, 0x00000100,
1131 	0x3c250, 0xffffffff, 0x00000100,
1132 	0x3c254, 0xffffffff, 0x00000100,
1133 	0x3c258, 0xffffffff, 0x00000100,
1134 	0x3c25c, 0xffffffff, 0x00000100,
1135 	0x3c260, 0xffffffff, 0x00000100,
1136 	0x3c27c, 0xffffffff, 0x00000100,
1137 	0x3c278, 0xffffffff, 0x00000100,
1138 	0x3c210, 0xffffffff, 0x06000100,
1139 	0x3c290, 0xffffffff, 0x00000100,
1140 	0x3c274, 0xffffffff, 0x00000100,
1141 	0x3c2b4, 0xffffffff, 0x00000100,
1142 	0x3c2b0, 0xffffffff, 0x00000100,
1143 	0x3c270, 0xffffffff, 0x00000100,
1144 	0x30800, 0xffffffff, 0xe0000000,
1145 	0x3c020, 0xffffffff, 0x00010000,
1146 	0x3c024, 0xffffffff, 0x00030002,
1147 	0x3c028, 0xffffffff, 0x00040007,
1148 	0x3c02c, 0xffffffff, 0x00060005,
1149 	0x3c030, 0xffffffff, 0x00090008,
1150 	0x3c034, 0xffffffff, 0x00010000,
1151 	0x3c038, 0xffffffff, 0x00030002,
1152 	0x3c03c, 0xffffffff, 0x00040007,
1153 	0x3c040, 0xffffffff, 0x00060005,
1154 	0x3c044, 0xffffffff, 0x00090008,
1155 	0x3c048, 0xffffffff, 0x00010000,
1156 	0x3c04c, 0xffffffff, 0x00030002,
1157 	0x3c050, 0xffffffff, 0x00040007,
1158 	0x3c054, 0xffffffff, 0x00060005,
1159 	0x3c058, 0xffffffff, 0x00090008,
1160 	0x3c05c, 0xffffffff, 0x00010000,
1161 	0x3c060, 0xffffffff, 0x00030002,
1162 	0x3c064, 0xffffffff, 0x00040007,
1163 	0x3c068, 0xffffffff, 0x00060005,
1164 	0x3c06c, 0xffffffff, 0x00090008,
1165 	0x3c070, 0xffffffff, 0x00010000,
1166 	0x3c074, 0xffffffff, 0x00030002,
1167 	0x3c078, 0xffffffff, 0x00040007,
1168 	0x3c07c, 0xffffffff, 0x00060005,
1169 	0x3c080, 0xffffffff, 0x00090008,
1170 	0x3c084, 0xffffffff, 0x00010000,
1171 	0x3c088, 0xffffffff, 0x00030002,
1172 	0x3c08c, 0xffffffff, 0x00040007,
1173 	0x3c090, 0xffffffff, 0x00060005,
1174 	0x3c094, 0xffffffff, 0x00090008,
1175 	0x3c098, 0xffffffff, 0x00010000,
1176 	0x3c09c, 0xffffffff, 0x00030002,
1177 	0x3c0a0, 0xffffffff, 0x00040007,
1178 	0x3c0a4, 0xffffffff, 0x00060005,
1179 	0x3c0a8, 0xffffffff, 0x00090008,
1180 	0x3c0ac, 0xffffffff, 0x00010000,
1181 	0x3c0b0, 0xffffffff, 0x00030002,
1182 	0x3c0b4, 0xffffffff, 0x00040007,
1183 	0x3c0b8, 0xffffffff, 0x00060005,
1184 	0x3c0bc, 0xffffffff, 0x00090008,
1185 	0x3c000, 0xffffffff, 0x96e00200,
1186 	0x8708, 0xffffffff, 0x00900100,
1187 	0xc424, 0xffffffff, 0x0020003f,
1188 	0x38, 0xffffffff, 0x0140001c,
1189 	0x3c, 0x000f0000, 0x000f0000,
1190 	0x220, 0xffffffff, 0xC060000C,
1191 	0x224, 0xc0000fff, 0x00000100,
1192 	0xf90, 0xffffffff, 0x00000100,
1193 	0xf98, 0x00000101, 0x00000000,
1194 	0x20a8, 0xffffffff, 0x00000104,
1195 	0x55e4, 0xff000fff, 0x00000100,
1196 	0x30cc, 0xc0000fff, 0x00000104,
1197 	0xc1e4, 0x00000001, 0x00000001,
1198 	0xd00c, 0xff000ff0, 0x00000100,
1199 	0xd80c, 0xff000ff0, 0x00000100
1200 };
1201 
1202 static const u32 kalindi_golden_spm_registers[] =
1203 {
1204 	0x30800, 0xe0ffffff, 0xe0000000
1205 };
1206 
1207 static const u32 kalindi_golden_common_registers[] =
1208 {
1209 	0xc770, 0xffffffff, 0x00000800,
1210 	0xc774, 0xffffffff, 0x00000800,
1211 	0xc798, 0xffffffff, 0x00007fbf,
1212 	0xc79c, 0xffffffff, 0x00007faf
1213 };
1214 
1215 static const u32 kalindi_golden_registers[] =
1216 {
1217 	0x3c000, 0xffffdfff, 0x6e944040,
1218 	0x55e4, 0xff607fff, 0xfc000100,
1219 	0x3c220, 0xff000fff, 0x00000100,
1220 	0x3c224, 0xff000fff, 0x00000100,
1221 	0x3c200, 0xfffc0fff, 0x00000100,
1222 	0x6ed8, 0x00010101, 0x00010000,
1223 	0x9830, 0xffffffff, 0x00000000,
1224 	0x9834, 0xf00fffff, 0x00000400,
1225 	0x5bb0, 0x000000f0, 0x00000070,
1226 	0x5bc0, 0xf0311fff, 0x80300000,
1227 	0x98f8, 0x73773777, 0x12010001,
1228 	0x98fc, 0xffffffff, 0x00000010,
1229 	0x9b7c, 0x00ff0000, 0x00fc0000,
1230 	0x8030, 0x00001f0f, 0x0000100a,
1231 	0x2f48, 0x73773777, 0x12010001,
1232 	0x2408, 0x000fffff, 0x000c007f,
1233 	0x8a14, 0xf000003f, 0x00000007,
1234 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1235 	0x30a04, 0x0000ff0f, 0x00000000,
1236 	0x28a4c, 0x07ffffff, 0x06000000,
1237 	0x4d8, 0x00000fff, 0x00000100,
1238 	0x3e78, 0x00000001, 0x00000002,
1239 	0xc768, 0x00000008, 0x00000008,
1240 	0x8c00, 0x000000ff, 0x00000003,
1241 	0x214f8, 0x01ff01ff, 0x00000002,
1242 	0x21498, 0x007ff800, 0x00200000,
1243 	0x2015c, 0xffffffff, 0x00000f40,
1244 	0x88c4, 0x001f3ae3, 0x00000082,
1245 	0x88d4, 0x0000001f, 0x00000010,
1246 	0x30934, 0xffffffff, 0x00000000
1247 };
1248 
1249 static const u32 kalindi_mgcg_cgcg_init[] =
1250 {
1251 	0xc420, 0xffffffff, 0xfffffffc,
1252 	0x30800, 0xffffffff, 0xe0000000,
1253 	0x3c2a0, 0xffffffff, 0x00000100,
1254 	0x3c208, 0xffffffff, 0x00000100,
1255 	0x3c2c0, 0xffffffff, 0x00000100,
1256 	0x3c2c8, 0xffffffff, 0x00000100,
1257 	0x3c2c4, 0xffffffff, 0x00000100,
1258 	0x55e4, 0xffffffff, 0x00600100,
1259 	0x3c280, 0xffffffff, 0x00000100,
1260 	0x3c214, 0xffffffff, 0x06000100,
1261 	0x3c220, 0xffffffff, 0x00000100,
1262 	0x3c218, 0xffffffff, 0x06000100,
1263 	0x3c204, 0xffffffff, 0x00000100,
1264 	0x3c2e0, 0xffffffff, 0x00000100,
1265 	0x3c224, 0xffffffff, 0x00000100,
1266 	0x3c200, 0xffffffff, 0x00000100,
1267 	0x3c230, 0xffffffff, 0x00000100,
1268 	0x3c234, 0xffffffff, 0x00000100,
1269 	0x3c250, 0xffffffff, 0x00000100,
1270 	0x3c254, 0xffffffff, 0x00000100,
1271 	0x3c258, 0xffffffff, 0x00000100,
1272 	0x3c25c, 0xffffffff, 0x00000100,
1273 	0x3c260, 0xffffffff, 0x00000100,
1274 	0x3c27c, 0xffffffff, 0x00000100,
1275 	0x3c278, 0xffffffff, 0x00000100,
1276 	0x3c210, 0xffffffff, 0x06000100,
1277 	0x3c290, 0xffffffff, 0x00000100,
1278 	0x3c274, 0xffffffff, 0x00000100,
1279 	0x3c2b4, 0xffffffff, 0x00000100,
1280 	0x3c2b0, 0xffffffff, 0x00000100,
1281 	0x3c270, 0xffffffff, 0x00000100,
1282 	0x30800, 0xffffffff, 0xe0000000,
1283 	0x3c020, 0xffffffff, 0x00010000,
1284 	0x3c024, 0xffffffff, 0x00030002,
1285 	0x3c028, 0xffffffff, 0x00040007,
1286 	0x3c02c, 0xffffffff, 0x00060005,
1287 	0x3c030, 0xffffffff, 0x00090008,
1288 	0x3c034, 0xffffffff, 0x00010000,
1289 	0x3c038, 0xffffffff, 0x00030002,
1290 	0x3c03c, 0xffffffff, 0x00040007,
1291 	0x3c040, 0xffffffff, 0x00060005,
1292 	0x3c044, 0xffffffff, 0x00090008,
1293 	0x3c000, 0xffffffff, 0x96e00200,
1294 	0x8708, 0xffffffff, 0x00900100,
1295 	0xc424, 0xffffffff, 0x0020003f,
1296 	0x38, 0xffffffff, 0x0140001c,
1297 	0x3c, 0x000f0000, 0x000f0000,
1298 	0x220, 0xffffffff, 0xC060000C,
1299 	0x224, 0xc0000fff, 0x00000100,
1300 	0x20a8, 0xffffffff, 0x00000104,
1301 	0x55e4, 0xff000fff, 0x00000100,
1302 	0x30cc, 0xc0000fff, 0x00000104,
1303 	0xc1e4, 0x00000001, 0x00000001,
1304 	0xd00c, 0xff000ff0, 0x00000100,
1305 	0xd80c, 0xff000ff0, 0x00000100
1306 };
1307 
1308 static const u32 hawaii_golden_spm_registers[] =
1309 {
1310 	0x30800, 0xe0ffffff, 0xe0000000
1311 };
1312 
1313 static const u32 hawaii_golden_common_registers[] =
1314 {
1315 	0x30800, 0xffffffff, 0xe0000000,
1316 	0x28350, 0xffffffff, 0x3a00161a,
1317 	0x28354, 0xffffffff, 0x0000002e,
1318 	0x9a10, 0xffffffff, 0x00018208,
1319 	0x98f8, 0xffffffff, 0x12011003
1320 };
1321 
1322 static const u32 hawaii_golden_registers[] =
1323 {
1324 	0x3354, 0x00000333, 0x00000333,
1325 	0x9a10, 0x00010000, 0x00058208,
1326 	0x9830, 0xffffffff, 0x00000000,
1327 	0x9834, 0xf00fffff, 0x00000400,
1328 	0x9838, 0x0002021c, 0x00020200,
1329 	0xc78, 0x00000080, 0x00000000,
1330 	0x5bb0, 0x000000f0, 0x00000070,
1331 	0x5bc0, 0xf0311fff, 0x80300000,
1332 	0x350c, 0x00810000, 0x408af000,
1333 	0x7030, 0x31000111, 0x00000011,
1334 	0x2f48, 0x73773777, 0x12010001,
1335 	0x2120, 0x0000007f, 0x0000001b,
1336 	0x21dc, 0x00007fb6, 0x00002191,
1337 	0x3628, 0x0000003f, 0x0000000a,
1338 	0x362c, 0x0000003f, 0x0000000a,
1339 	0x2ae4, 0x00073ffe, 0x000022a2,
1340 	0x240c, 0x000007ff, 0x00000000,
1341 	0x8bf0, 0x00002001, 0x00000001,
1342 	0x8b24, 0xffffffff, 0x00ffffff,
1343 	0x30a04, 0x0000ff0f, 0x00000000,
1344 	0x28a4c, 0x07ffffff, 0x06000000,
1345 	0x3e78, 0x00000001, 0x00000002,
1346 	0xc768, 0x00000008, 0x00000008,
1347 	0xc770, 0x00000f00, 0x00000800,
1348 	0xc774, 0x00000f00, 0x00000800,
1349 	0xc798, 0x00ffffff, 0x00ff7fbf,
1350 	0xc79c, 0x00ffffff, 0x00ff7faf,
1351 	0x8c00, 0x000000ff, 0x00000800,
1352 	0xe40, 0x00001fff, 0x00001fff,
1353 	0x9060, 0x0000007f, 0x00000020,
1354 	0x9508, 0x00010000, 0x00010000,
1355 	0xae00, 0x00100000, 0x000ff07c,
1356 	0xac14, 0x000003ff, 0x0000000f,
1357 	0xac10, 0xffffffff, 0x7564fdec,
1358 	0xac0c, 0xffffffff, 0x3120b9a8,
1359 	0xac08, 0x20000000, 0x0f9c0000
1360 };
1361 
1362 static const u32 hawaii_mgcg_cgcg_init[] =
1363 {
1364 	0xc420, 0xffffffff, 0xfffffffd,
1365 	0x30800, 0xffffffff, 0xe0000000,
1366 	0x3c2a0, 0xffffffff, 0x00000100,
1367 	0x3c208, 0xffffffff, 0x00000100,
1368 	0x3c2c0, 0xffffffff, 0x00000100,
1369 	0x3c2c8, 0xffffffff, 0x00000100,
1370 	0x3c2c4, 0xffffffff, 0x00000100,
1371 	0x55e4, 0xffffffff, 0x00200100,
1372 	0x3c280, 0xffffffff, 0x00000100,
1373 	0x3c214, 0xffffffff, 0x06000100,
1374 	0x3c220, 0xffffffff, 0x00000100,
1375 	0x3c218, 0xffffffff, 0x06000100,
1376 	0x3c204, 0xffffffff, 0x00000100,
1377 	0x3c2e0, 0xffffffff, 0x00000100,
1378 	0x3c224, 0xffffffff, 0x00000100,
1379 	0x3c200, 0xffffffff, 0x00000100,
1380 	0x3c230, 0xffffffff, 0x00000100,
1381 	0x3c234, 0xffffffff, 0x00000100,
1382 	0x3c250, 0xffffffff, 0x00000100,
1383 	0x3c254, 0xffffffff, 0x00000100,
1384 	0x3c258, 0xffffffff, 0x00000100,
1385 	0x3c25c, 0xffffffff, 0x00000100,
1386 	0x3c260, 0xffffffff, 0x00000100,
1387 	0x3c27c, 0xffffffff, 0x00000100,
1388 	0x3c278, 0xffffffff, 0x00000100,
1389 	0x3c210, 0xffffffff, 0x06000100,
1390 	0x3c290, 0xffffffff, 0x00000100,
1391 	0x3c274, 0xffffffff, 0x00000100,
1392 	0x3c2b4, 0xffffffff, 0x00000100,
1393 	0x3c2b0, 0xffffffff, 0x00000100,
1394 	0x3c270, 0xffffffff, 0x00000100,
1395 	0x30800, 0xffffffff, 0xe0000000,
1396 	0x3c020, 0xffffffff, 0x00010000,
1397 	0x3c024, 0xffffffff, 0x00030002,
1398 	0x3c028, 0xffffffff, 0x00040007,
1399 	0x3c02c, 0xffffffff, 0x00060005,
1400 	0x3c030, 0xffffffff, 0x00090008,
1401 	0x3c034, 0xffffffff, 0x00010000,
1402 	0x3c038, 0xffffffff, 0x00030002,
1403 	0x3c03c, 0xffffffff, 0x00040007,
1404 	0x3c040, 0xffffffff, 0x00060005,
1405 	0x3c044, 0xffffffff, 0x00090008,
1406 	0x3c048, 0xffffffff, 0x00010000,
1407 	0x3c04c, 0xffffffff, 0x00030002,
1408 	0x3c050, 0xffffffff, 0x00040007,
1409 	0x3c054, 0xffffffff, 0x00060005,
1410 	0x3c058, 0xffffffff, 0x00090008,
1411 	0x3c05c, 0xffffffff, 0x00010000,
1412 	0x3c060, 0xffffffff, 0x00030002,
1413 	0x3c064, 0xffffffff, 0x00040007,
1414 	0x3c068, 0xffffffff, 0x00060005,
1415 	0x3c06c, 0xffffffff, 0x00090008,
1416 	0x3c070, 0xffffffff, 0x00010000,
1417 	0x3c074, 0xffffffff, 0x00030002,
1418 	0x3c078, 0xffffffff, 0x00040007,
1419 	0x3c07c, 0xffffffff, 0x00060005,
1420 	0x3c080, 0xffffffff, 0x00090008,
1421 	0x3c084, 0xffffffff, 0x00010000,
1422 	0x3c088, 0xffffffff, 0x00030002,
1423 	0x3c08c, 0xffffffff, 0x00040007,
1424 	0x3c090, 0xffffffff, 0x00060005,
1425 	0x3c094, 0xffffffff, 0x00090008,
1426 	0x3c098, 0xffffffff, 0x00010000,
1427 	0x3c09c, 0xffffffff, 0x00030002,
1428 	0x3c0a0, 0xffffffff, 0x00040007,
1429 	0x3c0a4, 0xffffffff, 0x00060005,
1430 	0x3c0a8, 0xffffffff, 0x00090008,
1431 	0x3c0ac, 0xffffffff, 0x00010000,
1432 	0x3c0b0, 0xffffffff, 0x00030002,
1433 	0x3c0b4, 0xffffffff, 0x00040007,
1434 	0x3c0b8, 0xffffffff, 0x00060005,
1435 	0x3c0bc, 0xffffffff, 0x00090008,
1436 	0x3c0c0, 0xffffffff, 0x00010000,
1437 	0x3c0c4, 0xffffffff, 0x00030002,
1438 	0x3c0c8, 0xffffffff, 0x00040007,
1439 	0x3c0cc, 0xffffffff, 0x00060005,
1440 	0x3c0d0, 0xffffffff, 0x00090008,
1441 	0x3c0d4, 0xffffffff, 0x00010000,
1442 	0x3c0d8, 0xffffffff, 0x00030002,
1443 	0x3c0dc, 0xffffffff, 0x00040007,
1444 	0x3c0e0, 0xffffffff, 0x00060005,
1445 	0x3c0e4, 0xffffffff, 0x00090008,
1446 	0x3c0e8, 0xffffffff, 0x00010000,
1447 	0x3c0ec, 0xffffffff, 0x00030002,
1448 	0x3c0f0, 0xffffffff, 0x00040007,
1449 	0x3c0f4, 0xffffffff, 0x00060005,
1450 	0x3c0f8, 0xffffffff, 0x00090008,
1451 	0xc318, 0xffffffff, 0x00020200,
1452 	0x3350, 0xffffffff, 0x00000200,
1453 	0x15c0, 0xffffffff, 0x00000400,
1454 	0x55e8, 0xffffffff, 0x00000000,
1455 	0x2f50, 0xffffffff, 0x00000902,
1456 	0x3c000, 0xffffffff, 0x96940200,
1457 	0x8708, 0xffffffff, 0x00900100,
1458 	0xc424, 0xffffffff, 0x0020003f,
1459 	0x38, 0xffffffff, 0x0140001c,
1460 	0x3c, 0x000f0000, 0x000f0000,
1461 	0x220, 0xffffffff, 0xc060000c,
1462 	0x224, 0xc0000fff, 0x00000100,
1463 	0xf90, 0xffffffff, 0x00000100,
1464 	0xf98, 0x00000101, 0x00000000,
1465 	0x20a8, 0xffffffff, 0x00000104,
1466 	0x55e4, 0xff000fff, 0x00000100,
1467 	0x30cc, 0xc0000fff, 0x00000104,
1468 	0xc1e4, 0x00000001, 0x00000001,
1469 	0xd00c, 0xff000ff0, 0x00000100,
1470 	0xd80c, 0xff000ff0, 0x00000100
1471 };
1472 
1473 static void cik_init_golden_registers(struct radeon_device *rdev)
1474 {
1475 	switch (rdev->family) {
1476 	case CHIP_BONAIRE:
1477 		radeon_program_register_sequence(rdev,
1478 						 bonaire_mgcg_cgcg_init,
1479 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1480 		radeon_program_register_sequence(rdev,
1481 						 bonaire_golden_registers,
1482 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1483 		radeon_program_register_sequence(rdev,
1484 						 bonaire_golden_common_registers,
1485 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1486 		radeon_program_register_sequence(rdev,
1487 						 bonaire_golden_spm_registers,
1488 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1489 		break;
1490 	case CHIP_KABINI:
1491 		radeon_program_register_sequence(rdev,
1492 						 kalindi_mgcg_cgcg_init,
1493 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1494 		radeon_program_register_sequence(rdev,
1495 						 kalindi_golden_registers,
1496 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1497 		radeon_program_register_sequence(rdev,
1498 						 kalindi_golden_common_registers,
1499 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1500 		radeon_program_register_sequence(rdev,
1501 						 kalindi_golden_spm_registers,
1502 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1503 		break;
1504 	case CHIP_KAVERI:
1505 		radeon_program_register_sequence(rdev,
1506 						 spectre_mgcg_cgcg_init,
1507 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1508 		radeon_program_register_sequence(rdev,
1509 						 spectre_golden_registers,
1510 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1511 		radeon_program_register_sequence(rdev,
1512 						 spectre_golden_common_registers,
1513 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1514 		radeon_program_register_sequence(rdev,
1515 						 spectre_golden_spm_registers,
1516 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1517 		break;
1518 	case CHIP_HAWAII:
1519 		radeon_program_register_sequence(rdev,
1520 						 hawaii_mgcg_cgcg_init,
1521 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1522 		radeon_program_register_sequence(rdev,
1523 						 hawaii_golden_registers,
1524 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1525 		radeon_program_register_sequence(rdev,
1526 						 hawaii_golden_common_registers,
1527 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1528 		radeon_program_register_sequence(rdev,
1529 						 hawaii_golden_spm_registers,
1530 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1531 		break;
1532 	default:
1533 		break;
1534 	}
1535 }
1536 
1537 /**
1538  * cik_get_xclk - get the xclk
1539  *
1540  * @rdev: radeon_device pointer
1541  *
1542  * Returns the reference clock used by the gfx engine
1543  * (CIK).
1544  */
1545 u32 cik_get_xclk(struct radeon_device *rdev)
1546 {
1547         u32 reference_clock = rdev->clock.spll.reference_freq;
1548 
1549 	if (rdev->flags & RADEON_IS_IGP) {
1550 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1551 			return reference_clock / 2;
1552 	} else {
1553 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1554 			return reference_clock / 4;
1555 	}
1556 	return reference_clock;
1557 }
1558 
1559 /**
1560  * cik_mm_rdoorbell - read a doorbell dword
1561  *
1562  * @rdev: radeon_device pointer
1563  * @index: doorbell index
1564  *
1565  * Returns the value in the doorbell aperture at the
1566  * requested doorbell index (CIK).
1567  */
1568 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1569 {
1570 	if (index < rdev->doorbell.num_doorbells) {
1571 		return readl(rdev->doorbell.ptr + index);
1572 	} else {
1573 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1574 		return 0;
1575 	}
1576 }
1577 
1578 /**
1579  * cik_mm_wdoorbell - write a doorbell dword
1580  *
1581  * @rdev: radeon_device pointer
1582  * @index: doorbell index
1583  * @v: value to write
1584  *
1585  * Writes @v to the doorbell aperture at the
1586  * requested doorbell index (CIK).
1587  */
1588 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1589 {
1590 	if (index < rdev->doorbell.num_doorbells) {
1591 		writel(v, rdev->doorbell.ptr + index);
1592 	} else {
1593 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1594 	}
1595 }
1596 
1597 #define BONAIRE_IO_MC_REGS_SIZE 36
1598 
1599 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1600 {
1601 	{0x00000070, 0x04400000},
1602 	{0x00000071, 0x80c01803},
1603 	{0x00000072, 0x00004004},
1604 	{0x00000073, 0x00000100},
1605 	{0x00000074, 0x00ff0000},
1606 	{0x00000075, 0x34000000},
1607 	{0x00000076, 0x08000014},
1608 	{0x00000077, 0x00cc08ec},
1609 	{0x00000078, 0x00000400},
1610 	{0x00000079, 0x00000000},
1611 	{0x0000007a, 0x04090000},
1612 	{0x0000007c, 0x00000000},
1613 	{0x0000007e, 0x4408a8e8},
1614 	{0x0000007f, 0x00000304},
1615 	{0x00000080, 0x00000000},
1616 	{0x00000082, 0x00000001},
1617 	{0x00000083, 0x00000002},
1618 	{0x00000084, 0xf3e4f400},
1619 	{0x00000085, 0x052024e3},
1620 	{0x00000087, 0x00000000},
1621 	{0x00000088, 0x01000000},
1622 	{0x0000008a, 0x1c0a0000},
1623 	{0x0000008b, 0xff010000},
1624 	{0x0000008d, 0xffffefff},
1625 	{0x0000008e, 0xfff3efff},
1626 	{0x0000008f, 0xfff3efbf},
1627 	{0x00000092, 0xf7ffffff},
1628 	{0x00000093, 0xffffff7f},
1629 	{0x00000095, 0x00101101},
1630 	{0x00000096, 0x00000fff},
1631 	{0x00000097, 0x00116fff},
1632 	{0x00000098, 0x60010000},
1633 	{0x00000099, 0x10010000},
1634 	{0x0000009a, 0x00006000},
1635 	{0x0000009b, 0x00001000},
1636 	{0x0000009f, 0x00b48000}
1637 };
1638 
1639 #define HAWAII_IO_MC_REGS_SIZE 22
1640 
1641 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1642 {
1643 	{0x0000007d, 0x40000000},
1644 	{0x0000007e, 0x40180304},
1645 	{0x0000007f, 0x0000ff00},
1646 	{0x00000081, 0x00000000},
1647 	{0x00000083, 0x00000800},
1648 	{0x00000086, 0x00000000},
1649 	{0x00000087, 0x00000100},
1650 	{0x00000088, 0x00020100},
1651 	{0x00000089, 0x00000000},
1652 	{0x0000008b, 0x00040000},
1653 	{0x0000008c, 0x00000100},
1654 	{0x0000008e, 0xff010000},
1655 	{0x00000090, 0xffffefff},
1656 	{0x00000091, 0xfff3efff},
1657 	{0x00000092, 0xfff3efbf},
1658 	{0x00000093, 0xf7ffffff},
1659 	{0x00000094, 0xffffff7f},
1660 	{0x00000095, 0x00000fff},
1661 	{0x00000096, 0x00116fff},
1662 	{0x00000097, 0x60010000},
1663 	{0x00000098, 0x10010000},
1664 	{0x0000009f, 0x00c79000}
1665 };
1666 
1667 
1668 /**
1669  * cik_srbm_select - select specific register instances
1670  *
1671  * @rdev: radeon_device pointer
1672  * @me: selected ME (micro engine)
1673  * @pipe: pipe
1674  * @queue: queue
1675  * @vmid: VMID
1676  *
1677  * Switches the currently active registers instances.  Some
1678  * registers are instanced per VMID, others are instanced per
1679  * me/pipe/queue combination.
1680  */
1681 static void cik_srbm_select(struct radeon_device *rdev,
1682 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1683 {
1684 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1685 			     MEID(me & 0x3) |
1686 			     VMID(vmid & 0xf) |
1687 			     QUEUEID(queue & 0x7));
1688 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1689 }
1690 
1691 /* ucode loading */
1692 /**
1693  * ci_mc_load_microcode - load MC ucode into the hw
1694  *
1695  * @rdev: radeon_device pointer
1696  *
1697  * Load the GDDR MC ucode into the hw (CIK).
1698  * Returns 0 on success, error on failure.
1699  */
1700 static int ci_mc_load_microcode(struct radeon_device *rdev)
1701 {
1702 	const __be32 *fw_data;
1703 	u32 running, blackout = 0;
1704 	u32 *io_mc_regs;
1705 	int i, ucode_size, regs_size;
1706 
1707 	if (!rdev->mc_fw)
1708 		return -EINVAL;
1709 
1710 	switch (rdev->family) {
1711 	case CHIP_BONAIRE:
1712 		io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1713 		ucode_size = CIK_MC_UCODE_SIZE;
1714 		regs_size = BONAIRE_IO_MC_REGS_SIZE;
1715 		break;
1716 	case CHIP_HAWAII:
1717 		io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1718 		ucode_size = HAWAII_MC_UCODE_SIZE;
1719 		regs_size = HAWAII_IO_MC_REGS_SIZE;
1720 		break;
1721 	default:
1722 		return -EINVAL;
1723 	}
1724 
1725 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1726 
1727 	if (running == 0) {
1728 		if (running) {
1729 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1730 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1731 		}
1732 
1733 		/* reset the engine and set to writable */
1734 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1735 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1736 
1737 		/* load mc io regs */
1738 		for (i = 0; i < regs_size; i++) {
1739 			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1740 			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1741 		}
1742 		/* load the MC ucode */
1743 		fw_data = (const __be32 *)rdev->mc_fw->data;
1744 		for (i = 0; i < ucode_size; i++)
1745 			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1746 
1747 		/* put the engine back into the active state */
1748 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1749 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1750 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1751 
1752 		/* wait for training to complete */
1753 		for (i = 0; i < rdev->usec_timeout; i++) {
1754 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1755 				break;
1756 			udelay(1);
1757 		}
1758 		for (i = 0; i < rdev->usec_timeout; i++) {
1759 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1760 				break;
1761 			udelay(1);
1762 		}
1763 
1764 		if (running)
1765 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1766 	}
1767 
1768 	return 0;
1769 }
1770 
1771 /**
1772  * cik_init_microcode - load ucode images from disk
1773  *
1774  * @rdev: radeon_device pointer
1775  *
1776  * Use the firmware interface to load the ucode images into
1777  * the driver (not loaded into hw).
1778  * Returns 0 on success, error on failure.
1779  */
1780 static int cik_init_microcode(struct radeon_device *rdev)
1781 {
1782 	const char *chip_name;
1783 	size_t pfp_req_size, me_req_size, ce_req_size,
1784 		mec_req_size, rlc_req_size, mc_req_size = 0,
1785 		sdma_req_size, smc_req_size = 0;
1786 	char fw_name[30];
1787 	int err;
1788 
1789 	DRM_DEBUG("\n");
1790 
1791 	switch (rdev->family) {
1792 	case CHIP_BONAIRE:
1793 		chip_name = "BONAIRE";
1794 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1795 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1796 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1797 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1798 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1799 		mc_req_size = CIK_MC_UCODE_SIZE * 4;
1800 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1801 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1802 		break;
1803 	case CHIP_HAWAII:
1804 		chip_name = "HAWAII";
1805 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1806 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1807 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1808 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1809 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1810 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1811 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1812 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1813 		break;
1814 	case CHIP_KAVERI:
1815 		chip_name = "KAVERI";
1816 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1817 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1818 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1819 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1820 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1821 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1822 		break;
1823 	case CHIP_KABINI:
1824 		chip_name = "KABINI";
1825 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1826 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1827 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1828 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1829 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1830 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1831 		break;
1832 	default: BUG();
1833 	}
1834 
1835 	DRM_INFO("Loading %s Microcode\n", chip_name);
1836 
1837 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1838 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1839 	if (err)
1840 		goto out;
1841 	if (rdev->pfp_fw->size != pfp_req_size) {
1842 		printk(KERN_ERR
1843 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1844 		       rdev->pfp_fw->size, fw_name);
1845 		err = -EINVAL;
1846 		goto out;
1847 	}
1848 
1849 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1850 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1851 	if (err)
1852 		goto out;
1853 	if (rdev->me_fw->size != me_req_size) {
1854 		printk(KERN_ERR
1855 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1856 		       rdev->me_fw->size, fw_name);
1857 		err = -EINVAL;
1858 	}
1859 
1860 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1861 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1862 	if (err)
1863 		goto out;
1864 	if (rdev->ce_fw->size != ce_req_size) {
1865 		printk(KERN_ERR
1866 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1867 		       rdev->ce_fw->size, fw_name);
1868 		err = -EINVAL;
1869 	}
1870 
1871 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1872 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1873 	if (err)
1874 		goto out;
1875 	if (rdev->mec_fw->size != mec_req_size) {
1876 		printk(KERN_ERR
1877 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1878 		       rdev->mec_fw->size, fw_name);
1879 		err = -EINVAL;
1880 	}
1881 
1882 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1883 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1884 	if (err)
1885 		goto out;
1886 	if (rdev->rlc_fw->size != rlc_req_size) {
1887 		printk(KERN_ERR
1888 		       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1889 		       rdev->rlc_fw->size, fw_name);
1890 		err = -EINVAL;
1891 	}
1892 
1893 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1894 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1895 	if (err)
1896 		goto out;
1897 	if (rdev->sdma_fw->size != sdma_req_size) {
1898 		printk(KERN_ERR
1899 		       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1900 		       rdev->sdma_fw->size, fw_name);
1901 		err = -EINVAL;
1902 	}
1903 
1904 	/* No SMC, MC ucode on APUs */
1905 	if (!(rdev->flags & RADEON_IS_IGP)) {
1906 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1907 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1908 		if (err)
1909 			goto out;
1910 		if (rdev->mc_fw->size != mc_req_size) {
1911 			printk(KERN_ERR
1912 			       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1913 			       rdev->mc_fw->size, fw_name);
1914 			err = -EINVAL;
1915 		}
1916 
1917 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1918 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1919 		if (err) {
1920 			printk(KERN_ERR
1921 			       "smc: error loading firmware \"%s\"\n",
1922 			       fw_name);
1923 			release_firmware(rdev->smc_fw);
1924 			rdev->smc_fw = NULL;
1925 			err = 0;
1926 		} else if (rdev->smc_fw->size != smc_req_size) {
1927 			printk(KERN_ERR
1928 			       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1929 			       rdev->smc_fw->size, fw_name);
1930 			err = -EINVAL;
1931 		}
1932 	}
1933 
1934 out:
1935 	if (err) {
1936 		if (err != -EINVAL)
1937 			printk(KERN_ERR
1938 			       "cik_cp: Failed to load firmware \"%s\"\n",
1939 			       fw_name);
1940 		release_firmware(rdev->pfp_fw);
1941 		rdev->pfp_fw = NULL;
1942 		release_firmware(rdev->me_fw);
1943 		rdev->me_fw = NULL;
1944 		release_firmware(rdev->ce_fw);
1945 		rdev->ce_fw = NULL;
1946 		release_firmware(rdev->rlc_fw);
1947 		rdev->rlc_fw = NULL;
1948 		release_firmware(rdev->mc_fw);
1949 		rdev->mc_fw = NULL;
1950 		release_firmware(rdev->smc_fw);
1951 		rdev->smc_fw = NULL;
1952 	}
1953 	return err;
1954 }
1955 
1956 /*
1957  * Core functions
1958  */
1959 /**
1960  * cik_tiling_mode_table_init - init the hw tiling table
1961  *
1962  * @rdev: radeon_device pointer
1963  *
1964  * Starting with SI, the tiling setup is done globally in a
1965  * set of 32 tiling modes.  Rather than selecting each set of
1966  * parameters per surface as on older asics, we just select
1967  * which index in the tiling table we want to use, and the
1968  * surface uses those parameters (CIK).
1969  */
1970 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1971 {
1972 	const u32 num_tile_mode_states = 32;
1973 	const u32 num_secondary_tile_mode_states = 16;
1974 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1975 	u32 num_pipe_configs;
1976 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
1977 		rdev->config.cik.max_shader_engines;
1978 
1979 	switch (rdev->config.cik.mem_row_size_in_kb) {
1980 	case 1:
1981 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1982 		break;
1983 	case 2:
1984 	default:
1985 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1986 		break;
1987 	case 4:
1988 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1989 		break;
1990 	}
1991 
1992 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
1993 	if (num_pipe_configs > 8)
1994 		num_pipe_configs = 16;
1995 
1996 	if (num_pipe_configs == 16) {
1997 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1998 			switch (reg_offset) {
1999 			case 0:
2000 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2001 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2002 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2003 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2004 				break;
2005 			case 1:
2006 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2007 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2008 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2009 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2010 				break;
2011 			case 2:
2012 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2013 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2014 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2015 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2016 				break;
2017 			case 3:
2018 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2019 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2020 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2021 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2022 				break;
2023 			case 4:
2024 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2025 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2026 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2027 						 TILE_SPLIT(split_equal_to_row_size));
2028 				break;
2029 			case 5:
2030 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2031 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2032 				break;
2033 			case 6:
2034 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2035 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2036 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2037 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2038 				break;
2039 			case 7:
2040 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2041 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2042 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2043 						 TILE_SPLIT(split_equal_to_row_size));
2044 				break;
2045 			case 8:
2046 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2047 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2048 				break;
2049 			case 9:
2050 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2051 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2052 				break;
2053 			case 10:
2054 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2055 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2056 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2057 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2058 				break;
2059 			case 11:
2060 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2061 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2062 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2063 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2064 				break;
2065 			case 12:
2066 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2067 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2068 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2069 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2070 				break;
2071 			case 13:
2072 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2073 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2074 				break;
2075 			case 14:
2076 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2077 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2078 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2079 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2080 				break;
2081 			case 16:
2082 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2083 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2084 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2085 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2086 				break;
2087 			case 17:
2088 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2089 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2090 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2091 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2092 				break;
2093 			case 27:
2094 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2095 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2096 				break;
2097 			case 28:
2098 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2099 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2100 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2101 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2102 				break;
2103 			case 29:
2104 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2105 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2106 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2107 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2108 				break;
2109 			case 30:
2110 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2111 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2112 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2113 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2114 				break;
2115 			default:
2116 				gb_tile_moden = 0;
2117 				break;
2118 			}
2119 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2120 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2121 		}
2122 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2123 			switch (reg_offset) {
2124 			case 0:
2125 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2126 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2127 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2128 						 NUM_BANKS(ADDR_SURF_16_BANK));
2129 				break;
2130 			case 1:
2131 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2132 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2133 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2134 						 NUM_BANKS(ADDR_SURF_16_BANK));
2135 				break;
2136 			case 2:
2137 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2138 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2139 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2140 						 NUM_BANKS(ADDR_SURF_16_BANK));
2141 				break;
2142 			case 3:
2143 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2144 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2145 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2146 						 NUM_BANKS(ADDR_SURF_16_BANK));
2147 				break;
2148 			case 4:
2149 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2150 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2151 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2152 						 NUM_BANKS(ADDR_SURF_8_BANK));
2153 				break;
2154 			case 5:
2155 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2156 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2157 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2158 						 NUM_BANKS(ADDR_SURF_4_BANK));
2159 				break;
2160 			case 6:
2161 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2162 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2163 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2164 						 NUM_BANKS(ADDR_SURF_2_BANK));
2165 				break;
2166 			case 8:
2167 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2168 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2169 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2170 						 NUM_BANKS(ADDR_SURF_16_BANK));
2171 				break;
2172 			case 9:
2173 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2174 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2175 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2176 						 NUM_BANKS(ADDR_SURF_16_BANK));
2177 				break;
2178 			case 10:
2179 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2180 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2181 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2182 						 NUM_BANKS(ADDR_SURF_16_BANK));
2183 				break;
2184 			case 11:
2185 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2186 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2187 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2188 						 NUM_BANKS(ADDR_SURF_8_BANK));
2189 				break;
2190 			case 12:
2191 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2192 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2193 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2194 						 NUM_BANKS(ADDR_SURF_4_BANK));
2195 				break;
2196 			case 13:
2197 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2198 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2199 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2200 						 NUM_BANKS(ADDR_SURF_2_BANK));
2201 				break;
2202 			case 14:
2203 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2204 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2205 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2206 						 NUM_BANKS(ADDR_SURF_2_BANK));
2207 				break;
2208 			default:
2209 				gb_tile_moden = 0;
2210 				break;
2211 			}
2212 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2213 		}
2214 	} else if (num_pipe_configs == 8) {
2215 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2216 			switch (reg_offset) {
2217 			case 0:
2218 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2219 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2220 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2221 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2222 				break;
2223 			case 1:
2224 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2225 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2226 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2227 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2228 				break;
2229 			case 2:
2230 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2231 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2232 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2233 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2234 				break;
2235 			case 3:
2236 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2237 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2238 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2239 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2240 				break;
2241 			case 4:
2242 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2243 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2244 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2245 						 TILE_SPLIT(split_equal_to_row_size));
2246 				break;
2247 			case 5:
2248 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2249 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2250 				break;
2251 			case 6:
2252 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2253 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2254 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2255 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2256 				break;
2257 			case 7:
2258 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2259 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2260 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2261 						 TILE_SPLIT(split_equal_to_row_size));
2262 				break;
2263 			case 8:
2264 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2265 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2266 				break;
2267 			case 9:
2268 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2269 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2270 				break;
2271 			case 10:
2272 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2273 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2274 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2275 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2276 				break;
2277 			case 11:
2278 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2279 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2280 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2281 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2282 				break;
2283 			case 12:
2284 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2285 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2286 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2287 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2288 				break;
2289 			case 13:
2290 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2291 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2292 				break;
2293 			case 14:
2294 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2295 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2296 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2297 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2298 				break;
2299 			case 16:
2300 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2301 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2302 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2303 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2304 				break;
2305 			case 17:
2306 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2307 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2308 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2309 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2310 				break;
2311 			case 27:
2312 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2313 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2314 				break;
2315 			case 28:
2316 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2317 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2318 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2319 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2320 				break;
2321 			case 29:
2322 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2323 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2324 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2325 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2326 				break;
2327 			case 30:
2328 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2329 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2330 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2331 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2332 				break;
2333 			default:
2334 				gb_tile_moden = 0;
2335 				break;
2336 			}
2337 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2338 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2339 		}
2340 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2341 			switch (reg_offset) {
2342 			case 0:
2343 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2344 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2345 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2346 						 NUM_BANKS(ADDR_SURF_16_BANK));
2347 				break;
2348 			case 1:
2349 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2350 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2351 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2352 						 NUM_BANKS(ADDR_SURF_16_BANK));
2353 				break;
2354 			case 2:
2355 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2356 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2357 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2358 						 NUM_BANKS(ADDR_SURF_16_BANK));
2359 				break;
2360 			case 3:
2361 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2362 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2363 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2364 						 NUM_BANKS(ADDR_SURF_16_BANK));
2365 				break;
2366 			case 4:
2367 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2368 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2369 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2370 						 NUM_BANKS(ADDR_SURF_8_BANK));
2371 				break;
2372 			case 5:
2373 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2374 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2375 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2376 						 NUM_BANKS(ADDR_SURF_4_BANK));
2377 				break;
2378 			case 6:
2379 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2380 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2381 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2382 						 NUM_BANKS(ADDR_SURF_2_BANK));
2383 				break;
2384 			case 8:
2385 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2386 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2387 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2388 						 NUM_BANKS(ADDR_SURF_16_BANK));
2389 				break;
2390 			case 9:
2391 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2392 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2393 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2394 						 NUM_BANKS(ADDR_SURF_16_BANK));
2395 				break;
2396 			case 10:
2397 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2398 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2399 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2400 						 NUM_BANKS(ADDR_SURF_16_BANK));
2401 				break;
2402 			case 11:
2403 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2404 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2405 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2406 						 NUM_BANKS(ADDR_SURF_16_BANK));
2407 				break;
2408 			case 12:
2409 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2410 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2411 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2412 						 NUM_BANKS(ADDR_SURF_8_BANK));
2413 				break;
2414 			case 13:
2415 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2416 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2417 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2418 						 NUM_BANKS(ADDR_SURF_4_BANK));
2419 				break;
2420 			case 14:
2421 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2422 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2423 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2424 						 NUM_BANKS(ADDR_SURF_2_BANK));
2425 				break;
2426 			default:
2427 				gb_tile_moden = 0;
2428 				break;
2429 			}
2430 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2431 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2432 		}
2433 	} else if (num_pipe_configs == 4) {
2434 		if (num_rbs == 4) {
2435 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2436 				switch (reg_offset) {
2437 				case 0:
2438 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2439 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2440 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2441 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2442 					break;
2443 				case 1:
2444 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2445 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2446 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2447 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2448 					break;
2449 				case 2:
2450 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2451 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2452 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2453 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2454 					break;
2455 				case 3:
2456 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2457 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2458 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2459 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2460 					break;
2461 				case 4:
2462 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2463 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2464 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2465 							 TILE_SPLIT(split_equal_to_row_size));
2466 					break;
2467 				case 5:
2468 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2469 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2470 					break;
2471 				case 6:
2472 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2473 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2474 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2475 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2476 					break;
2477 				case 7:
2478 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2479 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2480 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2481 							 TILE_SPLIT(split_equal_to_row_size));
2482 					break;
2483 				case 8:
2484 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2485 							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2486 					break;
2487 				case 9:
2488 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2489 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2490 					break;
2491 				case 10:
2492 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2493 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2494 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2495 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2496 					break;
2497 				case 11:
2498 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2499 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2500 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2501 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2502 					break;
2503 				case 12:
2504 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2505 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2506 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2507 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2508 					break;
2509 				case 13:
2510 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2511 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2512 					break;
2513 				case 14:
2514 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2515 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2516 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2517 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2518 					break;
2519 				case 16:
2520 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2521 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2522 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2523 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2524 					break;
2525 				case 17:
2526 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2527 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2528 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2529 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2530 					break;
2531 				case 27:
2532 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2533 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2534 					break;
2535 				case 28:
2536 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2537 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2538 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2539 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2540 					break;
2541 				case 29:
2542 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2543 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2544 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2545 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2546 					break;
2547 				case 30:
2548 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2549 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2550 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2551 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2552 					break;
2553 				default:
2554 					gb_tile_moden = 0;
2555 					break;
2556 				}
2557 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2558 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2559 			}
2560 		} else if (num_rbs < 4) {
2561 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2562 				switch (reg_offset) {
2563 				case 0:
2564 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2565 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2566 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2567 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2568 					break;
2569 				case 1:
2570 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2571 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2572 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2573 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2574 					break;
2575 				case 2:
2576 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2577 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2578 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2579 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2580 					break;
2581 				case 3:
2582 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2583 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2584 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2585 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2586 					break;
2587 				case 4:
2588 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2589 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2590 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2591 							 TILE_SPLIT(split_equal_to_row_size));
2592 					break;
2593 				case 5:
2594 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2595 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2596 					break;
2597 				case 6:
2598 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2599 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2600 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2601 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2602 					break;
2603 				case 7:
2604 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2605 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2606 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2607 							 TILE_SPLIT(split_equal_to_row_size));
2608 					break;
2609 				case 8:
2610 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2611 						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2612 					break;
2613 				case 9:
2614 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2615 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2616 					break;
2617 				case 10:
2618 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2619 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2620 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2621 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2622 					break;
2623 				case 11:
2624 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2625 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2626 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2627 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2628 					break;
2629 				case 12:
2630 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2631 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2632 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2633 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2634 					break;
2635 				case 13:
2636 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2637 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2638 					break;
2639 				case 14:
2640 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2641 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2642 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2643 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2644 					break;
2645 				case 16:
2646 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2647 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2648 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2649 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2650 					break;
2651 				case 17:
2652 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2653 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2654 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2655 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2656 					break;
2657 				case 27:
2658 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2659 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2660 					break;
2661 				case 28:
2662 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2663 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2664 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2665 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2666 					break;
2667 				case 29:
2668 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2669 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2670 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2671 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2672 					break;
2673 				case 30:
2674 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2675 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2676 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2677 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2678 					break;
2679 				default:
2680 					gb_tile_moden = 0;
2681 					break;
2682 				}
2683 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2684 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2685 			}
2686 		}
2687 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2688 			switch (reg_offset) {
2689 			case 0:
2690 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2691 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2692 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2693 						 NUM_BANKS(ADDR_SURF_16_BANK));
2694 				break;
2695 			case 1:
2696 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2697 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2698 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2699 						 NUM_BANKS(ADDR_SURF_16_BANK));
2700 				break;
2701 			case 2:
2702 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2703 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2704 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2705 						 NUM_BANKS(ADDR_SURF_16_BANK));
2706 				break;
2707 			case 3:
2708 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2709 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2710 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2711 						 NUM_BANKS(ADDR_SURF_16_BANK));
2712 				break;
2713 			case 4:
2714 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2715 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2716 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2717 						 NUM_BANKS(ADDR_SURF_16_BANK));
2718 				break;
2719 			case 5:
2720 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2721 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2722 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2723 						 NUM_BANKS(ADDR_SURF_8_BANK));
2724 				break;
2725 			case 6:
2726 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2728 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2729 						 NUM_BANKS(ADDR_SURF_4_BANK));
2730 				break;
2731 			case 8:
2732 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2733 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2734 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2735 						 NUM_BANKS(ADDR_SURF_16_BANK));
2736 				break;
2737 			case 9:
2738 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2739 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2740 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2741 						 NUM_BANKS(ADDR_SURF_16_BANK));
2742 				break;
2743 			case 10:
2744 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2745 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2746 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2747 						 NUM_BANKS(ADDR_SURF_16_BANK));
2748 				break;
2749 			case 11:
2750 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2751 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2752 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2753 						 NUM_BANKS(ADDR_SURF_16_BANK));
2754 				break;
2755 			case 12:
2756 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2757 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2758 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2759 						 NUM_BANKS(ADDR_SURF_16_BANK));
2760 				break;
2761 			case 13:
2762 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2763 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2764 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2765 						 NUM_BANKS(ADDR_SURF_8_BANK));
2766 				break;
2767 			case 14:
2768 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2769 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2770 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2771 						 NUM_BANKS(ADDR_SURF_4_BANK));
2772 				break;
2773 			default:
2774 				gb_tile_moden = 0;
2775 				break;
2776 			}
2777 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2778 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2779 		}
2780 	} else if (num_pipe_configs == 2) {
2781 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2782 			switch (reg_offset) {
2783 			case 0:
2784 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2785 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2786 						 PIPE_CONFIG(ADDR_SURF_P2) |
2787 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2788 				break;
2789 			case 1:
2790 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2791 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2792 						 PIPE_CONFIG(ADDR_SURF_P2) |
2793 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2794 				break;
2795 			case 2:
2796 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2797 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2798 						 PIPE_CONFIG(ADDR_SURF_P2) |
2799 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2800 				break;
2801 			case 3:
2802 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2803 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2804 						 PIPE_CONFIG(ADDR_SURF_P2) |
2805 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2806 				break;
2807 			case 4:
2808 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2809 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2810 						 PIPE_CONFIG(ADDR_SURF_P2) |
2811 						 TILE_SPLIT(split_equal_to_row_size));
2812 				break;
2813 			case 5:
2814 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2815 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2816 				break;
2817 			case 6:
2818 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2819 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2820 						 PIPE_CONFIG(ADDR_SURF_P2) |
2821 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2822 				break;
2823 			case 7:
2824 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2825 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2826 						 PIPE_CONFIG(ADDR_SURF_P2) |
2827 						 TILE_SPLIT(split_equal_to_row_size));
2828 				break;
2829 			case 8:
2830 				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
2831 				break;
2832 			case 9:
2833 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2834 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2835 				break;
2836 			case 10:
2837 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2838 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2839 						 PIPE_CONFIG(ADDR_SURF_P2) |
2840 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2841 				break;
2842 			case 11:
2843 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2844 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2845 						 PIPE_CONFIG(ADDR_SURF_P2) |
2846 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2847 				break;
2848 			case 12:
2849 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2850 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2851 						 PIPE_CONFIG(ADDR_SURF_P2) |
2852 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2853 				break;
2854 			case 13:
2855 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2856 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2857 				break;
2858 			case 14:
2859 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2860 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2861 						 PIPE_CONFIG(ADDR_SURF_P2) |
2862 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2863 				break;
2864 			case 16:
2865 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2866 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2867 						 PIPE_CONFIG(ADDR_SURF_P2) |
2868 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2869 				break;
2870 			case 17:
2871 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2872 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2873 						 PIPE_CONFIG(ADDR_SURF_P2) |
2874 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2875 				break;
2876 			case 27:
2877 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2878 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2879 				break;
2880 			case 28:
2881 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2882 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2883 						 PIPE_CONFIG(ADDR_SURF_P2) |
2884 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2885 				break;
2886 			case 29:
2887 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2888 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2889 						 PIPE_CONFIG(ADDR_SURF_P2) |
2890 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2891 				break;
2892 			case 30:
2893 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2894 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2895 						 PIPE_CONFIG(ADDR_SURF_P2) |
2896 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2897 				break;
2898 			default:
2899 				gb_tile_moden = 0;
2900 				break;
2901 			}
2902 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2903 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2904 		}
2905 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2906 			switch (reg_offset) {
2907 			case 0:
2908 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2909 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2910 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2911 						 NUM_BANKS(ADDR_SURF_16_BANK));
2912 				break;
2913 			case 1:
2914 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2915 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2916 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2917 						 NUM_BANKS(ADDR_SURF_16_BANK));
2918 				break;
2919 			case 2:
2920 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2921 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2922 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2923 						 NUM_BANKS(ADDR_SURF_16_BANK));
2924 				break;
2925 			case 3:
2926 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2927 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2928 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2929 						 NUM_BANKS(ADDR_SURF_16_BANK));
2930 				break;
2931 			case 4:
2932 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2933 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2934 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2935 						 NUM_BANKS(ADDR_SURF_16_BANK));
2936 				break;
2937 			case 5:
2938 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2939 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2940 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2941 						 NUM_BANKS(ADDR_SURF_16_BANK));
2942 				break;
2943 			case 6:
2944 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2945 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2946 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2947 						 NUM_BANKS(ADDR_SURF_8_BANK));
2948 				break;
2949 			case 8:
2950 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2951 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2952 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2953 						 NUM_BANKS(ADDR_SURF_16_BANK));
2954 				break;
2955 			case 9:
2956 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2957 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2958 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2959 						 NUM_BANKS(ADDR_SURF_16_BANK));
2960 				break;
2961 			case 10:
2962 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2963 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2964 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2965 						 NUM_BANKS(ADDR_SURF_16_BANK));
2966 				break;
2967 			case 11:
2968 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2969 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2970 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2971 						 NUM_BANKS(ADDR_SURF_16_BANK));
2972 				break;
2973 			case 12:
2974 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2975 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2976 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2977 						 NUM_BANKS(ADDR_SURF_16_BANK));
2978 				break;
2979 			case 13:
2980 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2981 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2982 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2983 						 NUM_BANKS(ADDR_SURF_16_BANK));
2984 				break;
2985 			case 14:
2986 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2987 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2988 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2989 						 NUM_BANKS(ADDR_SURF_8_BANK));
2990 				break;
2991 			default:
2992 				gb_tile_moden = 0;
2993 				break;
2994 			}
2995 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2996 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2997 		}
2998 	} else
2999 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3000 }
3001 
3002 /**
3003  * cik_select_se_sh - select which SE, SH to address
3004  *
3005  * @rdev: radeon_device pointer
3006  * @se_num: shader engine to address
3007  * @sh_num: sh block to address
3008  *
3009  * Select which SE, SH combinations to address. Certain
3010  * registers are instanced per SE or SH.  0xffffffff means
3011  * broadcast to all SEs or SHs (CIK).
3012  */
3013 static void cik_select_se_sh(struct radeon_device *rdev,
3014 			     u32 se_num, u32 sh_num)
3015 {
3016 	u32 data = INSTANCE_BROADCAST_WRITES;
3017 
3018 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3019 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3020 	else if (se_num == 0xffffffff)
3021 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3022 	else if (sh_num == 0xffffffff)
3023 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3024 	else
3025 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3026 	WREG32(GRBM_GFX_INDEX, data);
3027 }
3028 
3029 /**
3030  * cik_create_bitmask - create a bitmask
3031  *
3032  * @bit_width: length of the mask
3033  *
3034  * create a variable length bit mask (CIK).
3035  * Returns the bitmask.
3036  */
3037 static u32 cik_create_bitmask(u32 bit_width)
3038 {
3039 	u32 i, mask = 0;
3040 
3041 	for (i = 0; i < bit_width; i++) {
3042 		mask <<= 1;
3043 		mask |= 1;
3044 	}
3045 	return mask;
3046 }
3047 
3048 /**
3049  * cik_select_se_sh - select which SE, SH to address
3050  *
3051  * @rdev: radeon_device pointer
3052  * @max_rb_num: max RBs (render backends) for the asic
3053  * @se_num: number of SEs (shader engines) for the asic
3054  * @sh_per_se: number of SH blocks per SE for the asic
3055  *
3056  * Calculates the bitmask of disabled RBs (CIK).
3057  * Returns the disabled RB bitmask.
3058  */
3059 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3060 			      u32 max_rb_num_per_se,
3061 			      u32 sh_per_se)
3062 {
3063 	u32 data, mask;
3064 
3065 	data = RREG32(CC_RB_BACKEND_DISABLE);
3066 	if (data & 1)
3067 		data &= BACKEND_DISABLE_MASK;
3068 	else
3069 		data = 0;
3070 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3071 
3072 	data >>= BACKEND_DISABLE_SHIFT;
3073 
3074 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3075 
3076 	return data & mask;
3077 }
3078 
3079 /**
3080  * cik_setup_rb - setup the RBs on the asic
3081  *
3082  * @rdev: radeon_device pointer
3083  * @se_num: number of SEs (shader engines) for the asic
3084  * @sh_per_se: number of SH blocks per SE for the asic
3085  * @max_rb_num: max RBs (render backends) for the asic
3086  *
3087  * Configures per-SE/SH RB registers (CIK).
3088  */
3089 static void cik_setup_rb(struct radeon_device *rdev,
3090 			 u32 se_num, u32 sh_per_se,
3091 			 u32 max_rb_num_per_se)
3092 {
3093 	int i, j;
3094 	u32 data, mask;
3095 	u32 disabled_rbs = 0;
3096 	u32 enabled_rbs = 0;
3097 
3098 	for (i = 0; i < se_num; i++) {
3099 		for (j = 0; j < sh_per_se; j++) {
3100 			cik_select_se_sh(rdev, i, j);
3101 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3102 			if (rdev->family == CHIP_HAWAII)
3103 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3104 			else
3105 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3106 		}
3107 	}
3108 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3109 
3110 	mask = 1;
3111 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3112 		if (!(disabled_rbs & mask))
3113 			enabled_rbs |= mask;
3114 		mask <<= 1;
3115 	}
3116 
3117 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3118 
3119 	for (i = 0; i < se_num; i++) {
3120 		cik_select_se_sh(rdev, i, 0xffffffff);
3121 		data = 0;
3122 		for (j = 0; j < sh_per_se; j++) {
3123 			switch (enabled_rbs & 3) {
3124 			case 0:
3125 				if (j == 0)
3126 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3127 				else
3128 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3129 				break;
3130 			case 1:
3131 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3132 				break;
3133 			case 2:
3134 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3135 				break;
3136 			case 3:
3137 			default:
3138 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3139 				break;
3140 			}
3141 			enabled_rbs >>= 2;
3142 		}
3143 		WREG32(PA_SC_RASTER_CONFIG, data);
3144 	}
3145 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3146 }
3147 
3148 /**
3149  * cik_gpu_init - setup the 3D engine
3150  *
3151  * @rdev: radeon_device pointer
3152  *
3153  * Configures the 3D engine and tiling configuration
3154  * registers so that the 3D engine is usable.
3155  */
3156 static void cik_gpu_init(struct radeon_device *rdev)
3157 {
3158 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3159 	u32 mc_shared_chmap, mc_arb_ramcfg;
3160 	u32 hdp_host_path_cntl;
3161 	u32 tmp;
3162 	int i, j;
3163 
3164 	switch (rdev->family) {
3165 	case CHIP_BONAIRE:
3166 		rdev->config.cik.max_shader_engines = 2;
3167 		rdev->config.cik.max_tile_pipes = 4;
3168 		rdev->config.cik.max_cu_per_sh = 7;
3169 		rdev->config.cik.max_sh_per_se = 1;
3170 		rdev->config.cik.max_backends_per_se = 2;
3171 		rdev->config.cik.max_texture_channel_caches = 4;
3172 		rdev->config.cik.max_gprs = 256;
3173 		rdev->config.cik.max_gs_threads = 32;
3174 		rdev->config.cik.max_hw_contexts = 8;
3175 
3176 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3177 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3178 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3179 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3180 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3181 		break;
3182 	case CHIP_HAWAII:
3183 		rdev->config.cik.max_shader_engines = 4;
3184 		rdev->config.cik.max_tile_pipes = 16;
3185 		rdev->config.cik.max_cu_per_sh = 11;
3186 		rdev->config.cik.max_sh_per_se = 1;
3187 		rdev->config.cik.max_backends_per_se = 4;
3188 		rdev->config.cik.max_texture_channel_caches = 16;
3189 		rdev->config.cik.max_gprs = 256;
3190 		rdev->config.cik.max_gs_threads = 32;
3191 		rdev->config.cik.max_hw_contexts = 8;
3192 
3193 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3194 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3195 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3196 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3197 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3198 		break;
3199 	case CHIP_KAVERI:
3200 		rdev->config.cik.max_shader_engines = 1;
3201 		rdev->config.cik.max_tile_pipes = 4;
3202 		if ((rdev->pdev->device == 0x1304) ||
3203 		    (rdev->pdev->device == 0x1305) ||
3204 		    (rdev->pdev->device == 0x130C) ||
3205 		    (rdev->pdev->device == 0x130F) ||
3206 		    (rdev->pdev->device == 0x1310) ||
3207 		    (rdev->pdev->device == 0x1311) ||
3208 		    (rdev->pdev->device == 0x131C)) {
3209 			rdev->config.cik.max_cu_per_sh = 8;
3210 			rdev->config.cik.max_backends_per_se = 2;
3211 		} else if ((rdev->pdev->device == 0x1309) ||
3212 			   (rdev->pdev->device == 0x130A) ||
3213 			   (rdev->pdev->device == 0x130D) ||
3214 			   (rdev->pdev->device == 0x1313) ||
3215 			   (rdev->pdev->device == 0x131D)) {
3216 			rdev->config.cik.max_cu_per_sh = 6;
3217 			rdev->config.cik.max_backends_per_se = 2;
3218 		} else if ((rdev->pdev->device == 0x1306) ||
3219 			   (rdev->pdev->device == 0x1307) ||
3220 			   (rdev->pdev->device == 0x130B) ||
3221 			   (rdev->pdev->device == 0x130E) ||
3222 			   (rdev->pdev->device == 0x1315) ||
3223 			   (rdev->pdev->device == 0x131B)) {
3224 			rdev->config.cik.max_cu_per_sh = 4;
3225 			rdev->config.cik.max_backends_per_se = 1;
3226 		} else {
3227 			rdev->config.cik.max_cu_per_sh = 3;
3228 			rdev->config.cik.max_backends_per_se = 1;
3229 		}
3230 		rdev->config.cik.max_sh_per_se = 1;
3231 		rdev->config.cik.max_texture_channel_caches = 4;
3232 		rdev->config.cik.max_gprs = 256;
3233 		rdev->config.cik.max_gs_threads = 16;
3234 		rdev->config.cik.max_hw_contexts = 8;
3235 
3236 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3237 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3238 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3239 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3240 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3241 		break;
3242 	case CHIP_KABINI:
3243 	default:
3244 		rdev->config.cik.max_shader_engines = 1;
3245 		rdev->config.cik.max_tile_pipes = 2;
3246 		rdev->config.cik.max_cu_per_sh = 2;
3247 		rdev->config.cik.max_sh_per_se = 1;
3248 		rdev->config.cik.max_backends_per_se = 1;
3249 		rdev->config.cik.max_texture_channel_caches = 2;
3250 		rdev->config.cik.max_gprs = 256;
3251 		rdev->config.cik.max_gs_threads = 16;
3252 		rdev->config.cik.max_hw_contexts = 8;
3253 
3254 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3255 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3256 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3257 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3258 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3259 		break;
3260 	}
3261 
3262 	/* Initialize HDP */
3263 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3264 		WREG32((0x2c14 + j), 0x00000000);
3265 		WREG32((0x2c18 + j), 0x00000000);
3266 		WREG32((0x2c1c + j), 0x00000000);
3267 		WREG32((0x2c20 + j), 0x00000000);
3268 		WREG32((0x2c24 + j), 0x00000000);
3269 	}
3270 
3271 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3272 
3273 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3274 
3275 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3276 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3277 
3278 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3279 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3280 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3281 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3282 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3283 		rdev->config.cik.mem_row_size_in_kb = 4;
3284 	/* XXX use MC settings? */
3285 	rdev->config.cik.shader_engine_tile_size = 32;
3286 	rdev->config.cik.num_gpus = 1;
3287 	rdev->config.cik.multi_gpu_tile_size = 64;
3288 
3289 	/* fix up row size */
3290 	gb_addr_config &= ~ROW_SIZE_MASK;
3291 	switch (rdev->config.cik.mem_row_size_in_kb) {
3292 	case 1:
3293 	default:
3294 		gb_addr_config |= ROW_SIZE(0);
3295 		break;
3296 	case 2:
3297 		gb_addr_config |= ROW_SIZE(1);
3298 		break;
3299 	case 4:
3300 		gb_addr_config |= ROW_SIZE(2);
3301 		break;
3302 	}
3303 
3304 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3305 	 * not have bank info, so create a custom tiling dword.
3306 	 * bits 3:0   num_pipes
3307 	 * bits 7:4   num_banks
3308 	 * bits 11:8  group_size
3309 	 * bits 15:12 row_size
3310 	 */
3311 	rdev->config.cik.tile_config = 0;
3312 	switch (rdev->config.cik.num_tile_pipes) {
3313 	case 1:
3314 		rdev->config.cik.tile_config |= (0 << 0);
3315 		break;
3316 	case 2:
3317 		rdev->config.cik.tile_config |= (1 << 0);
3318 		break;
3319 	case 4:
3320 		rdev->config.cik.tile_config |= (2 << 0);
3321 		break;
3322 	case 8:
3323 	default:
3324 		/* XXX what about 12? */
3325 		rdev->config.cik.tile_config |= (3 << 0);
3326 		break;
3327 	}
3328 	rdev->config.cik.tile_config |=
3329 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3330 	rdev->config.cik.tile_config |=
3331 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3332 	rdev->config.cik.tile_config |=
3333 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3334 
3335 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3336 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3337 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3338 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3339 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3340 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3341 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3342 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3343 
3344 	cik_tiling_mode_table_init(rdev);
3345 
3346 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3347 		     rdev->config.cik.max_sh_per_se,
3348 		     rdev->config.cik.max_backends_per_se);
3349 
3350 	/* set HW defaults for 3D engine */
3351 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3352 
3353 	WREG32(SX_DEBUG_1, 0x20);
3354 
3355 	WREG32(TA_CNTL_AUX, 0x00010000);
3356 
3357 	tmp = RREG32(SPI_CONFIG_CNTL);
3358 	tmp |= 0x03000000;
3359 	WREG32(SPI_CONFIG_CNTL, tmp);
3360 
3361 	WREG32(SQ_CONFIG, 1);
3362 
3363 	WREG32(DB_DEBUG, 0);
3364 
3365 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3366 	tmp |= 0x00000400;
3367 	WREG32(DB_DEBUG2, tmp);
3368 
3369 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3370 	tmp |= 0x00020200;
3371 	WREG32(DB_DEBUG3, tmp);
3372 
3373 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3374 	tmp |= 0x00018208;
3375 	WREG32(CB_HW_CONTROL, tmp);
3376 
3377 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3378 
3379 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3380 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3381 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3382 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3383 
3384 	WREG32(VGT_NUM_INSTANCES, 1);
3385 
3386 	WREG32(CP_PERFMON_CNTL, 0);
3387 
3388 	WREG32(SQ_CONFIG, 0);
3389 
3390 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3391 					  FORCE_EOV_MAX_REZ_CNT(255)));
3392 
3393 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3394 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3395 
3396 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3397 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3398 
3399 	tmp = RREG32(HDP_MISC_CNTL);
3400 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3401 	WREG32(HDP_MISC_CNTL, tmp);
3402 
3403 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3404 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3405 
3406 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3407 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3408 
3409 	udelay(50);
3410 }
3411 
3412 /*
3413  * GPU scratch registers helpers function.
3414  */
3415 /**
3416  * cik_scratch_init - setup driver info for CP scratch regs
3417  *
3418  * @rdev: radeon_device pointer
3419  *
3420  * Set up the number and offset of the CP scratch registers.
3421  * NOTE: use of CP scratch registers is a legacy inferface and
3422  * is not used by default on newer asics (r6xx+).  On newer asics,
3423  * memory buffers are used for fences rather than scratch regs.
3424  */
3425 static void cik_scratch_init(struct radeon_device *rdev)
3426 {
3427 	int i;
3428 
3429 	rdev->scratch.num_reg = 7;
3430 	rdev->scratch.reg_base = SCRATCH_REG0;
3431 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3432 		rdev->scratch.free[i] = true;
3433 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3434 	}
3435 }
3436 
3437 /**
3438  * cik_ring_test - basic gfx ring test
3439  *
3440  * @rdev: radeon_device pointer
3441  * @ring: radeon_ring structure holding ring information
3442  *
3443  * Allocate a scratch register and write to it using the gfx ring (CIK).
3444  * Provides a basic gfx ring test to verify that the ring is working.
3445  * Used by cik_cp_gfx_resume();
3446  * Returns 0 on success, error on failure.
3447  */
3448 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3449 {
3450 	uint32_t scratch;
3451 	uint32_t tmp = 0;
3452 	unsigned i;
3453 	int r;
3454 
3455 	r = radeon_scratch_get(rdev, &scratch);
3456 	if (r) {
3457 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3458 		return r;
3459 	}
3460 	WREG32(scratch, 0xCAFEDEAD);
3461 	r = radeon_ring_lock(rdev, ring, 3);
3462 	if (r) {
3463 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3464 		radeon_scratch_free(rdev, scratch);
3465 		return r;
3466 	}
3467 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3468 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3469 	radeon_ring_write(ring, 0xDEADBEEF);
3470 	radeon_ring_unlock_commit(rdev, ring);
3471 
3472 	for (i = 0; i < rdev->usec_timeout; i++) {
3473 		tmp = RREG32(scratch);
3474 		if (tmp == 0xDEADBEEF)
3475 			break;
3476 		DRM_UDELAY(1);
3477 	}
3478 	if (i < rdev->usec_timeout) {
3479 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3480 	} else {
3481 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3482 			  ring->idx, scratch, tmp);
3483 		r = -EINVAL;
3484 	}
3485 	radeon_scratch_free(rdev, scratch);
3486 	return r;
3487 }
3488 
3489 /**
3490  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3491  *
3492  * @rdev: radeon_device pointer
3493  * @fence: radeon fence object
3494  *
3495  * Emits a fence sequnce number on the gfx ring and flushes
3496  * GPU caches.
3497  */
3498 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3499 			     struct radeon_fence *fence)
3500 {
3501 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3502 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3503 
3504 	/* EVENT_WRITE_EOP - flush caches, send int */
3505 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3506 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3507 				 EOP_TC_ACTION_EN |
3508 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3509 				 EVENT_INDEX(5)));
3510 	radeon_ring_write(ring, addr & 0xfffffffc);
3511 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3512 	radeon_ring_write(ring, fence->seq);
3513 	radeon_ring_write(ring, 0);
3514 	/* HDP flush */
3515 	/* We should be using the new WAIT_REG_MEM special op packet here
3516 	 * but it causes the CP to hang
3517 	 */
3518 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3519 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3520 				 WRITE_DATA_DST_SEL(0)));
3521 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3522 	radeon_ring_write(ring, 0);
3523 	radeon_ring_write(ring, 0);
3524 }
3525 
3526 /**
3527  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3528  *
3529  * @rdev: radeon_device pointer
3530  * @fence: radeon fence object
3531  *
3532  * Emits a fence sequnce number on the compute ring and flushes
3533  * GPU caches.
3534  */
3535 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3536 				 struct radeon_fence *fence)
3537 {
3538 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3539 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3540 
3541 	/* RELEASE_MEM - flush caches, send int */
3542 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3543 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3544 				 EOP_TC_ACTION_EN |
3545 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3546 				 EVENT_INDEX(5)));
3547 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3548 	radeon_ring_write(ring, addr & 0xfffffffc);
3549 	radeon_ring_write(ring, upper_32_bits(addr));
3550 	radeon_ring_write(ring, fence->seq);
3551 	radeon_ring_write(ring, 0);
3552 	/* HDP flush */
3553 	/* We should be using the new WAIT_REG_MEM special op packet here
3554 	 * but it causes the CP to hang
3555 	 */
3556 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3557 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3558 				 WRITE_DATA_DST_SEL(0)));
3559 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3560 	radeon_ring_write(ring, 0);
3561 	radeon_ring_write(ring, 0);
3562 }
3563 
3564 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3565 			     struct radeon_ring *ring,
3566 			     struct radeon_semaphore *semaphore,
3567 			     bool emit_wait)
3568 {
3569 /* TODO: figure out why semaphore cause lockups */
3570 #if 0
3571 	uint64_t addr = semaphore->gpu_addr;
3572 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3573 
3574 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3575 	radeon_ring_write(ring, addr & 0xffffffff);
3576 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3577 
3578 	return true;
3579 #else
3580 	return false;
3581 #endif
3582 }
3583 
3584 /**
3585  * cik_copy_cpdma - copy pages using the CP DMA engine
3586  *
3587  * @rdev: radeon_device pointer
3588  * @src_offset: src GPU address
3589  * @dst_offset: dst GPU address
3590  * @num_gpu_pages: number of GPU pages to xfer
3591  * @fence: radeon fence object
3592  *
3593  * Copy GPU paging using the CP DMA engine (CIK+).
3594  * Used by the radeon ttm implementation to move pages if
3595  * registered as the asic copy callback.
3596  */
3597 int cik_copy_cpdma(struct radeon_device *rdev,
3598 		   uint64_t src_offset, uint64_t dst_offset,
3599 		   unsigned num_gpu_pages,
3600 		   struct radeon_fence **fence)
3601 {
3602 	struct radeon_semaphore *sem = NULL;
3603 	int ring_index = rdev->asic->copy.blit_ring_index;
3604 	struct radeon_ring *ring = &rdev->ring[ring_index];
3605 	u32 size_in_bytes, cur_size_in_bytes, control;
3606 	int i, num_loops;
3607 	int r = 0;
3608 
3609 	r = radeon_semaphore_create(rdev, &sem);
3610 	if (r) {
3611 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3612 		return r;
3613 	}
3614 
3615 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3616 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3617 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3618 	if (r) {
3619 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3620 		radeon_semaphore_free(rdev, &sem, NULL);
3621 		return r;
3622 	}
3623 
3624 	radeon_semaphore_sync_to(sem, *fence);
3625 	radeon_semaphore_sync_rings(rdev, sem, ring->idx);
3626 
3627 	for (i = 0; i < num_loops; i++) {
3628 		cur_size_in_bytes = size_in_bytes;
3629 		if (cur_size_in_bytes > 0x1fffff)
3630 			cur_size_in_bytes = 0x1fffff;
3631 		size_in_bytes -= cur_size_in_bytes;
3632 		control = 0;
3633 		if (size_in_bytes == 0)
3634 			control |= PACKET3_DMA_DATA_CP_SYNC;
3635 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3636 		radeon_ring_write(ring, control);
3637 		radeon_ring_write(ring, lower_32_bits(src_offset));
3638 		radeon_ring_write(ring, upper_32_bits(src_offset));
3639 		radeon_ring_write(ring, lower_32_bits(dst_offset));
3640 		radeon_ring_write(ring, upper_32_bits(dst_offset));
3641 		radeon_ring_write(ring, cur_size_in_bytes);
3642 		src_offset += cur_size_in_bytes;
3643 		dst_offset += cur_size_in_bytes;
3644 	}
3645 
3646 	r = radeon_fence_emit(rdev, fence, ring->idx);
3647 	if (r) {
3648 		radeon_ring_unlock_undo(rdev, ring);
3649 		return r;
3650 	}
3651 
3652 	radeon_ring_unlock_commit(rdev, ring);
3653 	radeon_semaphore_free(rdev, &sem, *fence);
3654 
3655 	return r;
3656 }
3657 
3658 /*
3659  * IB stuff
3660  */
3661 /**
3662  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3663  *
3664  * @rdev: radeon_device pointer
3665  * @ib: radeon indirect buffer object
3666  *
3667  * Emits an DE (drawing engine) or CE (constant engine) IB
3668  * on the gfx ring.  IBs are usually generated by userspace
3669  * acceleration drivers and submitted to the kernel for
3670  * sheduling on the ring.  This function schedules the IB
3671  * on the gfx ring for execution by the GPU.
3672  */
3673 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3674 {
3675 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3676 	u32 header, control = INDIRECT_BUFFER_VALID;
3677 
3678 	if (ib->is_const_ib) {
3679 		/* set switch buffer packet before const IB */
3680 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3681 		radeon_ring_write(ring, 0);
3682 
3683 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3684 	} else {
3685 		u32 next_rptr;
3686 		if (ring->rptr_save_reg) {
3687 			next_rptr = ring->wptr + 3 + 4;
3688 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3689 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3690 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
3691 			radeon_ring_write(ring, next_rptr);
3692 		} else if (rdev->wb.enabled) {
3693 			next_rptr = ring->wptr + 5 + 4;
3694 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3695 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3696 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3697 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3698 			radeon_ring_write(ring, next_rptr);
3699 		}
3700 
3701 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3702 	}
3703 
3704 	control |= ib->length_dw |
3705 		(ib->vm ? (ib->vm->id << 24) : 0);
3706 
3707 	radeon_ring_write(ring, header);
3708 	radeon_ring_write(ring,
3709 #ifdef __BIG_ENDIAN
3710 			  (2 << 0) |
3711 #endif
3712 			  (ib->gpu_addr & 0xFFFFFFFC));
3713 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3714 	radeon_ring_write(ring, control);
3715 }
3716 
3717 /**
3718  * cik_ib_test - basic gfx ring IB test
3719  *
3720  * @rdev: radeon_device pointer
3721  * @ring: radeon_ring structure holding ring information
3722  *
3723  * Allocate an IB and execute it on the gfx ring (CIK).
3724  * Provides a basic gfx ring test to verify that IBs are working.
3725  * Returns 0 on success, error on failure.
3726  */
3727 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3728 {
3729 	struct radeon_ib ib;
3730 	uint32_t scratch;
3731 	uint32_t tmp = 0;
3732 	unsigned i;
3733 	int r;
3734 
3735 	r = radeon_scratch_get(rdev, &scratch);
3736 	if (r) {
3737 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3738 		return r;
3739 	}
3740 	WREG32(scratch, 0xCAFEDEAD);
3741 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3742 	if (r) {
3743 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3744 		radeon_scratch_free(rdev, scratch);
3745 		return r;
3746 	}
3747 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3748 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3749 	ib.ptr[2] = 0xDEADBEEF;
3750 	ib.length_dw = 3;
3751 	r = radeon_ib_schedule(rdev, &ib, NULL);
3752 	if (r) {
3753 		radeon_scratch_free(rdev, scratch);
3754 		radeon_ib_free(rdev, &ib);
3755 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3756 		return r;
3757 	}
3758 	r = radeon_fence_wait(ib.fence, false);
3759 	if (r) {
3760 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3761 		radeon_scratch_free(rdev, scratch);
3762 		radeon_ib_free(rdev, &ib);
3763 		return r;
3764 	}
3765 	for (i = 0; i < rdev->usec_timeout; i++) {
3766 		tmp = RREG32(scratch);
3767 		if (tmp == 0xDEADBEEF)
3768 			break;
3769 		DRM_UDELAY(1);
3770 	}
3771 	if (i < rdev->usec_timeout) {
3772 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3773 	} else {
3774 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3775 			  scratch, tmp);
3776 		r = -EINVAL;
3777 	}
3778 	radeon_scratch_free(rdev, scratch);
3779 	radeon_ib_free(rdev, &ib);
3780 	return r;
3781 }
3782 
3783 /*
3784  * CP.
3785  * On CIK, gfx and compute now have independant command processors.
3786  *
3787  * GFX
3788  * Gfx consists of a single ring and can process both gfx jobs and
3789  * compute jobs.  The gfx CP consists of three microengines (ME):
3790  * PFP - Pre-Fetch Parser
3791  * ME - Micro Engine
3792  * CE - Constant Engine
3793  * The PFP and ME make up what is considered the Drawing Engine (DE).
3794  * The CE is an asynchronous engine used for updating buffer desciptors
3795  * used by the DE so that they can be loaded into cache in parallel
3796  * while the DE is processing state update packets.
3797  *
3798  * Compute
3799  * The compute CP consists of two microengines (ME):
3800  * MEC1 - Compute MicroEngine 1
3801  * MEC2 - Compute MicroEngine 2
3802  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3803  * The queues are exposed to userspace and are programmed directly
3804  * by the compute runtime.
3805  */
3806 /**
3807  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3808  *
3809  * @rdev: radeon_device pointer
3810  * @enable: enable or disable the MEs
3811  *
3812  * Halts or unhalts the gfx MEs.
3813  */
3814 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3815 {
3816 	if (enable)
3817 		WREG32(CP_ME_CNTL, 0);
3818 	else {
3819 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3820 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3821 	}
3822 	udelay(50);
3823 }
3824 
3825 /**
3826  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3827  *
3828  * @rdev: radeon_device pointer
3829  *
3830  * Loads the gfx PFP, ME, and CE ucode.
3831  * Returns 0 for success, -EINVAL if the ucode is not available.
3832  */
3833 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3834 {
3835 	const __be32 *fw_data;
3836 	int i;
3837 
3838 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3839 		return -EINVAL;
3840 
3841 	cik_cp_gfx_enable(rdev, false);
3842 
3843 	/* PFP */
3844 	fw_data = (const __be32 *)rdev->pfp_fw->data;
3845 	WREG32(CP_PFP_UCODE_ADDR, 0);
3846 	for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3847 		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3848 	WREG32(CP_PFP_UCODE_ADDR, 0);
3849 
3850 	/* CE */
3851 	fw_data = (const __be32 *)rdev->ce_fw->data;
3852 	WREG32(CP_CE_UCODE_ADDR, 0);
3853 	for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3854 		WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3855 	WREG32(CP_CE_UCODE_ADDR, 0);
3856 
3857 	/* ME */
3858 	fw_data = (const __be32 *)rdev->me_fw->data;
3859 	WREG32(CP_ME_RAM_WADDR, 0);
3860 	for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3861 		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3862 	WREG32(CP_ME_RAM_WADDR, 0);
3863 
3864 	WREG32(CP_PFP_UCODE_ADDR, 0);
3865 	WREG32(CP_CE_UCODE_ADDR, 0);
3866 	WREG32(CP_ME_RAM_WADDR, 0);
3867 	WREG32(CP_ME_RAM_RADDR, 0);
3868 	return 0;
3869 }
3870 
3871 /**
3872  * cik_cp_gfx_start - start the gfx ring
3873  *
3874  * @rdev: radeon_device pointer
3875  *
3876  * Enables the ring and loads the clear state context and other
3877  * packets required to init the ring.
3878  * Returns 0 for success, error for failure.
3879  */
3880 static int cik_cp_gfx_start(struct radeon_device *rdev)
3881 {
3882 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3883 	int r, i;
3884 
3885 	/* init the CP */
3886 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3887 	WREG32(CP_ENDIAN_SWAP, 0);
3888 	WREG32(CP_DEVICE_ID, 1);
3889 
3890 	cik_cp_gfx_enable(rdev, true);
3891 
3892 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3893 	if (r) {
3894 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3895 		return r;
3896 	}
3897 
3898 	/* init the CE partitions.  CE only used for gfx on CIK */
3899 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3900 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3901 	radeon_ring_write(ring, 0xc000);
3902 	radeon_ring_write(ring, 0xc000);
3903 
3904 	/* setup clear context state */
3905 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3906 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3907 
3908 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3909 	radeon_ring_write(ring, 0x80000000);
3910 	radeon_ring_write(ring, 0x80000000);
3911 
3912 	for (i = 0; i < cik_default_size; i++)
3913 		radeon_ring_write(ring, cik_default_state[i]);
3914 
3915 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3916 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3917 
3918 	/* set clear context state */
3919 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3920 	radeon_ring_write(ring, 0);
3921 
3922 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3923 	radeon_ring_write(ring, 0x00000316);
3924 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3925 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3926 
3927 	radeon_ring_unlock_commit(rdev, ring);
3928 
3929 	return 0;
3930 }
3931 
3932 /**
3933  * cik_cp_gfx_fini - stop the gfx ring
3934  *
3935  * @rdev: radeon_device pointer
3936  *
3937  * Stop the gfx ring and tear down the driver ring
3938  * info.
3939  */
3940 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3941 {
3942 	cik_cp_gfx_enable(rdev, false);
3943 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3944 }
3945 
3946 /**
3947  * cik_cp_gfx_resume - setup the gfx ring buffer registers
3948  *
3949  * @rdev: radeon_device pointer
3950  *
3951  * Program the location and size of the gfx ring buffer
3952  * and test it to make sure it's working.
3953  * Returns 0 for success, error for failure.
3954  */
3955 static int cik_cp_gfx_resume(struct radeon_device *rdev)
3956 {
3957 	struct radeon_ring *ring;
3958 	u32 tmp;
3959 	u32 rb_bufsz;
3960 	u64 rb_addr;
3961 	int r;
3962 
3963 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3964 	if (rdev->family != CHIP_HAWAII)
3965 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3966 
3967 	/* Set the write pointer delay */
3968 	WREG32(CP_RB_WPTR_DELAY, 0);
3969 
3970 	/* set the RB to use vmid 0 */
3971 	WREG32(CP_RB_VMID, 0);
3972 
3973 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3974 
3975 	/* ring 0 - compute and gfx */
3976 	/* Set ring buffer size */
3977 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3978 	rb_bufsz = order_base_2(ring->ring_size / 8);
3979 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3980 #ifdef __BIG_ENDIAN
3981 	tmp |= BUF_SWAP_32BIT;
3982 #endif
3983 	WREG32(CP_RB0_CNTL, tmp);
3984 
3985 	/* Initialize the ring buffer's read and write pointers */
3986 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3987 	ring->wptr = 0;
3988 	WREG32(CP_RB0_WPTR, ring->wptr);
3989 
3990 	/* set the wb address wether it's enabled or not */
3991 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3992 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3993 
3994 	/* scratch register shadowing is no longer supported */
3995 	WREG32(SCRATCH_UMSK, 0);
3996 
3997 	if (!rdev->wb.enabled)
3998 		tmp |= RB_NO_UPDATE;
3999 
4000 	mdelay(1);
4001 	WREG32(CP_RB0_CNTL, tmp);
4002 
4003 	rb_addr = ring->gpu_addr >> 8;
4004 	WREG32(CP_RB0_BASE, rb_addr);
4005 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4006 
4007 	ring->rptr = RREG32(CP_RB0_RPTR);
4008 
4009 	/* start the ring */
4010 	cik_cp_gfx_start(rdev);
4011 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4012 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4013 	if (r) {
4014 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4015 		return r;
4016 	}
4017 	return 0;
4018 }
4019 
4020 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
4021 			      struct radeon_ring *ring)
4022 {
4023 	u32 rptr;
4024 
4025 
4026 
4027 	if (rdev->wb.enabled) {
4028 		rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
4029 	} else {
4030 		mutex_lock(&rdev->srbm_mutex);
4031 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4032 		rptr = RREG32(CP_HQD_PQ_RPTR);
4033 		cik_srbm_select(rdev, 0, 0, 0, 0);
4034 		mutex_unlock(&rdev->srbm_mutex);
4035 	}
4036 
4037 	return rptr;
4038 }
4039 
4040 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
4041 			      struct radeon_ring *ring)
4042 {
4043 	u32 wptr;
4044 
4045 	if (rdev->wb.enabled) {
4046 		wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
4047 	} else {
4048 		mutex_lock(&rdev->srbm_mutex);
4049 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4050 		wptr = RREG32(CP_HQD_PQ_WPTR);
4051 		cik_srbm_select(rdev, 0, 0, 0, 0);
4052 		mutex_unlock(&rdev->srbm_mutex);
4053 	}
4054 
4055 	return wptr;
4056 }
4057 
4058 void cik_compute_ring_set_wptr(struct radeon_device *rdev,
4059 			       struct radeon_ring *ring)
4060 {
4061 	rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(ring->wptr);
4062 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4063 }
4064 
4065 /**
4066  * cik_cp_compute_enable - enable/disable the compute CP MEs
4067  *
4068  * @rdev: radeon_device pointer
4069  * @enable: enable or disable the MEs
4070  *
4071  * Halts or unhalts the compute MEs.
4072  */
4073 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4074 {
4075 	if (enable)
4076 		WREG32(CP_MEC_CNTL, 0);
4077 	else
4078 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4079 	udelay(50);
4080 }
4081 
4082 /**
4083  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4084  *
4085  * @rdev: radeon_device pointer
4086  *
4087  * Loads the compute MEC1&2 ucode.
4088  * Returns 0 for success, -EINVAL if the ucode is not available.
4089  */
4090 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4091 {
4092 	const __be32 *fw_data;
4093 	int i;
4094 
4095 	if (!rdev->mec_fw)
4096 		return -EINVAL;
4097 
4098 	cik_cp_compute_enable(rdev, false);
4099 
4100 	/* MEC1 */
4101 	fw_data = (const __be32 *)rdev->mec_fw->data;
4102 	WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4103 	for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4104 		WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4105 	WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4106 
4107 	if (rdev->family == CHIP_KAVERI) {
4108 		/* MEC2 */
4109 		fw_data = (const __be32 *)rdev->mec_fw->data;
4110 		WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4111 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4112 			WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4113 		WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4114 	}
4115 
4116 	return 0;
4117 }
4118 
4119 /**
4120  * cik_cp_compute_start - start the compute queues
4121  *
4122  * @rdev: radeon_device pointer
4123  *
4124  * Enable the compute queues.
4125  * Returns 0 for success, error for failure.
4126  */
4127 static int cik_cp_compute_start(struct radeon_device *rdev)
4128 {
4129 	cik_cp_compute_enable(rdev, true);
4130 
4131 	return 0;
4132 }
4133 
4134 /**
4135  * cik_cp_compute_fini - stop the compute queues
4136  *
4137  * @rdev: radeon_device pointer
4138  *
4139  * Stop the compute queues and tear down the driver queue
4140  * info.
4141  */
4142 static void cik_cp_compute_fini(struct radeon_device *rdev)
4143 {
4144 	int i, idx, r;
4145 
4146 	cik_cp_compute_enable(rdev, false);
4147 
4148 	for (i = 0; i < 2; i++) {
4149 		if (i == 0)
4150 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4151 		else
4152 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4153 
4154 		if (rdev->ring[idx].mqd_obj) {
4155 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4156 			if (unlikely(r != 0))
4157 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4158 
4159 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4160 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4161 
4162 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4163 			rdev->ring[idx].mqd_obj = NULL;
4164 		}
4165 	}
4166 }
4167 
4168 static void cik_mec_fini(struct radeon_device *rdev)
4169 {
4170 	int r;
4171 
4172 	if (rdev->mec.hpd_eop_obj) {
4173 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4174 		if (unlikely(r != 0))
4175 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4176 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4177 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4178 
4179 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4180 		rdev->mec.hpd_eop_obj = NULL;
4181 	}
4182 }
4183 
4184 #define MEC_HPD_SIZE 2048
4185 
4186 static int cik_mec_init(struct radeon_device *rdev)
4187 {
4188 	int r;
4189 	u32 *hpd;
4190 
4191 	/*
4192 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4193 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4194 	 */
4195 	if (rdev->family == CHIP_KAVERI)
4196 		rdev->mec.num_mec = 2;
4197 	else
4198 		rdev->mec.num_mec = 1;
4199 	rdev->mec.num_pipe = 4;
4200 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4201 
4202 	if (rdev->mec.hpd_eop_obj == NULL) {
4203 		r = radeon_bo_create(rdev,
4204 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4205 				     PAGE_SIZE, true,
4206 				     RADEON_GEM_DOMAIN_GTT, NULL,
4207 				     &rdev->mec.hpd_eop_obj);
4208 		if (r) {
4209 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4210 			return r;
4211 		}
4212 	}
4213 
4214 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4215 	if (unlikely(r != 0)) {
4216 		cik_mec_fini(rdev);
4217 		return r;
4218 	}
4219 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4220 			  &rdev->mec.hpd_eop_gpu_addr);
4221 	if (r) {
4222 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4223 		cik_mec_fini(rdev);
4224 		return r;
4225 	}
4226 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4227 	if (r) {
4228 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4229 		cik_mec_fini(rdev);
4230 		return r;
4231 	}
4232 
4233 	/* clear memory.  Not sure if this is required or not */
4234 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4235 
4236 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4237 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4238 
4239 	return 0;
4240 }
4241 
4242 struct hqd_registers
4243 {
4244 	u32 cp_mqd_base_addr;
4245 	u32 cp_mqd_base_addr_hi;
4246 	u32 cp_hqd_active;
4247 	u32 cp_hqd_vmid;
4248 	u32 cp_hqd_persistent_state;
4249 	u32 cp_hqd_pipe_priority;
4250 	u32 cp_hqd_queue_priority;
4251 	u32 cp_hqd_quantum;
4252 	u32 cp_hqd_pq_base;
4253 	u32 cp_hqd_pq_base_hi;
4254 	u32 cp_hqd_pq_rptr;
4255 	u32 cp_hqd_pq_rptr_report_addr;
4256 	u32 cp_hqd_pq_rptr_report_addr_hi;
4257 	u32 cp_hqd_pq_wptr_poll_addr;
4258 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4259 	u32 cp_hqd_pq_doorbell_control;
4260 	u32 cp_hqd_pq_wptr;
4261 	u32 cp_hqd_pq_control;
4262 	u32 cp_hqd_ib_base_addr;
4263 	u32 cp_hqd_ib_base_addr_hi;
4264 	u32 cp_hqd_ib_rptr;
4265 	u32 cp_hqd_ib_control;
4266 	u32 cp_hqd_iq_timer;
4267 	u32 cp_hqd_iq_rptr;
4268 	u32 cp_hqd_dequeue_request;
4269 	u32 cp_hqd_dma_offload;
4270 	u32 cp_hqd_sema_cmd;
4271 	u32 cp_hqd_msg_type;
4272 	u32 cp_hqd_atomic0_preop_lo;
4273 	u32 cp_hqd_atomic0_preop_hi;
4274 	u32 cp_hqd_atomic1_preop_lo;
4275 	u32 cp_hqd_atomic1_preop_hi;
4276 	u32 cp_hqd_hq_scheduler0;
4277 	u32 cp_hqd_hq_scheduler1;
4278 	u32 cp_mqd_control;
4279 };
4280 
4281 struct bonaire_mqd
4282 {
4283 	u32 header;
4284 	u32 dispatch_initiator;
4285 	u32 dimensions[3];
4286 	u32 start_idx[3];
4287 	u32 num_threads[3];
4288 	u32 pipeline_stat_enable;
4289 	u32 perf_counter_enable;
4290 	u32 pgm[2];
4291 	u32 tba[2];
4292 	u32 tma[2];
4293 	u32 pgm_rsrc[2];
4294 	u32 vmid;
4295 	u32 resource_limits;
4296 	u32 static_thread_mgmt01[2];
4297 	u32 tmp_ring_size;
4298 	u32 static_thread_mgmt23[2];
4299 	u32 restart[3];
4300 	u32 thread_trace_enable;
4301 	u32 reserved1;
4302 	u32 user_data[16];
4303 	u32 vgtcs_invoke_count[2];
4304 	struct hqd_registers queue_state;
4305 	u32 dequeue_cntr;
4306 	u32 interrupt_queue[64];
4307 };
4308 
4309 /**
4310  * cik_cp_compute_resume - setup the compute queue registers
4311  *
4312  * @rdev: radeon_device pointer
4313  *
4314  * Program the compute queues and test them to make sure they
4315  * are working.
4316  * Returns 0 for success, error for failure.
4317  */
4318 static int cik_cp_compute_resume(struct radeon_device *rdev)
4319 {
4320 	int r, i, idx;
4321 	u32 tmp;
4322 	bool use_doorbell = true;
4323 	u64 hqd_gpu_addr;
4324 	u64 mqd_gpu_addr;
4325 	u64 eop_gpu_addr;
4326 	u64 wb_gpu_addr;
4327 	u32 *buf;
4328 	struct bonaire_mqd *mqd;
4329 
4330 	r = cik_cp_compute_start(rdev);
4331 	if (r)
4332 		return r;
4333 
4334 	/* fix up chicken bits */
4335 	tmp = RREG32(CP_CPF_DEBUG);
4336 	tmp |= (1 << 23);
4337 	WREG32(CP_CPF_DEBUG, tmp);
4338 
4339 	/* init the pipes */
4340 	mutex_lock(&rdev->srbm_mutex);
4341 	for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
4342 		int me = (i < 4) ? 1 : 2;
4343 		int pipe = (i < 4) ? i : (i - 4);
4344 
4345 		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
4346 
4347 		cik_srbm_select(rdev, me, pipe, 0, 0);
4348 
4349 		/* write the EOP addr */
4350 		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4351 		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4352 
4353 		/* set the VMID assigned */
4354 		WREG32(CP_HPD_EOP_VMID, 0);
4355 
4356 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4357 		tmp = RREG32(CP_HPD_EOP_CONTROL);
4358 		tmp &= ~EOP_SIZE_MASK;
4359 		tmp |= order_base_2(MEC_HPD_SIZE / 8);
4360 		WREG32(CP_HPD_EOP_CONTROL, tmp);
4361 	}
4362 	cik_srbm_select(rdev, 0, 0, 0, 0);
4363 	mutex_unlock(&rdev->srbm_mutex);
4364 
4365 	/* init the queues.  Just two for now. */
4366 	for (i = 0; i < 2; i++) {
4367 		if (i == 0)
4368 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4369 		else
4370 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4371 
4372 		if (rdev->ring[idx].mqd_obj == NULL) {
4373 			r = radeon_bo_create(rdev,
4374 					     sizeof(struct bonaire_mqd),
4375 					     PAGE_SIZE, true,
4376 					     RADEON_GEM_DOMAIN_GTT, NULL,
4377 					     &rdev->ring[idx].mqd_obj);
4378 			if (r) {
4379 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4380 				return r;
4381 			}
4382 		}
4383 
4384 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4385 		if (unlikely(r != 0)) {
4386 			cik_cp_compute_fini(rdev);
4387 			return r;
4388 		}
4389 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4390 				  &mqd_gpu_addr);
4391 		if (r) {
4392 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4393 			cik_cp_compute_fini(rdev);
4394 			return r;
4395 		}
4396 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4397 		if (r) {
4398 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4399 			cik_cp_compute_fini(rdev);
4400 			return r;
4401 		}
4402 
4403 		/* init the mqd struct */
4404 		memset(buf, 0, sizeof(struct bonaire_mqd));
4405 
4406 		mqd = (struct bonaire_mqd *)buf;
4407 		mqd->header = 0xC0310800;
4408 		mqd->static_thread_mgmt01[0] = 0xffffffff;
4409 		mqd->static_thread_mgmt01[1] = 0xffffffff;
4410 		mqd->static_thread_mgmt23[0] = 0xffffffff;
4411 		mqd->static_thread_mgmt23[1] = 0xffffffff;
4412 
4413 		mutex_lock(&rdev->srbm_mutex);
4414 		cik_srbm_select(rdev, rdev->ring[idx].me,
4415 				rdev->ring[idx].pipe,
4416 				rdev->ring[idx].queue, 0);
4417 
4418 		/* disable wptr polling */
4419 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4420 		tmp &= ~WPTR_POLL_EN;
4421 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4422 
4423 		/* enable doorbell? */
4424 		mqd->queue_state.cp_hqd_pq_doorbell_control =
4425 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4426 		if (use_doorbell)
4427 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4428 		else
4429 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4430 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4431 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4432 
4433 		/* disable the queue if it's active */
4434 		mqd->queue_state.cp_hqd_dequeue_request = 0;
4435 		mqd->queue_state.cp_hqd_pq_rptr = 0;
4436 		mqd->queue_state.cp_hqd_pq_wptr= 0;
4437 		if (RREG32(CP_HQD_ACTIVE) & 1) {
4438 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4439 			for (i = 0; i < rdev->usec_timeout; i++) {
4440 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4441 					break;
4442 				udelay(1);
4443 			}
4444 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4445 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4446 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4447 		}
4448 
4449 		/* set the pointer to the MQD */
4450 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4451 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4452 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4453 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4454 		/* set MQD vmid to 0 */
4455 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4456 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4457 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4458 
4459 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4460 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4461 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4462 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4463 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4464 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4465 
4466 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4467 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4468 		mqd->queue_state.cp_hqd_pq_control &=
4469 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4470 
4471 		mqd->queue_state.cp_hqd_pq_control |=
4472 			order_base_2(rdev->ring[idx].ring_size / 8);
4473 		mqd->queue_state.cp_hqd_pq_control |=
4474 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4475 #ifdef __BIG_ENDIAN
4476 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4477 #endif
4478 		mqd->queue_state.cp_hqd_pq_control &=
4479 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4480 		mqd->queue_state.cp_hqd_pq_control |=
4481 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4482 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4483 
4484 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4485 		if (i == 0)
4486 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4487 		else
4488 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4489 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4490 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4491 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4492 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4493 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4494 
4495 		/* set the wb address wether it's enabled or not */
4496 		if (i == 0)
4497 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4498 		else
4499 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4500 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4501 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4502 			upper_32_bits(wb_gpu_addr) & 0xffff;
4503 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4504 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4505 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4506 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4507 
4508 		/* enable the doorbell if requested */
4509 		if (use_doorbell) {
4510 			mqd->queue_state.cp_hqd_pq_doorbell_control =
4511 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4512 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4513 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
4514 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4515 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4516 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
4517 				~(DOORBELL_SOURCE | DOORBELL_HIT);
4518 
4519 		} else {
4520 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4521 		}
4522 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4523 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4524 
4525 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4526 		rdev->ring[idx].wptr = 0;
4527 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4528 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4529 		rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
4530 		mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
4531 
4532 		/* set the vmid for the queue */
4533 		mqd->queue_state.cp_hqd_vmid = 0;
4534 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4535 
4536 		/* activate the queue */
4537 		mqd->queue_state.cp_hqd_active = 1;
4538 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4539 
4540 		cik_srbm_select(rdev, 0, 0, 0, 0);
4541 		mutex_unlock(&rdev->srbm_mutex);
4542 
4543 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4544 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4545 
4546 		rdev->ring[idx].ready = true;
4547 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4548 		if (r)
4549 			rdev->ring[idx].ready = false;
4550 	}
4551 
4552 	return 0;
4553 }
4554 
4555 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4556 {
4557 	cik_cp_gfx_enable(rdev, enable);
4558 	cik_cp_compute_enable(rdev, enable);
4559 }
4560 
4561 static int cik_cp_load_microcode(struct radeon_device *rdev)
4562 {
4563 	int r;
4564 
4565 	r = cik_cp_gfx_load_microcode(rdev);
4566 	if (r)
4567 		return r;
4568 	r = cik_cp_compute_load_microcode(rdev);
4569 	if (r)
4570 		return r;
4571 
4572 	return 0;
4573 }
4574 
4575 static void cik_cp_fini(struct radeon_device *rdev)
4576 {
4577 	cik_cp_gfx_fini(rdev);
4578 	cik_cp_compute_fini(rdev);
4579 }
4580 
4581 static int cik_cp_resume(struct radeon_device *rdev)
4582 {
4583 	int r;
4584 
4585 	cik_enable_gui_idle_interrupt(rdev, false);
4586 
4587 	r = cik_cp_load_microcode(rdev);
4588 	if (r)
4589 		return r;
4590 
4591 	r = cik_cp_gfx_resume(rdev);
4592 	if (r)
4593 		return r;
4594 	r = cik_cp_compute_resume(rdev);
4595 	if (r)
4596 		return r;
4597 
4598 	cik_enable_gui_idle_interrupt(rdev, true);
4599 
4600 	return 0;
4601 }
4602 
4603 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4604 {
4605 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4606 		RREG32(GRBM_STATUS));
4607 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4608 		RREG32(GRBM_STATUS2));
4609 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4610 		RREG32(GRBM_STATUS_SE0));
4611 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4612 		RREG32(GRBM_STATUS_SE1));
4613 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4614 		RREG32(GRBM_STATUS_SE2));
4615 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4616 		RREG32(GRBM_STATUS_SE3));
4617 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4618 		RREG32(SRBM_STATUS));
4619 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4620 		RREG32(SRBM_STATUS2));
4621 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4622 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4623 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4624 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4625 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4626 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4627 		 RREG32(CP_STALLED_STAT1));
4628 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4629 		 RREG32(CP_STALLED_STAT2));
4630 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4631 		 RREG32(CP_STALLED_STAT3));
4632 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4633 		 RREG32(CP_CPF_BUSY_STAT));
4634 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4635 		 RREG32(CP_CPF_STALLED_STAT1));
4636 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4637 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4638 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4639 		 RREG32(CP_CPC_STALLED_STAT1));
4640 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4641 }
4642 
4643 /**
4644  * cik_gpu_check_soft_reset - check which blocks are busy
4645  *
4646  * @rdev: radeon_device pointer
4647  *
4648  * Check which blocks are busy and return the relevant reset
4649  * mask to be used by cik_gpu_soft_reset().
4650  * Returns a mask of the blocks to be reset.
4651  */
4652 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4653 {
4654 	u32 reset_mask = 0;
4655 	u32 tmp;
4656 
4657 	/* GRBM_STATUS */
4658 	tmp = RREG32(GRBM_STATUS);
4659 	if (tmp & (PA_BUSY | SC_BUSY |
4660 		   BCI_BUSY | SX_BUSY |
4661 		   TA_BUSY | VGT_BUSY |
4662 		   DB_BUSY | CB_BUSY |
4663 		   GDS_BUSY | SPI_BUSY |
4664 		   IA_BUSY | IA_BUSY_NO_DMA))
4665 		reset_mask |= RADEON_RESET_GFX;
4666 
4667 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4668 		reset_mask |= RADEON_RESET_CP;
4669 
4670 	/* GRBM_STATUS2 */
4671 	tmp = RREG32(GRBM_STATUS2);
4672 	if (tmp & RLC_BUSY)
4673 		reset_mask |= RADEON_RESET_RLC;
4674 
4675 	/* SDMA0_STATUS_REG */
4676 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4677 	if (!(tmp & SDMA_IDLE))
4678 		reset_mask |= RADEON_RESET_DMA;
4679 
4680 	/* SDMA1_STATUS_REG */
4681 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4682 	if (!(tmp & SDMA_IDLE))
4683 		reset_mask |= RADEON_RESET_DMA1;
4684 
4685 	/* SRBM_STATUS2 */
4686 	tmp = RREG32(SRBM_STATUS2);
4687 	if (tmp & SDMA_BUSY)
4688 		reset_mask |= RADEON_RESET_DMA;
4689 
4690 	if (tmp & SDMA1_BUSY)
4691 		reset_mask |= RADEON_RESET_DMA1;
4692 
4693 	/* SRBM_STATUS */
4694 	tmp = RREG32(SRBM_STATUS);
4695 
4696 	if (tmp & IH_BUSY)
4697 		reset_mask |= RADEON_RESET_IH;
4698 
4699 	if (tmp & SEM_BUSY)
4700 		reset_mask |= RADEON_RESET_SEM;
4701 
4702 	if (tmp & GRBM_RQ_PENDING)
4703 		reset_mask |= RADEON_RESET_GRBM;
4704 
4705 	if (tmp & VMC_BUSY)
4706 		reset_mask |= RADEON_RESET_VMC;
4707 
4708 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4709 		   MCC_BUSY | MCD_BUSY))
4710 		reset_mask |= RADEON_RESET_MC;
4711 
4712 	if (evergreen_is_display_hung(rdev))
4713 		reset_mask |= RADEON_RESET_DISPLAY;
4714 
4715 	/* Skip MC reset as it's mostly likely not hung, just busy */
4716 	if (reset_mask & RADEON_RESET_MC) {
4717 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4718 		reset_mask &= ~RADEON_RESET_MC;
4719 	}
4720 
4721 	return reset_mask;
4722 }
4723 
4724 /**
4725  * cik_gpu_soft_reset - soft reset GPU
4726  *
4727  * @rdev: radeon_device pointer
4728  * @reset_mask: mask of which blocks to reset
4729  *
4730  * Soft reset the blocks specified in @reset_mask.
4731  */
4732 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4733 {
4734 	struct evergreen_mc_save save;
4735 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4736 	u32 tmp;
4737 
4738 	if (reset_mask == 0)
4739 		return;
4740 
4741 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4742 
4743 	cik_print_gpu_status_regs(rdev);
4744 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4745 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4746 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4747 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4748 
4749 	/* disable CG/PG */
4750 	cik_fini_pg(rdev);
4751 	cik_fini_cg(rdev);
4752 
4753 	/* stop the rlc */
4754 	cik_rlc_stop(rdev);
4755 
4756 	/* Disable GFX parsing/prefetching */
4757 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4758 
4759 	/* Disable MEC parsing/prefetching */
4760 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4761 
4762 	if (reset_mask & RADEON_RESET_DMA) {
4763 		/* sdma0 */
4764 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4765 		tmp |= SDMA_HALT;
4766 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4767 	}
4768 	if (reset_mask & RADEON_RESET_DMA1) {
4769 		/* sdma1 */
4770 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4771 		tmp |= SDMA_HALT;
4772 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4773 	}
4774 
4775 	evergreen_mc_stop(rdev, &save);
4776 	if (evergreen_mc_wait_for_idle(rdev)) {
4777 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4778 	}
4779 
4780 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4781 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4782 
4783 	if (reset_mask & RADEON_RESET_CP) {
4784 		grbm_soft_reset |= SOFT_RESET_CP;
4785 
4786 		srbm_soft_reset |= SOFT_RESET_GRBM;
4787 	}
4788 
4789 	if (reset_mask & RADEON_RESET_DMA)
4790 		srbm_soft_reset |= SOFT_RESET_SDMA;
4791 
4792 	if (reset_mask & RADEON_RESET_DMA1)
4793 		srbm_soft_reset |= SOFT_RESET_SDMA1;
4794 
4795 	if (reset_mask & RADEON_RESET_DISPLAY)
4796 		srbm_soft_reset |= SOFT_RESET_DC;
4797 
4798 	if (reset_mask & RADEON_RESET_RLC)
4799 		grbm_soft_reset |= SOFT_RESET_RLC;
4800 
4801 	if (reset_mask & RADEON_RESET_SEM)
4802 		srbm_soft_reset |= SOFT_RESET_SEM;
4803 
4804 	if (reset_mask & RADEON_RESET_IH)
4805 		srbm_soft_reset |= SOFT_RESET_IH;
4806 
4807 	if (reset_mask & RADEON_RESET_GRBM)
4808 		srbm_soft_reset |= SOFT_RESET_GRBM;
4809 
4810 	if (reset_mask & RADEON_RESET_VMC)
4811 		srbm_soft_reset |= SOFT_RESET_VMC;
4812 
4813 	if (!(rdev->flags & RADEON_IS_IGP)) {
4814 		if (reset_mask & RADEON_RESET_MC)
4815 			srbm_soft_reset |= SOFT_RESET_MC;
4816 	}
4817 
4818 	if (grbm_soft_reset) {
4819 		tmp = RREG32(GRBM_SOFT_RESET);
4820 		tmp |= grbm_soft_reset;
4821 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4822 		WREG32(GRBM_SOFT_RESET, tmp);
4823 		tmp = RREG32(GRBM_SOFT_RESET);
4824 
4825 		udelay(50);
4826 
4827 		tmp &= ~grbm_soft_reset;
4828 		WREG32(GRBM_SOFT_RESET, tmp);
4829 		tmp = RREG32(GRBM_SOFT_RESET);
4830 	}
4831 
4832 	if (srbm_soft_reset) {
4833 		tmp = RREG32(SRBM_SOFT_RESET);
4834 		tmp |= srbm_soft_reset;
4835 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4836 		WREG32(SRBM_SOFT_RESET, tmp);
4837 		tmp = RREG32(SRBM_SOFT_RESET);
4838 
4839 		udelay(50);
4840 
4841 		tmp &= ~srbm_soft_reset;
4842 		WREG32(SRBM_SOFT_RESET, tmp);
4843 		tmp = RREG32(SRBM_SOFT_RESET);
4844 	}
4845 
4846 	/* Wait a little for things to settle down */
4847 	udelay(50);
4848 
4849 	evergreen_mc_resume(rdev, &save);
4850 	udelay(50);
4851 
4852 	cik_print_gpu_status_regs(rdev);
4853 }
4854 
4855 /**
4856  * cik_asic_reset - soft reset GPU
4857  *
4858  * @rdev: radeon_device pointer
4859  *
4860  * Look up which blocks are hung and attempt
4861  * to reset them.
4862  * Returns 0 for success.
4863  */
4864 int cik_asic_reset(struct radeon_device *rdev)
4865 {
4866 	u32 reset_mask;
4867 
4868 	reset_mask = cik_gpu_check_soft_reset(rdev);
4869 
4870 	if (reset_mask)
4871 		r600_set_bios_scratch_engine_hung(rdev, true);
4872 
4873 	cik_gpu_soft_reset(rdev, reset_mask);
4874 
4875 	reset_mask = cik_gpu_check_soft_reset(rdev);
4876 
4877 	if (!reset_mask)
4878 		r600_set_bios_scratch_engine_hung(rdev, false);
4879 
4880 	return 0;
4881 }
4882 
4883 /**
4884  * cik_gfx_is_lockup - check if the 3D engine is locked up
4885  *
4886  * @rdev: radeon_device pointer
4887  * @ring: radeon_ring structure holding ring information
4888  *
4889  * Check if the 3D engine is locked up (CIK).
4890  * Returns true if the engine is locked, false if not.
4891  */
4892 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4893 {
4894 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4895 
4896 	if (!(reset_mask & (RADEON_RESET_GFX |
4897 			    RADEON_RESET_COMPUTE |
4898 			    RADEON_RESET_CP))) {
4899 		radeon_ring_lockup_update(ring);
4900 		return false;
4901 	}
4902 	/* force CP activities */
4903 	radeon_ring_force_activity(rdev, ring);
4904 	return radeon_ring_test_lockup(rdev, ring);
4905 }
4906 
4907 /* MC */
4908 /**
4909  * cik_mc_program - program the GPU memory controller
4910  *
4911  * @rdev: radeon_device pointer
4912  *
4913  * Set the location of vram, gart, and AGP in the GPU's
4914  * physical address space (CIK).
4915  */
4916 static void cik_mc_program(struct radeon_device *rdev)
4917 {
4918 	struct evergreen_mc_save save;
4919 	u32 tmp;
4920 	int i, j;
4921 
4922 	/* Initialize HDP */
4923 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4924 		WREG32((0x2c14 + j), 0x00000000);
4925 		WREG32((0x2c18 + j), 0x00000000);
4926 		WREG32((0x2c1c + j), 0x00000000);
4927 		WREG32((0x2c20 + j), 0x00000000);
4928 		WREG32((0x2c24 + j), 0x00000000);
4929 	}
4930 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4931 
4932 	evergreen_mc_stop(rdev, &save);
4933 	if (radeon_mc_wait_for_idle(rdev)) {
4934 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4935 	}
4936 	/* Lockout access through VGA aperture*/
4937 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4938 	/* Update configuration */
4939 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4940 	       rdev->mc.vram_start >> 12);
4941 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4942 	       rdev->mc.vram_end >> 12);
4943 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4944 	       rdev->vram_scratch.gpu_addr >> 12);
4945 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4946 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4947 	WREG32(MC_VM_FB_LOCATION, tmp);
4948 	/* XXX double check these! */
4949 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4950 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4951 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4952 	WREG32(MC_VM_AGP_BASE, 0);
4953 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4954 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4955 	if (radeon_mc_wait_for_idle(rdev)) {
4956 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4957 	}
4958 	evergreen_mc_resume(rdev, &save);
4959 	/* we need to own VRAM, so turn off the VGA renderer here
4960 	 * to stop it overwriting our objects */
4961 	rv515_vga_render_disable(rdev);
4962 }
4963 
4964 /**
4965  * cik_mc_init - initialize the memory controller driver params
4966  *
4967  * @rdev: radeon_device pointer
4968  *
4969  * Look up the amount of vram, vram width, and decide how to place
4970  * vram and gart within the GPU's physical address space (CIK).
4971  * Returns 0 for success.
4972  */
4973 static int cik_mc_init(struct radeon_device *rdev)
4974 {
4975 	u32 tmp;
4976 	int chansize, numchan;
4977 
4978 	/* Get VRAM informations */
4979 	rdev->mc.vram_is_ddr = true;
4980 	tmp = RREG32(MC_ARB_RAMCFG);
4981 	if (tmp & CHANSIZE_MASK) {
4982 		chansize = 64;
4983 	} else {
4984 		chansize = 32;
4985 	}
4986 	tmp = RREG32(MC_SHARED_CHMAP);
4987 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4988 	case 0:
4989 	default:
4990 		numchan = 1;
4991 		break;
4992 	case 1:
4993 		numchan = 2;
4994 		break;
4995 	case 2:
4996 		numchan = 4;
4997 		break;
4998 	case 3:
4999 		numchan = 8;
5000 		break;
5001 	case 4:
5002 		numchan = 3;
5003 		break;
5004 	case 5:
5005 		numchan = 6;
5006 		break;
5007 	case 6:
5008 		numchan = 10;
5009 		break;
5010 	case 7:
5011 		numchan = 12;
5012 		break;
5013 	case 8:
5014 		numchan = 16;
5015 		break;
5016 	}
5017 	rdev->mc.vram_width = numchan * chansize;
5018 	/* Could aper size report 0 ? */
5019 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5020 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5021 	/* size in MB on si */
5022 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5023 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5024 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5025 	si_vram_gtt_location(rdev, &rdev->mc);
5026 	radeon_update_bandwidth_info(rdev);
5027 
5028 	return 0;
5029 }
5030 
5031 /*
5032  * GART
5033  * VMID 0 is the physical GPU addresses as used by the kernel.
5034  * VMIDs 1-15 are used for userspace clients and are handled
5035  * by the radeon vm/hsa code.
5036  */
5037 /**
5038  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5039  *
5040  * @rdev: radeon_device pointer
5041  *
5042  * Flush the TLB for the VMID 0 page table (CIK).
5043  */
5044 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5045 {
5046 	/* flush hdp cache */
5047 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5048 
5049 	/* bits 0-15 are the VM contexts0-15 */
5050 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5051 }
5052 
5053 /**
5054  * cik_pcie_gart_enable - gart enable
5055  *
5056  * @rdev: radeon_device pointer
5057  *
5058  * This sets up the TLBs, programs the page tables for VMID0,
5059  * sets up the hw for VMIDs 1-15 which are allocated on
5060  * demand, and sets up the global locations for the LDS, GDS,
5061  * and GPUVM for FSA64 clients (CIK).
5062  * Returns 0 for success, errors for failure.
5063  */
5064 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5065 {
5066 	int r, i;
5067 
5068 	if (rdev->gart.robj == NULL) {
5069 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5070 		return -EINVAL;
5071 	}
5072 	r = radeon_gart_table_vram_pin(rdev);
5073 	if (r)
5074 		return r;
5075 	radeon_gart_restore(rdev);
5076 	/* Setup TLB control */
5077 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5078 	       (0xA << 7) |
5079 	       ENABLE_L1_TLB |
5080 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5081 	       ENABLE_ADVANCED_DRIVER_MODEL |
5082 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5083 	/* Setup L2 cache */
5084 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5085 	       ENABLE_L2_FRAGMENT_PROCESSING |
5086 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5087 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5088 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5089 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5090 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5091 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5092 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5093 	/* setup context0 */
5094 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5095 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5096 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5097 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5098 			(u32)(rdev->dummy_page.addr >> 12));
5099 	WREG32(VM_CONTEXT0_CNTL2, 0);
5100 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5101 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5102 
5103 	WREG32(0x15D4, 0);
5104 	WREG32(0x15D8, 0);
5105 	WREG32(0x15DC, 0);
5106 
5107 	/* empty context1-15 */
5108 	/* FIXME start with 4G, once using 2 level pt switch to full
5109 	 * vm size space
5110 	 */
5111 	/* set vm size, must be a multiple of 4 */
5112 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5113 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5114 	for (i = 1; i < 16; i++) {
5115 		if (i < 8)
5116 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5117 			       rdev->gart.table_addr >> 12);
5118 		else
5119 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5120 			       rdev->gart.table_addr >> 12);
5121 	}
5122 
5123 	/* enable context1-15 */
5124 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5125 	       (u32)(rdev->dummy_page.addr >> 12));
5126 	WREG32(VM_CONTEXT1_CNTL2, 4);
5127 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5128 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5129 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5130 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5131 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5132 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5133 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5134 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5135 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5136 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5137 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5138 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5139 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5140 
5141 	/* TC cache setup ??? */
5142 	WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
5143 	WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
5144 	WREG32(TC_CFG_L1_STORE_POLICY, 0);
5145 
5146 	WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
5147 	WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
5148 	WREG32(TC_CFG_L2_STORE_POLICY0, 0);
5149 	WREG32(TC_CFG_L2_STORE_POLICY1, 0);
5150 	WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
5151 
5152 	WREG32(TC_CFG_L1_VOLATILE, 0);
5153 	WREG32(TC_CFG_L2_VOLATILE, 0);
5154 
5155 	if (rdev->family == CHIP_KAVERI) {
5156 		u32 tmp = RREG32(CHUB_CONTROL);
5157 		tmp &= ~BYPASS_VM;
5158 		WREG32(CHUB_CONTROL, tmp);
5159 	}
5160 
5161 	/* XXX SH_MEM regs */
5162 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5163 	mutex_lock(&rdev->srbm_mutex);
5164 	for (i = 0; i < 16; i++) {
5165 		cik_srbm_select(rdev, 0, 0, 0, i);
5166 		/* CP and shaders */
5167 		WREG32(SH_MEM_CONFIG, 0);
5168 		WREG32(SH_MEM_APE1_BASE, 1);
5169 		WREG32(SH_MEM_APE1_LIMIT, 0);
5170 		WREG32(SH_MEM_BASES, 0);
5171 		/* SDMA GFX */
5172 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5173 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5174 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5175 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5176 		/* XXX SDMA RLC - todo */
5177 	}
5178 	cik_srbm_select(rdev, 0, 0, 0, 0);
5179 	mutex_unlock(&rdev->srbm_mutex);
5180 
5181 	cik_pcie_gart_tlb_flush(rdev);
5182 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5183 		 (unsigned)(rdev->mc.gtt_size >> 20),
5184 		 (unsigned long long)rdev->gart.table_addr);
5185 	rdev->gart.ready = true;
5186 	return 0;
5187 }
5188 
5189 /**
5190  * cik_pcie_gart_disable - gart disable
5191  *
5192  * @rdev: radeon_device pointer
5193  *
5194  * This disables all VM page table (CIK).
5195  */
5196 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5197 {
5198 	/* Disable all tables */
5199 	WREG32(VM_CONTEXT0_CNTL, 0);
5200 	WREG32(VM_CONTEXT1_CNTL, 0);
5201 	/* Setup TLB control */
5202 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5203 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5204 	/* Setup L2 cache */
5205 	WREG32(VM_L2_CNTL,
5206 	       ENABLE_L2_FRAGMENT_PROCESSING |
5207 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5208 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5209 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5210 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5211 	WREG32(VM_L2_CNTL2, 0);
5212 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5213 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5214 	radeon_gart_table_vram_unpin(rdev);
5215 }
5216 
5217 /**
5218  * cik_pcie_gart_fini - vm fini callback
5219  *
5220  * @rdev: radeon_device pointer
5221  *
5222  * Tears down the driver GART/VM setup (CIK).
5223  */
5224 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5225 {
5226 	cik_pcie_gart_disable(rdev);
5227 	radeon_gart_table_vram_free(rdev);
5228 	radeon_gart_fini(rdev);
5229 }
5230 
5231 /* vm parser */
5232 /**
5233  * cik_ib_parse - vm ib_parse callback
5234  *
5235  * @rdev: radeon_device pointer
5236  * @ib: indirect buffer pointer
5237  *
5238  * CIK uses hw IB checking so this is a nop (CIK).
5239  */
5240 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5241 {
5242 	return 0;
5243 }
5244 
5245 /*
5246  * vm
5247  * VMID 0 is the physical GPU addresses as used by the kernel.
5248  * VMIDs 1-15 are used for userspace clients and are handled
5249  * by the radeon vm/hsa code.
5250  */
5251 /**
5252  * cik_vm_init - cik vm init callback
5253  *
5254  * @rdev: radeon_device pointer
5255  *
5256  * Inits cik specific vm parameters (number of VMs, base of vram for
5257  * VMIDs 1-15) (CIK).
5258  * Returns 0 for success.
5259  */
5260 int cik_vm_init(struct radeon_device *rdev)
5261 {
5262 	/* number of VMs */
5263 	rdev->vm_manager.nvm = 16;
5264 	/* base offset of vram pages */
5265 	if (rdev->flags & RADEON_IS_IGP) {
5266 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5267 		tmp <<= 22;
5268 		rdev->vm_manager.vram_base_offset = tmp;
5269 	} else
5270 		rdev->vm_manager.vram_base_offset = 0;
5271 
5272 	return 0;
5273 }
5274 
5275 /**
5276  * cik_vm_fini - cik vm fini callback
5277  *
5278  * @rdev: radeon_device pointer
5279  *
5280  * Tear down any asic specific VM setup (CIK).
5281  */
5282 void cik_vm_fini(struct radeon_device *rdev)
5283 {
5284 }
5285 
5286 /**
5287  * cik_vm_decode_fault - print human readable fault info
5288  *
5289  * @rdev: radeon_device pointer
5290  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5291  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5292  *
5293  * Print human readable fault information (CIK).
5294  */
5295 static void cik_vm_decode_fault(struct radeon_device *rdev,
5296 				u32 status, u32 addr, u32 mc_client)
5297 {
5298 	u32 mc_id;
5299 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5300 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5301 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5302 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5303 
5304 	if (rdev->family == CHIP_HAWAII)
5305 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5306 	else
5307 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5308 
5309 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5310 	       protections, vmid, addr,
5311 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5312 	       block, mc_client, mc_id);
5313 }
5314 
5315 /**
5316  * cik_vm_flush - cik vm flush using the CP
5317  *
5318  * @rdev: radeon_device pointer
5319  *
5320  * Update the page table base and flush the VM TLB
5321  * using the CP (CIK).
5322  */
5323 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5324 {
5325 	struct radeon_ring *ring = &rdev->ring[ridx];
5326 
5327 	if (vm == NULL)
5328 		return;
5329 
5330 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5331 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5332 				 WRITE_DATA_DST_SEL(0)));
5333 	if (vm->id < 8) {
5334 		radeon_ring_write(ring,
5335 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5336 	} else {
5337 		radeon_ring_write(ring,
5338 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5339 	}
5340 	radeon_ring_write(ring, 0);
5341 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5342 
5343 	/* update SH_MEM_* regs */
5344 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5345 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5346 				 WRITE_DATA_DST_SEL(0)));
5347 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5348 	radeon_ring_write(ring, 0);
5349 	radeon_ring_write(ring, VMID(vm->id));
5350 
5351 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5352 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5353 				 WRITE_DATA_DST_SEL(0)));
5354 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
5355 	radeon_ring_write(ring, 0);
5356 
5357 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5358 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5359 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5360 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5361 
5362 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5363 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5364 				 WRITE_DATA_DST_SEL(0)));
5365 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5366 	radeon_ring_write(ring, 0);
5367 	radeon_ring_write(ring, VMID(0));
5368 
5369 	/* HDP flush */
5370 	/* We should be using the WAIT_REG_MEM packet here like in
5371 	 * cik_fence_ring_emit(), but it causes the CP to hang in this
5372 	 * context...
5373 	 */
5374 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5375 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5376 				 WRITE_DATA_DST_SEL(0)));
5377 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5378 	radeon_ring_write(ring, 0);
5379 	radeon_ring_write(ring, 0);
5380 
5381 	/* bits 0-15 are the VM contexts0-15 */
5382 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5383 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5384 				 WRITE_DATA_DST_SEL(0)));
5385 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5386 	radeon_ring_write(ring, 0);
5387 	radeon_ring_write(ring, 1 << vm->id);
5388 
5389 	/* compute doesn't have PFP */
5390 	if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
5391 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5392 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5393 		radeon_ring_write(ring, 0x0);
5394 	}
5395 }
5396 
5397 /*
5398  * RLC
5399  * The RLC is a multi-purpose microengine that handles a
5400  * variety of functions, the most important of which is
5401  * the interrupt controller.
5402  */
5403 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5404 					  bool enable)
5405 {
5406 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5407 
5408 	if (enable)
5409 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5410 	else
5411 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5412 	WREG32(CP_INT_CNTL_RING0, tmp);
5413 }
5414 
5415 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5416 {
5417 	u32 tmp;
5418 
5419 	tmp = RREG32(RLC_LB_CNTL);
5420 	if (enable)
5421 		tmp |= LOAD_BALANCE_ENABLE;
5422 	else
5423 		tmp &= ~LOAD_BALANCE_ENABLE;
5424 	WREG32(RLC_LB_CNTL, tmp);
5425 }
5426 
5427 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5428 {
5429 	u32 i, j, k;
5430 	u32 mask;
5431 
5432 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5433 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5434 			cik_select_se_sh(rdev, i, j);
5435 			for (k = 0; k < rdev->usec_timeout; k++) {
5436 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5437 					break;
5438 				udelay(1);
5439 			}
5440 		}
5441 	}
5442 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5443 
5444 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5445 	for (k = 0; k < rdev->usec_timeout; k++) {
5446 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5447 			break;
5448 		udelay(1);
5449 	}
5450 }
5451 
5452 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5453 {
5454 	u32 tmp;
5455 
5456 	tmp = RREG32(RLC_CNTL);
5457 	if (tmp != rlc)
5458 		WREG32(RLC_CNTL, rlc);
5459 }
5460 
5461 static u32 cik_halt_rlc(struct radeon_device *rdev)
5462 {
5463 	u32 data, orig;
5464 
5465 	orig = data = RREG32(RLC_CNTL);
5466 
5467 	if (data & RLC_ENABLE) {
5468 		u32 i;
5469 
5470 		data &= ~RLC_ENABLE;
5471 		WREG32(RLC_CNTL, data);
5472 
5473 		for (i = 0; i < rdev->usec_timeout; i++) {
5474 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5475 				break;
5476 			udelay(1);
5477 		}
5478 
5479 		cik_wait_for_rlc_serdes(rdev);
5480 	}
5481 
5482 	return orig;
5483 }
5484 
5485 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5486 {
5487 	u32 tmp, i, mask;
5488 
5489 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5490 	WREG32(RLC_GPR_REG2, tmp);
5491 
5492 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5493 	for (i = 0; i < rdev->usec_timeout; i++) {
5494 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5495 			break;
5496 		udelay(1);
5497 	}
5498 
5499 	for (i = 0; i < rdev->usec_timeout; i++) {
5500 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5501 			break;
5502 		udelay(1);
5503 	}
5504 }
5505 
5506 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5507 {
5508 	u32 tmp;
5509 
5510 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5511 	WREG32(RLC_GPR_REG2, tmp);
5512 }
5513 
5514 /**
5515  * cik_rlc_stop - stop the RLC ME
5516  *
5517  * @rdev: radeon_device pointer
5518  *
5519  * Halt the RLC ME (MicroEngine) (CIK).
5520  */
5521 static void cik_rlc_stop(struct radeon_device *rdev)
5522 {
5523 	WREG32(RLC_CNTL, 0);
5524 
5525 	cik_enable_gui_idle_interrupt(rdev, false);
5526 
5527 	cik_wait_for_rlc_serdes(rdev);
5528 }
5529 
5530 /**
5531  * cik_rlc_start - start the RLC ME
5532  *
5533  * @rdev: radeon_device pointer
5534  *
5535  * Unhalt the RLC ME (MicroEngine) (CIK).
5536  */
5537 static void cik_rlc_start(struct radeon_device *rdev)
5538 {
5539 	WREG32(RLC_CNTL, RLC_ENABLE);
5540 
5541 	cik_enable_gui_idle_interrupt(rdev, true);
5542 
5543 	udelay(50);
5544 }
5545 
5546 /**
5547  * cik_rlc_resume - setup the RLC hw
5548  *
5549  * @rdev: radeon_device pointer
5550  *
5551  * Initialize the RLC registers, load the ucode,
5552  * and start the RLC (CIK).
5553  * Returns 0 for success, -EINVAL if the ucode is not available.
5554  */
5555 static int cik_rlc_resume(struct radeon_device *rdev)
5556 {
5557 	u32 i, size, tmp;
5558 	const __be32 *fw_data;
5559 
5560 	if (!rdev->rlc_fw)
5561 		return -EINVAL;
5562 
5563 	switch (rdev->family) {
5564 	case CHIP_BONAIRE:
5565 	case CHIP_HAWAII:
5566 	default:
5567 		size = BONAIRE_RLC_UCODE_SIZE;
5568 		break;
5569 	case CHIP_KAVERI:
5570 		size = KV_RLC_UCODE_SIZE;
5571 		break;
5572 	case CHIP_KABINI:
5573 		size = KB_RLC_UCODE_SIZE;
5574 		break;
5575 	}
5576 
5577 	cik_rlc_stop(rdev);
5578 
5579 	/* disable CG */
5580 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5581 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5582 
5583 	si_rlc_reset(rdev);
5584 
5585 	cik_init_pg(rdev);
5586 
5587 	cik_init_cg(rdev);
5588 
5589 	WREG32(RLC_LB_CNTR_INIT, 0);
5590 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5591 
5592 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5593 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5594 	WREG32(RLC_LB_PARAMS, 0x00600408);
5595 	WREG32(RLC_LB_CNTL, 0x80000004);
5596 
5597 	WREG32(RLC_MC_CNTL, 0);
5598 	WREG32(RLC_UCODE_CNTL, 0);
5599 
5600 	fw_data = (const __be32 *)rdev->rlc_fw->data;
5601 		WREG32(RLC_GPM_UCODE_ADDR, 0);
5602 	for (i = 0; i < size; i++)
5603 		WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5604 	WREG32(RLC_GPM_UCODE_ADDR, 0);
5605 
5606 	/* XXX - find out what chips support lbpw */
5607 	cik_enable_lbpw(rdev, false);
5608 
5609 	if (rdev->family == CHIP_BONAIRE)
5610 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
5611 
5612 	cik_rlc_start(rdev);
5613 
5614 	return 0;
5615 }
5616 
5617 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5618 {
5619 	u32 data, orig, tmp, tmp2;
5620 
5621 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5622 
5623 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5624 		cik_enable_gui_idle_interrupt(rdev, true);
5625 
5626 		tmp = cik_halt_rlc(rdev);
5627 
5628 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5629 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5630 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5631 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5632 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
5633 
5634 		cik_update_rlc(rdev, tmp);
5635 
5636 		data |= CGCG_EN | CGLS_EN;
5637 	} else {
5638 		cik_enable_gui_idle_interrupt(rdev, false);
5639 
5640 		RREG32(CB_CGTT_SCLK_CTRL);
5641 		RREG32(CB_CGTT_SCLK_CTRL);
5642 		RREG32(CB_CGTT_SCLK_CTRL);
5643 		RREG32(CB_CGTT_SCLK_CTRL);
5644 
5645 		data &= ~(CGCG_EN | CGLS_EN);
5646 	}
5647 
5648 	if (orig != data)
5649 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5650 
5651 }
5652 
5653 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5654 {
5655 	u32 data, orig, tmp = 0;
5656 
5657 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5658 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5659 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5660 				orig = data = RREG32(CP_MEM_SLP_CNTL);
5661 				data |= CP_MEM_LS_EN;
5662 				if (orig != data)
5663 					WREG32(CP_MEM_SLP_CNTL, data);
5664 			}
5665 		}
5666 
5667 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5668 		data &= 0xfffffffd;
5669 		if (orig != data)
5670 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5671 
5672 		tmp = cik_halt_rlc(rdev);
5673 
5674 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5675 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5676 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5677 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5678 		WREG32(RLC_SERDES_WR_CTRL, data);
5679 
5680 		cik_update_rlc(rdev, tmp);
5681 
5682 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
5683 			orig = data = RREG32(CGTS_SM_CTRL_REG);
5684 			data &= ~SM_MODE_MASK;
5685 			data |= SM_MODE(0x2);
5686 			data |= SM_MODE_ENABLE;
5687 			data &= ~CGTS_OVERRIDE;
5688 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
5689 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
5690 				data &= ~CGTS_LS_OVERRIDE;
5691 			data &= ~ON_MONITOR_ADD_MASK;
5692 			data |= ON_MONITOR_ADD_EN;
5693 			data |= ON_MONITOR_ADD(0x96);
5694 			if (orig != data)
5695 				WREG32(CGTS_SM_CTRL_REG, data);
5696 		}
5697 	} else {
5698 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5699 		data |= 0x00000002;
5700 		if (orig != data)
5701 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5702 
5703 		data = RREG32(RLC_MEM_SLP_CNTL);
5704 		if (data & RLC_MEM_LS_EN) {
5705 			data &= ~RLC_MEM_LS_EN;
5706 			WREG32(RLC_MEM_SLP_CNTL, data);
5707 		}
5708 
5709 		data = RREG32(CP_MEM_SLP_CNTL);
5710 		if (data & CP_MEM_LS_EN) {
5711 			data &= ~CP_MEM_LS_EN;
5712 			WREG32(CP_MEM_SLP_CNTL, data);
5713 		}
5714 
5715 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5716 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5717 		if (orig != data)
5718 			WREG32(CGTS_SM_CTRL_REG, data);
5719 
5720 		tmp = cik_halt_rlc(rdev);
5721 
5722 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5723 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5724 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5725 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5726 		WREG32(RLC_SERDES_WR_CTRL, data);
5727 
5728 		cik_update_rlc(rdev, tmp);
5729 	}
5730 }
5731 
5732 static const u32 mc_cg_registers[] =
5733 {
5734 	MC_HUB_MISC_HUB_CG,
5735 	MC_HUB_MISC_SIP_CG,
5736 	MC_HUB_MISC_VM_CG,
5737 	MC_XPB_CLK_GAT,
5738 	ATC_MISC_CG,
5739 	MC_CITF_MISC_WR_CG,
5740 	MC_CITF_MISC_RD_CG,
5741 	MC_CITF_MISC_VM_CG,
5742 	VM_L2_CG,
5743 };
5744 
5745 static void cik_enable_mc_ls(struct radeon_device *rdev,
5746 			     bool enable)
5747 {
5748 	int i;
5749 	u32 orig, data;
5750 
5751 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5752 		orig = data = RREG32(mc_cg_registers[i]);
5753 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5754 			data |= MC_LS_ENABLE;
5755 		else
5756 			data &= ~MC_LS_ENABLE;
5757 		if (data != orig)
5758 			WREG32(mc_cg_registers[i], data);
5759 	}
5760 }
5761 
5762 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5763 			       bool enable)
5764 {
5765 	int i;
5766 	u32 orig, data;
5767 
5768 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5769 		orig = data = RREG32(mc_cg_registers[i]);
5770 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5771 			data |= MC_CG_ENABLE;
5772 		else
5773 			data &= ~MC_CG_ENABLE;
5774 		if (data != orig)
5775 			WREG32(mc_cg_registers[i], data);
5776 	}
5777 }
5778 
5779 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
5780 				 bool enable)
5781 {
5782 	u32 orig, data;
5783 
5784 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5785 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
5786 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
5787 	} else {
5788 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
5789 		data |= 0xff000000;
5790 		if (data != orig)
5791 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
5792 
5793 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
5794 		data |= 0xff000000;
5795 		if (data != orig)
5796 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
5797 	}
5798 }
5799 
5800 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
5801 				 bool enable)
5802 {
5803 	u32 orig, data;
5804 
5805 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
5806 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5807 		data |= 0x100;
5808 		if (orig != data)
5809 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5810 
5811 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5812 		data |= 0x100;
5813 		if (orig != data)
5814 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5815 	} else {
5816 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5817 		data &= ~0x100;
5818 		if (orig != data)
5819 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5820 
5821 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5822 		data &= ~0x100;
5823 		if (orig != data)
5824 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5825 	}
5826 }
5827 
5828 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
5829 				bool enable)
5830 {
5831 	u32 orig, data;
5832 
5833 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5834 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5835 		data = 0xfff;
5836 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5837 
5838 		orig = data = RREG32(UVD_CGC_CTRL);
5839 		data |= DCM;
5840 		if (orig != data)
5841 			WREG32(UVD_CGC_CTRL, data);
5842 	} else {
5843 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5844 		data &= ~0xfff;
5845 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5846 
5847 		orig = data = RREG32(UVD_CGC_CTRL);
5848 		data &= ~DCM;
5849 		if (orig != data)
5850 			WREG32(UVD_CGC_CTRL, data);
5851 	}
5852 }
5853 
5854 static void cik_enable_bif_mgls(struct radeon_device *rdev,
5855 			       bool enable)
5856 {
5857 	u32 orig, data;
5858 
5859 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
5860 
5861 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5862 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5863 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5864 	else
5865 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5866 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5867 
5868 	if (orig != data)
5869 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
5870 }
5871 
5872 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
5873 				bool enable)
5874 {
5875 	u32 orig, data;
5876 
5877 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
5878 
5879 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5880 		data &= ~CLOCK_GATING_DIS;
5881 	else
5882 		data |= CLOCK_GATING_DIS;
5883 
5884 	if (orig != data)
5885 		WREG32(HDP_HOST_PATH_CNTL, data);
5886 }
5887 
5888 static void cik_enable_hdp_ls(struct radeon_device *rdev,
5889 			      bool enable)
5890 {
5891 	u32 orig, data;
5892 
5893 	orig = data = RREG32(HDP_MEM_POWER_LS);
5894 
5895 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5896 		data |= HDP_LS_ENABLE;
5897 	else
5898 		data &= ~HDP_LS_ENABLE;
5899 
5900 	if (orig != data)
5901 		WREG32(HDP_MEM_POWER_LS, data);
5902 }
5903 
5904 void cik_update_cg(struct radeon_device *rdev,
5905 		   u32 block, bool enable)
5906 {
5907 
5908 	if (block & RADEON_CG_BLOCK_GFX) {
5909 		cik_enable_gui_idle_interrupt(rdev, false);
5910 		/* order matters! */
5911 		if (enable) {
5912 			cik_enable_mgcg(rdev, true);
5913 			cik_enable_cgcg(rdev, true);
5914 		} else {
5915 			cik_enable_cgcg(rdev, false);
5916 			cik_enable_mgcg(rdev, false);
5917 		}
5918 		cik_enable_gui_idle_interrupt(rdev, true);
5919 	}
5920 
5921 	if (block & RADEON_CG_BLOCK_MC) {
5922 		if (!(rdev->flags & RADEON_IS_IGP)) {
5923 			cik_enable_mc_mgcg(rdev, enable);
5924 			cik_enable_mc_ls(rdev, enable);
5925 		}
5926 	}
5927 
5928 	if (block & RADEON_CG_BLOCK_SDMA) {
5929 		cik_enable_sdma_mgcg(rdev, enable);
5930 		cik_enable_sdma_mgls(rdev, enable);
5931 	}
5932 
5933 	if (block & RADEON_CG_BLOCK_BIF) {
5934 		cik_enable_bif_mgls(rdev, enable);
5935 	}
5936 
5937 	if (block & RADEON_CG_BLOCK_UVD) {
5938 		if (rdev->has_uvd)
5939 			cik_enable_uvd_mgcg(rdev, enable);
5940 	}
5941 
5942 	if (block & RADEON_CG_BLOCK_HDP) {
5943 		cik_enable_hdp_mgcg(rdev, enable);
5944 		cik_enable_hdp_ls(rdev, enable);
5945 	}
5946 }
5947 
5948 static void cik_init_cg(struct radeon_device *rdev)
5949 {
5950 
5951 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
5952 
5953 	if (rdev->has_uvd)
5954 		si_init_uvd_internal_cg(rdev);
5955 
5956 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5957 			     RADEON_CG_BLOCK_SDMA |
5958 			     RADEON_CG_BLOCK_BIF |
5959 			     RADEON_CG_BLOCK_UVD |
5960 			     RADEON_CG_BLOCK_HDP), true);
5961 }
5962 
5963 static void cik_fini_cg(struct radeon_device *rdev)
5964 {
5965 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5966 			     RADEON_CG_BLOCK_SDMA |
5967 			     RADEON_CG_BLOCK_BIF |
5968 			     RADEON_CG_BLOCK_UVD |
5969 			     RADEON_CG_BLOCK_HDP), false);
5970 
5971 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
5972 }
5973 
5974 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
5975 					  bool enable)
5976 {
5977 	u32 data, orig;
5978 
5979 	orig = data = RREG32(RLC_PG_CNTL);
5980 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5981 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5982 	else
5983 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5984 	if (orig != data)
5985 		WREG32(RLC_PG_CNTL, data);
5986 }
5987 
5988 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
5989 					  bool enable)
5990 {
5991 	u32 data, orig;
5992 
5993 	orig = data = RREG32(RLC_PG_CNTL);
5994 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5995 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5996 	else
5997 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5998 	if (orig != data)
5999 		WREG32(RLC_PG_CNTL, data);
6000 }
6001 
6002 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6003 {
6004 	u32 data, orig;
6005 
6006 	orig = data = RREG32(RLC_PG_CNTL);
6007 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6008 		data &= ~DISABLE_CP_PG;
6009 	else
6010 		data |= DISABLE_CP_PG;
6011 	if (orig != data)
6012 		WREG32(RLC_PG_CNTL, data);
6013 }
6014 
6015 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6016 {
6017 	u32 data, orig;
6018 
6019 	orig = data = RREG32(RLC_PG_CNTL);
6020 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6021 		data &= ~DISABLE_GDS_PG;
6022 	else
6023 		data |= DISABLE_GDS_PG;
6024 	if (orig != data)
6025 		WREG32(RLC_PG_CNTL, data);
6026 }
6027 
6028 #define CP_ME_TABLE_SIZE    96
6029 #define CP_ME_TABLE_OFFSET  2048
6030 #define CP_MEC_TABLE_OFFSET 4096
6031 
6032 void cik_init_cp_pg_table(struct radeon_device *rdev)
6033 {
6034 	const __be32 *fw_data;
6035 	volatile u32 *dst_ptr;
6036 	int me, i, max_me = 4;
6037 	u32 bo_offset = 0;
6038 	u32 table_offset;
6039 
6040 	if (rdev->family == CHIP_KAVERI)
6041 		max_me = 5;
6042 
6043 	if (rdev->rlc.cp_table_ptr == NULL)
6044 		return;
6045 
6046 	/* write the cp table buffer */
6047 	dst_ptr = rdev->rlc.cp_table_ptr;
6048 	for (me = 0; me < max_me; me++) {
6049 		if (me == 0) {
6050 			fw_data = (const __be32 *)rdev->ce_fw->data;
6051 			table_offset = CP_ME_TABLE_OFFSET;
6052 		} else if (me == 1) {
6053 			fw_data = (const __be32 *)rdev->pfp_fw->data;
6054 			table_offset = CP_ME_TABLE_OFFSET;
6055 		} else if (me == 2) {
6056 			fw_data = (const __be32 *)rdev->me_fw->data;
6057 			table_offset = CP_ME_TABLE_OFFSET;
6058 		} else {
6059 			fw_data = (const __be32 *)rdev->mec_fw->data;
6060 			table_offset = CP_MEC_TABLE_OFFSET;
6061 		}
6062 
6063 		for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
6064 			dst_ptr[bo_offset + i] = cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6065 		}
6066 		bo_offset += CP_ME_TABLE_SIZE;
6067 	}
6068 }
6069 
6070 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6071 				bool enable)
6072 {
6073 	u32 data, orig;
6074 
6075 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6076 		orig = data = RREG32(RLC_PG_CNTL);
6077 		data |= GFX_PG_ENABLE;
6078 		if (orig != data)
6079 			WREG32(RLC_PG_CNTL, data);
6080 
6081 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6082 		data |= AUTO_PG_EN;
6083 		if (orig != data)
6084 			WREG32(RLC_AUTO_PG_CTRL, data);
6085 	} else {
6086 		orig = data = RREG32(RLC_PG_CNTL);
6087 		data &= ~GFX_PG_ENABLE;
6088 		if (orig != data)
6089 			WREG32(RLC_PG_CNTL, data);
6090 
6091 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6092 		data &= ~AUTO_PG_EN;
6093 		if (orig != data)
6094 			WREG32(RLC_AUTO_PG_CTRL, data);
6095 
6096 		data = RREG32(DB_RENDER_CONTROL);
6097 	}
6098 }
6099 
6100 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6101 {
6102 	u32 mask = 0, tmp, tmp1;
6103 	int i;
6104 
6105 	cik_select_se_sh(rdev, se, sh);
6106 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6107 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6108 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6109 
6110 	tmp &= 0xffff0000;
6111 
6112 	tmp |= tmp1;
6113 	tmp >>= 16;
6114 
6115 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6116 		mask <<= 1;
6117 		mask |= 1;
6118 	}
6119 
6120 	return (~tmp) & mask;
6121 }
6122 
6123 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6124 {
6125 	u32 i, j, k, active_cu_number = 0;
6126 	u32 mask, counter, cu_bitmap;
6127 	u32 tmp = 0;
6128 
6129 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6130 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6131 			mask = 1;
6132 			cu_bitmap = 0;
6133 			counter = 0;
6134 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6135 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6136 					if (counter < 2)
6137 						cu_bitmap |= mask;
6138 					counter ++;
6139 				}
6140 				mask <<= 1;
6141 			}
6142 
6143 			active_cu_number += counter;
6144 			tmp |= (cu_bitmap << (i * 16 + j * 8));
6145 		}
6146 	}
6147 
6148 	WREG32(RLC_PG_AO_CU_MASK, tmp);
6149 
6150 	tmp = RREG32(RLC_MAX_PG_CU);
6151 	tmp &= ~MAX_PU_CU_MASK;
6152 	tmp |= MAX_PU_CU(active_cu_number);
6153 	WREG32(RLC_MAX_PG_CU, tmp);
6154 }
6155 
6156 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6157 				       bool enable)
6158 {
6159 	u32 data, orig;
6160 
6161 	orig = data = RREG32(RLC_PG_CNTL);
6162 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6163 		data |= STATIC_PER_CU_PG_ENABLE;
6164 	else
6165 		data &= ~STATIC_PER_CU_PG_ENABLE;
6166 	if (orig != data)
6167 		WREG32(RLC_PG_CNTL, data);
6168 }
6169 
6170 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6171 					bool enable)
6172 {
6173 	u32 data, orig;
6174 
6175 	orig = data = RREG32(RLC_PG_CNTL);
6176 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6177 		data |= DYN_PER_CU_PG_ENABLE;
6178 	else
6179 		data &= ~DYN_PER_CU_PG_ENABLE;
6180 	if (orig != data)
6181 		WREG32(RLC_PG_CNTL, data);
6182 }
6183 
6184 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6185 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6186 
6187 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6188 {
6189 	u32 data, orig;
6190 	u32 i;
6191 
6192 	if (rdev->rlc.cs_data) {
6193 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6194 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6195 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6196 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6197 	} else {
6198 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6199 		for (i = 0; i < 3; i++)
6200 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6201 	}
6202 	if (rdev->rlc.reg_list) {
6203 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6204 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6205 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6206 	}
6207 
6208 	orig = data = RREG32(RLC_PG_CNTL);
6209 	data |= GFX_PG_SRC;
6210 	if (orig != data)
6211 		WREG32(RLC_PG_CNTL, data);
6212 
6213 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6214 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6215 
6216 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6217 	data &= ~IDLE_POLL_COUNT_MASK;
6218 	data |= IDLE_POLL_COUNT(0x60);
6219 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6220 
6221 	data = 0x10101010;
6222 	WREG32(RLC_PG_DELAY, data);
6223 
6224 	data = RREG32(RLC_PG_DELAY_2);
6225 	data &= ~0xff;
6226 	data |= 0x3;
6227 	WREG32(RLC_PG_DELAY_2, data);
6228 
6229 	data = RREG32(RLC_AUTO_PG_CTRL);
6230 	data &= ~GRBM_REG_SGIT_MASK;
6231 	data |= GRBM_REG_SGIT(0x700);
6232 	WREG32(RLC_AUTO_PG_CTRL, data);
6233 
6234 }
6235 
6236 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6237 {
6238 	cik_enable_gfx_cgpg(rdev, enable);
6239 	cik_enable_gfx_static_mgpg(rdev, enable);
6240 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
6241 }
6242 
6243 u32 cik_get_csb_size(struct radeon_device *rdev)
6244 {
6245 	u32 count = 0;
6246 	const struct cs_section_def *sect = NULL;
6247 	const struct cs_extent_def *ext = NULL;
6248 
6249 	if (rdev->rlc.cs_data == NULL)
6250 		return 0;
6251 
6252 	/* begin clear state */
6253 	count += 2;
6254 	/* context control state */
6255 	count += 3;
6256 
6257 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6258 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6259 			if (sect->id == SECT_CONTEXT)
6260 				count += 2 + ext->reg_count;
6261 			else
6262 				return 0;
6263 		}
6264 	}
6265 	/* pa_sc_raster_config/pa_sc_raster_config1 */
6266 	count += 4;
6267 	/* end clear state */
6268 	count += 2;
6269 	/* clear state */
6270 	count += 2;
6271 
6272 	return count;
6273 }
6274 
6275 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6276 {
6277 	u32 count = 0, i;
6278 	const struct cs_section_def *sect = NULL;
6279 	const struct cs_extent_def *ext = NULL;
6280 
6281 	if (rdev->rlc.cs_data == NULL)
6282 		return;
6283 	if (buffer == NULL)
6284 		return;
6285 
6286 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6287 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6288 
6289 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6290 	buffer[count++] = cpu_to_le32(0x80000000);
6291 	buffer[count++] = cpu_to_le32(0x80000000);
6292 
6293 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6294 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6295 			if (sect->id == SECT_CONTEXT) {
6296 				buffer[count++] =
6297 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6298 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6299 				for (i = 0; i < ext->reg_count; i++)
6300 					buffer[count++] = cpu_to_le32(ext->extent[i]);
6301 			} else {
6302 				return;
6303 			}
6304 		}
6305 	}
6306 
6307 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6308 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6309 	switch (rdev->family) {
6310 	case CHIP_BONAIRE:
6311 		buffer[count++] = cpu_to_le32(0x16000012);
6312 		buffer[count++] = cpu_to_le32(0x00000000);
6313 		break;
6314 	case CHIP_KAVERI:
6315 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6316 		buffer[count++] = cpu_to_le32(0x00000000);
6317 		break;
6318 	case CHIP_KABINI:
6319 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6320 		buffer[count++] = cpu_to_le32(0x00000000);
6321 		break;
6322 	case CHIP_HAWAII:
6323 		buffer[count++] = 0x3a00161a;
6324 		buffer[count++] = 0x0000002e;
6325 		break;
6326 	default:
6327 		buffer[count++] = cpu_to_le32(0x00000000);
6328 		buffer[count++] = cpu_to_le32(0x00000000);
6329 		break;
6330 	}
6331 
6332 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6333 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6334 
6335 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6336 	buffer[count++] = cpu_to_le32(0);
6337 }
6338 
6339 static void cik_init_pg(struct radeon_device *rdev)
6340 {
6341 	if (rdev->pg_flags) {
6342 		cik_enable_sck_slowdown_on_pu(rdev, true);
6343 		cik_enable_sck_slowdown_on_pd(rdev, true);
6344 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6345 			cik_init_gfx_cgpg(rdev);
6346 			cik_enable_cp_pg(rdev, true);
6347 			cik_enable_gds_pg(rdev, true);
6348 		}
6349 		cik_init_ao_cu_mask(rdev);
6350 		cik_update_gfx_pg(rdev, true);
6351 	}
6352 }
6353 
6354 static void cik_fini_pg(struct radeon_device *rdev)
6355 {
6356 	if (rdev->pg_flags) {
6357 		cik_update_gfx_pg(rdev, false);
6358 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6359 			cik_enable_cp_pg(rdev, false);
6360 			cik_enable_gds_pg(rdev, false);
6361 		}
6362 	}
6363 }
6364 
6365 /*
6366  * Interrupts
6367  * Starting with r6xx, interrupts are handled via a ring buffer.
6368  * Ring buffers are areas of GPU accessible memory that the GPU
6369  * writes interrupt vectors into and the host reads vectors out of.
6370  * There is a rptr (read pointer) that determines where the
6371  * host is currently reading, and a wptr (write pointer)
6372  * which determines where the GPU has written.  When the
6373  * pointers are equal, the ring is idle.  When the GPU
6374  * writes vectors to the ring buffer, it increments the
6375  * wptr.  When there is an interrupt, the host then starts
6376  * fetching commands and processing them until the pointers are
6377  * equal again at which point it updates the rptr.
6378  */
6379 
6380 /**
6381  * cik_enable_interrupts - Enable the interrupt ring buffer
6382  *
6383  * @rdev: radeon_device pointer
6384  *
6385  * Enable the interrupt ring buffer (CIK).
6386  */
6387 static void cik_enable_interrupts(struct radeon_device *rdev)
6388 {
6389 	u32 ih_cntl = RREG32(IH_CNTL);
6390 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6391 
6392 	ih_cntl |= ENABLE_INTR;
6393 	ih_rb_cntl |= IH_RB_ENABLE;
6394 	WREG32(IH_CNTL, ih_cntl);
6395 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6396 	rdev->ih.enabled = true;
6397 }
6398 
6399 /**
6400  * cik_disable_interrupts - Disable the interrupt ring buffer
6401  *
6402  * @rdev: radeon_device pointer
6403  *
6404  * Disable the interrupt ring buffer (CIK).
6405  */
6406 static void cik_disable_interrupts(struct radeon_device *rdev)
6407 {
6408 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6409 	u32 ih_cntl = RREG32(IH_CNTL);
6410 
6411 	ih_rb_cntl &= ~IH_RB_ENABLE;
6412 	ih_cntl &= ~ENABLE_INTR;
6413 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6414 	WREG32(IH_CNTL, ih_cntl);
6415 	/* set rptr, wptr to 0 */
6416 	WREG32(IH_RB_RPTR, 0);
6417 	WREG32(IH_RB_WPTR, 0);
6418 	rdev->ih.enabled = false;
6419 	rdev->ih.rptr = 0;
6420 }
6421 
6422 /**
6423  * cik_disable_interrupt_state - Disable all interrupt sources
6424  *
6425  * @rdev: radeon_device pointer
6426  *
6427  * Clear all interrupt enable bits used by the driver (CIK).
6428  */
6429 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6430 {
6431 	u32 tmp;
6432 
6433 	/* gfx ring */
6434 	tmp = RREG32(CP_INT_CNTL_RING0) &
6435 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6436 	WREG32(CP_INT_CNTL_RING0, tmp);
6437 	/* sdma */
6438 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6439 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6440 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6441 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6442 	/* compute queues */
6443 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6444 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6445 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6446 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6447 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6448 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6449 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6450 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6451 	/* grbm */
6452 	WREG32(GRBM_INT_CNTL, 0);
6453 	/* vline/vblank, etc. */
6454 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6455 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6456 	if (rdev->num_crtc >= 4) {
6457 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6458 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6459 	}
6460 	if (rdev->num_crtc >= 6) {
6461 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6462 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6463 	}
6464 
6465 	/* dac hotplug */
6466 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6467 
6468 	/* digital hotplug */
6469 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6470 	WREG32(DC_HPD1_INT_CONTROL, tmp);
6471 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6472 	WREG32(DC_HPD2_INT_CONTROL, tmp);
6473 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6474 	WREG32(DC_HPD3_INT_CONTROL, tmp);
6475 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6476 	WREG32(DC_HPD4_INT_CONTROL, tmp);
6477 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6478 	WREG32(DC_HPD5_INT_CONTROL, tmp);
6479 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6480 	WREG32(DC_HPD6_INT_CONTROL, tmp);
6481 
6482 }
6483 
6484 /**
6485  * cik_irq_init - init and enable the interrupt ring
6486  *
6487  * @rdev: radeon_device pointer
6488  *
6489  * Allocate a ring buffer for the interrupt controller,
6490  * enable the RLC, disable interrupts, enable the IH
6491  * ring buffer and enable it (CIK).
6492  * Called at device load and reume.
6493  * Returns 0 for success, errors for failure.
6494  */
6495 static int cik_irq_init(struct radeon_device *rdev)
6496 {
6497 	int ret = 0;
6498 	int rb_bufsz;
6499 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6500 
6501 	/* allocate ring */
6502 	ret = r600_ih_ring_alloc(rdev);
6503 	if (ret)
6504 		return ret;
6505 
6506 	/* disable irqs */
6507 	cik_disable_interrupts(rdev);
6508 
6509 	/* init rlc */
6510 	ret = cik_rlc_resume(rdev);
6511 	if (ret) {
6512 		r600_ih_ring_fini(rdev);
6513 		return ret;
6514 	}
6515 
6516 	/* setup interrupt control */
6517 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
6518 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6519 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
6520 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6521 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6522 	 */
6523 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6524 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6525 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6526 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6527 
6528 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6529 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6530 
6531 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6532 		      IH_WPTR_OVERFLOW_CLEAR |
6533 		      (rb_bufsz << 1));
6534 
6535 	if (rdev->wb.enabled)
6536 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6537 
6538 	/* set the writeback address whether it's enabled or not */
6539 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6540 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6541 
6542 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6543 
6544 	/* set rptr, wptr to 0 */
6545 	WREG32(IH_RB_RPTR, 0);
6546 	WREG32(IH_RB_WPTR, 0);
6547 
6548 	/* Default settings for IH_CNTL (disabled at first) */
6549 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6550 	/* RPTR_REARM only works if msi's are enabled */
6551 	if (rdev->msi_enabled)
6552 		ih_cntl |= RPTR_REARM;
6553 	WREG32(IH_CNTL, ih_cntl);
6554 
6555 	/* force the active interrupt state to all disabled */
6556 	cik_disable_interrupt_state(rdev);
6557 
6558 	pci_set_master(rdev->pdev);
6559 
6560 	/* enable irqs */
6561 	cik_enable_interrupts(rdev);
6562 
6563 	return ret;
6564 }
6565 
6566 /**
6567  * cik_irq_set - enable/disable interrupt sources
6568  *
6569  * @rdev: radeon_device pointer
6570  *
6571  * Enable interrupt sources on the GPU (vblanks, hpd,
6572  * etc.) (CIK).
6573  * Returns 0 for success, errors for failure.
6574  */
6575 int cik_irq_set(struct radeon_device *rdev)
6576 {
6577 	u32 cp_int_cntl;
6578 	u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6579 	u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6580 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6581 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6582 	u32 grbm_int_cntl = 0;
6583 	u32 dma_cntl, dma_cntl1;
6584 	u32 thermal_int;
6585 
6586 	if (!rdev->irq.installed) {
6587 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6588 		return -EINVAL;
6589 	}
6590 	/* don't enable anything if the ih is disabled */
6591 	if (!rdev->ih.enabled) {
6592 		cik_disable_interrupts(rdev);
6593 		/* force the active interrupt state to all disabled */
6594 		cik_disable_interrupt_state(rdev);
6595 		return 0;
6596 	}
6597 
6598 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6599 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6600 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6601 
6602 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6603 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6604 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6605 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6606 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6607 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6608 
6609 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6610 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6611 
6612 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6613 	cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6614 	cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6615 	cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6616 	cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6617 	cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6618 	cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6619 	cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6620 
6621 	if (rdev->flags & RADEON_IS_IGP)
6622 		thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6623 			~(THERM_INTH_MASK | THERM_INTL_MASK);
6624 	else
6625 		thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6626 			~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6627 
6628 	/* enable CP interrupts on all rings */
6629 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6630 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
6631 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6632 	}
6633 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6634 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6635 		DRM_DEBUG("si_irq_set: sw int cp1\n");
6636 		if (ring->me == 1) {
6637 			switch (ring->pipe) {
6638 			case 0:
6639 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6640 				break;
6641 			case 1:
6642 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6643 				break;
6644 			case 2:
6645 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6646 				break;
6647 			case 3:
6648 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6649 				break;
6650 			default:
6651 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6652 				break;
6653 			}
6654 		} else if (ring->me == 2) {
6655 			switch (ring->pipe) {
6656 			case 0:
6657 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6658 				break;
6659 			case 1:
6660 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6661 				break;
6662 			case 2:
6663 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6664 				break;
6665 			case 3:
6666 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6667 				break;
6668 			default:
6669 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6670 				break;
6671 			}
6672 		} else {
6673 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6674 		}
6675 	}
6676 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6677 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6678 		DRM_DEBUG("si_irq_set: sw int cp2\n");
6679 		if (ring->me == 1) {
6680 			switch (ring->pipe) {
6681 			case 0:
6682 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6683 				break;
6684 			case 1:
6685 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6686 				break;
6687 			case 2:
6688 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6689 				break;
6690 			case 3:
6691 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6692 				break;
6693 			default:
6694 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6695 				break;
6696 			}
6697 		} else if (ring->me == 2) {
6698 			switch (ring->pipe) {
6699 			case 0:
6700 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6701 				break;
6702 			case 1:
6703 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6704 				break;
6705 			case 2:
6706 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6707 				break;
6708 			case 3:
6709 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6710 				break;
6711 			default:
6712 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6713 				break;
6714 			}
6715 		} else {
6716 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6717 		}
6718 	}
6719 
6720 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6721 		DRM_DEBUG("cik_irq_set: sw int dma\n");
6722 		dma_cntl |= TRAP_ENABLE;
6723 	}
6724 
6725 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6726 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
6727 		dma_cntl1 |= TRAP_ENABLE;
6728 	}
6729 
6730 	if (rdev->irq.crtc_vblank_int[0] ||
6731 	    atomic_read(&rdev->irq.pflip[0])) {
6732 		DRM_DEBUG("cik_irq_set: vblank 0\n");
6733 		crtc1 |= VBLANK_INTERRUPT_MASK;
6734 	}
6735 	if (rdev->irq.crtc_vblank_int[1] ||
6736 	    atomic_read(&rdev->irq.pflip[1])) {
6737 		DRM_DEBUG("cik_irq_set: vblank 1\n");
6738 		crtc2 |= VBLANK_INTERRUPT_MASK;
6739 	}
6740 	if (rdev->irq.crtc_vblank_int[2] ||
6741 	    atomic_read(&rdev->irq.pflip[2])) {
6742 		DRM_DEBUG("cik_irq_set: vblank 2\n");
6743 		crtc3 |= VBLANK_INTERRUPT_MASK;
6744 	}
6745 	if (rdev->irq.crtc_vblank_int[3] ||
6746 	    atomic_read(&rdev->irq.pflip[3])) {
6747 		DRM_DEBUG("cik_irq_set: vblank 3\n");
6748 		crtc4 |= VBLANK_INTERRUPT_MASK;
6749 	}
6750 	if (rdev->irq.crtc_vblank_int[4] ||
6751 	    atomic_read(&rdev->irq.pflip[4])) {
6752 		DRM_DEBUG("cik_irq_set: vblank 4\n");
6753 		crtc5 |= VBLANK_INTERRUPT_MASK;
6754 	}
6755 	if (rdev->irq.crtc_vblank_int[5] ||
6756 	    atomic_read(&rdev->irq.pflip[5])) {
6757 		DRM_DEBUG("cik_irq_set: vblank 5\n");
6758 		crtc6 |= VBLANK_INTERRUPT_MASK;
6759 	}
6760 	if (rdev->irq.hpd[0]) {
6761 		DRM_DEBUG("cik_irq_set: hpd 1\n");
6762 		hpd1 |= DC_HPDx_INT_EN;
6763 	}
6764 	if (rdev->irq.hpd[1]) {
6765 		DRM_DEBUG("cik_irq_set: hpd 2\n");
6766 		hpd2 |= DC_HPDx_INT_EN;
6767 	}
6768 	if (rdev->irq.hpd[2]) {
6769 		DRM_DEBUG("cik_irq_set: hpd 3\n");
6770 		hpd3 |= DC_HPDx_INT_EN;
6771 	}
6772 	if (rdev->irq.hpd[3]) {
6773 		DRM_DEBUG("cik_irq_set: hpd 4\n");
6774 		hpd4 |= DC_HPDx_INT_EN;
6775 	}
6776 	if (rdev->irq.hpd[4]) {
6777 		DRM_DEBUG("cik_irq_set: hpd 5\n");
6778 		hpd5 |= DC_HPDx_INT_EN;
6779 	}
6780 	if (rdev->irq.hpd[5]) {
6781 		DRM_DEBUG("cik_irq_set: hpd 6\n");
6782 		hpd6 |= DC_HPDx_INT_EN;
6783 	}
6784 
6785 	if (rdev->irq.dpm_thermal) {
6786 		DRM_DEBUG("dpm thermal\n");
6787 		if (rdev->flags & RADEON_IS_IGP)
6788 			thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
6789 		else
6790 			thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6791 	}
6792 
6793 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6794 
6795 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
6796 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
6797 
6798 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
6799 	WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
6800 	WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
6801 	WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
6802 	WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
6803 	WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
6804 	WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
6805 	WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
6806 
6807 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6808 
6809 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6810 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6811 	if (rdev->num_crtc >= 4) {
6812 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6813 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6814 	}
6815 	if (rdev->num_crtc >= 6) {
6816 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6817 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6818 	}
6819 
6820 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
6821 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
6822 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
6823 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
6824 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
6825 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
6826 
6827 	if (rdev->flags & RADEON_IS_IGP)
6828 		WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
6829 	else
6830 		WREG32_SMC(CG_THERMAL_INT, thermal_int);
6831 
6832 	return 0;
6833 }
6834 
6835 /**
6836  * cik_irq_ack - ack interrupt sources
6837  *
6838  * @rdev: radeon_device pointer
6839  *
6840  * Ack interrupt sources on the GPU (vblanks, hpd,
6841  * etc.) (CIK).  Certain interrupts sources are sw
6842  * generated and do not require an explicit ack.
6843  */
6844 static inline void cik_irq_ack(struct radeon_device *rdev)
6845 {
6846 	u32 tmp;
6847 
6848 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6849 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6850 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6851 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6852 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6853 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6854 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
6855 
6856 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
6857 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6858 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
6859 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6860 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6861 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6862 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6863 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6864 
6865 	if (rdev->num_crtc >= 4) {
6866 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6867 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6868 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6869 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6870 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6871 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6872 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6873 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6874 	}
6875 
6876 	if (rdev->num_crtc >= 6) {
6877 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6878 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6879 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6880 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6881 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6882 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6883 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6884 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6885 	}
6886 
6887 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6888 		tmp = RREG32(DC_HPD1_INT_CONTROL);
6889 		tmp |= DC_HPDx_INT_ACK;
6890 		WREG32(DC_HPD1_INT_CONTROL, tmp);
6891 	}
6892 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6893 		tmp = RREG32(DC_HPD2_INT_CONTROL);
6894 		tmp |= DC_HPDx_INT_ACK;
6895 		WREG32(DC_HPD2_INT_CONTROL, tmp);
6896 	}
6897 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6898 		tmp = RREG32(DC_HPD3_INT_CONTROL);
6899 		tmp |= DC_HPDx_INT_ACK;
6900 		WREG32(DC_HPD3_INT_CONTROL, tmp);
6901 	}
6902 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6903 		tmp = RREG32(DC_HPD4_INT_CONTROL);
6904 		tmp |= DC_HPDx_INT_ACK;
6905 		WREG32(DC_HPD4_INT_CONTROL, tmp);
6906 	}
6907 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6908 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6909 		tmp |= DC_HPDx_INT_ACK;
6910 		WREG32(DC_HPD5_INT_CONTROL, tmp);
6911 	}
6912 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6913 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6914 		tmp |= DC_HPDx_INT_ACK;
6915 		WREG32(DC_HPD6_INT_CONTROL, tmp);
6916 	}
6917 }
6918 
6919 /**
6920  * cik_irq_disable - disable interrupts
6921  *
6922  * @rdev: radeon_device pointer
6923  *
6924  * Disable interrupts on the hw (CIK).
6925  */
6926 static void cik_irq_disable(struct radeon_device *rdev)
6927 {
6928 	cik_disable_interrupts(rdev);
6929 	/* Wait and acknowledge irq */
6930 	mdelay(1);
6931 	cik_irq_ack(rdev);
6932 	cik_disable_interrupt_state(rdev);
6933 }
6934 
6935 /**
6936  * cik_irq_disable - disable interrupts for suspend
6937  *
6938  * @rdev: radeon_device pointer
6939  *
6940  * Disable interrupts and stop the RLC (CIK).
6941  * Used for suspend.
6942  */
6943 static void cik_irq_suspend(struct radeon_device *rdev)
6944 {
6945 	cik_irq_disable(rdev);
6946 	cik_rlc_stop(rdev);
6947 }
6948 
6949 /**
6950  * cik_irq_fini - tear down interrupt support
6951  *
6952  * @rdev: radeon_device pointer
6953  *
6954  * Disable interrupts on the hw and free the IH ring
6955  * buffer (CIK).
6956  * Used for driver unload.
6957  */
6958 static void cik_irq_fini(struct radeon_device *rdev)
6959 {
6960 	cik_irq_suspend(rdev);
6961 	r600_ih_ring_fini(rdev);
6962 }
6963 
6964 /**
6965  * cik_get_ih_wptr - get the IH ring buffer wptr
6966  *
6967  * @rdev: radeon_device pointer
6968  *
6969  * Get the IH ring buffer wptr from either the register
6970  * or the writeback memory buffer (CIK).  Also check for
6971  * ring buffer overflow and deal with it.
6972  * Used by cik_irq_process().
6973  * Returns the value of the wptr.
6974  */
6975 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
6976 {
6977 	u32 wptr, tmp;
6978 
6979 	if (rdev->wb.enabled)
6980 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6981 	else
6982 		wptr = RREG32(IH_RB_WPTR);
6983 
6984 	if (wptr & RB_OVERFLOW) {
6985 		/* When a ring buffer overflow happen start parsing interrupt
6986 		 * from the last not overwritten vector (wptr + 16). Hopefully
6987 		 * this should allow us to catchup.
6988 		 */
6989 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
6990 			wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
6991 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6992 		tmp = RREG32(IH_RB_CNTL);
6993 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
6994 		WREG32(IH_RB_CNTL, tmp);
6995 	}
6996 	return (wptr & rdev->ih.ptr_mask);
6997 }
6998 
6999 /*        CIK IV Ring
7000  * Each IV ring entry is 128 bits:
7001  * [7:0]    - interrupt source id
7002  * [31:8]   - reserved
7003  * [59:32]  - interrupt source data
7004  * [63:60]  - reserved
7005  * [71:64]  - RINGID
7006  *            CP:
7007  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7008  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7009  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7010  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7011  *            PIPE_ID - ME0 0=3D
7012  *                    - ME1&2 compute dispatcher (4 pipes each)
7013  *            SDMA:
7014  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7015  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7016  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7017  * [79:72]  - VMID
7018  * [95:80]  - PASID
7019  * [127:96] - reserved
7020  */
7021 /**
7022  * cik_irq_process - interrupt handler
7023  *
7024  * @rdev: radeon_device pointer
7025  *
7026  * Interrupt hander (CIK).  Walk the IH ring,
7027  * ack interrupts and schedule work to handle
7028  * interrupt events.
7029  * Returns irq process return code.
7030  */
7031 int cik_irq_process(struct radeon_device *rdev)
7032 {
7033 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7034 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7035 	u32 wptr;
7036 	u32 rptr;
7037 	u32 src_id, src_data, ring_id;
7038 	u8 me_id, pipe_id, queue_id;
7039 	u32 ring_index;
7040 	bool queue_hotplug = false;
7041 	bool queue_reset = false;
7042 	u32 addr, status, mc_client;
7043 	bool queue_thermal = false;
7044 
7045 	if (!rdev->ih.enabled || rdev->shutdown)
7046 		return IRQ_NONE;
7047 
7048 	wptr = cik_get_ih_wptr(rdev);
7049 
7050 restart_ih:
7051 	/* is somebody else already processing irqs? */
7052 	if (atomic_xchg(&rdev->ih.lock, 1))
7053 		return IRQ_NONE;
7054 
7055 	rptr = rdev->ih.rptr;
7056 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7057 
7058 	/* Order reading of wptr vs. reading of IH ring data */
7059 	rmb();
7060 
7061 	/* display interrupts */
7062 	cik_irq_ack(rdev);
7063 
7064 	while (rptr != wptr) {
7065 		/* wptr/rptr are in bytes! */
7066 		ring_index = rptr / 4;
7067 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7068 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7069 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7070 
7071 		switch (src_id) {
7072 		case 1: /* D1 vblank/vline */
7073 			switch (src_data) {
7074 			case 0: /* D1 vblank */
7075 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7076 					if (rdev->irq.crtc_vblank_int[0]) {
7077 						drm_handle_vblank(rdev->ddev, 0);
7078 						rdev->pm.vblank_sync = true;
7079 						wake_up(&rdev->irq.vblank_queue);
7080 					}
7081 					if (atomic_read(&rdev->irq.pflip[0]))
7082 						radeon_crtc_handle_flip(rdev, 0);
7083 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7084 					DRM_DEBUG("IH: D1 vblank\n");
7085 				}
7086 				break;
7087 			case 1: /* D1 vline */
7088 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7089 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7090 					DRM_DEBUG("IH: D1 vline\n");
7091 				}
7092 				break;
7093 			default:
7094 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7095 				break;
7096 			}
7097 			break;
7098 		case 2: /* D2 vblank/vline */
7099 			switch (src_data) {
7100 			case 0: /* D2 vblank */
7101 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7102 					if (rdev->irq.crtc_vblank_int[1]) {
7103 						drm_handle_vblank(rdev->ddev, 1);
7104 						rdev->pm.vblank_sync = true;
7105 						wake_up(&rdev->irq.vblank_queue);
7106 					}
7107 					if (atomic_read(&rdev->irq.pflip[1]))
7108 						radeon_crtc_handle_flip(rdev, 1);
7109 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7110 					DRM_DEBUG("IH: D2 vblank\n");
7111 				}
7112 				break;
7113 			case 1: /* D2 vline */
7114 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7115 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7116 					DRM_DEBUG("IH: D2 vline\n");
7117 				}
7118 				break;
7119 			default:
7120 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7121 				break;
7122 			}
7123 			break;
7124 		case 3: /* D3 vblank/vline */
7125 			switch (src_data) {
7126 			case 0: /* D3 vblank */
7127 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7128 					if (rdev->irq.crtc_vblank_int[2]) {
7129 						drm_handle_vblank(rdev->ddev, 2);
7130 						rdev->pm.vblank_sync = true;
7131 						wake_up(&rdev->irq.vblank_queue);
7132 					}
7133 					if (atomic_read(&rdev->irq.pflip[2]))
7134 						radeon_crtc_handle_flip(rdev, 2);
7135 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7136 					DRM_DEBUG("IH: D3 vblank\n");
7137 				}
7138 				break;
7139 			case 1: /* D3 vline */
7140 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7141 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7142 					DRM_DEBUG("IH: D3 vline\n");
7143 				}
7144 				break;
7145 			default:
7146 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7147 				break;
7148 			}
7149 			break;
7150 		case 4: /* D4 vblank/vline */
7151 			switch (src_data) {
7152 			case 0: /* D4 vblank */
7153 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7154 					if (rdev->irq.crtc_vblank_int[3]) {
7155 						drm_handle_vblank(rdev->ddev, 3);
7156 						rdev->pm.vblank_sync = true;
7157 						wake_up(&rdev->irq.vblank_queue);
7158 					}
7159 					if (atomic_read(&rdev->irq.pflip[3]))
7160 						radeon_crtc_handle_flip(rdev, 3);
7161 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7162 					DRM_DEBUG("IH: D4 vblank\n");
7163 				}
7164 				break;
7165 			case 1: /* D4 vline */
7166 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7167 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7168 					DRM_DEBUG("IH: D4 vline\n");
7169 				}
7170 				break;
7171 			default:
7172 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7173 				break;
7174 			}
7175 			break;
7176 		case 5: /* D5 vblank/vline */
7177 			switch (src_data) {
7178 			case 0: /* D5 vblank */
7179 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7180 					if (rdev->irq.crtc_vblank_int[4]) {
7181 						drm_handle_vblank(rdev->ddev, 4);
7182 						rdev->pm.vblank_sync = true;
7183 						wake_up(&rdev->irq.vblank_queue);
7184 					}
7185 					if (atomic_read(&rdev->irq.pflip[4]))
7186 						radeon_crtc_handle_flip(rdev, 4);
7187 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7188 					DRM_DEBUG("IH: D5 vblank\n");
7189 				}
7190 				break;
7191 			case 1: /* D5 vline */
7192 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7193 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7194 					DRM_DEBUG("IH: D5 vline\n");
7195 				}
7196 				break;
7197 			default:
7198 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7199 				break;
7200 			}
7201 			break;
7202 		case 6: /* D6 vblank/vline */
7203 			switch (src_data) {
7204 			case 0: /* D6 vblank */
7205 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7206 					if (rdev->irq.crtc_vblank_int[5]) {
7207 						drm_handle_vblank(rdev->ddev, 5);
7208 						rdev->pm.vblank_sync = true;
7209 						wake_up(&rdev->irq.vblank_queue);
7210 					}
7211 					if (atomic_read(&rdev->irq.pflip[5]))
7212 						radeon_crtc_handle_flip(rdev, 5);
7213 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7214 					DRM_DEBUG("IH: D6 vblank\n");
7215 				}
7216 				break;
7217 			case 1: /* D6 vline */
7218 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7219 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7220 					DRM_DEBUG("IH: D6 vline\n");
7221 				}
7222 				break;
7223 			default:
7224 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7225 				break;
7226 			}
7227 			break;
7228 		case 42: /* HPD hotplug */
7229 			switch (src_data) {
7230 			case 0:
7231 				if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7232 					rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7233 					queue_hotplug = true;
7234 					DRM_DEBUG("IH: HPD1\n");
7235 				}
7236 				break;
7237 			case 1:
7238 				if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7239 					rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7240 					queue_hotplug = true;
7241 					DRM_DEBUG("IH: HPD2\n");
7242 				}
7243 				break;
7244 			case 2:
7245 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7246 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7247 					queue_hotplug = true;
7248 					DRM_DEBUG("IH: HPD3\n");
7249 				}
7250 				break;
7251 			case 3:
7252 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7253 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7254 					queue_hotplug = true;
7255 					DRM_DEBUG("IH: HPD4\n");
7256 				}
7257 				break;
7258 			case 4:
7259 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7260 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7261 					queue_hotplug = true;
7262 					DRM_DEBUG("IH: HPD5\n");
7263 				}
7264 				break;
7265 			case 5:
7266 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7267 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7268 					queue_hotplug = true;
7269 					DRM_DEBUG("IH: HPD6\n");
7270 				}
7271 				break;
7272 			default:
7273 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7274 				break;
7275 			}
7276 			break;
7277 		case 124: /* UVD */
7278 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7279 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7280 			break;
7281 		case 146:
7282 		case 147:
7283 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7284 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7285 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7286 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7287 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7288 				addr);
7289 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7290 				status);
7291 			cik_vm_decode_fault(rdev, status, addr, mc_client);
7292 			/* reset addr and status */
7293 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7294 			break;
7295 		case 176: /* GFX RB CP_INT */
7296 		case 177: /* GFX IB CP_INT */
7297 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7298 			break;
7299 		case 181: /* CP EOP event */
7300 			DRM_DEBUG("IH: CP EOP\n");
7301 			/* XXX check the bitfield order! */
7302 			me_id = (ring_id & 0x60) >> 5;
7303 			pipe_id = (ring_id & 0x18) >> 3;
7304 			queue_id = (ring_id & 0x7) >> 0;
7305 			switch (me_id) {
7306 			case 0:
7307 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7308 				break;
7309 			case 1:
7310 			case 2:
7311 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7312 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7313 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7314 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7315 				break;
7316 			}
7317 			break;
7318 		case 184: /* CP Privileged reg access */
7319 			DRM_ERROR("Illegal register access in command stream\n");
7320 			/* XXX check the bitfield order! */
7321 			me_id = (ring_id & 0x60) >> 5;
7322 			pipe_id = (ring_id & 0x18) >> 3;
7323 			queue_id = (ring_id & 0x7) >> 0;
7324 			switch (me_id) {
7325 			case 0:
7326 				/* This results in a full GPU reset, but all we need to do is soft
7327 				 * reset the CP for gfx
7328 				 */
7329 				queue_reset = true;
7330 				break;
7331 			case 1:
7332 				/* XXX compute */
7333 				queue_reset = true;
7334 				break;
7335 			case 2:
7336 				/* XXX compute */
7337 				queue_reset = true;
7338 				break;
7339 			}
7340 			break;
7341 		case 185: /* CP Privileged inst */
7342 			DRM_ERROR("Illegal instruction in command stream\n");
7343 			/* XXX check the bitfield order! */
7344 			me_id = (ring_id & 0x60) >> 5;
7345 			pipe_id = (ring_id & 0x18) >> 3;
7346 			queue_id = (ring_id & 0x7) >> 0;
7347 			switch (me_id) {
7348 			case 0:
7349 				/* This results in a full GPU reset, but all we need to do is soft
7350 				 * reset the CP for gfx
7351 				 */
7352 				queue_reset = true;
7353 				break;
7354 			case 1:
7355 				/* XXX compute */
7356 				queue_reset = true;
7357 				break;
7358 			case 2:
7359 				/* XXX compute */
7360 				queue_reset = true;
7361 				break;
7362 			}
7363 			break;
7364 		case 224: /* SDMA trap event */
7365 			/* XXX check the bitfield order! */
7366 			me_id = (ring_id & 0x3) >> 0;
7367 			queue_id = (ring_id & 0xc) >> 2;
7368 			DRM_DEBUG("IH: SDMA trap\n");
7369 			switch (me_id) {
7370 			case 0:
7371 				switch (queue_id) {
7372 				case 0:
7373 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7374 					break;
7375 				case 1:
7376 					/* XXX compute */
7377 					break;
7378 				case 2:
7379 					/* XXX compute */
7380 					break;
7381 				}
7382 				break;
7383 			case 1:
7384 				switch (queue_id) {
7385 				case 0:
7386 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7387 					break;
7388 				case 1:
7389 					/* XXX compute */
7390 					break;
7391 				case 2:
7392 					/* XXX compute */
7393 					break;
7394 				}
7395 				break;
7396 			}
7397 			break;
7398 		case 230: /* thermal low to high */
7399 			DRM_DEBUG("IH: thermal low to high\n");
7400 			rdev->pm.dpm.thermal.high_to_low = false;
7401 			queue_thermal = true;
7402 			break;
7403 		case 231: /* thermal high to low */
7404 			DRM_DEBUG("IH: thermal high to low\n");
7405 			rdev->pm.dpm.thermal.high_to_low = true;
7406 			queue_thermal = true;
7407 			break;
7408 		case 233: /* GUI IDLE */
7409 			DRM_DEBUG("IH: GUI idle\n");
7410 			break;
7411 		case 241: /* SDMA Privileged inst */
7412 		case 247: /* SDMA Privileged inst */
7413 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
7414 			/* XXX check the bitfield order! */
7415 			me_id = (ring_id & 0x3) >> 0;
7416 			queue_id = (ring_id & 0xc) >> 2;
7417 			switch (me_id) {
7418 			case 0:
7419 				switch (queue_id) {
7420 				case 0:
7421 					queue_reset = true;
7422 					break;
7423 				case 1:
7424 					/* XXX compute */
7425 					queue_reset = true;
7426 					break;
7427 				case 2:
7428 					/* XXX compute */
7429 					queue_reset = true;
7430 					break;
7431 				}
7432 				break;
7433 			case 1:
7434 				switch (queue_id) {
7435 				case 0:
7436 					queue_reset = true;
7437 					break;
7438 				case 1:
7439 					/* XXX compute */
7440 					queue_reset = true;
7441 					break;
7442 				case 2:
7443 					/* XXX compute */
7444 					queue_reset = true;
7445 					break;
7446 				}
7447 				break;
7448 			}
7449 			break;
7450 		default:
7451 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7452 			break;
7453 		}
7454 
7455 		/* wptr/rptr are in bytes! */
7456 		rptr += 16;
7457 		rptr &= rdev->ih.ptr_mask;
7458 	}
7459 	if (queue_hotplug)
7460 		schedule_work(&rdev->hotplug_work);
7461 	if (queue_reset)
7462 		schedule_work(&rdev->reset_work);
7463 	if (queue_thermal)
7464 		schedule_work(&rdev->pm.dpm.thermal.work);
7465 	rdev->ih.rptr = rptr;
7466 	WREG32(IH_RB_RPTR, rdev->ih.rptr);
7467 	atomic_set(&rdev->ih.lock, 0);
7468 
7469 	/* make sure wptr hasn't changed while processing */
7470 	wptr = cik_get_ih_wptr(rdev);
7471 	if (wptr != rptr)
7472 		goto restart_ih;
7473 
7474 	return IRQ_HANDLED;
7475 }
7476 
7477 /*
7478  * startup/shutdown callbacks
7479  */
7480 /**
7481  * cik_startup - program the asic to a functional state
7482  *
7483  * @rdev: radeon_device pointer
7484  *
7485  * Programs the asic to a functional state (CIK).
7486  * Called by cik_init() and cik_resume().
7487  * Returns 0 for success, error for failure.
7488  */
7489 static int cik_startup(struct radeon_device *rdev)
7490 {
7491 	struct radeon_ring *ring;
7492 	int r;
7493 
7494 	/* enable pcie gen2/3 link */
7495 	cik_pcie_gen3_enable(rdev);
7496 	/* enable aspm */
7497 	cik_program_aspm(rdev);
7498 
7499 	/* scratch needs to be initialized before MC */
7500 	r = r600_vram_scratch_init(rdev);
7501 	if (r)
7502 		return r;
7503 
7504 	cik_mc_program(rdev);
7505 
7506 	if (rdev->flags & RADEON_IS_IGP) {
7507 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7508 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
7509 			r = cik_init_microcode(rdev);
7510 			if (r) {
7511 				DRM_ERROR("Failed to load firmware!\n");
7512 				return r;
7513 			}
7514 		}
7515 	} else {
7516 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7517 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
7518 		    !rdev->mc_fw) {
7519 			r = cik_init_microcode(rdev);
7520 			if (r) {
7521 				DRM_ERROR("Failed to load firmware!\n");
7522 				return r;
7523 			}
7524 		}
7525 
7526 		r = ci_mc_load_microcode(rdev);
7527 		if (r) {
7528 			DRM_ERROR("Failed to load MC firmware!\n");
7529 			return r;
7530 		}
7531 	}
7532 
7533 	r = cik_pcie_gart_enable(rdev);
7534 	if (r)
7535 		return r;
7536 	cik_gpu_init(rdev);
7537 
7538 	/* allocate rlc buffers */
7539 	if (rdev->flags & RADEON_IS_IGP) {
7540 		if (rdev->family == CHIP_KAVERI) {
7541 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7542 			rdev->rlc.reg_list_size =
7543 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7544 		} else {
7545 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7546 			rdev->rlc.reg_list_size =
7547 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7548 		}
7549 	}
7550 	rdev->rlc.cs_data = ci_cs_data;
7551 	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7552 	r = sumo_rlc_init(rdev);
7553 	if (r) {
7554 		DRM_ERROR("Failed to init rlc BOs!\n");
7555 		return r;
7556 	}
7557 
7558 	/* allocate wb buffer */
7559 	r = radeon_wb_init(rdev);
7560 	if (r)
7561 		return r;
7562 
7563 	/* allocate mec buffers */
7564 	r = cik_mec_init(rdev);
7565 	if (r) {
7566 		DRM_ERROR("Failed to init MEC BOs!\n");
7567 		return r;
7568 	}
7569 
7570 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7571 	if (r) {
7572 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7573 		return r;
7574 	}
7575 
7576 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7577 	if (r) {
7578 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7579 		return r;
7580 	}
7581 
7582 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7583 	if (r) {
7584 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7585 		return r;
7586 	}
7587 
7588 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7589 	if (r) {
7590 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7591 		return r;
7592 	}
7593 
7594 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7595 	if (r) {
7596 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7597 		return r;
7598 	}
7599 
7600 	r = radeon_uvd_resume(rdev);
7601 	if (!r) {
7602 		r = uvd_v4_2_resume(rdev);
7603 		if (!r) {
7604 			r = radeon_fence_driver_start_ring(rdev,
7605 							   R600_RING_TYPE_UVD_INDEX);
7606 			if (r)
7607 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7608 		}
7609 	}
7610 	if (r)
7611 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7612 
7613 	/* Enable IRQ */
7614 	if (!rdev->irq.installed) {
7615 		r = radeon_irq_kms_init(rdev);
7616 		if (r)
7617 			return r;
7618 	}
7619 
7620 	r = cik_irq_init(rdev);
7621 	if (r) {
7622 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
7623 		radeon_irq_kms_fini(rdev);
7624 		return r;
7625 	}
7626 	cik_irq_set(rdev);
7627 
7628 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7629 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7630 			     CP_RB0_RPTR, CP_RB0_WPTR,
7631 			     PACKET3(PACKET3_NOP, 0x3FFF));
7632 	if (r)
7633 		return r;
7634 
7635 	/* set up the compute queues */
7636 	/* type-2 packets are deprecated on MEC, use type-3 instead */
7637 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7638 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7639 			     CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7640 			     PACKET3(PACKET3_NOP, 0x3FFF));
7641 	if (r)
7642 		return r;
7643 	ring->me = 1; /* first MEC */
7644 	ring->pipe = 0; /* first pipe */
7645 	ring->queue = 0; /* first queue */
7646 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
7647 
7648 	/* type-2 packets are deprecated on MEC, use type-3 instead */
7649 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7650 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7651 			     CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7652 			     PACKET3(PACKET3_NOP, 0x3FFF));
7653 	if (r)
7654 		return r;
7655 	/* dGPU only have 1 MEC */
7656 	ring->me = 1; /* first MEC */
7657 	ring->pipe = 0; /* first pipe */
7658 	ring->queue = 1; /* second queue */
7659 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
7660 
7661 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7662 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7663 			     SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
7664 			     SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
7665 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7666 	if (r)
7667 		return r;
7668 
7669 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7670 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7671 			     SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
7672 			     SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
7673 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7674 	if (r)
7675 		return r;
7676 
7677 	r = cik_cp_resume(rdev);
7678 	if (r)
7679 		return r;
7680 
7681 	r = cik_sdma_resume(rdev);
7682 	if (r)
7683 		return r;
7684 
7685 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7686 	if (ring->ring_size) {
7687 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7688 				     UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
7689 				     RADEON_CP_PACKET2);
7690 		if (!r)
7691 			r = uvd_v1_0_init(rdev);
7692 		if (r)
7693 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7694 	}
7695 
7696 	r = radeon_ib_pool_init(rdev);
7697 	if (r) {
7698 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7699 		return r;
7700 	}
7701 
7702 	r = radeon_vm_manager_init(rdev);
7703 	if (r) {
7704 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7705 		return r;
7706 	}
7707 
7708 	r = dce6_audio_init(rdev);
7709 	if (r)
7710 		return r;
7711 
7712 	return 0;
7713 }
7714 
7715 /**
7716  * cik_resume - resume the asic to a functional state
7717  *
7718  * @rdev: radeon_device pointer
7719  *
7720  * Programs the asic to a functional state (CIK).
7721  * Called at resume.
7722  * Returns 0 for success, error for failure.
7723  */
7724 int cik_resume(struct radeon_device *rdev)
7725 {
7726 	int r;
7727 
7728 	/* post card */
7729 	atom_asic_init(rdev->mode_info.atom_context);
7730 
7731 	/* init golden registers */
7732 	cik_init_golden_registers(rdev);
7733 
7734 	rdev->accel_working = true;
7735 	r = cik_startup(rdev);
7736 	if (r) {
7737 		DRM_ERROR("cik startup failed on resume\n");
7738 		rdev->accel_working = false;
7739 		return r;
7740 	}
7741 
7742 	return r;
7743 
7744 }
7745 
7746 /**
7747  * cik_suspend - suspend the asic
7748  *
7749  * @rdev: radeon_device pointer
7750  *
7751  * Bring the chip into a state suitable for suspend (CIK).
7752  * Called at suspend.
7753  * Returns 0 for success.
7754  */
7755 int cik_suspend(struct radeon_device *rdev)
7756 {
7757 	dce6_audio_fini(rdev);
7758 	radeon_vm_manager_fini(rdev);
7759 	cik_cp_enable(rdev, false);
7760 	cik_sdma_enable(rdev, false);
7761 	uvd_v1_0_fini(rdev);
7762 	radeon_uvd_suspend(rdev);
7763 	cik_fini_pg(rdev);
7764 	cik_fini_cg(rdev);
7765 	cik_irq_suspend(rdev);
7766 	radeon_wb_disable(rdev);
7767 	cik_pcie_gart_disable(rdev);
7768 	return 0;
7769 }
7770 
7771 /* Plan is to move initialization in that function and use
7772  * helper function so that radeon_device_init pretty much
7773  * do nothing more than calling asic specific function. This
7774  * should also allow to remove a bunch of callback function
7775  * like vram_info.
7776  */
7777 /**
7778  * cik_init - asic specific driver and hw init
7779  *
7780  * @rdev: radeon_device pointer
7781  *
7782  * Setup asic specific driver variables and program the hw
7783  * to a functional state (CIK).
7784  * Called at driver startup.
7785  * Returns 0 for success, errors for failure.
7786  */
7787 int cik_init(struct radeon_device *rdev)
7788 {
7789 	struct radeon_ring *ring;
7790 	int r;
7791 
7792 	/* Read BIOS */
7793 	if (!radeon_get_bios(rdev)) {
7794 		if (ASIC_IS_AVIVO(rdev))
7795 			return -EINVAL;
7796 	}
7797 	/* Must be an ATOMBIOS */
7798 	if (!rdev->is_atom_bios) {
7799 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7800 		return -EINVAL;
7801 	}
7802 	r = radeon_atombios_init(rdev);
7803 	if (r)
7804 		return r;
7805 
7806 	/* Post card if necessary */
7807 	if (!radeon_card_posted(rdev)) {
7808 		if (!rdev->bios) {
7809 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7810 			return -EINVAL;
7811 		}
7812 		DRM_INFO("GPU not posted. posting now...\n");
7813 		atom_asic_init(rdev->mode_info.atom_context);
7814 	}
7815 	/* init golden registers */
7816 	cik_init_golden_registers(rdev);
7817 	/* Initialize scratch registers */
7818 	cik_scratch_init(rdev);
7819 	/* Initialize surface registers */
7820 	radeon_surface_init(rdev);
7821 	/* Initialize clocks */
7822 	radeon_get_clock_info(rdev->ddev);
7823 
7824 	/* Fence driver */
7825 	r = radeon_fence_driver_init(rdev);
7826 	if (r)
7827 		return r;
7828 
7829 	/* initialize memory controller */
7830 	r = cik_mc_init(rdev);
7831 	if (r)
7832 		return r;
7833 	/* Memory manager */
7834 	r = radeon_bo_init(rdev);
7835 	if (r)
7836 		return r;
7837 
7838 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7839 	ring->ring_obj = NULL;
7840 	r600_ring_init(rdev, ring, 1024 * 1024);
7841 
7842 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7843 	ring->ring_obj = NULL;
7844 	r600_ring_init(rdev, ring, 1024 * 1024);
7845 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
7846 	if (r)
7847 		return r;
7848 
7849 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7850 	ring->ring_obj = NULL;
7851 	r600_ring_init(rdev, ring, 1024 * 1024);
7852 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
7853 	if (r)
7854 		return r;
7855 
7856 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7857 	ring->ring_obj = NULL;
7858 	r600_ring_init(rdev, ring, 256 * 1024);
7859 
7860 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7861 	ring->ring_obj = NULL;
7862 	r600_ring_init(rdev, ring, 256 * 1024);
7863 
7864 	r = radeon_uvd_init(rdev);
7865 	if (!r) {
7866 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7867 		ring->ring_obj = NULL;
7868 		r600_ring_init(rdev, ring, 4096);
7869 	}
7870 
7871 	rdev->ih.ring_obj = NULL;
7872 	r600_ih_ring_init(rdev, 64 * 1024);
7873 
7874 	r = r600_pcie_gart_init(rdev);
7875 	if (r)
7876 		return r;
7877 
7878 	rdev->accel_working = true;
7879 	r = cik_startup(rdev);
7880 	if (r) {
7881 		dev_err(rdev->dev, "disabling GPU acceleration\n");
7882 		cik_cp_fini(rdev);
7883 		cik_sdma_fini(rdev);
7884 		cik_irq_fini(rdev);
7885 		sumo_rlc_fini(rdev);
7886 		cik_mec_fini(rdev);
7887 		radeon_wb_fini(rdev);
7888 		radeon_ib_pool_fini(rdev);
7889 		radeon_vm_manager_fini(rdev);
7890 		radeon_irq_kms_fini(rdev);
7891 		cik_pcie_gart_fini(rdev);
7892 		rdev->accel_working = false;
7893 	}
7894 
7895 	/* Don't start up if the MC ucode is missing.
7896 	 * The default clocks and voltages before the MC ucode
7897 	 * is loaded are not suffient for advanced operations.
7898 	 */
7899 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
7900 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
7901 		return -EINVAL;
7902 	}
7903 
7904 	return 0;
7905 }
7906 
7907 /**
7908  * cik_fini - asic specific driver and hw fini
7909  *
7910  * @rdev: radeon_device pointer
7911  *
7912  * Tear down the asic specific driver variables and program the hw
7913  * to an idle state (CIK).
7914  * Called at driver unload.
7915  */
7916 void cik_fini(struct radeon_device *rdev)
7917 {
7918 	cik_cp_fini(rdev);
7919 	cik_sdma_fini(rdev);
7920 	cik_fini_pg(rdev);
7921 	cik_fini_cg(rdev);
7922 	cik_irq_fini(rdev);
7923 	sumo_rlc_fini(rdev);
7924 	cik_mec_fini(rdev);
7925 	radeon_wb_fini(rdev);
7926 	radeon_vm_manager_fini(rdev);
7927 	radeon_ib_pool_fini(rdev);
7928 	radeon_irq_kms_fini(rdev);
7929 	uvd_v1_0_fini(rdev);
7930 	radeon_uvd_fini(rdev);
7931 	cik_pcie_gart_fini(rdev);
7932 	r600_vram_scratch_fini(rdev);
7933 	radeon_gem_fini(rdev);
7934 	radeon_fence_driver_fini(rdev);
7935 	radeon_bo_fini(rdev);
7936 	radeon_atombios_fini(rdev);
7937 	kfree(rdev->bios);
7938 	rdev->bios = NULL;
7939 }
7940 
7941 void dce8_program_fmt(struct drm_encoder *encoder)
7942 {
7943 	struct drm_device *dev = encoder->dev;
7944 	struct radeon_device *rdev = dev->dev_private;
7945 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
7946 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
7947 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
7948 	int bpc = 0;
7949 	u32 tmp = 0;
7950 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
7951 
7952 	if (connector) {
7953 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
7954 		bpc = radeon_get_monitor_bpc(connector);
7955 		dither = radeon_connector->dither;
7956 	}
7957 
7958 	/* LVDS/eDP FMT is set up by atom */
7959 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
7960 		return;
7961 
7962 	/* not needed for analog */
7963 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
7964 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
7965 		return;
7966 
7967 	if (bpc == 0)
7968 		return;
7969 
7970 	switch (bpc) {
7971 	case 6:
7972 		if (dither == RADEON_FMT_DITHER_ENABLE)
7973 			/* XXX sort out optimal dither settings */
7974 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
7975 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
7976 		else
7977 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
7978 		break;
7979 	case 8:
7980 		if (dither == RADEON_FMT_DITHER_ENABLE)
7981 			/* XXX sort out optimal dither settings */
7982 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
7983 				FMT_RGB_RANDOM_ENABLE |
7984 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
7985 		else
7986 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
7987 		break;
7988 	case 10:
7989 		if (dither == RADEON_FMT_DITHER_ENABLE)
7990 			/* XXX sort out optimal dither settings */
7991 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
7992 				FMT_RGB_RANDOM_ENABLE |
7993 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
7994 		else
7995 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
7996 		break;
7997 	default:
7998 		/* not needed */
7999 		break;
8000 	}
8001 
8002 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8003 }
8004 
8005 /* display watermark setup */
8006 /**
8007  * dce8_line_buffer_adjust - Set up the line buffer
8008  *
8009  * @rdev: radeon_device pointer
8010  * @radeon_crtc: the selected display controller
8011  * @mode: the current display mode on the selected display
8012  * controller
8013  *
8014  * Setup up the line buffer allocation for
8015  * the selected display controller (CIK).
8016  * Returns the line buffer size in pixels.
8017  */
8018 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8019 				   struct radeon_crtc *radeon_crtc,
8020 				   struct drm_display_mode *mode)
8021 {
8022 	u32 tmp, buffer_alloc, i;
8023 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8024 	/*
8025 	 * Line Buffer Setup
8026 	 * There are 6 line buffers, one for each display controllers.
8027 	 * There are 3 partitions per LB. Select the number of partitions
8028 	 * to enable based on the display width.  For display widths larger
8029 	 * than 4096, you need use to use 2 display controllers and combine
8030 	 * them using the stereo blender.
8031 	 */
8032 	if (radeon_crtc->base.enabled && mode) {
8033 		if (mode->crtc_hdisplay < 1920) {
8034 			tmp = 1;
8035 			buffer_alloc = 2;
8036 		} else if (mode->crtc_hdisplay < 2560) {
8037 			tmp = 2;
8038 			buffer_alloc = 2;
8039 		} else if (mode->crtc_hdisplay < 4096) {
8040 			tmp = 0;
8041 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8042 		} else {
8043 			DRM_DEBUG_KMS("Mode too big for LB!\n");
8044 			tmp = 0;
8045 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8046 		}
8047 	} else {
8048 		tmp = 1;
8049 		buffer_alloc = 0;
8050 	}
8051 
8052 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8053 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8054 
8055 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8056 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8057 	for (i = 0; i < rdev->usec_timeout; i++) {
8058 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8059 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8060 			break;
8061 		udelay(1);
8062 	}
8063 
8064 	if (radeon_crtc->base.enabled && mode) {
8065 		switch (tmp) {
8066 		case 0:
8067 		default:
8068 			return 4096 * 2;
8069 		case 1:
8070 			return 1920 * 2;
8071 		case 2:
8072 			return 2560 * 2;
8073 		}
8074 	}
8075 
8076 	/* controller not enabled, so no lb used */
8077 	return 0;
8078 }
8079 
8080 /**
8081  * cik_get_number_of_dram_channels - get the number of dram channels
8082  *
8083  * @rdev: radeon_device pointer
8084  *
8085  * Look up the number of video ram channels (CIK).
8086  * Used for display watermark bandwidth calculations
8087  * Returns the number of dram channels
8088  */
8089 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8090 {
8091 	u32 tmp = RREG32(MC_SHARED_CHMAP);
8092 
8093 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8094 	case 0:
8095 	default:
8096 		return 1;
8097 	case 1:
8098 		return 2;
8099 	case 2:
8100 		return 4;
8101 	case 3:
8102 		return 8;
8103 	case 4:
8104 		return 3;
8105 	case 5:
8106 		return 6;
8107 	case 6:
8108 		return 10;
8109 	case 7:
8110 		return 12;
8111 	case 8:
8112 		return 16;
8113 	}
8114 }
8115 
8116 struct dce8_wm_params {
8117 	u32 dram_channels; /* number of dram channels */
8118 	u32 yclk;          /* bandwidth per dram data pin in kHz */
8119 	u32 sclk;          /* engine clock in kHz */
8120 	u32 disp_clk;      /* display clock in kHz */
8121 	u32 src_width;     /* viewport width */
8122 	u32 active_time;   /* active display time in ns */
8123 	u32 blank_time;    /* blank time in ns */
8124 	bool interlaced;    /* mode is interlaced */
8125 	fixed20_12 vsc;    /* vertical scale ratio */
8126 	u32 num_heads;     /* number of active crtcs */
8127 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8128 	u32 lb_size;       /* line buffer allocated to pipe */
8129 	u32 vtaps;         /* vertical scaler taps */
8130 };
8131 
8132 /**
8133  * dce8_dram_bandwidth - get the dram bandwidth
8134  *
8135  * @wm: watermark calculation data
8136  *
8137  * Calculate the raw dram bandwidth (CIK).
8138  * Used for display watermark bandwidth calculations
8139  * Returns the dram bandwidth in MBytes/s
8140  */
8141 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8142 {
8143 	/* Calculate raw DRAM Bandwidth */
8144 	fixed20_12 dram_efficiency; /* 0.7 */
8145 	fixed20_12 yclk, dram_channels, bandwidth;
8146 	fixed20_12 a;
8147 
8148 	a.full = dfixed_const(1000);
8149 	yclk.full = dfixed_const(wm->yclk);
8150 	yclk.full = dfixed_div(yclk, a);
8151 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8152 	a.full = dfixed_const(10);
8153 	dram_efficiency.full = dfixed_const(7);
8154 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
8155 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8156 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8157 
8158 	return dfixed_trunc(bandwidth);
8159 }
8160 
8161 /**
8162  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8163  *
8164  * @wm: watermark calculation data
8165  *
8166  * Calculate the dram bandwidth used for display (CIK).
8167  * Used for display watermark bandwidth calculations
8168  * Returns the dram bandwidth for display in MBytes/s
8169  */
8170 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8171 {
8172 	/* Calculate DRAM Bandwidth and the part allocated to display. */
8173 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8174 	fixed20_12 yclk, dram_channels, bandwidth;
8175 	fixed20_12 a;
8176 
8177 	a.full = dfixed_const(1000);
8178 	yclk.full = dfixed_const(wm->yclk);
8179 	yclk.full = dfixed_div(yclk, a);
8180 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8181 	a.full = dfixed_const(10);
8182 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8183 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8184 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8185 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8186 
8187 	return dfixed_trunc(bandwidth);
8188 }
8189 
8190 /**
8191  * dce8_data_return_bandwidth - get the data return bandwidth
8192  *
8193  * @wm: watermark calculation data
8194  *
8195  * Calculate the data return bandwidth used for display (CIK).
8196  * Used for display watermark bandwidth calculations
8197  * Returns the data return bandwidth in MBytes/s
8198  */
8199 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8200 {
8201 	/* Calculate the display Data return Bandwidth */
8202 	fixed20_12 return_efficiency; /* 0.8 */
8203 	fixed20_12 sclk, bandwidth;
8204 	fixed20_12 a;
8205 
8206 	a.full = dfixed_const(1000);
8207 	sclk.full = dfixed_const(wm->sclk);
8208 	sclk.full = dfixed_div(sclk, a);
8209 	a.full = dfixed_const(10);
8210 	return_efficiency.full = dfixed_const(8);
8211 	return_efficiency.full = dfixed_div(return_efficiency, a);
8212 	a.full = dfixed_const(32);
8213 	bandwidth.full = dfixed_mul(a, sclk);
8214 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8215 
8216 	return dfixed_trunc(bandwidth);
8217 }
8218 
8219 /**
8220  * dce8_dmif_request_bandwidth - get the dmif bandwidth
8221  *
8222  * @wm: watermark calculation data
8223  *
8224  * Calculate the dmif bandwidth used for display (CIK).
8225  * Used for display watermark bandwidth calculations
8226  * Returns the dmif bandwidth in MBytes/s
8227  */
8228 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
8229 {
8230 	/* Calculate the DMIF Request Bandwidth */
8231 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
8232 	fixed20_12 disp_clk, bandwidth;
8233 	fixed20_12 a, b;
8234 
8235 	a.full = dfixed_const(1000);
8236 	disp_clk.full = dfixed_const(wm->disp_clk);
8237 	disp_clk.full = dfixed_div(disp_clk, a);
8238 	a.full = dfixed_const(32);
8239 	b.full = dfixed_mul(a, disp_clk);
8240 
8241 	a.full = dfixed_const(10);
8242 	disp_clk_request_efficiency.full = dfixed_const(8);
8243 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
8244 
8245 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
8246 
8247 	return dfixed_trunc(bandwidth);
8248 }
8249 
8250 /**
8251  * dce8_available_bandwidth - get the min available bandwidth
8252  *
8253  * @wm: watermark calculation data
8254  *
8255  * Calculate the min available bandwidth used for display (CIK).
8256  * Used for display watermark bandwidth calculations
8257  * Returns the min available bandwidth in MBytes/s
8258  */
8259 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
8260 {
8261 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8262 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
8263 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
8264 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
8265 
8266 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
8267 }
8268 
8269 /**
8270  * dce8_average_bandwidth - get the average available bandwidth
8271  *
8272  * @wm: watermark calculation data
8273  *
8274  * Calculate the average available bandwidth used for display (CIK).
8275  * Used for display watermark bandwidth calculations
8276  * Returns the average available bandwidth in MBytes/s
8277  */
8278 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
8279 {
8280 	/* Calculate the display mode Average Bandwidth
8281 	 * DisplayMode should contain the source and destination dimensions,
8282 	 * timing, etc.
8283 	 */
8284 	fixed20_12 bpp;
8285 	fixed20_12 line_time;
8286 	fixed20_12 src_width;
8287 	fixed20_12 bandwidth;
8288 	fixed20_12 a;
8289 
8290 	a.full = dfixed_const(1000);
8291 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
8292 	line_time.full = dfixed_div(line_time, a);
8293 	bpp.full = dfixed_const(wm->bytes_per_pixel);
8294 	src_width.full = dfixed_const(wm->src_width);
8295 	bandwidth.full = dfixed_mul(src_width, bpp);
8296 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
8297 	bandwidth.full = dfixed_div(bandwidth, line_time);
8298 
8299 	return dfixed_trunc(bandwidth);
8300 }
8301 
8302 /**
8303  * dce8_latency_watermark - get the latency watermark
8304  *
8305  * @wm: watermark calculation data
8306  *
8307  * Calculate the latency watermark (CIK).
8308  * Used for display watermark bandwidth calculations
8309  * Returns the latency watermark in ns
8310  */
8311 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
8312 {
8313 	/* First calculate the latency in ns */
8314 	u32 mc_latency = 2000; /* 2000 ns. */
8315 	u32 available_bandwidth = dce8_available_bandwidth(wm);
8316 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
8317 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
8318 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
8319 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
8320 		(wm->num_heads * cursor_line_pair_return_time);
8321 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
8322 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
8323 	u32 tmp, dmif_size = 12288;
8324 	fixed20_12 a, b, c;
8325 
8326 	if (wm->num_heads == 0)
8327 		return 0;
8328 
8329 	a.full = dfixed_const(2);
8330 	b.full = dfixed_const(1);
8331 	if ((wm->vsc.full > a.full) ||
8332 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
8333 	    (wm->vtaps >= 5) ||
8334 	    ((wm->vsc.full >= a.full) && wm->interlaced))
8335 		max_src_lines_per_dst_line = 4;
8336 	else
8337 		max_src_lines_per_dst_line = 2;
8338 
8339 	a.full = dfixed_const(available_bandwidth);
8340 	b.full = dfixed_const(wm->num_heads);
8341 	a.full = dfixed_div(a, b);
8342 
8343 	b.full = dfixed_const(mc_latency + 512);
8344 	c.full = dfixed_const(wm->disp_clk);
8345 	b.full = dfixed_div(b, c);
8346 
8347 	c.full = dfixed_const(dmif_size);
8348 	b.full = dfixed_div(c, b);
8349 
8350 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
8351 
8352 	b.full = dfixed_const(1000);
8353 	c.full = dfixed_const(wm->disp_clk);
8354 	b.full = dfixed_div(c, b);
8355 	c.full = dfixed_const(wm->bytes_per_pixel);
8356 	b.full = dfixed_mul(b, c);
8357 
8358 	lb_fill_bw = min(tmp, dfixed_trunc(b));
8359 
8360 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
8361 	b.full = dfixed_const(1000);
8362 	c.full = dfixed_const(lb_fill_bw);
8363 	b.full = dfixed_div(c, b);
8364 	a.full = dfixed_div(a, b);
8365 	line_fill_time = dfixed_trunc(a);
8366 
8367 	if (line_fill_time < wm->active_time)
8368 		return latency;
8369 	else
8370 		return latency + (line_fill_time - wm->active_time);
8371 
8372 }
8373 
8374 /**
8375  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
8376  * average and available dram bandwidth
8377  *
8378  * @wm: watermark calculation data
8379  *
8380  * Check if the display average bandwidth fits in the display
8381  * dram bandwidth (CIK).
8382  * Used for display watermark bandwidth calculations
8383  * Returns true if the display fits, false if not.
8384  */
8385 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8386 {
8387 	if (dce8_average_bandwidth(wm) <=
8388 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
8389 		return true;
8390 	else
8391 		return false;
8392 }
8393 
8394 /**
8395  * dce8_average_bandwidth_vs_available_bandwidth - check
8396  * average and available bandwidth
8397  *
8398  * @wm: watermark calculation data
8399  *
8400  * Check if the display average bandwidth fits in the display
8401  * available bandwidth (CIK).
8402  * Used for display watermark bandwidth calculations
8403  * Returns true if the display fits, false if not.
8404  */
8405 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
8406 {
8407 	if (dce8_average_bandwidth(wm) <=
8408 	    (dce8_available_bandwidth(wm) / wm->num_heads))
8409 		return true;
8410 	else
8411 		return false;
8412 }
8413 
8414 /**
8415  * dce8_check_latency_hiding - check latency hiding
8416  *
8417  * @wm: watermark calculation data
8418  *
8419  * Check latency hiding (CIK).
8420  * Used for display watermark bandwidth calculations
8421  * Returns true if the display fits, false if not.
8422  */
8423 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
8424 {
8425 	u32 lb_partitions = wm->lb_size / wm->src_width;
8426 	u32 line_time = wm->active_time + wm->blank_time;
8427 	u32 latency_tolerant_lines;
8428 	u32 latency_hiding;
8429 	fixed20_12 a;
8430 
8431 	a.full = dfixed_const(1);
8432 	if (wm->vsc.full > a.full)
8433 		latency_tolerant_lines = 1;
8434 	else {
8435 		if (lb_partitions <= (wm->vtaps + 1))
8436 			latency_tolerant_lines = 1;
8437 		else
8438 			latency_tolerant_lines = 2;
8439 	}
8440 
8441 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
8442 
8443 	if (dce8_latency_watermark(wm) <= latency_hiding)
8444 		return true;
8445 	else
8446 		return false;
8447 }
8448 
8449 /**
8450  * dce8_program_watermarks - program display watermarks
8451  *
8452  * @rdev: radeon_device pointer
8453  * @radeon_crtc: the selected display controller
8454  * @lb_size: line buffer size
8455  * @num_heads: number of display controllers in use
8456  *
8457  * Calculate and program the display watermarks for the
8458  * selected display controller (CIK).
8459  */
8460 static void dce8_program_watermarks(struct radeon_device *rdev,
8461 				    struct radeon_crtc *radeon_crtc,
8462 				    u32 lb_size, u32 num_heads)
8463 {
8464 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
8465 	struct dce8_wm_params wm_low, wm_high;
8466 	u32 pixel_period;
8467 	u32 line_time = 0;
8468 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
8469 	u32 tmp, wm_mask;
8470 
8471 	if (radeon_crtc->base.enabled && num_heads && mode) {
8472 		pixel_period = 1000000 / (u32)mode->clock;
8473 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
8474 
8475 		/* watermark for high clocks */
8476 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8477 		    rdev->pm.dpm_enabled) {
8478 			wm_high.yclk =
8479 				radeon_dpm_get_mclk(rdev, false) * 10;
8480 			wm_high.sclk =
8481 				radeon_dpm_get_sclk(rdev, false) * 10;
8482 		} else {
8483 			wm_high.yclk = rdev->pm.current_mclk * 10;
8484 			wm_high.sclk = rdev->pm.current_sclk * 10;
8485 		}
8486 
8487 		wm_high.disp_clk = mode->clock;
8488 		wm_high.src_width = mode->crtc_hdisplay;
8489 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
8490 		wm_high.blank_time = line_time - wm_high.active_time;
8491 		wm_high.interlaced = false;
8492 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8493 			wm_high.interlaced = true;
8494 		wm_high.vsc = radeon_crtc->vsc;
8495 		wm_high.vtaps = 1;
8496 		if (radeon_crtc->rmx_type != RMX_OFF)
8497 			wm_high.vtaps = 2;
8498 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
8499 		wm_high.lb_size = lb_size;
8500 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
8501 		wm_high.num_heads = num_heads;
8502 
8503 		/* set for high clocks */
8504 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
8505 
8506 		/* possibly force display priority to high */
8507 		/* should really do this at mode validation time... */
8508 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
8509 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
8510 		    !dce8_check_latency_hiding(&wm_high) ||
8511 		    (rdev->disp_priority == 2)) {
8512 			DRM_DEBUG_KMS("force priority to high\n");
8513 		}
8514 
8515 		/* watermark for low clocks */
8516 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8517 		    rdev->pm.dpm_enabled) {
8518 			wm_low.yclk =
8519 				radeon_dpm_get_mclk(rdev, true) * 10;
8520 			wm_low.sclk =
8521 				radeon_dpm_get_sclk(rdev, true) * 10;
8522 		} else {
8523 			wm_low.yclk = rdev->pm.current_mclk * 10;
8524 			wm_low.sclk = rdev->pm.current_sclk * 10;
8525 		}
8526 
8527 		wm_low.disp_clk = mode->clock;
8528 		wm_low.src_width = mode->crtc_hdisplay;
8529 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
8530 		wm_low.blank_time = line_time - wm_low.active_time;
8531 		wm_low.interlaced = false;
8532 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8533 			wm_low.interlaced = true;
8534 		wm_low.vsc = radeon_crtc->vsc;
8535 		wm_low.vtaps = 1;
8536 		if (radeon_crtc->rmx_type != RMX_OFF)
8537 			wm_low.vtaps = 2;
8538 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
8539 		wm_low.lb_size = lb_size;
8540 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
8541 		wm_low.num_heads = num_heads;
8542 
8543 		/* set for low clocks */
8544 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
8545 
8546 		/* possibly force display priority to high */
8547 		/* should really do this at mode validation time... */
8548 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
8549 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
8550 		    !dce8_check_latency_hiding(&wm_low) ||
8551 		    (rdev->disp_priority == 2)) {
8552 			DRM_DEBUG_KMS("force priority to high\n");
8553 		}
8554 	}
8555 
8556 	/* select wm A */
8557 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8558 	tmp = wm_mask;
8559 	tmp &= ~LATENCY_WATERMARK_MASK(3);
8560 	tmp |= LATENCY_WATERMARK_MASK(1);
8561 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8562 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8563 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
8564 		LATENCY_HIGH_WATERMARK(line_time)));
8565 	/* select wm B */
8566 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8567 	tmp &= ~LATENCY_WATERMARK_MASK(3);
8568 	tmp |= LATENCY_WATERMARK_MASK(2);
8569 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8570 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8571 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
8572 		LATENCY_HIGH_WATERMARK(line_time)));
8573 	/* restore original selection */
8574 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
8575 
8576 	/* save values for DPM */
8577 	radeon_crtc->line_time = line_time;
8578 	radeon_crtc->wm_high = latency_watermark_a;
8579 	radeon_crtc->wm_low = latency_watermark_b;
8580 }
8581 
8582 /**
8583  * dce8_bandwidth_update - program display watermarks
8584  *
8585  * @rdev: radeon_device pointer
8586  *
8587  * Calculate and program the display watermarks and line
8588  * buffer allocation (CIK).
8589  */
8590 void dce8_bandwidth_update(struct radeon_device *rdev)
8591 {
8592 	struct drm_display_mode *mode = NULL;
8593 	u32 num_heads = 0, lb_size;
8594 	int i;
8595 
8596 	radeon_update_display_priority(rdev);
8597 
8598 	for (i = 0; i < rdev->num_crtc; i++) {
8599 		if (rdev->mode_info.crtcs[i]->base.enabled)
8600 			num_heads++;
8601 	}
8602 	for (i = 0; i < rdev->num_crtc; i++) {
8603 		mode = &rdev->mode_info.crtcs[i]->base.mode;
8604 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
8605 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
8606 	}
8607 }
8608 
8609 /**
8610  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8611  *
8612  * @rdev: radeon_device pointer
8613  *
8614  * Fetches a GPU clock counter snapshot (SI).
8615  * Returns the 64 bit clock counter snapshot.
8616  */
8617 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
8618 {
8619 	uint64_t clock;
8620 
8621 	mutex_lock(&rdev->gpu_clock_mutex);
8622 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
8623 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
8624 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
8625 	mutex_unlock(&rdev->gpu_clock_mutex);
8626 	return clock;
8627 }
8628 
8629 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
8630                               u32 cntl_reg, u32 status_reg)
8631 {
8632 	int r, i;
8633 	struct atom_clock_dividers dividers;
8634 	uint32_t tmp;
8635 
8636 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8637 					   clock, false, &dividers);
8638 	if (r)
8639 		return r;
8640 
8641 	tmp = RREG32_SMC(cntl_reg);
8642 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
8643 	tmp |= dividers.post_divider;
8644 	WREG32_SMC(cntl_reg, tmp);
8645 
8646 	for (i = 0; i < 100; i++) {
8647 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
8648 			break;
8649 		mdelay(10);
8650 	}
8651 	if (i == 100)
8652 		return -ETIMEDOUT;
8653 
8654 	return 0;
8655 }
8656 
8657 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
8658 {
8659 	int r = 0;
8660 
8661 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
8662 	if (r)
8663 		return r;
8664 
8665 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
8666 	return r;
8667 }
8668 
8669 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
8670 {
8671 	struct pci_dev *root = rdev->pdev->bus->self;
8672 	int bridge_pos, gpu_pos;
8673 	u32 speed_cntl, mask, current_data_rate;
8674 	int ret, i;
8675 	u16 tmp16;
8676 
8677 	if (radeon_pcie_gen2 == 0)
8678 		return;
8679 
8680 	if (rdev->flags & RADEON_IS_IGP)
8681 		return;
8682 
8683 	if (!(rdev->flags & RADEON_IS_PCIE))
8684 		return;
8685 
8686 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
8687 	if (ret != 0)
8688 		return;
8689 
8690 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
8691 		return;
8692 
8693 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8694 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
8695 		LC_CURRENT_DATA_RATE_SHIFT;
8696 	if (mask & DRM_PCIE_SPEED_80) {
8697 		if (current_data_rate == 2) {
8698 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
8699 			return;
8700 		}
8701 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
8702 	} else if (mask & DRM_PCIE_SPEED_50) {
8703 		if (current_data_rate == 1) {
8704 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
8705 			return;
8706 		}
8707 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
8708 	}
8709 
8710 	bridge_pos = pci_pcie_cap(root);
8711 	if (!bridge_pos)
8712 		return;
8713 
8714 	gpu_pos = pci_pcie_cap(rdev->pdev);
8715 	if (!gpu_pos)
8716 		return;
8717 
8718 	if (mask & DRM_PCIE_SPEED_80) {
8719 		/* re-try equalization if gen3 is not already enabled */
8720 		if (current_data_rate != 2) {
8721 			u16 bridge_cfg, gpu_cfg;
8722 			u16 bridge_cfg2, gpu_cfg2;
8723 			u32 max_lw, current_lw, tmp;
8724 
8725 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8726 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8727 
8728 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
8729 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8730 
8731 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
8732 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8733 
8734 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8735 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
8736 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
8737 
8738 			if (current_lw < max_lw) {
8739 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8740 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
8741 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
8742 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
8743 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
8744 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
8745 				}
8746 			}
8747 
8748 			for (i = 0; i < 10; i++) {
8749 				/* check status */
8750 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
8751 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
8752 					break;
8753 
8754 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8755 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8756 
8757 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
8758 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
8759 
8760 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8761 				tmp |= LC_SET_QUIESCE;
8762 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8763 
8764 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8765 				tmp |= LC_REDO_EQ;
8766 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8767 
8768 				mdelay(100);
8769 
8770 				/* linkctl */
8771 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
8772 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8773 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
8774 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8775 
8776 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
8777 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8778 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
8779 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8780 
8781 				/* linkctl2 */
8782 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
8783 				tmp16 &= ~((1 << 4) | (7 << 9));
8784 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
8785 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
8786 
8787 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8788 				tmp16 &= ~((1 << 4) | (7 << 9));
8789 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
8790 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8791 
8792 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8793 				tmp &= ~LC_SET_QUIESCE;
8794 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8795 			}
8796 		}
8797 	}
8798 
8799 	/* set the link speed */
8800 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
8801 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
8802 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8803 
8804 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8805 	tmp16 &= ~0xf;
8806 	if (mask & DRM_PCIE_SPEED_80)
8807 		tmp16 |= 3; /* gen3 */
8808 	else if (mask & DRM_PCIE_SPEED_50)
8809 		tmp16 |= 2; /* gen2 */
8810 	else
8811 		tmp16 |= 1; /* gen1 */
8812 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8813 
8814 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8815 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
8816 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8817 
8818 	for (i = 0; i < rdev->usec_timeout; i++) {
8819 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8820 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
8821 			break;
8822 		udelay(1);
8823 	}
8824 }
8825 
8826 static void cik_program_aspm(struct radeon_device *rdev)
8827 {
8828 	u32 data, orig;
8829 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
8830 	bool disable_clkreq = false;
8831 
8832 	if (radeon_aspm == 0)
8833 		return;
8834 
8835 	/* XXX double check IGPs */
8836 	if (rdev->flags & RADEON_IS_IGP)
8837 		return;
8838 
8839 	if (!(rdev->flags & RADEON_IS_PCIE))
8840 		return;
8841 
8842 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8843 	data &= ~LC_XMIT_N_FTS_MASK;
8844 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
8845 	if (orig != data)
8846 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
8847 
8848 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
8849 	data |= LC_GO_TO_RECOVERY;
8850 	if (orig != data)
8851 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
8852 
8853 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
8854 	data |= P_IGNORE_EDB_ERR;
8855 	if (orig != data)
8856 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
8857 
8858 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8859 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
8860 	data |= LC_PMI_TO_L1_DIS;
8861 	if (!disable_l0s)
8862 		data |= LC_L0S_INACTIVITY(7);
8863 
8864 	if (!disable_l1) {
8865 		data |= LC_L1_INACTIVITY(7);
8866 		data &= ~LC_PMI_TO_L1_DIS;
8867 		if (orig != data)
8868 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8869 
8870 		if (!disable_plloff_in_l1) {
8871 			bool clk_req_support;
8872 
8873 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
8874 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8875 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8876 			if (orig != data)
8877 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
8878 
8879 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
8880 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8881 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8882 			if (orig != data)
8883 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
8884 
8885 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
8886 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8887 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8888 			if (orig != data)
8889 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
8890 
8891 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
8892 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8893 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8894 			if (orig != data)
8895 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
8896 
8897 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8898 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
8899 			data |= LC_DYN_LANES_PWR_STATE(3);
8900 			if (orig != data)
8901 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
8902 
8903 			if (!disable_clkreq) {
8904 				struct pci_dev *root = rdev->pdev->bus->self;
8905 				u32 lnkcap;
8906 
8907 				clk_req_support = false;
8908 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
8909 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
8910 					clk_req_support = true;
8911 			} else {
8912 				clk_req_support = false;
8913 			}
8914 
8915 			if (clk_req_support) {
8916 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
8917 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
8918 				if (orig != data)
8919 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
8920 
8921 				orig = data = RREG32_SMC(THM_CLK_CNTL);
8922 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
8923 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
8924 				if (orig != data)
8925 					WREG32_SMC(THM_CLK_CNTL, data);
8926 
8927 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
8928 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
8929 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
8930 				if (orig != data)
8931 					WREG32_SMC(MISC_CLK_CTRL, data);
8932 
8933 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
8934 				data &= ~BCLK_AS_XCLK;
8935 				if (orig != data)
8936 					WREG32_SMC(CG_CLKPIN_CNTL, data);
8937 
8938 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
8939 				data &= ~FORCE_BIF_REFCLK_EN;
8940 				if (orig != data)
8941 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
8942 
8943 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
8944 				data &= ~MPLL_CLKOUT_SEL_MASK;
8945 				data |= MPLL_CLKOUT_SEL(4);
8946 				if (orig != data)
8947 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
8948 			}
8949 		}
8950 	} else {
8951 		if (orig != data)
8952 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8953 	}
8954 
8955 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
8956 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
8957 	if (orig != data)
8958 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
8959 
8960 	if (!disable_l0s) {
8961 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8962 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
8963 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8964 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
8965 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8966 				data &= ~LC_L0S_INACTIVITY_MASK;
8967 				if (orig != data)
8968 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8969 			}
8970 		}
8971 	}
8972 }
8973