xref: /openbmc/linux/drivers/gpu/drm/radeon/cik.c (revision 089a49b6)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35 
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
44 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
45 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
46 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
47 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
48 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
49 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
50 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
51 MODULE_FIRMWARE("radeon/KABINI_me.bin");
52 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
53 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
54 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
55 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
56 
57 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
58 extern void r600_ih_ring_fini(struct radeon_device *rdev);
59 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
60 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
61 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
62 extern void sumo_rlc_fini(struct radeon_device *rdev);
63 extern int sumo_rlc_init(struct radeon_device *rdev);
64 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
65 extern void si_rlc_reset(struct radeon_device *rdev);
66 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
67 extern int cik_sdma_resume(struct radeon_device *rdev);
68 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
69 extern void cik_sdma_fini(struct radeon_device *rdev);
70 extern void cik_sdma_vm_set_page(struct radeon_device *rdev,
71 				 struct radeon_ib *ib,
72 				 uint64_t pe,
73 				 uint64_t addr, unsigned count,
74 				 uint32_t incr, uint32_t flags);
75 static void cik_rlc_stop(struct radeon_device *rdev);
76 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
77 static void cik_program_aspm(struct radeon_device *rdev);
78 static void cik_init_pg(struct radeon_device *rdev);
79 static void cik_init_cg(struct radeon_device *rdev);
80 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
81 					  bool enable);
82 
83 /* get temperature in millidegrees */
84 int ci_get_temp(struct radeon_device *rdev)
85 {
86 	u32 temp;
87 	int actual_temp = 0;
88 
89 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
90 		CTF_TEMP_SHIFT;
91 
92 	if (temp & 0x200)
93 		actual_temp = 255;
94 	else
95 		actual_temp = temp & 0x1ff;
96 
97 	actual_temp = actual_temp * 1000;
98 
99 	return actual_temp;
100 }
101 
102 /* get temperature in millidegrees */
103 int kv_get_temp(struct radeon_device *rdev)
104 {
105 	u32 temp;
106 	int actual_temp = 0;
107 
108 	temp = RREG32_SMC(0xC0300E0C);
109 
110 	if (temp)
111 		actual_temp = (temp / 8) - 49;
112 	else
113 		actual_temp = 0;
114 
115 	actual_temp = actual_temp * 1000;
116 
117 	return actual_temp;
118 }
119 
120 /*
121  * Indirect registers accessor
122  */
123 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
124 {
125 	unsigned long flags;
126 	u32 r;
127 
128 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
129 	WREG32(PCIE_INDEX, reg);
130 	(void)RREG32(PCIE_INDEX);
131 	r = RREG32(PCIE_DATA);
132 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
133 	return r;
134 }
135 
136 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
137 {
138 	unsigned long flags;
139 
140 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
141 	WREG32(PCIE_INDEX, reg);
142 	(void)RREG32(PCIE_INDEX);
143 	WREG32(PCIE_DATA, v);
144 	(void)RREG32(PCIE_DATA);
145 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
146 }
147 
148 static const u32 spectre_rlc_save_restore_register_list[] =
149 {
150 	(0x0e00 << 16) | (0xc12c >> 2),
151 	0x00000000,
152 	(0x0e00 << 16) | (0xc140 >> 2),
153 	0x00000000,
154 	(0x0e00 << 16) | (0xc150 >> 2),
155 	0x00000000,
156 	(0x0e00 << 16) | (0xc15c >> 2),
157 	0x00000000,
158 	(0x0e00 << 16) | (0xc168 >> 2),
159 	0x00000000,
160 	(0x0e00 << 16) | (0xc170 >> 2),
161 	0x00000000,
162 	(0x0e00 << 16) | (0xc178 >> 2),
163 	0x00000000,
164 	(0x0e00 << 16) | (0xc204 >> 2),
165 	0x00000000,
166 	(0x0e00 << 16) | (0xc2b4 >> 2),
167 	0x00000000,
168 	(0x0e00 << 16) | (0xc2b8 >> 2),
169 	0x00000000,
170 	(0x0e00 << 16) | (0xc2bc >> 2),
171 	0x00000000,
172 	(0x0e00 << 16) | (0xc2c0 >> 2),
173 	0x00000000,
174 	(0x0e00 << 16) | (0x8228 >> 2),
175 	0x00000000,
176 	(0x0e00 << 16) | (0x829c >> 2),
177 	0x00000000,
178 	(0x0e00 << 16) | (0x869c >> 2),
179 	0x00000000,
180 	(0x0600 << 16) | (0x98f4 >> 2),
181 	0x00000000,
182 	(0x0e00 << 16) | (0x98f8 >> 2),
183 	0x00000000,
184 	(0x0e00 << 16) | (0x9900 >> 2),
185 	0x00000000,
186 	(0x0e00 << 16) | (0xc260 >> 2),
187 	0x00000000,
188 	(0x0e00 << 16) | (0x90e8 >> 2),
189 	0x00000000,
190 	(0x0e00 << 16) | (0x3c000 >> 2),
191 	0x00000000,
192 	(0x0e00 << 16) | (0x3c00c >> 2),
193 	0x00000000,
194 	(0x0e00 << 16) | (0x8c1c >> 2),
195 	0x00000000,
196 	(0x0e00 << 16) | (0x9700 >> 2),
197 	0x00000000,
198 	(0x0e00 << 16) | (0xcd20 >> 2),
199 	0x00000000,
200 	(0x4e00 << 16) | (0xcd20 >> 2),
201 	0x00000000,
202 	(0x5e00 << 16) | (0xcd20 >> 2),
203 	0x00000000,
204 	(0x6e00 << 16) | (0xcd20 >> 2),
205 	0x00000000,
206 	(0x7e00 << 16) | (0xcd20 >> 2),
207 	0x00000000,
208 	(0x8e00 << 16) | (0xcd20 >> 2),
209 	0x00000000,
210 	(0x9e00 << 16) | (0xcd20 >> 2),
211 	0x00000000,
212 	(0xae00 << 16) | (0xcd20 >> 2),
213 	0x00000000,
214 	(0xbe00 << 16) | (0xcd20 >> 2),
215 	0x00000000,
216 	(0x0e00 << 16) | (0x89bc >> 2),
217 	0x00000000,
218 	(0x0e00 << 16) | (0x8900 >> 2),
219 	0x00000000,
220 	0x3,
221 	(0x0e00 << 16) | (0xc130 >> 2),
222 	0x00000000,
223 	(0x0e00 << 16) | (0xc134 >> 2),
224 	0x00000000,
225 	(0x0e00 << 16) | (0xc1fc >> 2),
226 	0x00000000,
227 	(0x0e00 << 16) | (0xc208 >> 2),
228 	0x00000000,
229 	(0x0e00 << 16) | (0xc264 >> 2),
230 	0x00000000,
231 	(0x0e00 << 16) | (0xc268 >> 2),
232 	0x00000000,
233 	(0x0e00 << 16) | (0xc26c >> 2),
234 	0x00000000,
235 	(0x0e00 << 16) | (0xc270 >> 2),
236 	0x00000000,
237 	(0x0e00 << 16) | (0xc274 >> 2),
238 	0x00000000,
239 	(0x0e00 << 16) | (0xc278 >> 2),
240 	0x00000000,
241 	(0x0e00 << 16) | (0xc27c >> 2),
242 	0x00000000,
243 	(0x0e00 << 16) | (0xc280 >> 2),
244 	0x00000000,
245 	(0x0e00 << 16) | (0xc284 >> 2),
246 	0x00000000,
247 	(0x0e00 << 16) | (0xc288 >> 2),
248 	0x00000000,
249 	(0x0e00 << 16) | (0xc28c >> 2),
250 	0x00000000,
251 	(0x0e00 << 16) | (0xc290 >> 2),
252 	0x00000000,
253 	(0x0e00 << 16) | (0xc294 >> 2),
254 	0x00000000,
255 	(0x0e00 << 16) | (0xc298 >> 2),
256 	0x00000000,
257 	(0x0e00 << 16) | (0xc29c >> 2),
258 	0x00000000,
259 	(0x0e00 << 16) | (0xc2a0 >> 2),
260 	0x00000000,
261 	(0x0e00 << 16) | (0xc2a4 >> 2),
262 	0x00000000,
263 	(0x0e00 << 16) | (0xc2a8 >> 2),
264 	0x00000000,
265 	(0x0e00 << 16) | (0xc2ac  >> 2),
266 	0x00000000,
267 	(0x0e00 << 16) | (0xc2b0 >> 2),
268 	0x00000000,
269 	(0x0e00 << 16) | (0x301d0 >> 2),
270 	0x00000000,
271 	(0x0e00 << 16) | (0x30238 >> 2),
272 	0x00000000,
273 	(0x0e00 << 16) | (0x30250 >> 2),
274 	0x00000000,
275 	(0x0e00 << 16) | (0x30254 >> 2),
276 	0x00000000,
277 	(0x0e00 << 16) | (0x30258 >> 2),
278 	0x00000000,
279 	(0x0e00 << 16) | (0x3025c >> 2),
280 	0x00000000,
281 	(0x4e00 << 16) | (0xc900 >> 2),
282 	0x00000000,
283 	(0x5e00 << 16) | (0xc900 >> 2),
284 	0x00000000,
285 	(0x6e00 << 16) | (0xc900 >> 2),
286 	0x00000000,
287 	(0x7e00 << 16) | (0xc900 >> 2),
288 	0x00000000,
289 	(0x8e00 << 16) | (0xc900 >> 2),
290 	0x00000000,
291 	(0x9e00 << 16) | (0xc900 >> 2),
292 	0x00000000,
293 	(0xae00 << 16) | (0xc900 >> 2),
294 	0x00000000,
295 	(0xbe00 << 16) | (0xc900 >> 2),
296 	0x00000000,
297 	(0x4e00 << 16) | (0xc904 >> 2),
298 	0x00000000,
299 	(0x5e00 << 16) | (0xc904 >> 2),
300 	0x00000000,
301 	(0x6e00 << 16) | (0xc904 >> 2),
302 	0x00000000,
303 	(0x7e00 << 16) | (0xc904 >> 2),
304 	0x00000000,
305 	(0x8e00 << 16) | (0xc904 >> 2),
306 	0x00000000,
307 	(0x9e00 << 16) | (0xc904 >> 2),
308 	0x00000000,
309 	(0xae00 << 16) | (0xc904 >> 2),
310 	0x00000000,
311 	(0xbe00 << 16) | (0xc904 >> 2),
312 	0x00000000,
313 	(0x4e00 << 16) | (0xc908 >> 2),
314 	0x00000000,
315 	(0x5e00 << 16) | (0xc908 >> 2),
316 	0x00000000,
317 	(0x6e00 << 16) | (0xc908 >> 2),
318 	0x00000000,
319 	(0x7e00 << 16) | (0xc908 >> 2),
320 	0x00000000,
321 	(0x8e00 << 16) | (0xc908 >> 2),
322 	0x00000000,
323 	(0x9e00 << 16) | (0xc908 >> 2),
324 	0x00000000,
325 	(0xae00 << 16) | (0xc908 >> 2),
326 	0x00000000,
327 	(0xbe00 << 16) | (0xc908 >> 2),
328 	0x00000000,
329 	(0x4e00 << 16) | (0xc90c >> 2),
330 	0x00000000,
331 	(0x5e00 << 16) | (0xc90c >> 2),
332 	0x00000000,
333 	(0x6e00 << 16) | (0xc90c >> 2),
334 	0x00000000,
335 	(0x7e00 << 16) | (0xc90c >> 2),
336 	0x00000000,
337 	(0x8e00 << 16) | (0xc90c >> 2),
338 	0x00000000,
339 	(0x9e00 << 16) | (0xc90c >> 2),
340 	0x00000000,
341 	(0xae00 << 16) | (0xc90c >> 2),
342 	0x00000000,
343 	(0xbe00 << 16) | (0xc90c >> 2),
344 	0x00000000,
345 	(0x4e00 << 16) | (0xc910 >> 2),
346 	0x00000000,
347 	(0x5e00 << 16) | (0xc910 >> 2),
348 	0x00000000,
349 	(0x6e00 << 16) | (0xc910 >> 2),
350 	0x00000000,
351 	(0x7e00 << 16) | (0xc910 >> 2),
352 	0x00000000,
353 	(0x8e00 << 16) | (0xc910 >> 2),
354 	0x00000000,
355 	(0x9e00 << 16) | (0xc910 >> 2),
356 	0x00000000,
357 	(0xae00 << 16) | (0xc910 >> 2),
358 	0x00000000,
359 	(0xbe00 << 16) | (0xc910 >> 2),
360 	0x00000000,
361 	(0x0e00 << 16) | (0xc99c >> 2),
362 	0x00000000,
363 	(0x0e00 << 16) | (0x9834 >> 2),
364 	0x00000000,
365 	(0x0000 << 16) | (0x30f00 >> 2),
366 	0x00000000,
367 	(0x0001 << 16) | (0x30f00 >> 2),
368 	0x00000000,
369 	(0x0000 << 16) | (0x30f04 >> 2),
370 	0x00000000,
371 	(0x0001 << 16) | (0x30f04 >> 2),
372 	0x00000000,
373 	(0x0000 << 16) | (0x30f08 >> 2),
374 	0x00000000,
375 	(0x0001 << 16) | (0x30f08 >> 2),
376 	0x00000000,
377 	(0x0000 << 16) | (0x30f0c >> 2),
378 	0x00000000,
379 	(0x0001 << 16) | (0x30f0c >> 2),
380 	0x00000000,
381 	(0x0600 << 16) | (0x9b7c >> 2),
382 	0x00000000,
383 	(0x0e00 << 16) | (0x8a14 >> 2),
384 	0x00000000,
385 	(0x0e00 << 16) | (0x8a18 >> 2),
386 	0x00000000,
387 	(0x0600 << 16) | (0x30a00 >> 2),
388 	0x00000000,
389 	(0x0e00 << 16) | (0x8bf0 >> 2),
390 	0x00000000,
391 	(0x0e00 << 16) | (0x8bcc >> 2),
392 	0x00000000,
393 	(0x0e00 << 16) | (0x8b24 >> 2),
394 	0x00000000,
395 	(0x0e00 << 16) | (0x30a04 >> 2),
396 	0x00000000,
397 	(0x0600 << 16) | (0x30a10 >> 2),
398 	0x00000000,
399 	(0x0600 << 16) | (0x30a14 >> 2),
400 	0x00000000,
401 	(0x0600 << 16) | (0x30a18 >> 2),
402 	0x00000000,
403 	(0x0600 << 16) | (0x30a2c >> 2),
404 	0x00000000,
405 	(0x0e00 << 16) | (0xc700 >> 2),
406 	0x00000000,
407 	(0x0e00 << 16) | (0xc704 >> 2),
408 	0x00000000,
409 	(0x0e00 << 16) | (0xc708 >> 2),
410 	0x00000000,
411 	(0x0e00 << 16) | (0xc768 >> 2),
412 	0x00000000,
413 	(0x0400 << 16) | (0xc770 >> 2),
414 	0x00000000,
415 	(0x0400 << 16) | (0xc774 >> 2),
416 	0x00000000,
417 	(0x0400 << 16) | (0xc778 >> 2),
418 	0x00000000,
419 	(0x0400 << 16) | (0xc77c >> 2),
420 	0x00000000,
421 	(0x0400 << 16) | (0xc780 >> 2),
422 	0x00000000,
423 	(0x0400 << 16) | (0xc784 >> 2),
424 	0x00000000,
425 	(0x0400 << 16) | (0xc788 >> 2),
426 	0x00000000,
427 	(0x0400 << 16) | (0xc78c >> 2),
428 	0x00000000,
429 	(0x0400 << 16) | (0xc798 >> 2),
430 	0x00000000,
431 	(0x0400 << 16) | (0xc79c >> 2),
432 	0x00000000,
433 	(0x0400 << 16) | (0xc7a0 >> 2),
434 	0x00000000,
435 	(0x0400 << 16) | (0xc7a4 >> 2),
436 	0x00000000,
437 	(0x0400 << 16) | (0xc7a8 >> 2),
438 	0x00000000,
439 	(0x0400 << 16) | (0xc7ac >> 2),
440 	0x00000000,
441 	(0x0400 << 16) | (0xc7b0 >> 2),
442 	0x00000000,
443 	(0x0400 << 16) | (0xc7b4 >> 2),
444 	0x00000000,
445 	(0x0e00 << 16) | (0x9100 >> 2),
446 	0x00000000,
447 	(0x0e00 << 16) | (0x3c010 >> 2),
448 	0x00000000,
449 	(0x0e00 << 16) | (0x92a8 >> 2),
450 	0x00000000,
451 	(0x0e00 << 16) | (0x92ac >> 2),
452 	0x00000000,
453 	(0x0e00 << 16) | (0x92b4 >> 2),
454 	0x00000000,
455 	(0x0e00 << 16) | (0x92b8 >> 2),
456 	0x00000000,
457 	(0x0e00 << 16) | (0x92bc >> 2),
458 	0x00000000,
459 	(0x0e00 << 16) | (0x92c0 >> 2),
460 	0x00000000,
461 	(0x0e00 << 16) | (0x92c4 >> 2),
462 	0x00000000,
463 	(0x0e00 << 16) | (0x92c8 >> 2),
464 	0x00000000,
465 	(0x0e00 << 16) | (0x92cc >> 2),
466 	0x00000000,
467 	(0x0e00 << 16) | (0x92d0 >> 2),
468 	0x00000000,
469 	(0x0e00 << 16) | (0x8c00 >> 2),
470 	0x00000000,
471 	(0x0e00 << 16) | (0x8c04 >> 2),
472 	0x00000000,
473 	(0x0e00 << 16) | (0x8c20 >> 2),
474 	0x00000000,
475 	(0x0e00 << 16) | (0x8c38 >> 2),
476 	0x00000000,
477 	(0x0e00 << 16) | (0x8c3c >> 2),
478 	0x00000000,
479 	(0x0e00 << 16) | (0xae00 >> 2),
480 	0x00000000,
481 	(0x0e00 << 16) | (0x9604 >> 2),
482 	0x00000000,
483 	(0x0e00 << 16) | (0xac08 >> 2),
484 	0x00000000,
485 	(0x0e00 << 16) | (0xac0c >> 2),
486 	0x00000000,
487 	(0x0e00 << 16) | (0xac10 >> 2),
488 	0x00000000,
489 	(0x0e00 << 16) | (0xac14 >> 2),
490 	0x00000000,
491 	(0x0e00 << 16) | (0xac58 >> 2),
492 	0x00000000,
493 	(0x0e00 << 16) | (0xac68 >> 2),
494 	0x00000000,
495 	(0x0e00 << 16) | (0xac6c >> 2),
496 	0x00000000,
497 	(0x0e00 << 16) | (0xac70 >> 2),
498 	0x00000000,
499 	(0x0e00 << 16) | (0xac74 >> 2),
500 	0x00000000,
501 	(0x0e00 << 16) | (0xac78 >> 2),
502 	0x00000000,
503 	(0x0e00 << 16) | (0xac7c >> 2),
504 	0x00000000,
505 	(0x0e00 << 16) | (0xac80 >> 2),
506 	0x00000000,
507 	(0x0e00 << 16) | (0xac84 >> 2),
508 	0x00000000,
509 	(0x0e00 << 16) | (0xac88 >> 2),
510 	0x00000000,
511 	(0x0e00 << 16) | (0xac8c >> 2),
512 	0x00000000,
513 	(0x0e00 << 16) | (0x970c >> 2),
514 	0x00000000,
515 	(0x0e00 << 16) | (0x9714 >> 2),
516 	0x00000000,
517 	(0x0e00 << 16) | (0x9718 >> 2),
518 	0x00000000,
519 	(0x0e00 << 16) | (0x971c >> 2),
520 	0x00000000,
521 	(0x0e00 << 16) | (0x31068 >> 2),
522 	0x00000000,
523 	(0x4e00 << 16) | (0x31068 >> 2),
524 	0x00000000,
525 	(0x5e00 << 16) | (0x31068 >> 2),
526 	0x00000000,
527 	(0x6e00 << 16) | (0x31068 >> 2),
528 	0x00000000,
529 	(0x7e00 << 16) | (0x31068 >> 2),
530 	0x00000000,
531 	(0x8e00 << 16) | (0x31068 >> 2),
532 	0x00000000,
533 	(0x9e00 << 16) | (0x31068 >> 2),
534 	0x00000000,
535 	(0xae00 << 16) | (0x31068 >> 2),
536 	0x00000000,
537 	(0xbe00 << 16) | (0x31068 >> 2),
538 	0x00000000,
539 	(0x0e00 << 16) | (0xcd10 >> 2),
540 	0x00000000,
541 	(0x0e00 << 16) | (0xcd14 >> 2),
542 	0x00000000,
543 	(0x0e00 << 16) | (0x88b0 >> 2),
544 	0x00000000,
545 	(0x0e00 << 16) | (0x88b4 >> 2),
546 	0x00000000,
547 	(0x0e00 << 16) | (0x88b8 >> 2),
548 	0x00000000,
549 	(0x0e00 << 16) | (0x88bc >> 2),
550 	0x00000000,
551 	(0x0400 << 16) | (0x89c0 >> 2),
552 	0x00000000,
553 	(0x0e00 << 16) | (0x88c4 >> 2),
554 	0x00000000,
555 	(0x0e00 << 16) | (0x88c8 >> 2),
556 	0x00000000,
557 	(0x0e00 << 16) | (0x88d0 >> 2),
558 	0x00000000,
559 	(0x0e00 << 16) | (0x88d4 >> 2),
560 	0x00000000,
561 	(0x0e00 << 16) | (0x88d8 >> 2),
562 	0x00000000,
563 	(0x0e00 << 16) | (0x8980 >> 2),
564 	0x00000000,
565 	(0x0e00 << 16) | (0x30938 >> 2),
566 	0x00000000,
567 	(0x0e00 << 16) | (0x3093c >> 2),
568 	0x00000000,
569 	(0x0e00 << 16) | (0x30940 >> 2),
570 	0x00000000,
571 	(0x0e00 << 16) | (0x89a0 >> 2),
572 	0x00000000,
573 	(0x0e00 << 16) | (0x30900 >> 2),
574 	0x00000000,
575 	(0x0e00 << 16) | (0x30904 >> 2),
576 	0x00000000,
577 	(0x0e00 << 16) | (0x89b4 >> 2),
578 	0x00000000,
579 	(0x0e00 << 16) | (0x3c210 >> 2),
580 	0x00000000,
581 	(0x0e00 << 16) | (0x3c214 >> 2),
582 	0x00000000,
583 	(0x0e00 << 16) | (0x3c218 >> 2),
584 	0x00000000,
585 	(0x0e00 << 16) | (0x8904 >> 2),
586 	0x00000000,
587 	0x5,
588 	(0x0e00 << 16) | (0x8c28 >> 2),
589 	(0x0e00 << 16) | (0x8c2c >> 2),
590 	(0x0e00 << 16) | (0x8c30 >> 2),
591 	(0x0e00 << 16) | (0x8c34 >> 2),
592 	(0x0e00 << 16) | (0x9600 >> 2),
593 };
594 
595 static const u32 kalindi_rlc_save_restore_register_list[] =
596 {
597 	(0x0e00 << 16) | (0xc12c >> 2),
598 	0x00000000,
599 	(0x0e00 << 16) | (0xc140 >> 2),
600 	0x00000000,
601 	(0x0e00 << 16) | (0xc150 >> 2),
602 	0x00000000,
603 	(0x0e00 << 16) | (0xc15c >> 2),
604 	0x00000000,
605 	(0x0e00 << 16) | (0xc168 >> 2),
606 	0x00000000,
607 	(0x0e00 << 16) | (0xc170 >> 2),
608 	0x00000000,
609 	(0x0e00 << 16) | (0xc204 >> 2),
610 	0x00000000,
611 	(0x0e00 << 16) | (0xc2b4 >> 2),
612 	0x00000000,
613 	(0x0e00 << 16) | (0xc2b8 >> 2),
614 	0x00000000,
615 	(0x0e00 << 16) | (0xc2bc >> 2),
616 	0x00000000,
617 	(0x0e00 << 16) | (0xc2c0 >> 2),
618 	0x00000000,
619 	(0x0e00 << 16) | (0x8228 >> 2),
620 	0x00000000,
621 	(0x0e00 << 16) | (0x829c >> 2),
622 	0x00000000,
623 	(0x0e00 << 16) | (0x869c >> 2),
624 	0x00000000,
625 	(0x0600 << 16) | (0x98f4 >> 2),
626 	0x00000000,
627 	(0x0e00 << 16) | (0x98f8 >> 2),
628 	0x00000000,
629 	(0x0e00 << 16) | (0x9900 >> 2),
630 	0x00000000,
631 	(0x0e00 << 16) | (0xc260 >> 2),
632 	0x00000000,
633 	(0x0e00 << 16) | (0x90e8 >> 2),
634 	0x00000000,
635 	(0x0e00 << 16) | (0x3c000 >> 2),
636 	0x00000000,
637 	(0x0e00 << 16) | (0x3c00c >> 2),
638 	0x00000000,
639 	(0x0e00 << 16) | (0x8c1c >> 2),
640 	0x00000000,
641 	(0x0e00 << 16) | (0x9700 >> 2),
642 	0x00000000,
643 	(0x0e00 << 16) | (0xcd20 >> 2),
644 	0x00000000,
645 	(0x4e00 << 16) | (0xcd20 >> 2),
646 	0x00000000,
647 	(0x5e00 << 16) | (0xcd20 >> 2),
648 	0x00000000,
649 	(0x6e00 << 16) | (0xcd20 >> 2),
650 	0x00000000,
651 	(0x7e00 << 16) | (0xcd20 >> 2),
652 	0x00000000,
653 	(0x0e00 << 16) | (0x89bc >> 2),
654 	0x00000000,
655 	(0x0e00 << 16) | (0x8900 >> 2),
656 	0x00000000,
657 	0x3,
658 	(0x0e00 << 16) | (0xc130 >> 2),
659 	0x00000000,
660 	(0x0e00 << 16) | (0xc134 >> 2),
661 	0x00000000,
662 	(0x0e00 << 16) | (0xc1fc >> 2),
663 	0x00000000,
664 	(0x0e00 << 16) | (0xc208 >> 2),
665 	0x00000000,
666 	(0x0e00 << 16) | (0xc264 >> 2),
667 	0x00000000,
668 	(0x0e00 << 16) | (0xc268 >> 2),
669 	0x00000000,
670 	(0x0e00 << 16) | (0xc26c >> 2),
671 	0x00000000,
672 	(0x0e00 << 16) | (0xc270 >> 2),
673 	0x00000000,
674 	(0x0e00 << 16) | (0xc274 >> 2),
675 	0x00000000,
676 	(0x0e00 << 16) | (0xc28c >> 2),
677 	0x00000000,
678 	(0x0e00 << 16) | (0xc290 >> 2),
679 	0x00000000,
680 	(0x0e00 << 16) | (0xc294 >> 2),
681 	0x00000000,
682 	(0x0e00 << 16) | (0xc298 >> 2),
683 	0x00000000,
684 	(0x0e00 << 16) | (0xc2a0 >> 2),
685 	0x00000000,
686 	(0x0e00 << 16) | (0xc2a4 >> 2),
687 	0x00000000,
688 	(0x0e00 << 16) | (0xc2a8 >> 2),
689 	0x00000000,
690 	(0x0e00 << 16) | (0xc2ac >> 2),
691 	0x00000000,
692 	(0x0e00 << 16) | (0x301d0 >> 2),
693 	0x00000000,
694 	(0x0e00 << 16) | (0x30238 >> 2),
695 	0x00000000,
696 	(0x0e00 << 16) | (0x30250 >> 2),
697 	0x00000000,
698 	(0x0e00 << 16) | (0x30254 >> 2),
699 	0x00000000,
700 	(0x0e00 << 16) | (0x30258 >> 2),
701 	0x00000000,
702 	(0x0e00 << 16) | (0x3025c >> 2),
703 	0x00000000,
704 	(0x4e00 << 16) | (0xc900 >> 2),
705 	0x00000000,
706 	(0x5e00 << 16) | (0xc900 >> 2),
707 	0x00000000,
708 	(0x6e00 << 16) | (0xc900 >> 2),
709 	0x00000000,
710 	(0x7e00 << 16) | (0xc900 >> 2),
711 	0x00000000,
712 	(0x4e00 << 16) | (0xc904 >> 2),
713 	0x00000000,
714 	(0x5e00 << 16) | (0xc904 >> 2),
715 	0x00000000,
716 	(0x6e00 << 16) | (0xc904 >> 2),
717 	0x00000000,
718 	(0x7e00 << 16) | (0xc904 >> 2),
719 	0x00000000,
720 	(0x4e00 << 16) | (0xc908 >> 2),
721 	0x00000000,
722 	(0x5e00 << 16) | (0xc908 >> 2),
723 	0x00000000,
724 	(0x6e00 << 16) | (0xc908 >> 2),
725 	0x00000000,
726 	(0x7e00 << 16) | (0xc908 >> 2),
727 	0x00000000,
728 	(0x4e00 << 16) | (0xc90c >> 2),
729 	0x00000000,
730 	(0x5e00 << 16) | (0xc90c >> 2),
731 	0x00000000,
732 	(0x6e00 << 16) | (0xc90c >> 2),
733 	0x00000000,
734 	(0x7e00 << 16) | (0xc90c >> 2),
735 	0x00000000,
736 	(0x4e00 << 16) | (0xc910 >> 2),
737 	0x00000000,
738 	(0x5e00 << 16) | (0xc910 >> 2),
739 	0x00000000,
740 	(0x6e00 << 16) | (0xc910 >> 2),
741 	0x00000000,
742 	(0x7e00 << 16) | (0xc910 >> 2),
743 	0x00000000,
744 	(0x0e00 << 16) | (0xc99c >> 2),
745 	0x00000000,
746 	(0x0e00 << 16) | (0x9834 >> 2),
747 	0x00000000,
748 	(0x0000 << 16) | (0x30f00 >> 2),
749 	0x00000000,
750 	(0x0000 << 16) | (0x30f04 >> 2),
751 	0x00000000,
752 	(0x0000 << 16) | (0x30f08 >> 2),
753 	0x00000000,
754 	(0x0000 << 16) | (0x30f0c >> 2),
755 	0x00000000,
756 	(0x0600 << 16) | (0x9b7c >> 2),
757 	0x00000000,
758 	(0x0e00 << 16) | (0x8a14 >> 2),
759 	0x00000000,
760 	(0x0e00 << 16) | (0x8a18 >> 2),
761 	0x00000000,
762 	(0x0600 << 16) | (0x30a00 >> 2),
763 	0x00000000,
764 	(0x0e00 << 16) | (0x8bf0 >> 2),
765 	0x00000000,
766 	(0x0e00 << 16) | (0x8bcc >> 2),
767 	0x00000000,
768 	(0x0e00 << 16) | (0x8b24 >> 2),
769 	0x00000000,
770 	(0x0e00 << 16) | (0x30a04 >> 2),
771 	0x00000000,
772 	(0x0600 << 16) | (0x30a10 >> 2),
773 	0x00000000,
774 	(0x0600 << 16) | (0x30a14 >> 2),
775 	0x00000000,
776 	(0x0600 << 16) | (0x30a18 >> 2),
777 	0x00000000,
778 	(0x0600 << 16) | (0x30a2c >> 2),
779 	0x00000000,
780 	(0x0e00 << 16) | (0xc700 >> 2),
781 	0x00000000,
782 	(0x0e00 << 16) | (0xc704 >> 2),
783 	0x00000000,
784 	(0x0e00 << 16) | (0xc708 >> 2),
785 	0x00000000,
786 	(0x0e00 << 16) | (0xc768 >> 2),
787 	0x00000000,
788 	(0x0400 << 16) | (0xc770 >> 2),
789 	0x00000000,
790 	(0x0400 << 16) | (0xc774 >> 2),
791 	0x00000000,
792 	(0x0400 << 16) | (0xc798 >> 2),
793 	0x00000000,
794 	(0x0400 << 16) | (0xc79c >> 2),
795 	0x00000000,
796 	(0x0e00 << 16) | (0x9100 >> 2),
797 	0x00000000,
798 	(0x0e00 << 16) | (0x3c010 >> 2),
799 	0x00000000,
800 	(0x0e00 << 16) | (0x8c00 >> 2),
801 	0x00000000,
802 	(0x0e00 << 16) | (0x8c04 >> 2),
803 	0x00000000,
804 	(0x0e00 << 16) | (0x8c20 >> 2),
805 	0x00000000,
806 	(0x0e00 << 16) | (0x8c38 >> 2),
807 	0x00000000,
808 	(0x0e00 << 16) | (0x8c3c >> 2),
809 	0x00000000,
810 	(0x0e00 << 16) | (0xae00 >> 2),
811 	0x00000000,
812 	(0x0e00 << 16) | (0x9604 >> 2),
813 	0x00000000,
814 	(0x0e00 << 16) | (0xac08 >> 2),
815 	0x00000000,
816 	(0x0e00 << 16) | (0xac0c >> 2),
817 	0x00000000,
818 	(0x0e00 << 16) | (0xac10 >> 2),
819 	0x00000000,
820 	(0x0e00 << 16) | (0xac14 >> 2),
821 	0x00000000,
822 	(0x0e00 << 16) | (0xac58 >> 2),
823 	0x00000000,
824 	(0x0e00 << 16) | (0xac68 >> 2),
825 	0x00000000,
826 	(0x0e00 << 16) | (0xac6c >> 2),
827 	0x00000000,
828 	(0x0e00 << 16) | (0xac70 >> 2),
829 	0x00000000,
830 	(0x0e00 << 16) | (0xac74 >> 2),
831 	0x00000000,
832 	(0x0e00 << 16) | (0xac78 >> 2),
833 	0x00000000,
834 	(0x0e00 << 16) | (0xac7c >> 2),
835 	0x00000000,
836 	(0x0e00 << 16) | (0xac80 >> 2),
837 	0x00000000,
838 	(0x0e00 << 16) | (0xac84 >> 2),
839 	0x00000000,
840 	(0x0e00 << 16) | (0xac88 >> 2),
841 	0x00000000,
842 	(0x0e00 << 16) | (0xac8c >> 2),
843 	0x00000000,
844 	(0x0e00 << 16) | (0x970c >> 2),
845 	0x00000000,
846 	(0x0e00 << 16) | (0x9714 >> 2),
847 	0x00000000,
848 	(0x0e00 << 16) | (0x9718 >> 2),
849 	0x00000000,
850 	(0x0e00 << 16) | (0x971c >> 2),
851 	0x00000000,
852 	(0x0e00 << 16) | (0x31068 >> 2),
853 	0x00000000,
854 	(0x4e00 << 16) | (0x31068 >> 2),
855 	0x00000000,
856 	(0x5e00 << 16) | (0x31068 >> 2),
857 	0x00000000,
858 	(0x6e00 << 16) | (0x31068 >> 2),
859 	0x00000000,
860 	(0x7e00 << 16) | (0x31068 >> 2),
861 	0x00000000,
862 	(0x0e00 << 16) | (0xcd10 >> 2),
863 	0x00000000,
864 	(0x0e00 << 16) | (0xcd14 >> 2),
865 	0x00000000,
866 	(0x0e00 << 16) | (0x88b0 >> 2),
867 	0x00000000,
868 	(0x0e00 << 16) | (0x88b4 >> 2),
869 	0x00000000,
870 	(0x0e00 << 16) | (0x88b8 >> 2),
871 	0x00000000,
872 	(0x0e00 << 16) | (0x88bc >> 2),
873 	0x00000000,
874 	(0x0400 << 16) | (0x89c0 >> 2),
875 	0x00000000,
876 	(0x0e00 << 16) | (0x88c4 >> 2),
877 	0x00000000,
878 	(0x0e00 << 16) | (0x88c8 >> 2),
879 	0x00000000,
880 	(0x0e00 << 16) | (0x88d0 >> 2),
881 	0x00000000,
882 	(0x0e00 << 16) | (0x88d4 >> 2),
883 	0x00000000,
884 	(0x0e00 << 16) | (0x88d8 >> 2),
885 	0x00000000,
886 	(0x0e00 << 16) | (0x8980 >> 2),
887 	0x00000000,
888 	(0x0e00 << 16) | (0x30938 >> 2),
889 	0x00000000,
890 	(0x0e00 << 16) | (0x3093c >> 2),
891 	0x00000000,
892 	(0x0e00 << 16) | (0x30940 >> 2),
893 	0x00000000,
894 	(0x0e00 << 16) | (0x89a0 >> 2),
895 	0x00000000,
896 	(0x0e00 << 16) | (0x30900 >> 2),
897 	0x00000000,
898 	(0x0e00 << 16) | (0x30904 >> 2),
899 	0x00000000,
900 	(0x0e00 << 16) | (0x89b4 >> 2),
901 	0x00000000,
902 	(0x0e00 << 16) | (0x3e1fc >> 2),
903 	0x00000000,
904 	(0x0e00 << 16) | (0x3c210 >> 2),
905 	0x00000000,
906 	(0x0e00 << 16) | (0x3c214 >> 2),
907 	0x00000000,
908 	(0x0e00 << 16) | (0x3c218 >> 2),
909 	0x00000000,
910 	(0x0e00 << 16) | (0x8904 >> 2),
911 	0x00000000,
912 	0x5,
913 	(0x0e00 << 16) | (0x8c28 >> 2),
914 	(0x0e00 << 16) | (0x8c2c >> 2),
915 	(0x0e00 << 16) | (0x8c30 >> 2),
916 	(0x0e00 << 16) | (0x8c34 >> 2),
917 	(0x0e00 << 16) | (0x9600 >> 2),
918 };
919 
920 static const u32 bonaire_golden_spm_registers[] =
921 {
922 	0x30800, 0xe0ffffff, 0xe0000000
923 };
924 
925 static const u32 bonaire_golden_common_registers[] =
926 {
927 	0xc770, 0xffffffff, 0x00000800,
928 	0xc774, 0xffffffff, 0x00000800,
929 	0xc798, 0xffffffff, 0x00007fbf,
930 	0xc79c, 0xffffffff, 0x00007faf
931 };
932 
933 static const u32 bonaire_golden_registers[] =
934 {
935 	0x3354, 0x00000333, 0x00000333,
936 	0x3350, 0x000c0fc0, 0x00040200,
937 	0x9a10, 0x00010000, 0x00058208,
938 	0x3c000, 0xffff1fff, 0x00140000,
939 	0x3c200, 0xfdfc0fff, 0x00000100,
940 	0x3c234, 0x40000000, 0x40000200,
941 	0x9830, 0xffffffff, 0x00000000,
942 	0x9834, 0xf00fffff, 0x00000400,
943 	0x9838, 0x0002021c, 0x00020200,
944 	0xc78, 0x00000080, 0x00000000,
945 	0x5bb0, 0x000000f0, 0x00000070,
946 	0x5bc0, 0xf0311fff, 0x80300000,
947 	0x98f8, 0x73773777, 0x12010001,
948 	0x350c, 0x00810000, 0x408af000,
949 	0x7030, 0x31000111, 0x00000011,
950 	0x2f48, 0x73773777, 0x12010001,
951 	0x220c, 0x00007fb6, 0x0021a1b1,
952 	0x2210, 0x00007fb6, 0x002021b1,
953 	0x2180, 0x00007fb6, 0x00002191,
954 	0x2218, 0x00007fb6, 0x002121b1,
955 	0x221c, 0x00007fb6, 0x002021b1,
956 	0x21dc, 0x00007fb6, 0x00002191,
957 	0x21e0, 0x00007fb6, 0x00002191,
958 	0x3628, 0x0000003f, 0x0000000a,
959 	0x362c, 0x0000003f, 0x0000000a,
960 	0x2ae4, 0x00073ffe, 0x000022a2,
961 	0x240c, 0x000007ff, 0x00000000,
962 	0x8a14, 0xf000003f, 0x00000007,
963 	0x8bf0, 0x00002001, 0x00000001,
964 	0x8b24, 0xffffffff, 0x00ffffff,
965 	0x30a04, 0x0000ff0f, 0x00000000,
966 	0x28a4c, 0x07ffffff, 0x06000000,
967 	0x4d8, 0x00000fff, 0x00000100,
968 	0x3e78, 0x00000001, 0x00000002,
969 	0x9100, 0x03000000, 0x0362c688,
970 	0x8c00, 0x000000ff, 0x00000001,
971 	0xe40, 0x00001fff, 0x00001fff,
972 	0x9060, 0x0000007f, 0x00000020,
973 	0x9508, 0x00010000, 0x00010000,
974 	0xac14, 0x000003ff, 0x000000f3,
975 	0xac0c, 0xffffffff, 0x00001032
976 };
977 
978 static const u32 bonaire_mgcg_cgcg_init[] =
979 {
980 	0xc420, 0xffffffff, 0xfffffffc,
981 	0x30800, 0xffffffff, 0xe0000000,
982 	0x3c2a0, 0xffffffff, 0x00000100,
983 	0x3c208, 0xffffffff, 0x00000100,
984 	0x3c2c0, 0xffffffff, 0xc0000100,
985 	0x3c2c8, 0xffffffff, 0xc0000100,
986 	0x3c2c4, 0xffffffff, 0xc0000100,
987 	0x55e4, 0xffffffff, 0x00600100,
988 	0x3c280, 0xffffffff, 0x00000100,
989 	0x3c214, 0xffffffff, 0x06000100,
990 	0x3c220, 0xffffffff, 0x00000100,
991 	0x3c218, 0xffffffff, 0x06000100,
992 	0x3c204, 0xffffffff, 0x00000100,
993 	0x3c2e0, 0xffffffff, 0x00000100,
994 	0x3c224, 0xffffffff, 0x00000100,
995 	0x3c200, 0xffffffff, 0x00000100,
996 	0x3c230, 0xffffffff, 0x00000100,
997 	0x3c234, 0xffffffff, 0x00000100,
998 	0x3c250, 0xffffffff, 0x00000100,
999 	0x3c254, 0xffffffff, 0x00000100,
1000 	0x3c258, 0xffffffff, 0x00000100,
1001 	0x3c25c, 0xffffffff, 0x00000100,
1002 	0x3c260, 0xffffffff, 0x00000100,
1003 	0x3c27c, 0xffffffff, 0x00000100,
1004 	0x3c278, 0xffffffff, 0x00000100,
1005 	0x3c210, 0xffffffff, 0x06000100,
1006 	0x3c290, 0xffffffff, 0x00000100,
1007 	0x3c274, 0xffffffff, 0x00000100,
1008 	0x3c2b4, 0xffffffff, 0x00000100,
1009 	0x3c2b0, 0xffffffff, 0x00000100,
1010 	0x3c270, 0xffffffff, 0x00000100,
1011 	0x30800, 0xffffffff, 0xe0000000,
1012 	0x3c020, 0xffffffff, 0x00010000,
1013 	0x3c024, 0xffffffff, 0x00030002,
1014 	0x3c028, 0xffffffff, 0x00040007,
1015 	0x3c02c, 0xffffffff, 0x00060005,
1016 	0x3c030, 0xffffffff, 0x00090008,
1017 	0x3c034, 0xffffffff, 0x00010000,
1018 	0x3c038, 0xffffffff, 0x00030002,
1019 	0x3c03c, 0xffffffff, 0x00040007,
1020 	0x3c040, 0xffffffff, 0x00060005,
1021 	0x3c044, 0xffffffff, 0x00090008,
1022 	0x3c048, 0xffffffff, 0x00010000,
1023 	0x3c04c, 0xffffffff, 0x00030002,
1024 	0x3c050, 0xffffffff, 0x00040007,
1025 	0x3c054, 0xffffffff, 0x00060005,
1026 	0x3c058, 0xffffffff, 0x00090008,
1027 	0x3c05c, 0xffffffff, 0x00010000,
1028 	0x3c060, 0xffffffff, 0x00030002,
1029 	0x3c064, 0xffffffff, 0x00040007,
1030 	0x3c068, 0xffffffff, 0x00060005,
1031 	0x3c06c, 0xffffffff, 0x00090008,
1032 	0x3c070, 0xffffffff, 0x00010000,
1033 	0x3c074, 0xffffffff, 0x00030002,
1034 	0x3c078, 0xffffffff, 0x00040007,
1035 	0x3c07c, 0xffffffff, 0x00060005,
1036 	0x3c080, 0xffffffff, 0x00090008,
1037 	0x3c084, 0xffffffff, 0x00010000,
1038 	0x3c088, 0xffffffff, 0x00030002,
1039 	0x3c08c, 0xffffffff, 0x00040007,
1040 	0x3c090, 0xffffffff, 0x00060005,
1041 	0x3c094, 0xffffffff, 0x00090008,
1042 	0x3c098, 0xffffffff, 0x00010000,
1043 	0x3c09c, 0xffffffff, 0x00030002,
1044 	0x3c0a0, 0xffffffff, 0x00040007,
1045 	0x3c0a4, 0xffffffff, 0x00060005,
1046 	0x3c0a8, 0xffffffff, 0x00090008,
1047 	0x3c000, 0xffffffff, 0x96e00200,
1048 	0x8708, 0xffffffff, 0x00900100,
1049 	0xc424, 0xffffffff, 0x0020003f,
1050 	0x38, 0xffffffff, 0x0140001c,
1051 	0x3c, 0x000f0000, 0x000f0000,
1052 	0x220, 0xffffffff, 0xC060000C,
1053 	0x224, 0xc0000fff, 0x00000100,
1054 	0xf90, 0xffffffff, 0x00000100,
1055 	0xf98, 0x00000101, 0x00000000,
1056 	0x20a8, 0xffffffff, 0x00000104,
1057 	0x55e4, 0xff000fff, 0x00000100,
1058 	0x30cc, 0xc0000fff, 0x00000104,
1059 	0xc1e4, 0x00000001, 0x00000001,
1060 	0xd00c, 0xff000ff0, 0x00000100,
1061 	0xd80c, 0xff000ff0, 0x00000100
1062 };
1063 
1064 static const u32 spectre_golden_spm_registers[] =
1065 {
1066 	0x30800, 0xe0ffffff, 0xe0000000
1067 };
1068 
1069 static const u32 spectre_golden_common_registers[] =
1070 {
1071 	0xc770, 0xffffffff, 0x00000800,
1072 	0xc774, 0xffffffff, 0x00000800,
1073 	0xc798, 0xffffffff, 0x00007fbf,
1074 	0xc79c, 0xffffffff, 0x00007faf
1075 };
1076 
1077 static const u32 spectre_golden_registers[] =
1078 {
1079 	0x3c000, 0xffff1fff, 0x96940200,
1080 	0x3c00c, 0xffff0001, 0xff000000,
1081 	0x3c200, 0xfffc0fff, 0x00000100,
1082 	0x6ed8, 0x00010101, 0x00010000,
1083 	0x9834, 0xf00fffff, 0x00000400,
1084 	0x9838, 0xfffffffc, 0x00020200,
1085 	0x5bb0, 0x000000f0, 0x00000070,
1086 	0x5bc0, 0xf0311fff, 0x80300000,
1087 	0x98f8, 0x73773777, 0x12010001,
1088 	0x9b7c, 0x00ff0000, 0x00fc0000,
1089 	0x2f48, 0x73773777, 0x12010001,
1090 	0x8a14, 0xf000003f, 0x00000007,
1091 	0x8b24, 0xffffffff, 0x00ffffff,
1092 	0x28350, 0x3f3f3fff, 0x00000082,
1093 	0x28355, 0x0000003f, 0x00000000,
1094 	0x3e78, 0x00000001, 0x00000002,
1095 	0x913c, 0xffff03df, 0x00000004,
1096 	0xc768, 0x00000008, 0x00000008,
1097 	0x8c00, 0x000008ff, 0x00000800,
1098 	0x9508, 0x00010000, 0x00010000,
1099 	0xac0c, 0xffffffff, 0x54763210,
1100 	0x214f8, 0x01ff01ff, 0x00000002,
1101 	0x21498, 0x007ff800, 0x00200000,
1102 	0x2015c, 0xffffffff, 0x00000f40,
1103 	0x30934, 0xffffffff, 0x00000001
1104 };
1105 
1106 static const u32 spectre_mgcg_cgcg_init[] =
1107 {
1108 	0xc420, 0xffffffff, 0xfffffffc,
1109 	0x30800, 0xffffffff, 0xe0000000,
1110 	0x3c2a0, 0xffffffff, 0x00000100,
1111 	0x3c208, 0xffffffff, 0x00000100,
1112 	0x3c2c0, 0xffffffff, 0x00000100,
1113 	0x3c2c8, 0xffffffff, 0x00000100,
1114 	0x3c2c4, 0xffffffff, 0x00000100,
1115 	0x55e4, 0xffffffff, 0x00600100,
1116 	0x3c280, 0xffffffff, 0x00000100,
1117 	0x3c214, 0xffffffff, 0x06000100,
1118 	0x3c220, 0xffffffff, 0x00000100,
1119 	0x3c218, 0xffffffff, 0x06000100,
1120 	0x3c204, 0xffffffff, 0x00000100,
1121 	0x3c2e0, 0xffffffff, 0x00000100,
1122 	0x3c224, 0xffffffff, 0x00000100,
1123 	0x3c200, 0xffffffff, 0x00000100,
1124 	0x3c230, 0xffffffff, 0x00000100,
1125 	0x3c234, 0xffffffff, 0x00000100,
1126 	0x3c250, 0xffffffff, 0x00000100,
1127 	0x3c254, 0xffffffff, 0x00000100,
1128 	0x3c258, 0xffffffff, 0x00000100,
1129 	0x3c25c, 0xffffffff, 0x00000100,
1130 	0x3c260, 0xffffffff, 0x00000100,
1131 	0x3c27c, 0xffffffff, 0x00000100,
1132 	0x3c278, 0xffffffff, 0x00000100,
1133 	0x3c210, 0xffffffff, 0x06000100,
1134 	0x3c290, 0xffffffff, 0x00000100,
1135 	0x3c274, 0xffffffff, 0x00000100,
1136 	0x3c2b4, 0xffffffff, 0x00000100,
1137 	0x3c2b0, 0xffffffff, 0x00000100,
1138 	0x3c270, 0xffffffff, 0x00000100,
1139 	0x30800, 0xffffffff, 0xe0000000,
1140 	0x3c020, 0xffffffff, 0x00010000,
1141 	0x3c024, 0xffffffff, 0x00030002,
1142 	0x3c028, 0xffffffff, 0x00040007,
1143 	0x3c02c, 0xffffffff, 0x00060005,
1144 	0x3c030, 0xffffffff, 0x00090008,
1145 	0x3c034, 0xffffffff, 0x00010000,
1146 	0x3c038, 0xffffffff, 0x00030002,
1147 	0x3c03c, 0xffffffff, 0x00040007,
1148 	0x3c040, 0xffffffff, 0x00060005,
1149 	0x3c044, 0xffffffff, 0x00090008,
1150 	0x3c048, 0xffffffff, 0x00010000,
1151 	0x3c04c, 0xffffffff, 0x00030002,
1152 	0x3c050, 0xffffffff, 0x00040007,
1153 	0x3c054, 0xffffffff, 0x00060005,
1154 	0x3c058, 0xffffffff, 0x00090008,
1155 	0x3c05c, 0xffffffff, 0x00010000,
1156 	0x3c060, 0xffffffff, 0x00030002,
1157 	0x3c064, 0xffffffff, 0x00040007,
1158 	0x3c068, 0xffffffff, 0x00060005,
1159 	0x3c06c, 0xffffffff, 0x00090008,
1160 	0x3c070, 0xffffffff, 0x00010000,
1161 	0x3c074, 0xffffffff, 0x00030002,
1162 	0x3c078, 0xffffffff, 0x00040007,
1163 	0x3c07c, 0xffffffff, 0x00060005,
1164 	0x3c080, 0xffffffff, 0x00090008,
1165 	0x3c084, 0xffffffff, 0x00010000,
1166 	0x3c088, 0xffffffff, 0x00030002,
1167 	0x3c08c, 0xffffffff, 0x00040007,
1168 	0x3c090, 0xffffffff, 0x00060005,
1169 	0x3c094, 0xffffffff, 0x00090008,
1170 	0x3c098, 0xffffffff, 0x00010000,
1171 	0x3c09c, 0xffffffff, 0x00030002,
1172 	0x3c0a0, 0xffffffff, 0x00040007,
1173 	0x3c0a4, 0xffffffff, 0x00060005,
1174 	0x3c0a8, 0xffffffff, 0x00090008,
1175 	0x3c0ac, 0xffffffff, 0x00010000,
1176 	0x3c0b0, 0xffffffff, 0x00030002,
1177 	0x3c0b4, 0xffffffff, 0x00040007,
1178 	0x3c0b8, 0xffffffff, 0x00060005,
1179 	0x3c0bc, 0xffffffff, 0x00090008,
1180 	0x3c000, 0xffffffff, 0x96e00200,
1181 	0x8708, 0xffffffff, 0x00900100,
1182 	0xc424, 0xffffffff, 0x0020003f,
1183 	0x38, 0xffffffff, 0x0140001c,
1184 	0x3c, 0x000f0000, 0x000f0000,
1185 	0x220, 0xffffffff, 0xC060000C,
1186 	0x224, 0xc0000fff, 0x00000100,
1187 	0xf90, 0xffffffff, 0x00000100,
1188 	0xf98, 0x00000101, 0x00000000,
1189 	0x20a8, 0xffffffff, 0x00000104,
1190 	0x55e4, 0xff000fff, 0x00000100,
1191 	0x30cc, 0xc0000fff, 0x00000104,
1192 	0xc1e4, 0x00000001, 0x00000001,
1193 	0xd00c, 0xff000ff0, 0x00000100,
1194 	0xd80c, 0xff000ff0, 0x00000100
1195 };
1196 
1197 static const u32 kalindi_golden_spm_registers[] =
1198 {
1199 	0x30800, 0xe0ffffff, 0xe0000000
1200 };
1201 
1202 static const u32 kalindi_golden_common_registers[] =
1203 {
1204 	0xc770, 0xffffffff, 0x00000800,
1205 	0xc774, 0xffffffff, 0x00000800,
1206 	0xc798, 0xffffffff, 0x00007fbf,
1207 	0xc79c, 0xffffffff, 0x00007faf
1208 };
1209 
1210 static const u32 kalindi_golden_registers[] =
1211 {
1212 	0x3c000, 0xffffdfff, 0x6e944040,
1213 	0x55e4, 0xff607fff, 0xfc000100,
1214 	0x3c220, 0xff000fff, 0x00000100,
1215 	0x3c224, 0xff000fff, 0x00000100,
1216 	0x3c200, 0xfffc0fff, 0x00000100,
1217 	0x6ed8, 0x00010101, 0x00010000,
1218 	0x9830, 0xffffffff, 0x00000000,
1219 	0x9834, 0xf00fffff, 0x00000400,
1220 	0x5bb0, 0x000000f0, 0x00000070,
1221 	0x5bc0, 0xf0311fff, 0x80300000,
1222 	0x98f8, 0x73773777, 0x12010001,
1223 	0x98fc, 0xffffffff, 0x00000010,
1224 	0x9b7c, 0x00ff0000, 0x00fc0000,
1225 	0x8030, 0x00001f0f, 0x0000100a,
1226 	0x2f48, 0x73773777, 0x12010001,
1227 	0x2408, 0x000fffff, 0x000c007f,
1228 	0x8a14, 0xf000003f, 0x00000007,
1229 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1230 	0x30a04, 0x0000ff0f, 0x00000000,
1231 	0x28a4c, 0x07ffffff, 0x06000000,
1232 	0x4d8, 0x00000fff, 0x00000100,
1233 	0x3e78, 0x00000001, 0x00000002,
1234 	0xc768, 0x00000008, 0x00000008,
1235 	0x8c00, 0x000000ff, 0x00000003,
1236 	0x214f8, 0x01ff01ff, 0x00000002,
1237 	0x21498, 0x007ff800, 0x00200000,
1238 	0x2015c, 0xffffffff, 0x00000f40,
1239 	0x88c4, 0x001f3ae3, 0x00000082,
1240 	0x88d4, 0x0000001f, 0x00000010,
1241 	0x30934, 0xffffffff, 0x00000000
1242 };
1243 
1244 static const u32 kalindi_mgcg_cgcg_init[] =
1245 {
1246 	0xc420, 0xffffffff, 0xfffffffc,
1247 	0x30800, 0xffffffff, 0xe0000000,
1248 	0x3c2a0, 0xffffffff, 0x00000100,
1249 	0x3c208, 0xffffffff, 0x00000100,
1250 	0x3c2c0, 0xffffffff, 0x00000100,
1251 	0x3c2c8, 0xffffffff, 0x00000100,
1252 	0x3c2c4, 0xffffffff, 0x00000100,
1253 	0x55e4, 0xffffffff, 0x00600100,
1254 	0x3c280, 0xffffffff, 0x00000100,
1255 	0x3c214, 0xffffffff, 0x06000100,
1256 	0x3c220, 0xffffffff, 0x00000100,
1257 	0x3c218, 0xffffffff, 0x06000100,
1258 	0x3c204, 0xffffffff, 0x00000100,
1259 	0x3c2e0, 0xffffffff, 0x00000100,
1260 	0x3c224, 0xffffffff, 0x00000100,
1261 	0x3c200, 0xffffffff, 0x00000100,
1262 	0x3c230, 0xffffffff, 0x00000100,
1263 	0x3c234, 0xffffffff, 0x00000100,
1264 	0x3c250, 0xffffffff, 0x00000100,
1265 	0x3c254, 0xffffffff, 0x00000100,
1266 	0x3c258, 0xffffffff, 0x00000100,
1267 	0x3c25c, 0xffffffff, 0x00000100,
1268 	0x3c260, 0xffffffff, 0x00000100,
1269 	0x3c27c, 0xffffffff, 0x00000100,
1270 	0x3c278, 0xffffffff, 0x00000100,
1271 	0x3c210, 0xffffffff, 0x06000100,
1272 	0x3c290, 0xffffffff, 0x00000100,
1273 	0x3c274, 0xffffffff, 0x00000100,
1274 	0x3c2b4, 0xffffffff, 0x00000100,
1275 	0x3c2b0, 0xffffffff, 0x00000100,
1276 	0x3c270, 0xffffffff, 0x00000100,
1277 	0x30800, 0xffffffff, 0xe0000000,
1278 	0x3c020, 0xffffffff, 0x00010000,
1279 	0x3c024, 0xffffffff, 0x00030002,
1280 	0x3c028, 0xffffffff, 0x00040007,
1281 	0x3c02c, 0xffffffff, 0x00060005,
1282 	0x3c030, 0xffffffff, 0x00090008,
1283 	0x3c034, 0xffffffff, 0x00010000,
1284 	0x3c038, 0xffffffff, 0x00030002,
1285 	0x3c03c, 0xffffffff, 0x00040007,
1286 	0x3c040, 0xffffffff, 0x00060005,
1287 	0x3c044, 0xffffffff, 0x00090008,
1288 	0x3c000, 0xffffffff, 0x96e00200,
1289 	0x8708, 0xffffffff, 0x00900100,
1290 	0xc424, 0xffffffff, 0x0020003f,
1291 	0x38, 0xffffffff, 0x0140001c,
1292 	0x3c, 0x000f0000, 0x000f0000,
1293 	0x220, 0xffffffff, 0xC060000C,
1294 	0x224, 0xc0000fff, 0x00000100,
1295 	0x20a8, 0xffffffff, 0x00000104,
1296 	0x55e4, 0xff000fff, 0x00000100,
1297 	0x30cc, 0xc0000fff, 0x00000104,
1298 	0xc1e4, 0x00000001, 0x00000001,
1299 	0xd00c, 0xff000ff0, 0x00000100,
1300 	0xd80c, 0xff000ff0, 0x00000100
1301 };
1302 
1303 static void cik_init_golden_registers(struct radeon_device *rdev)
1304 {
1305 	switch (rdev->family) {
1306 	case CHIP_BONAIRE:
1307 		radeon_program_register_sequence(rdev,
1308 						 bonaire_mgcg_cgcg_init,
1309 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1310 		radeon_program_register_sequence(rdev,
1311 						 bonaire_golden_registers,
1312 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1313 		radeon_program_register_sequence(rdev,
1314 						 bonaire_golden_common_registers,
1315 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1316 		radeon_program_register_sequence(rdev,
1317 						 bonaire_golden_spm_registers,
1318 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1319 		break;
1320 	case CHIP_KABINI:
1321 		radeon_program_register_sequence(rdev,
1322 						 kalindi_mgcg_cgcg_init,
1323 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1324 		radeon_program_register_sequence(rdev,
1325 						 kalindi_golden_registers,
1326 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1327 		radeon_program_register_sequence(rdev,
1328 						 kalindi_golden_common_registers,
1329 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1330 		radeon_program_register_sequence(rdev,
1331 						 kalindi_golden_spm_registers,
1332 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1333 		break;
1334 	case CHIP_KAVERI:
1335 		radeon_program_register_sequence(rdev,
1336 						 spectre_mgcg_cgcg_init,
1337 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1338 		radeon_program_register_sequence(rdev,
1339 						 spectre_golden_registers,
1340 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1341 		radeon_program_register_sequence(rdev,
1342 						 spectre_golden_common_registers,
1343 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1344 		radeon_program_register_sequence(rdev,
1345 						 spectre_golden_spm_registers,
1346 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1347 		break;
1348 	default:
1349 		break;
1350 	}
1351 }
1352 
1353 /**
1354  * cik_get_xclk - get the xclk
1355  *
1356  * @rdev: radeon_device pointer
1357  *
1358  * Returns the reference clock used by the gfx engine
1359  * (CIK).
1360  */
1361 u32 cik_get_xclk(struct radeon_device *rdev)
1362 {
1363         u32 reference_clock = rdev->clock.spll.reference_freq;
1364 
1365 	if (rdev->flags & RADEON_IS_IGP) {
1366 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1367 			return reference_clock / 2;
1368 	} else {
1369 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1370 			return reference_clock / 4;
1371 	}
1372 	return reference_clock;
1373 }
1374 
1375 /**
1376  * cik_mm_rdoorbell - read a doorbell dword
1377  *
1378  * @rdev: radeon_device pointer
1379  * @offset: byte offset into the aperture
1380  *
1381  * Returns the value in the doorbell aperture at the
1382  * requested offset (CIK).
1383  */
1384 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
1385 {
1386 	if (offset < rdev->doorbell.size) {
1387 		return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
1388 	} else {
1389 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
1390 		return 0;
1391 	}
1392 }
1393 
1394 /**
1395  * cik_mm_wdoorbell - write a doorbell dword
1396  *
1397  * @rdev: radeon_device pointer
1398  * @offset: byte offset into the aperture
1399  * @v: value to write
1400  *
1401  * Writes @v to the doorbell aperture at the
1402  * requested offset (CIK).
1403  */
1404 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
1405 {
1406 	if (offset < rdev->doorbell.size) {
1407 		writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
1408 	} else {
1409 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
1410 	}
1411 }
1412 
1413 #define BONAIRE_IO_MC_REGS_SIZE 36
1414 
1415 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1416 {
1417 	{0x00000070, 0x04400000},
1418 	{0x00000071, 0x80c01803},
1419 	{0x00000072, 0x00004004},
1420 	{0x00000073, 0x00000100},
1421 	{0x00000074, 0x00ff0000},
1422 	{0x00000075, 0x34000000},
1423 	{0x00000076, 0x08000014},
1424 	{0x00000077, 0x00cc08ec},
1425 	{0x00000078, 0x00000400},
1426 	{0x00000079, 0x00000000},
1427 	{0x0000007a, 0x04090000},
1428 	{0x0000007c, 0x00000000},
1429 	{0x0000007e, 0x4408a8e8},
1430 	{0x0000007f, 0x00000304},
1431 	{0x00000080, 0x00000000},
1432 	{0x00000082, 0x00000001},
1433 	{0x00000083, 0x00000002},
1434 	{0x00000084, 0xf3e4f400},
1435 	{0x00000085, 0x052024e3},
1436 	{0x00000087, 0x00000000},
1437 	{0x00000088, 0x01000000},
1438 	{0x0000008a, 0x1c0a0000},
1439 	{0x0000008b, 0xff010000},
1440 	{0x0000008d, 0xffffefff},
1441 	{0x0000008e, 0xfff3efff},
1442 	{0x0000008f, 0xfff3efbf},
1443 	{0x00000092, 0xf7ffffff},
1444 	{0x00000093, 0xffffff7f},
1445 	{0x00000095, 0x00101101},
1446 	{0x00000096, 0x00000fff},
1447 	{0x00000097, 0x00116fff},
1448 	{0x00000098, 0x60010000},
1449 	{0x00000099, 0x10010000},
1450 	{0x0000009a, 0x00006000},
1451 	{0x0000009b, 0x00001000},
1452 	{0x0000009f, 0x00b48000}
1453 };
1454 
1455 /**
1456  * cik_srbm_select - select specific register instances
1457  *
1458  * @rdev: radeon_device pointer
1459  * @me: selected ME (micro engine)
1460  * @pipe: pipe
1461  * @queue: queue
1462  * @vmid: VMID
1463  *
1464  * Switches the currently active registers instances.  Some
1465  * registers are instanced per VMID, others are instanced per
1466  * me/pipe/queue combination.
1467  */
1468 static void cik_srbm_select(struct radeon_device *rdev,
1469 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1470 {
1471 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1472 			     MEID(me & 0x3) |
1473 			     VMID(vmid & 0xf) |
1474 			     QUEUEID(queue & 0x7));
1475 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1476 }
1477 
1478 /* ucode loading */
1479 /**
1480  * ci_mc_load_microcode - load MC ucode into the hw
1481  *
1482  * @rdev: radeon_device pointer
1483  *
1484  * Load the GDDR MC ucode into the hw (CIK).
1485  * Returns 0 on success, error on failure.
1486  */
1487 static int ci_mc_load_microcode(struct radeon_device *rdev)
1488 {
1489 	const __be32 *fw_data;
1490 	u32 running, blackout = 0;
1491 	u32 *io_mc_regs;
1492 	int i, ucode_size, regs_size;
1493 
1494 	if (!rdev->mc_fw)
1495 		return -EINVAL;
1496 
1497 	switch (rdev->family) {
1498 	case CHIP_BONAIRE:
1499 	default:
1500 		io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1501 		ucode_size = CIK_MC_UCODE_SIZE;
1502 		regs_size = BONAIRE_IO_MC_REGS_SIZE;
1503 		break;
1504 	}
1505 
1506 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1507 
1508 	if (running == 0) {
1509 		if (running) {
1510 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1511 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1512 		}
1513 
1514 		/* reset the engine and set to writable */
1515 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1516 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1517 
1518 		/* load mc io regs */
1519 		for (i = 0; i < regs_size; i++) {
1520 			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1521 			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1522 		}
1523 		/* load the MC ucode */
1524 		fw_data = (const __be32 *)rdev->mc_fw->data;
1525 		for (i = 0; i < ucode_size; i++)
1526 			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1527 
1528 		/* put the engine back into the active state */
1529 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1530 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1531 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1532 
1533 		/* wait for training to complete */
1534 		for (i = 0; i < rdev->usec_timeout; i++) {
1535 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1536 				break;
1537 			udelay(1);
1538 		}
1539 		for (i = 0; i < rdev->usec_timeout; i++) {
1540 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1541 				break;
1542 			udelay(1);
1543 		}
1544 
1545 		if (running)
1546 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1547 	}
1548 
1549 	return 0;
1550 }
1551 
1552 /**
1553  * cik_init_microcode - load ucode images from disk
1554  *
1555  * @rdev: radeon_device pointer
1556  *
1557  * Use the firmware interface to load the ucode images into
1558  * the driver (not loaded into hw).
1559  * Returns 0 on success, error on failure.
1560  */
1561 static int cik_init_microcode(struct radeon_device *rdev)
1562 {
1563 	const char *chip_name;
1564 	size_t pfp_req_size, me_req_size, ce_req_size,
1565 		mec_req_size, rlc_req_size, mc_req_size,
1566 		sdma_req_size, smc_req_size;
1567 	char fw_name[30];
1568 	int err;
1569 
1570 	DRM_DEBUG("\n");
1571 
1572 	switch (rdev->family) {
1573 	case CHIP_BONAIRE:
1574 		chip_name = "BONAIRE";
1575 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1576 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1577 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1578 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1579 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1580 		mc_req_size = CIK_MC_UCODE_SIZE * 4;
1581 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1582 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1583 		break;
1584 	case CHIP_KAVERI:
1585 		chip_name = "KAVERI";
1586 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1587 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1588 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1589 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1590 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1591 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1592 		break;
1593 	case CHIP_KABINI:
1594 		chip_name = "KABINI";
1595 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1596 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1597 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1598 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1599 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1600 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1601 		break;
1602 	default: BUG();
1603 	}
1604 
1605 	DRM_INFO("Loading %s Microcode\n", chip_name);
1606 
1607 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1608 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1609 	if (err)
1610 		goto out;
1611 	if (rdev->pfp_fw->size != pfp_req_size) {
1612 		printk(KERN_ERR
1613 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1614 		       rdev->pfp_fw->size, fw_name);
1615 		err = -EINVAL;
1616 		goto out;
1617 	}
1618 
1619 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1620 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1621 	if (err)
1622 		goto out;
1623 	if (rdev->me_fw->size != me_req_size) {
1624 		printk(KERN_ERR
1625 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1626 		       rdev->me_fw->size, fw_name);
1627 		err = -EINVAL;
1628 	}
1629 
1630 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1631 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1632 	if (err)
1633 		goto out;
1634 	if (rdev->ce_fw->size != ce_req_size) {
1635 		printk(KERN_ERR
1636 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1637 		       rdev->ce_fw->size, fw_name);
1638 		err = -EINVAL;
1639 	}
1640 
1641 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1642 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1643 	if (err)
1644 		goto out;
1645 	if (rdev->mec_fw->size != mec_req_size) {
1646 		printk(KERN_ERR
1647 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1648 		       rdev->mec_fw->size, fw_name);
1649 		err = -EINVAL;
1650 	}
1651 
1652 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1653 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1654 	if (err)
1655 		goto out;
1656 	if (rdev->rlc_fw->size != rlc_req_size) {
1657 		printk(KERN_ERR
1658 		       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1659 		       rdev->rlc_fw->size, fw_name);
1660 		err = -EINVAL;
1661 	}
1662 
1663 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1664 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1665 	if (err)
1666 		goto out;
1667 	if (rdev->sdma_fw->size != sdma_req_size) {
1668 		printk(KERN_ERR
1669 		       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1670 		       rdev->sdma_fw->size, fw_name);
1671 		err = -EINVAL;
1672 	}
1673 
1674 	/* No SMC, MC ucode on APUs */
1675 	if (!(rdev->flags & RADEON_IS_IGP)) {
1676 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1677 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1678 		if (err)
1679 			goto out;
1680 		if (rdev->mc_fw->size != mc_req_size) {
1681 			printk(KERN_ERR
1682 			       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1683 			       rdev->mc_fw->size, fw_name);
1684 			err = -EINVAL;
1685 		}
1686 
1687 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1688 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1689 		if (err) {
1690 			printk(KERN_ERR
1691 			       "smc: error loading firmware \"%s\"\n",
1692 			       fw_name);
1693 			release_firmware(rdev->smc_fw);
1694 			rdev->smc_fw = NULL;
1695 		} else if (rdev->smc_fw->size != smc_req_size) {
1696 			printk(KERN_ERR
1697 			       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1698 			       rdev->smc_fw->size, fw_name);
1699 			err = -EINVAL;
1700 		}
1701 	}
1702 
1703 out:
1704 	if (err) {
1705 		if (err != -EINVAL)
1706 			printk(KERN_ERR
1707 			       "cik_cp: Failed to load firmware \"%s\"\n",
1708 			       fw_name);
1709 		release_firmware(rdev->pfp_fw);
1710 		rdev->pfp_fw = NULL;
1711 		release_firmware(rdev->me_fw);
1712 		rdev->me_fw = NULL;
1713 		release_firmware(rdev->ce_fw);
1714 		rdev->ce_fw = NULL;
1715 		release_firmware(rdev->rlc_fw);
1716 		rdev->rlc_fw = NULL;
1717 		release_firmware(rdev->mc_fw);
1718 		rdev->mc_fw = NULL;
1719 		release_firmware(rdev->smc_fw);
1720 		rdev->smc_fw = NULL;
1721 	}
1722 	return err;
1723 }
1724 
1725 /*
1726  * Core functions
1727  */
1728 /**
1729  * cik_tiling_mode_table_init - init the hw tiling table
1730  *
1731  * @rdev: radeon_device pointer
1732  *
1733  * Starting with SI, the tiling setup is done globally in a
1734  * set of 32 tiling modes.  Rather than selecting each set of
1735  * parameters per surface as on older asics, we just select
1736  * which index in the tiling table we want to use, and the
1737  * surface uses those parameters (CIK).
1738  */
1739 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1740 {
1741 	const u32 num_tile_mode_states = 32;
1742 	const u32 num_secondary_tile_mode_states = 16;
1743 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1744 	u32 num_pipe_configs;
1745 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
1746 		rdev->config.cik.max_shader_engines;
1747 
1748 	switch (rdev->config.cik.mem_row_size_in_kb) {
1749 	case 1:
1750 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1751 		break;
1752 	case 2:
1753 	default:
1754 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1755 		break;
1756 	case 4:
1757 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1758 		break;
1759 	}
1760 
1761 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
1762 	if (num_pipe_configs > 8)
1763 		num_pipe_configs = 8; /* ??? */
1764 
1765 	if (num_pipe_configs == 8) {
1766 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1767 			switch (reg_offset) {
1768 			case 0:
1769 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1770 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1771 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1772 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1773 				break;
1774 			case 1:
1775 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1776 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1777 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1778 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1779 				break;
1780 			case 2:
1781 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1782 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1783 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1784 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1785 				break;
1786 			case 3:
1787 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1788 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1789 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1790 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1791 				break;
1792 			case 4:
1793 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1794 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1795 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1796 						 TILE_SPLIT(split_equal_to_row_size));
1797 				break;
1798 			case 5:
1799 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1800 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1801 				break;
1802 			case 6:
1803 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1804 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1805 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1806 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1807 				break;
1808 			case 7:
1809 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1810 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1811 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1812 						 TILE_SPLIT(split_equal_to_row_size));
1813 				break;
1814 			case 8:
1815 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1816 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
1817 				break;
1818 			case 9:
1819 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1820 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1821 				break;
1822 			case 10:
1823 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1824 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1825 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1826 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1827 				break;
1828 			case 11:
1829 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1830 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1831 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1832 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1833 				break;
1834 			case 12:
1835 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1836 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1837 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1838 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1839 				break;
1840 			case 13:
1841 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1842 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1843 				break;
1844 			case 14:
1845 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1846 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1847 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1848 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1849 				break;
1850 			case 16:
1851 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1852 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1853 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1854 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1855 				break;
1856 			case 17:
1857 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1858 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1859 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1860 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1861 				break;
1862 			case 27:
1863 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1864 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1865 				break;
1866 			case 28:
1867 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1868 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1869 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1870 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1871 				break;
1872 			case 29:
1873 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1874 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1875 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1876 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1877 				break;
1878 			case 30:
1879 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1880 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1881 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1882 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1883 				break;
1884 			default:
1885 				gb_tile_moden = 0;
1886 				break;
1887 			}
1888 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1889 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1890 		}
1891 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1892 			switch (reg_offset) {
1893 			case 0:
1894 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1895 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1896 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1897 						 NUM_BANKS(ADDR_SURF_16_BANK));
1898 				break;
1899 			case 1:
1900 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1901 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1902 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1903 						 NUM_BANKS(ADDR_SURF_16_BANK));
1904 				break;
1905 			case 2:
1906 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1907 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1908 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1909 						 NUM_BANKS(ADDR_SURF_16_BANK));
1910 				break;
1911 			case 3:
1912 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1913 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1914 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1915 						 NUM_BANKS(ADDR_SURF_16_BANK));
1916 				break;
1917 			case 4:
1918 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1919 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1920 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1921 						 NUM_BANKS(ADDR_SURF_8_BANK));
1922 				break;
1923 			case 5:
1924 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1925 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1926 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1927 						 NUM_BANKS(ADDR_SURF_4_BANK));
1928 				break;
1929 			case 6:
1930 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1931 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1932 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1933 						 NUM_BANKS(ADDR_SURF_2_BANK));
1934 				break;
1935 			case 8:
1936 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1937 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1938 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1939 						 NUM_BANKS(ADDR_SURF_16_BANK));
1940 				break;
1941 			case 9:
1942 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1943 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1944 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1945 						 NUM_BANKS(ADDR_SURF_16_BANK));
1946 				break;
1947 			case 10:
1948 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1949 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1950 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1951 						 NUM_BANKS(ADDR_SURF_16_BANK));
1952 				break;
1953 			case 11:
1954 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1955 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1956 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1957 						 NUM_BANKS(ADDR_SURF_16_BANK));
1958 				break;
1959 			case 12:
1960 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1961 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1962 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1963 						 NUM_BANKS(ADDR_SURF_8_BANK));
1964 				break;
1965 			case 13:
1966 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1967 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1968 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1969 						 NUM_BANKS(ADDR_SURF_4_BANK));
1970 				break;
1971 			case 14:
1972 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1973 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1974 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1975 						 NUM_BANKS(ADDR_SURF_2_BANK));
1976 				break;
1977 			default:
1978 				gb_tile_moden = 0;
1979 				break;
1980 			}
1981 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1982 		}
1983 	} else if (num_pipe_configs == 4) {
1984 		if (num_rbs == 4) {
1985 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1986 				switch (reg_offset) {
1987 				case 0:
1988 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1989 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1990 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1991 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1992 					break;
1993 				case 1:
1994 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1995 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1996 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1997 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1998 					break;
1999 				case 2:
2000 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2001 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2002 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2003 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2004 					break;
2005 				case 3:
2006 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2007 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2008 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2009 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2010 					break;
2011 				case 4:
2012 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2013 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2014 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2015 							 TILE_SPLIT(split_equal_to_row_size));
2016 					break;
2017 				case 5:
2018 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2019 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2020 					break;
2021 				case 6:
2022 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2023 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2024 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2025 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2026 					break;
2027 				case 7:
2028 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2029 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2030 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2031 							 TILE_SPLIT(split_equal_to_row_size));
2032 					break;
2033 				case 8:
2034 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2035 							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2036 					break;
2037 				case 9:
2038 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2039 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2040 					break;
2041 				case 10:
2042 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2043 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2044 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2045 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2046 					break;
2047 				case 11:
2048 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2049 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2050 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2051 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2052 					break;
2053 				case 12:
2054 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2055 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2056 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2057 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2058 					break;
2059 				case 13:
2060 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2061 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2062 					break;
2063 				case 14:
2064 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2065 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2066 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2067 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2068 					break;
2069 				case 16:
2070 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2071 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2072 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2073 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2074 					break;
2075 				case 17:
2076 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2077 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2078 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2079 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2080 					break;
2081 				case 27:
2082 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2083 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2084 					break;
2085 				case 28:
2086 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2087 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2088 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2089 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2090 					break;
2091 				case 29:
2092 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2093 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2094 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2095 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2096 					break;
2097 				case 30:
2098 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2099 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2100 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2101 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2102 					break;
2103 				default:
2104 					gb_tile_moden = 0;
2105 					break;
2106 				}
2107 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2108 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2109 			}
2110 		} else if (num_rbs < 4) {
2111 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2112 				switch (reg_offset) {
2113 				case 0:
2114 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2115 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2116 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2117 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2118 					break;
2119 				case 1:
2120 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2121 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2122 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2123 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2124 					break;
2125 				case 2:
2126 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2127 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2128 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2129 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2130 					break;
2131 				case 3:
2132 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2133 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2134 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2135 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2136 					break;
2137 				case 4:
2138 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2139 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2140 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2141 							 TILE_SPLIT(split_equal_to_row_size));
2142 					break;
2143 				case 5:
2144 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2145 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2146 					break;
2147 				case 6:
2148 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2149 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2150 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2151 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2152 					break;
2153 				case 7:
2154 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2155 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2156 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2157 							 TILE_SPLIT(split_equal_to_row_size));
2158 					break;
2159 				case 8:
2160 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2161 						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2162 					break;
2163 				case 9:
2164 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2165 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2166 					break;
2167 				case 10:
2168 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2169 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2170 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2171 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2172 					break;
2173 				case 11:
2174 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2175 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2176 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2177 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2178 					break;
2179 				case 12:
2180 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2181 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2182 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2183 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2184 					break;
2185 				case 13:
2186 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2187 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2188 					break;
2189 				case 14:
2190 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2191 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2192 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2193 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2194 					break;
2195 				case 16:
2196 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2197 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2198 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2199 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2200 					break;
2201 				case 17:
2202 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2203 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2204 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2205 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2206 					break;
2207 				case 27:
2208 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2209 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2210 					break;
2211 				case 28:
2212 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2213 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2214 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2215 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2216 					break;
2217 				case 29:
2218 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2219 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2220 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2221 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2222 					break;
2223 				case 30:
2224 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2225 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2226 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2227 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2228 					break;
2229 				default:
2230 					gb_tile_moden = 0;
2231 					break;
2232 				}
2233 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2234 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2235 			}
2236 		}
2237 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2238 			switch (reg_offset) {
2239 			case 0:
2240 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2241 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2242 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2243 						 NUM_BANKS(ADDR_SURF_16_BANK));
2244 				break;
2245 			case 1:
2246 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2247 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2248 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2249 						 NUM_BANKS(ADDR_SURF_16_BANK));
2250 				break;
2251 			case 2:
2252 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2253 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2254 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2255 						 NUM_BANKS(ADDR_SURF_16_BANK));
2256 				break;
2257 			case 3:
2258 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2259 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2260 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2261 						 NUM_BANKS(ADDR_SURF_16_BANK));
2262 				break;
2263 			case 4:
2264 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2265 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2266 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2267 						 NUM_BANKS(ADDR_SURF_16_BANK));
2268 				break;
2269 			case 5:
2270 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2271 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2272 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2273 						 NUM_BANKS(ADDR_SURF_8_BANK));
2274 				break;
2275 			case 6:
2276 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2277 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2278 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2279 						 NUM_BANKS(ADDR_SURF_4_BANK));
2280 				break;
2281 			case 8:
2282 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2283 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2284 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2285 						 NUM_BANKS(ADDR_SURF_16_BANK));
2286 				break;
2287 			case 9:
2288 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2289 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2290 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2291 						 NUM_BANKS(ADDR_SURF_16_BANK));
2292 				break;
2293 			case 10:
2294 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2295 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2296 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2297 						 NUM_BANKS(ADDR_SURF_16_BANK));
2298 				break;
2299 			case 11:
2300 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2301 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2302 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2303 						 NUM_BANKS(ADDR_SURF_16_BANK));
2304 				break;
2305 			case 12:
2306 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2307 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2308 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2309 						 NUM_BANKS(ADDR_SURF_16_BANK));
2310 				break;
2311 			case 13:
2312 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2313 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2314 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2315 						 NUM_BANKS(ADDR_SURF_8_BANK));
2316 				break;
2317 			case 14:
2318 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2319 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2320 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2321 						 NUM_BANKS(ADDR_SURF_4_BANK));
2322 				break;
2323 			default:
2324 				gb_tile_moden = 0;
2325 				break;
2326 			}
2327 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2328 		}
2329 	} else if (num_pipe_configs == 2) {
2330 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2331 			switch (reg_offset) {
2332 			case 0:
2333 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2334 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2335 						 PIPE_CONFIG(ADDR_SURF_P2) |
2336 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2337 				break;
2338 			case 1:
2339 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2340 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2341 						 PIPE_CONFIG(ADDR_SURF_P2) |
2342 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2343 				break;
2344 			case 2:
2345 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2346 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2347 						 PIPE_CONFIG(ADDR_SURF_P2) |
2348 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2349 				break;
2350 			case 3:
2351 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2352 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2353 						 PIPE_CONFIG(ADDR_SURF_P2) |
2354 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2355 				break;
2356 			case 4:
2357 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2358 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2359 						 PIPE_CONFIG(ADDR_SURF_P2) |
2360 						 TILE_SPLIT(split_equal_to_row_size));
2361 				break;
2362 			case 5:
2363 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2364 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2365 				break;
2366 			case 6:
2367 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2368 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2369 						 PIPE_CONFIG(ADDR_SURF_P2) |
2370 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2371 				break;
2372 			case 7:
2373 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2374 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2375 						 PIPE_CONFIG(ADDR_SURF_P2) |
2376 						 TILE_SPLIT(split_equal_to_row_size));
2377 				break;
2378 			case 8:
2379 				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
2380 				break;
2381 			case 9:
2382 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2383 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2384 				break;
2385 			case 10:
2386 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2387 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2388 						 PIPE_CONFIG(ADDR_SURF_P2) |
2389 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2390 				break;
2391 			case 11:
2392 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2393 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2394 						 PIPE_CONFIG(ADDR_SURF_P2) |
2395 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2396 				break;
2397 			case 12:
2398 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2399 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2400 						 PIPE_CONFIG(ADDR_SURF_P2) |
2401 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2402 				break;
2403 			case 13:
2404 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2405 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2406 				break;
2407 			case 14:
2408 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2409 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2410 						 PIPE_CONFIG(ADDR_SURF_P2) |
2411 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2412 				break;
2413 			case 16:
2414 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2415 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2416 						 PIPE_CONFIG(ADDR_SURF_P2) |
2417 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2418 				break;
2419 			case 17:
2420 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2421 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2422 						 PIPE_CONFIG(ADDR_SURF_P2) |
2423 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2424 				break;
2425 			case 27:
2426 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2427 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2428 				break;
2429 			case 28:
2430 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2431 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2432 						 PIPE_CONFIG(ADDR_SURF_P2) |
2433 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2434 				break;
2435 			case 29:
2436 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2437 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2438 						 PIPE_CONFIG(ADDR_SURF_P2) |
2439 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440 				break;
2441 			case 30:
2442 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2443 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2444 						 PIPE_CONFIG(ADDR_SURF_P2) |
2445 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2446 				break;
2447 			default:
2448 				gb_tile_moden = 0;
2449 				break;
2450 			}
2451 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2452 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2453 		}
2454 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2455 			switch (reg_offset) {
2456 			case 0:
2457 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2458 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2459 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2460 						 NUM_BANKS(ADDR_SURF_16_BANK));
2461 				break;
2462 			case 1:
2463 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2464 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2465 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2466 						 NUM_BANKS(ADDR_SURF_16_BANK));
2467 				break;
2468 			case 2:
2469 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2470 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2471 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2472 						 NUM_BANKS(ADDR_SURF_16_BANK));
2473 				break;
2474 			case 3:
2475 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2476 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2477 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2478 						 NUM_BANKS(ADDR_SURF_16_BANK));
2479 				break;
2480 			case 4:
2481 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2483 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2484 						 NUM_BANKS(ADDR_SURF_16_BANK));
2485 				break;
2486 			case 5:
2487 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2488 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2489 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2490 						 NUM_BANKS(ADDR_SURF_16_BANK));
2491 				break;
2492 			case 6:
2493 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2494 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2495 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2496 						 NUM_BANKS(ADDR_SURF_8_BANK));
2497 				break;
2498 			case 8:
2499 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2500 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2501 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2502 						 NUM_BANKS(ADDR_SURF_16_BANK));
2503 				break;
2504 			case 9:
2505 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2506 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2507 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2508 						 NUM_BANKS(ADDR_SURF_16_BANK));
2509 				break;
2510 			case 10:
2511 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2512 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2513 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2514 						 NUM_BANKS(ADDR_SURF_16_BANK));
2515 				break;
2516 			case 11:
2517 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2518 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2519 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2520 						 NUM_BANKS(ADDR_SURF_16_BANK));
2521 				break;
2522 			case 12:
2523 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2524 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2525 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2526 						 NUM_BANKS(ADDR_SURF_16_BANK));
2527 				break;
2528 			case 13:
2529 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2530 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2531 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2532 						 NUM_BANKS(ADDR_SURF_16_BANK));
2533 				break;
2534 			case 14:
2535 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2536 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2537 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2538 						 NUM_BANKS(ADDR_SURF_8_BANK));
2539 				break;
2540 			default:
2541 				gb_tile_moden = 0;
2542 				break;
2543 			}
2544 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2545 		}
2546 	} else
2547 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
2548 }
2549 
2550 /**
2551  * cik_select_se_sh - select which SE, SH to address
2552  *
2553  * @rdev: radeon_device pointer
2554  * @se_num: shader engine to address
2555  * @sh_num: sh block to address
2556  *
2557  * Select which SE, SH combinations to address. Certain
2558  * registers are instanced per SE or SH.  0xffffffff means
2559  * broadcast to all SEs or SHs (CIK).
2560  */
2561 static void cik_select_se_sh(struct radeon_device *rdev,
2562 			     u32 se_num, u32 sh_num)
2563 {
2564 	u32 data = INSTANCE_BROADCAST_WRITES;
2565 
2566 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2567 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2568 	else if (se_num == 0xffffffff)
2569 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2570 	else if (sh_num == 0xffffffff)
2571 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2572 	else
2573 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2574 	WREG32(GRBM_GFX_INDEX, data);
2575 }
2576 
2577 /**
2578  * cik_create_bitmask - create a bitmask
2579  *
2580  * @bit_width: length of the mask
2581  *
2582  * create a variable length bit mask (CIK).
2583  * Returns the bitmask.
2584  */
2585 static u32 cik_create_bitmask(u32 bit_width)
2586 {
2587 	u32 i, mask = 0;
2588 
2589 	for (i = 0; i < bit_width; i++) {
2590 		mask <<= 1;
2591 		mask |= 1;
2592 	}
2593 	return mask;
2594 }
2595 
2596 /**
2597  * cik_select_se_sh - select which SE, SH to address
2598  *
2599  * @rdev: radeon_device pointer
2600  * @max_rb_num: max RBs (render backends) for the asic
2601  * @se_num: number of SEs (shader engines) for the asic
2602  * @sh_per_se: number of SH blocks per SE for the asic
2603  *
2604  * Calculates the bitmask of disabled RBs (CIK).
2605  * Returns the disabled RB bitmask.
2606  */
2607 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
2608 			      u32 max_rb_num, u32 se_num,
2609 			      u32 sh_per_se)
2610 {
2611 	u32 data, mask;
2612 
2613 	data = RREG32(CC_RB_BACKEND_DISABLE);
2614 	if (data & 1)
2615 		data &= BACKEND_DISABLE_MASK;
2616 	else
2617 		data = 0;
2618 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2619 
2620 	data >>= BACKEND_DISABLE_SHIFT;
2621 
2622 	mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
2623 
2624 	return data & mask;
2625 }
2626 
2627 /**
2628  * cik_setup_rb - setup the RBs on the asic
2629  *
2630  * @rdev: radeon_device pointer
2631  * @se_num: number of SEs (shader engines) for the asic
2632  * @sh_per_se: number of SH blocks per SE for the asic
2633  * @max_rb_num: max RBs (render backends) for the asic
2634  *
2635  * Configures per-SE/SH RB registers (CIK).
2636  */
2637 static void cik_setup_rb(struct radeon_device *rdev,
2638 			 u32 se_num, u32 sh_per_se,
2639 			 u32 max_rb_num)
2640 {
2641 	int i, j;
2642 	u32 data, mask;
2643 	u32 disabled_rbs = 0;
2644 	u32 enabled_rbs = 0;
2645 
2646 	for (i = 0; i < se_num; i++) {
2647 		for (j = 0; j < sh_per_se; j++) {
2648 			cik_select_se_sh(rdev, i, j);
2649 			data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2650 			disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
2651 		}
2652 	}
2653 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2654 
2655 	mask = 1;
2656 	for (i = 0; i < max_rb_num; i++) {
2657 		if (!(disabled_rbs & mask))
2658 			enabled_rbs |= mask;
2659 		mask <<= 1;
2660 	}
2661 
2662 	for (i = 0; i < se_num; i++) {
2663 		cik_select_se_sh(rdev, i, 0xffffffff);
2664 		data = 0;
2665 		for (j = 0; j < sh_per_se; j++) {
2666 			switch (enabled_rbs & 3) {
2667 			case 1:
2668 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2669 				break;
2670 			case 2:
2671 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2672 				break;
2673 			case 3:
2674 			default:
2675 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2676 				break;
2677 			}
2678 			enabled_rbs >>= 2;
2679 		}
2680 		WREG32(PA_SC_RASTER_CONFIG, data);
2681 	}
2682 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2683 }
2684 
2685 /**
2686  * cik_gpu_init - setup the 3D engine
2687  *
2688  * @rdev: radeon_device pointer
2689  *
2690  * Configures the 3D engine and tiling configuration
2691  * registers so that the 3D engine is usable.
2692  */
2693 static void cik_gpu_init(struct radeon_device *rdev)
2694 {
2695 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
2696 	u32 mc_shared_chmap, mc_arb_ramcfg;
2697 	u32 hdp_host_path_cntl;
2698 	u32 tmp;
2699 	int i, j;
2700 
2701 	switch (rdev->family) {
2702 	case CHIP_BONAIRE:
2703 		rdev->config.cik.max_shader_engines = 2;
2704 		rdev->config.cik.max_tile_pipes = 4;
2705 		rdev->config.cik.max_cu_per_sh = 7;
2706 		rdev->config.cik.max_sh_per_se = 1;
2707 		rdev->config.cik.max_backends_per_se = 2;
2708 		rdev->config.cik.max_texture_channel_caches = 4;
2709 		rdev->config.cik.max_gprs = 256;
2710 		rdev->config.cik.max_gs_threads = 32;
2711 		rdev->config.cik.max_hw_contexts = 8;
2712 
2713 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2714 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2715 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2716 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2717 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2718 		break;
2719 	case CHIP_KAVERI:
2720 		rdev->config.cik.max_shader_engines = 1;
2721 		rdev->config.cik.max_tile_pipes = 4;
2722 		if ((rdev->pdev->device == 0x1304) ||
2723 		    (rdev->pdev->device == 0x1305) ||
2724 		    (rdev->pdev->device == 0x130C) ||
2725 		    (rdev->pdev->device == 0x130F) ||
2726 		    (rdev->pdev->device == 0x1310) ||
2727 		    (rdev->pdev->device == 0x1311) ||
2728 		    (rdev->pdev->device == 0x131C)) {
2729 			rdev->config.cik.max_cu_per_sh = 8;
2730 			rdev->config.cik.max_backends_per_se = 2;
2731 		} else if ((rdev->pdev->device == 0x1309) ||
2732 			   (rdev->pdev->device == 0x130A) ||
2733 			   (rdev->pdev->device == 0x130D) ||
2734 			   (rdev->pdev->device == 0x1313) ||
2735 			   (rdev->pdev->device == 0x131D)) {
2736 			rdev->config.cik.max_cu_per_sh = 6;
2737 			rdev->config.cik.max_backends_per_se = 2;
2738 		} else if ((rdev->pdev->device == 0x1306) ||
2739 			   (rdev->pdev->device == 0x1307) ||
2740 			   (rdev->pdev->device == 0x130B) ||
2741 			   (rdev->pdev->device == 0x130E) ||
2742 			   (rdev->pdev->device == 0x1315) ||
2743 			   (rdev->pdev->device == 0x131B)) {
2744 			rdev->config.cik.max_cu_per_sh = 4;
2745 			rdev->config.cik.max_backends_per_se = 1;
2746 		} else {
2747 			rdev->config.cik.max_cu_per_sh = 3;
2748 			rdev->config.cik.max_backends_per_se = 1;
2749 		}
2750 		rdev->config.cik.max_sh_per_se = 1;
2751 		rdev->config.cik.max_texture_channel_caches = 4;
2752 		rdev->config.cik.max_gprs = 256;
2753 		rdev->config.cik.max_gs_threads = 16;
2754 		rdev->config.cik.max_hw_contexts = 8;
2755 
2756 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2757 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2758 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2759 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2760 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2761 		break;
2762 	case CHIP_KABINI:
2763 	default:
2764 		rdev->config.cik.max_shader_engines = 1;
2765 		rdev->config.cik.max_tile_pipes = 2;
2766 		rdev->config.cik.max_cu_per_sh = 2;
2767 		rdev->config.cik.max_sh_per_se = 1;
2768 		rdev->config.cik.max_backends_per_se = 1;
2769 		rdev->config.cik.max_texture_channel_caches = 2;
2770 		rdev->config.cik.max_gprs = 256;
2771 		rdev->config.cik.max_gs_threads = 16;
2772 		rdev->config.cik.max_hw_contexts = 8;
2773 
2774 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2775 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2776 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2777 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2778 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2779 		break;
2780 	}
2781 
2782 	/* Initialize HDP */
2783 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2784 		WREG32((0x2c14 + j), 0x00000000);
2785 		WREG32((0x2c18 + j), 0x00000000);
2786 		WREG32((0x2c1c + j), 0x00000000);
2787 		WREG32((0x2c20 + j), 0x00000000);
2788 		WREG32((0x2c24 + j), 0x00000000);
2789 	}
2790 
2791 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2792 
2793 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2794 
2795 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2796 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2797 
2798 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
2799 	rdev->config.cik.mem_max_burst_length_bytes = 256;
2800 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2801 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2802 	if (rdev->config.cik.mem_row_size_in_kb > 4)
2803 		rdev->config.cik.mem_row_size_in_kb = 4;
2804 	/* XXX use MC settings? */
2805 	rdev->config.cik.shader_engine_tile_size = 32;
2806 	rdev->config.cik.num_gpus = 1;
2807 	rdev->config.cik.multi_gpu_tile_size = 64;
2808 
2809 	/* fix up row size */
2810 	gb_addr_config &= ~ROW_SIZE_MASK;
2811 	switch (rdev->config.cik.mem_row_size_in_kb) {
2812 	case 1:
2813 	default:
2814 		gb_addr_config |= ROW_SIZE(0);
2815 		break;
2816 	case 2:
2817 		gb_addr_config |= ROW_SIZE(1);
2818 		break;
2819 	case 4:
2820 		gb_addr_config |= ROW_SIZE(2);
2821 		break;
2822 	}
2823 
2824 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
2825 	 * not have bank info, so create a custom tiling dword.
2826 	 * bits 3:0   num_pipes
2827 	 * bits 7:4   num_banks
2828 	 * bits 11:8  group_size
2829 	 * bits 15:12 row_size
2830 	 */
2831 	rdev->config.cik.tile_config = 0;
2832 	switch (rdev->config.cik.num_tile_pipes) {
2833 	case 1:
2834 		rdev->config.cik.tile_config |= (0 << 0);
2835 		break;
2836 	case 2:
2837 		rdev->config.cik.tile_config |= (1 << 0);
2838 		break;
2839 	case 4:
2840 		rdev->config.cik.tile_config |= (2 << 0);
2841 		break;
2842 	case 8:
2843 	default:
2844 		/* XXX what about 12? */
2845 		rdev->config.cik.tile_config |= (3 << 0);
2846 		break;
2847 	}
2848 	rdev->config.cik.tile_config |=
2849 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
2850 	rdev->config.cik.tile_config |=
2851 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
2852 	rdev->config.cik.tile_config |=
2853 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
2854 
2855 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
2856 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
2857 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
2858 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
2859 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
2860 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
2861 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
2862 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
2863 
2864 	cik_tiling_mode_table_init(rdev);
2865 
2866 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
2867 		     rdev->config.cik.max_sh_per_se,
2868 		     rdev->config.cik.max_backends_per_se);
2869 
2870 	/* set HW defaults for 3D engine */
2871 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
2872 
2873 	WREG32(SX_DEBUG_1, 0x20);
2874 
2875 	WREG32(TA_CNTL_AUX, 0x00010000);
2876 
2877 	tmp = RREG32(SPI_CONFIG_CNTL);
2878 	tmp |= 0x03000000;
2879 	WREG32(SPI_CONFIG_CNTL, tmp);
2880 
2881 	WREG32(SQ_CONFIG, 1);
2882 
2883 	WREG32(DB_DEBUG, 0);
2884 
2885 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
2886 	tmp |= 0x00000400;
2887 	WREG32(DB_DEBUG2, tmp);
2888 
2889 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2890 	tmp |= 0x00020200;
2891 	WREG32(DB_DEBUG3, tmp);
2892 
2893 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2894 	tmp |= 0x00018208;
2895 	WREG32(CB_HW_CONTROL, tmp);
2896 
2897 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2898 
2899 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2900 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2901 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2902 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2903 
2904 	WREG32(VGT_NUM_INSTANCES, 1);
2905 
2906 	WREG32(CP_PERFMON_CNTL, 0);
2907 
2908 	WREG32(SQ_CONFIG, 0);
2909 
2910 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2911 					  FORCE_EOV_MAX_REZ_CNT(255)));
2912 
2913 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2914 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
2915 
2916 	WREG32(VGT_GS_VERTEX_REUSE, 16);
2917 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2918 
2919 	tmp = RREG32(HDP_MISC_CNTL);
2920 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2921 	WREG32(HDP_MISC_CNTL, tmp);
2922 
2923 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2924 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2925 
2926 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2927 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2928 
2929 	udelay(50);
2930 }
2931 
2932 /*
2933  * GPU scratch registers helpers function.
2934  */
2935 /**
2936  * cik_scratch_init - setup driver info for CP scratch regs
2937  *
2938  * @rdev: radeon_device pointer
2939  *
2940  * Set up the number and offset of the CP scratch registers.
2941  * NOTE: use of CP scratch registers is a legacy inferface and
2942  * is not used by default on newer asics (r6xx+).  On newer asics,
2943  * memory buffers are used for fences rather than scratch regs.
2944  */
2945 static void cik_scratch_init(struct radeon_device *rdev)
2946 {
2947 	int i;
2948 
2949 	rdev->scratch.num_reg = 7;
2950 	rdev->scratch.reg_base = SCRATCH_REG0;
2951 	for (i = 0; i < rdev->scratch.num_reg; i++) {
2952 		rdev->scratch.free[i] = true;
2953 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2954 	}
2955 }
2956 
2957 /**
2958  * cik_ring_test - basic gfx ring test
2959  *
2960  * @rdev: radeon_device pointer
2961  * @ring: radeon_ring structure holding ring information
2962  *
2963  * Allocate a scratch register and write to it using the gfx ring (CIK).
2964  * Provides a basic gfx ring test to verify that the ring is working.
2965  * Used by cik_cp_gfx_resume();
2966  * Returns 0 on success, error on failure.
2967  */
2968 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2969 {
2970 	uint32_t scratch;
2971 	uint32_t tmp = 0;
2972 	unsigned i;
2973 	int r;
2974 
2975 	r = radeon_scratch_get(rdev, &scratch);
2976 	if (r) {
2977 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2978 		return r;
2979 	}
2980 	WREG32(scratch, 0xCAFEDEAD);
2981 	r = radeon_ring_lock(rdev, ring, 3);
2982 	if (r) {
2983 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2984 		radeon_scratch_free(rdev, scratch);
2985 		return r;
2986 	}
2987 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2988 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2989 	radeon_ring_write(ring, 0xDEADBEEF);
2990 	radeon_ring_unlock_commit(rdev, ring);
2991 
2992 	for (i = 0; i < rdev->usec_timeout; i++) {
2993 		tmp = RREG32(scratch);
2994 		if (tmp == 0xDEADBEEF)
2995 			break;
2996 		DRM_UDELAY(1);
2997 	}
2998 	if (i < rdev->usec_timeout) {
2999 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3000 	} else {
3001 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3002 			  ring->idx, scratch, tmp);
3003 		r = -EINVAL;
3004 	}
3005 	radeon_scratch_free(rdev, scratch);
3006 	return r;
3007 }
3008 
3009 /**
3010  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3011  *
3012  * @rdev: radeon_device pointer
3013  * @fence: radeon fence object
3014  *
3015  * Emits a fence sequnce number on the gfx ring and flushes
3016  * GPU caches.
3017  */
3018 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3019 			     struct radeon_fence *fence)
3020 {
3021 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3022 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3023 
3024 	/* EVENT_WRITE_EOP - flush caches, send int */
3025 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3026 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3027 				 EOP_TC_ACTION_EN |
3028 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3029 				 EVENT_INDEX(5)));
3030 	radeon_ring_write(ring, addr & 0xfffffffc);
3031 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3032 	radeon_ring_write(ring, fence->seq);
3033 	radeon_ring_write(ring, 0);
3034 	/* HDP flush */
3035 	/* We should be using the new WAIT_REG_MEM special op packet here
3036 	 * but it causes the CP to hang
3037 	 */
3038 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3039 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3040 				 WRITE_DATA_DST_SEL(0)));
3041 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3042 	radeon_ring_write(ring, 0);
3043 	radeon_ring_write(ring, 0);
3044 }
3045 
3046 /**
3047  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3048  *
3049  * @rdev: radeon_device pointer
3050  * @fence: radeon fence object
3051  *
3052  * Emits a fence sequnce number on the compute ring and flushes
3053  * GPU caches.
3054  */
3055 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3056 				 struct radeon_fence *fence)
3057 {
3058 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3059 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3060 
3061 	/* RELEASE_MEM - flush caches, send int */
3062 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3063 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3064 				 EOP_TC_ACTION_EN |
3065 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3066 				 EVENT_INDEX(5)));
3067 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3068 	radeon_ring_write(ring, addr & 0xfffffffc);
3069 	radeon_ring_write(ring, upper_32_bits(addr));
3070 	radeon_ring_write(ring, fence->seq);
3071 	radeon_ring_write(ring, 0);
3072 	/* HDP flush */
3073 	/* We should be using the new WAIT_REG_MEM special op packet here
3074 	 * but it causes the CP to hang
3075 	 */
3076 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3077 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3078 				 WRITE_DATA_DST_SEL(0)));
3079 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3080 	radeon_ring_write(ring, 0);
3081 	radeon_ring_write(ring, 0);
3082 }
3083 
3084 void cik_semaphore_ring_emit(struct radeon_device *rdev,
3085 			     struct radeon_ring *ring,
3086 			     struct radeon_semaphore *semaphore,
3087 			     bool emit_wait)
3088 {
3089 	uint64_t addr = semaphore->gpu_addr;
3090 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3091 
3092 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3093 	radeon_ring_write(ring, addr & 0xffffffff);
3094 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3095 }
3096 
3097 /*
3098  * IB stuff
3099  */
3100 /**
3101  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3102  *
3103  * @rdev: radeon_device pointer
3104  * @ib: radeon indirect buffer object
3105  *
3106  * Emits an DE (drawing engine) or CE (constant engine) IB
3107  * on the gfx ring.  IBs are usually generated by userspace
3108  * acceleration drivers and submitted to the kernel for
3109  * sheduling on the ring.  This function schedules the IB
3110  * on the gfx ring for execution by the GPU.
3111  */
3112 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3113 {
3114 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3115 	u32 header, control = INDIRECT_BUFFER_VALID;
3116 
3117 	if (ib->is_const_ib) {
3118 		/* set switch buffer packet before const IB */
3119 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3120 		radeon_ring_write(ring, 0);
3121 
3122 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3123 	} else {
3124 		u32 next_rptr;
3125 		if (ring->rptr_save_reg) {
3126 			next_rptr = ring->wptr + 3 + 4;
3127 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3128 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3129 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
3130 			radeon_ring_write(ring, next_rptr);
3131 		} else if (rdev->wb.enabled) {
3132 			next_rptr = ring->wptr + 5 + 4;
3133 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3134 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3135 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3136 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3137 			radeon_ring_write(ring, next_rptr);
3138 		}
3139 
3140 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3141 	}
3142 
3143 	control |= ib->length_dw |
3144 		(ib->vm ? (ib->vm->id << 24) : 0);
3145 
3146 	radeon_ring_write(ring, header);
3147 	radeon_ring_write(ring,
3148 #ifdef __BIG_ENDIAN
3149 			  (2 << 0) |
3150 #endif
3151 			  (ib->gpu_addr & 0xFFFFFFFC));
3152 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3153 	radeon_ring_write(ring, control);
3154 }
3155 
3156 /**
3157  * cik_ib_test - basic gfx ring IB test
3158  *
3159  * @rdev: radeon_device pointer
3160  * @ring: radeon_ring structure holding ring information
3161  *
3162  * Allocate an IB and execute it on the gfx ring (CIK).
3163  * Provides a basic gfx ring test to verify that IBs are working.
3164  * Returns 0 on success, error on failure.
3165  */
3166 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3167 {
3168 	struct radeon_ib ib;
3169 	uint32_t scratch;
3170 	uint32_t tmp = 0;
3171 	unsigned i;
3172 	int r;
3173 
3174 	r = radeon_scratch_get(rdev, &scratch);
3175 	if (r) {
3176 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3177 		return r;
3178 	}
3179 	WREG32(scratch, 0xCAFEDEAD);
3180 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3181 	if (r) {
3182 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3183 		return r;
3184 	}
3185 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3186 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3187 	ib.ptr[2] = 0xDEADBEEF;
3188 	ib.length_dw = 3;
3189 	r = radeon_ib_schedule(rdev, &ib, NULL);
3190 	if (r) {
3191 		radeon_scratch_free(rdev, scratch);
3192 		radeon_ib_free(rdev, &ib);
3193 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3194 		return r;
3195 	}
3196 	r = radeon_fence_wait(ib.fence, false);
3197 	if (r) {
3198 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3199 		return r;
3200 	}
3201 	for (i = 0; i < rdev->usec_timeout; i++) {
3202 		tmp = RREG32(scratch);
3203 		if (tmp == 0xDEADBEEF)
3204 			break;
3205 		DRM_UDELAY(1);
3206 	}
3207 	if (i < rdev->usec_timeout) {
3208 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3209 	} else {
3210 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3211 			  scratch, tmp);
3212 		r = -EINVAL;
3213 	}
3214 	radeon_scratch_free(rdev, scratch);
3215 	radeon_ib_free(rdev, &ib);
3216 	return r;
3217 }
3218 
3219 /*
3220  * CP.
3221  * On CIK, gfx and compute now have independant command processors.
3222  *
3223  * GFX
3224  * Gfx consists of a single ring and can process both gfx jobs and
3225  * compute jobs.  The gfx CP consists of three microengines (ME):
3226  * PFP - Pre-Fetch Parser
3227  * ME - Micro Engine
3228  * CE - Constant Engine
3229  * The PFP and ME make up what is considered the Drawing Engine (DE).
3230  * The CE is an asynchronous engine used for updating buffer desciptors
3231  * used by the DE so that they can be loaded into cache in parallel
3232  * while the DE is processing state update packets.
3233  *
3234  * Compute
3235  * The compute CP consists of two microengines (ME):
3236  * MEC1 - Compute MicroEngine 1
3237  * MEC2 - Compute MicroEngine 2
3238  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3239  * The queues are exposed to userspace and are programmed directly
3240  * by the compute runtime.
3241  */
3242 /**
3243  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3244  *
3245  * @rdev: radeon_device pointer
3246  * @enable: enable or disable the MEs
3247  *
3248  * Halts or unhalts the gfx MEs.
3249  */
3250 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3251 {
3252 	if (enable)
3253 		WREG32(CP_ME_CNTL, 0);
3254 	else {
3255 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3256 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3257 	}
3258 	udelay(50);
3259 }
3260 
3261 /**
3262  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3263  *
3264  * @rdev: radeon_device pointer
3265  *
3266  * Loads the gfx PFP, ME, and CE ucode.
3267  * Returns 0 for success, -EINVAL if the ucode is not available.
3268  */
3269 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3270 {
3271 	const __be32 *fw_data;
3272 	int i;
3273 
3274 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3275 		return -EINVAL;
3276 
3277 	cik_cp_gfx_enable(rdev, false);
3278 
3279 	/* PFP */
3280 	fw_data = (const __be32 *)rdev->pfp_fw->data;
3281 	WREG32(CP_PFP_UCODE_ADDR, 0);
3282 	for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3283 		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3284 	WREG32(CP_PFP_UCODE_ADDR, 0);
3285 
3286 	/* CE */
3287 	fw_data = (const __be32 *)rdev->ce_fw->data;
3288 	WREG32(CP_CE_UCODE_ADDR, 0);
3289 	for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3290 		WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3291 	WREG32(CP_CE_UCODE_ADDR, 0);
3292 
3293 	/* ME */
3294 	fw_data = (const __be32 *)rdev->me_fw->data;
3295 	WREG32(CP_ME_RAM_WADDR, 0);
3296 	for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3297 		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3298 	WREG32(CP_ME_RAM_WADDR, 0);
3299 
3300 	WREG32(CP_PFP_UCODE_ADDR, 0);
3301 	WREG32(CP_CE_UCODE_ADDR, 0);
3302 	WREG32(CP_ME_RAM_WADDR, 0);
3303 	WREG32(CP_ME_RAM_RADDR, 0);
3304 	return 0;
3305 }
3306 
3307 /**
3308  * cik_cp_gfx_start - start the gfx ring
3309  *
3310  * @rdev: radeon_device pointer
3311  *
3312  * Enables the ring and loads the clear state context and other
3313  * packets required to init the ring.
3314  * Returns 0 for success, error for failure.
3315  */
3316 static int cik_cp_gfx_start(struct radeon_device *rdev)
3317 {
3318 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3319 	int r, i;
3320 
3321 	/* init the CP */
3322 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3323 	WREG32(CP_ENDIAN_SWAP, 0);
3324 	WREG32(CP_DEVICE_ID, 1);
3325 
3326 	cik_cp_gfx_enable(rdev, true);
3327 
3328 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3329 	if (r) {
3330 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3331 		return r;
3332 	}
3333 
3334 	/* init the CE partitions.  CE only used for gfx on CIK */
3335 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3336 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3337 	radeon_ring_write(ring, 0xc000);
3338 	radeon_ring_write(ring, 0xc000);
3339 
3340 	/* setup clear context state */
3341 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3342 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3343 
3344 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3345 	radeon_ring_write(ring, 0x80000000);
3346 	radeon_ring_write(ring, 0x80000000);
3347 
3348 	for (i = 0; i < cik_default_size; i++)
3349 		radeon_ring_write(ring, cik_default_state[i]);
3350 
3351 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3352 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3353 
3354 	/* set clear context state */
3355 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3356 	radeon_ring_write(ring, 0);
3357 
3358 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3359 	radeon_ring_write(ring, 0x00000316);
3360 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3361 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3362 
3363 	radeon_ring_unlock_commit(rdev, ring);
3364 
3365 	return 0;
3366 }
3367 
3368 /**
3369  * cik_cp_gfx_fini - stop the gfx ring
3370  *
3371  * @rdev: radeon_device pointer
3372  *
3373  * Stop the gfx ring and tear down the driver ring
3374  * info.
3375  */
3376 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3377 {
3378 	cik_cp_gfx_enable(rdev, false);
3379 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3380 }
3381 
3382 /**
3383  * cik_cp_gfx_resume - setup the gfx ring buffer registers
3384  *
3385  * @rdev: radeon_device pointer
3386  *
3387  * Program the location and size of the gfx ring buffer
3388  * and test it to make sure it's working.
3389  * Returns 0 for success, error for failure.
3390  */
3391 static int cik_cp_gfx_resume(struct radeon_device *rdev)
3392 {
3393 	struct radeon_ring *ring;
3394 	u32 tmp;
3395 	u32 rb_bufsz;
3396 	u64 rb_addr;
3397 	int r;
3398 
3399 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3400 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3401 
3402 	/* Set the write pointer delay */
3403 	WREG32(CP_RB_WPTR_DELAY, 0);
3404 
3405 	/* set the RB to use vmid 0 */
3406 	WREG32(CP_RB_VMID, 0);
3407 
3408 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3409 
3410 	/* ring 0 - compute and gfx */
3411 	/* Set ring buffer size */
3412 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3413 	rb_bufsz = order_base_2(ring->ring_size / 8);
3414 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3415 #ifdef __BIG_ENDIAN
3416 	tmp |= BUF_SWAP_32BIT;
3417 #endif
3418 	WREG32(CP_RB0_CNTL, tmp);
3419 
3420 	/* Initialize the ring buffer's read and write pointers */
3421 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3422 	ring->wptr = 0;
3423 	WREG32(CP_RB0_WPTR, ring->wptr);
3424 
3425 	/* set the wb address wether it's enabled or not */
3426 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3427 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3428 
3429 	/* scratch register shadowing is no longer supported */
3430 	WREG32(SCRATCH_UMSK, 0);
3431 
3432 	if (!rdev->wb.enabled)
3433 		tmp |= RB_NO_UPDATE;
3434 
3435 	mdelay(1);
3436 	WREG32(CP_RB0_CNTL, tmp);
3437 
3438 	rb_addr = ring->gpu_addr >> 8;
3439 	WREG32(CP_RB0_BASE, rb_addr);
3440 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
3441 
3442 	ring->rptr = RREG32(CP_RB0_RPTR);
3443 
3444 	/* start the ring */
3445 	cik_cp_gfx_start(rdev);
3446 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3447 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3448 	if (r) {
3449 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3450 		return r;
3451 	}
3452 	return 0;
3453 }
3454 
3455 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
3456 			      struct radeon_ring *ring)
3457 {
3458 	u32 rptr;
3459 
3460 
3461 
3462 	if (rdev->wb.enabled) {
3463 		rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
3464 	} else {
3465 		mutex_lock(&rdev->srbm_mutex);
3466 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3467 		rptr = RREG32(CP_HQD_PQ_RPTR);
3468 		cik_srbm_select(rdev, 0, 0, 0, 0);
3469 		mutex_unlock(&rdev->srbm_mutex);
3470 	}
3471 
3472 	return rptr;
3473 }
3474 
3475 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
3476 			      struct radeon_ring *ring)
3477 {
3478 	u32 wptr;
3479 
3480 	if (rdev->wb.enabled) {
3481 		wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
3482 	} else {
3483 		mutex_lock(&rdev->srbm_mutex);
3484 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3485 		wptr = RREG32(CP_HQD_PQ_WPTR);
3486 		cik_srbm_select(rdev, 0, 0, 0, 0);
3487 		mutex_unlock(&rdev->srbm_mutex);
3488 	}
3489 
3490 	return wptr;
3491 }
3492 
3493 void cik_compute_ring_set_wptr(struct radeon_device *rdev,
3494 			       struct radeon_ring *ring)
3495 {
3496 	rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(ring->wptr);
3497 	WDOORBELL32(ring->doorbell_offset, ring->wptr);
3498 }
3499 
3500 /**
3501  * cik_cp_compute_enable - enable/disable the compute CP MEs
3502  *
3503  * @rdev: radeon_device pointer
3504  * @enable: enable or disable the MEs
3505  *
3506  * Halts or unhalts the compute MEs.
3507  */
3508 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
3509 {
3510 	if (enable)
3511 		WREG32(CP_MEC_CNTL, 0);
3512 	else
3513 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
3514 	udelay(50);
3515 }
3516 
3517 /**
3518  * cik_cp_compute_load_microcode - load the compute CP ME ucode
3519  *
3520  * @rdev: radeon_device pointer
3521  *
3522  * Loads the compute MEC1&2 ucode.
3523  * Returns 0 for success, -EINVAL if the ucode is not available.
3524  */
3525 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
3526 {
3527 	const __be32 *fw_data;
3528 	int i;
3529 
3530 	if (!rdev->mec_fw)
3531 		return -EINVAL;
3532 
3533 	cik_cp_compute_enable(rdev, false);
3534 
3535 	/* MEC1 */
3536 	fw_data = (const __be32 *)rdev->mec_fw->data;
3537 	WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3538 	for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3539 		WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
3540 	WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3541 
3542 	if (rdev->family == CHIP_KAVERI) {
3543 		/* MEC2 */
3544 		fw_data = (const __be32 *)rdev->mec_fw->data;
3545 		WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3546 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3547 			WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
3548 		WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3549 	}
3550 
3551 	return 0;
3552 }
3553 
3554 /**
3555  * cik_cp_compute_start - start the compute queues
3556  *
3557  * @rdev: radeon_device pointer
3558  *
3559  * Enable the compute queues.
3560  * Returns 0 for success, error for failure.
3561  */
3562 static int cik_cp_compute_start(struct radeon_device *rdev)
3563 {
3564 	cik_cp_compute_enable(rdev, true);
3565 
3566 	return 0;
3567 }
3568 
3569 /**
3570  * cik_cp_compute_fini - stop the compute queues
3571  *
3572  * @rdev: radeon_device pointer
3573  *
3574  * Stop the compute queues and tear down the driver queue
3575  * info.
3576  */
3577 static void cik_cp_compute_fini(struct radeon_device *rdev)
3578 {
3579 	int i, idx, r;
3580 
3581 	cik_cp_compute_enable(rdev, false);
3582 
3583 	for (i = 0; i < 2; i++) {
3584 		if (i == 0)
3585 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
3586 		else
3587 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
3588 
3589 		if (rdev->ring[idx].mqd_obj) {
3590 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3591 			if (unlikely(r != 0))
3592 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
3593 
3594 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
3595 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3596 
3597 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
3598 			rdev->ring[idx].mqd_obj = NULL;
3599 		}
3600 	}
3601 }
3602 
3603 static void cik_mec_fini(struct radeon_device *rdev)
3604 {
3605 	int r;
3606 
3607 	if (rdev->mec.hpd_eop_obj) {
3608 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3609 		if (unlikely(r != 0))
3610 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
3611 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
3612 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3613 
3614 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
3615 		rdev->mec.hpd_eop_obj = NULL;
3616 	}
3617 }
3618 
3619 #define MEC_HPD_SIZE 2048
3620 
3621 static int cik_mec_init(struct radeon_device *rdev)
3622 {
3623 	int r;
3624 	u32 *hpd;
3625 
3626 	/*
3627 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
3628 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
3629 	 */
3630 	if (rdev->family == CHIP_KAVERI)
3631 		rdev->mec.num_mec = 2;
3632 	else
3633 		rdev->mec.num_mec = 1;
3634 	rdev->mec.num_pipe = 4;
3635 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
3636 
3637 	if (rdev->mec.hpd_eop_obj == NULL) {
3638 		r = radeon_bo_create(rdev,
3639 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
3640 				     PAGE_SIZE, true,
3641 				     RADEON_GEM_DOMAIN_GTT, NULL,
3642 				     &rdev->mec.hpd_eop_obj);
3643 		if (r) {
3644 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
3645 			return r;
3646 		}
3647 	}
3648 
3649 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3650 	if (unlikely(r != 0)) {
3651 		cik_mec_fini(rdev);
3652 		return r;
3653 	}
3654 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
3655 			  &rdev->mec.hpd_eop_gpu_addr);
3656 	if (r) {
3657 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
3658 		cik_mec_fini(rdev);
3659 		return r;
3660 	}
3661 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
3662 	if (r) {
3663 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
3664 		cik_mec_fini(rdev);
3665 		return r;
3666 	}
3667 
3668 	/* clear memory.  Not sure if this is required or not */
3669 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
3670 
3671 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
3672 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3673 
3674 	return 0;
3675 }
3676 
3677 struct hqd_registers
3678 {
3679 	u32 cp_mqd_base_addr;
3680 	u32 cp_mqd_base_addr_hi;
3681 	u32 cp_hqd_active;
3682 	u32 cp_hqd_vmid;
3683 	u32 cp_hqd_persistent_state;
3684 	u32 cp_hqd_pipe_priority;
3685 	u32 cp_hqd_queue_priority;
3686 	u32 cp_hqd_quantum;
3687 	u32 cp_hqd_pq_base;
3688 	u32 cp_hqd_pq_base_hi;
3689 	u32 cp_hqd_pq_rptr;
3690 	u32 cp_hqd_pq_rptr_report_addr;
3691 	u32 cp_hqd_pq_rptr_report_addr_hi;
3692 	u32 cp_hqd_pq_wptr_poll_addr;
3693 	u32 cp_hqd_pq_wptr_poll_addr_hi;
3694 	u32 cp_hqd_pq_doorbell_control;
3695 	u32 cp_hqd_pq_wptr;
3696 	u32 cp_hqd_pq_control;
3697 	u32 cp_hqd_ib_base_addr;
3698 	u32 cp_hqd_ib_base_addr_hi;
3699 	u32 cp_hqd_ib_rptr;
3700 	u32 cp_hqd_ib_control;
3701 	u32 cp_hqd_iq_timer;
3702 	u32 cp_hqd_iq_rptr;
3703 	u32 cp_hqd_dequeue_request;
3704 	u32 cp_hqd_dma_offload;
3705 	u32 cp_hqd_sema_cmd;
3706 	u32 cp_hqd_msg_type;
3707 	u32 cp_hqd_atomic0_preop_lo;
3708 	u32 cp_hqd_atomic0_preop_hi;
3709 	u32 cp_hqd_atomic1_preop_lo;
3710 	u32 cp_hqd_atomic1_preop_hi;
3711 	u32 cp_hqd_hq_scheduler0;
3712 	u32 cp_hqd_hq_scheduler1;
3713 	u32 cp_mqd_control;
3714 };
3715 
3716 struct bonaire_mqd
3717 {
3718 	u32 header;
3719 	u32 dispatch_initiator;
3720 	u32 dimensions[3];
3721 	u32 start_idx[3];
3722 	u32 num_threads[3];
3723 	u32 pipeline_stat_enable;
3724 	u32 perf_counter_enable;
3725 	u32 pgm[2];
3726 	u32 tba[2];
3727 	u32 tma[2];
3728 	u32 pgm_rsrc[2];
3729 	u32 vmid;
3730 	u32 resource_limits;
3731 	u32 static_thread_mgmt01[2];
3732 	u32 tmp_ring_size;
3733 	u32 static_thread_mgmt23[2];
3734 	u32 restart[3];
3735 	u32 thread_trace_enable;
3736 	u32 reserved1;
3737 	u32 user_data[16];
3738 	u32 vgtcs_invoke_count[2];
3739 	struct hqd_registers queue_state;
3740 	u32 dequeue_cntr;
3741 	u32 interrupt_queue[64];
3742 };
3743 
3744 /**
3745  * cik_cp_compute_resume - setup the compute queue registers
3746  *
3747  * @rdev: radeon_device pointer
3748  *
3749  * Program the compute queues and test them to make sure they
3750  * are working.
3751  * Returns 0 for success, error for failure.
3752  */
3753 static int cik_cp_compute_resume(struct radeon_device *rdev)
3754 {
3755 	int r, i, idx;
3756 	u32 tmp;
3757 	bool use_doorbell = true;
3758 	u64 hqd_gpu_addr;
3759 	u64 mqd_gpu_addr;
3760 	u64 eop_gpu_addr;
3761 	u64 wb_gpu_addr;
3762 	u32 *buf;
3763 	struct bonaire_mqd *mqd;
3764 
3765 	r = cik_cp_compute_start(rdev);
3766 	if (r)
3767 		return r;
3768 
3769 	/* fix up chicken bits */
3770 	tmp = RREG32(CP_CPF_DEBUG);
3771 	tmp |= (1 << 23);
3772 	WREG32(CP_CPF_DEBUG, tmp);
3773 
3774 	/* init the pipes */
3775 	mutex_lock(&rdev->srbm_mutex);
3776 	for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
3777 		int me = (i < 4) ? 1 : 2;
3778 		int pipe = (i < 4) ? i : (i - 4);
3779 
3780 		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
3781 
3782 		cik_srbm_select(rdev, me, pipe, 0, 0);
3783 
3784 		/* write the EOP addr */
3785 		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
3786 		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
3787 
3788 		/* set the VMID assigned */
3789 		WREG32(CP_HPD_EOP_VMID, 0);
3790 
3791 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3792 		tmp = RREG32(CP_HPD_EOP_CONTROL);
3793 		tmp &= ~EOP_SIZE_MASK;
3794 		tmp |= order_base_2(MEC_HPD_SIZE / 8);
3795 		WREG32(CP_HPD_EOP_CONTROL, tmp);
3796 	}
3797 	cik_srbm_select(rdev, 0, 0, 0, 0);
3798 	mutex_unlock(&rdev->srbm_mutex);
3799 
3800 	/* init the queues.  Just two for now. */
3801 	for (i = 0; i < 2; i++) {
3802 		if (i == 0)
3803 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
3804 		else
3805 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
3806 
3807 		if (rdev->ring[idx].mqd_obj == NULL) {
3808 			r = radeon_bo_create(rdev,
3809 					     sizeof(struct bonaire_mqd),
3810 					     PAGE_SIZE, true,
3811 					     RADEON_GEM_DOMAIN_GTT, NULL,
3812 					     &rdev->ring[idx].mqd_obj);
3813 			if (r) {
3814 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
3815 				return r;
3816 			}
3817 		}
3818 
3819 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3820 		if (unlikely(r != 0)) {
3821 			cik_cp_compute_fini(rdev);
3822 			return r;
3823 		}
3824 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
3825 				  &mqd_gpu_addr);
3826 		if (r) {
3827 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
3828 			cik_cp_compute_fini(rdev);
3829 			return r;
3830 		}
3831 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
3832 		if (r) {
3833 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
3834 			cik_cp_compute_fini(rdev);
3835 			return r;
3836 		}
3837 
3838 		/* doorbell offset */
3839 		rdev->ring[idx].doorbell_offset =
3840 			(rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
3841 
3842 		/* init the mqd struct */
3843 		memset(buf, 0, sizeof(struct bonaire_mqd));
3844 
3845 		mqd = (struct bonaire_mqd *)buf;
3846 		mqd->header = 0xC0310800;
3847 		mqd->static_thread_mgmt01[0] = 0xffffffff;
3848 		mqd->static_thread_mgmt01[1] = 0xffffffff;
3849 		mqd->static_thread_mgmt23[0] = 0xffffffff;
3850 		mqd->static_thread_mgmt23[1] = 0xffffffff;
3851 
3852 		mutex_lock(&rdev->srbm_mutex);
3853 		cik_srbm_select(rdev, rdev->ring[idx].me,
3854 				rdev->ring[idx].pipe,
3855 				rdev->ring[idx].queue, 0);
3856 
3857 		/* disable wptr polling */
3858 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
3859 		tmp &= ~WPTR_POLL_EN;
3860 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
3861 
3862 		/* enable doorbell? */
3863 		mqd->queue_state.cp_hqd_pq_doorbell_control =
3864 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3865 		if (use_doorbell)
3866 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3867 		else
3868 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
3869 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3870 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
3871 
3872 		/* disable the queue if it's active */
3873 		mqd->queue_state.cp_hqd_dequeue_request = 0;
3874 		mqd->queue_state.cp_hqd_pq_rptr = 0;
3875 		mqd->queue_state.cp_hqd_pq_wptr= 0;
3876 		if (RREG32(CP_HQD_ACTIVE) & 1) {
3877 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
3878 			for (i = 0; i < rdev->usec_timeout; i++) {
3879 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
3880 					break;
3881 				udelay(1);
3882 			}
3883 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3884 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3885 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3886 		}
3887 
3888 		/* set the pointer to the MQD */
3889 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3890 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3891 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3892 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3893 		/* set MQD vmid to 0 */
3894 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3895 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3896 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3897 
3898 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3899 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3900 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3901 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3902 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3903 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3904 
3905 		/* set up the HQD, this is similar to CP_RB0_CNTL */
3906 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3907 		mqd->queue_state.cp_hqd_pq_control &=
3908 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3909 
3910 		mqd->queue_state.cp_hqd_pq_control |=
3911 			order_base_2(rdev->ring[idx].ring_size / 8);
3912 		mqd->queue_state.cp_hqd_pq_control |=
3913 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
3914 #ifdef __BIG_ENDIAN
3915 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3916 #endif
3917 		mqd->queue_state.cp_hqd_pq_control &=
3918 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3919 		mqd->queue_state.cp_hqd_pq_control |=
3920 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3921 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3922 
3923 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3924 		if (i == 0)
3925 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3926 		else
3927 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3928 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3929 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3930 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3931 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3932 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3933 
3934 		/* set the wb address wether it's enabled or not */
3935 		if (i == 0)
3936 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3937 		else
3938 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3939 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3940 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3941 			upper_32_bits(wb_gpu_addr) & 0xffff;
3942 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3943 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3944 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3945 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3946 
3947 		/* enable the doorbell if requested */
3948 		if (use_doorbell) {
3949 			mqd->queue_state.cp_hqd_pq_doorbell_control =
3950 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3951 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3952 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
3953 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3954 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3955 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
3956 				~(DOORBELL_SOURCE | DOORBELL_HIT);
3957 
3958 		} else {
3959 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3960 		}
3961 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3962 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
3963 
3964 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3965 		rdev->ring[idx].wptr = 0;
3966 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3967 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3968 		rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3969 		mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3970 
3971 		/* set the vmid for the queue */
3972 		mqd->queue_state.cp_hqd_vmid = 0;
3973 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3974 
3975 		/* activate the queue */
3976 		mqd->queue_state.cp_hqd_active = 1;
3977 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3978 
3979 		cik_srbm_select(rdev, 0, 0, 0, 0);
3980 		mutex_unlock(&rdev->srbm_mutex);
3981 
3982 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3983 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3984 
3985 		rdev->ring[idx].ready = true;
3986 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3987 		if (r)
3988 			rdev->ring[idx].ready = false;
3989 	}
3990 
3991 	return 0;
3992 }
3993 
3994 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
3995 {
3996 	cik_cp_gfx_enable(rdev, enable);
3997 	cik_cp_compute_enable(rdev, enable);
3998 }
3999 
4000 static int cik_cp_load_microcode(struct radeon_device *rdev)
4001 {
4002 	int r;
4003 
4004 	r = cik_cp_gfx_load_microcode(rdev);
4005 	if (r)
4006 		return r;
4007 	r = cik_cp_compute_load_microcode(rdev);
4008 	if (r)
4009 		return r;
4010 
4011 	return 0;
4012 }
4013 
4014 static void cik_cp_fini(struct radeon_device *rdev)
4015 {
4016 	cik_cp_gfx_fini(rdev);
4017 	cik_cp_compute_fini(rdev);
4018 }
4019 
4020 static int cik_cp_resume(struct radeon_device *rdev)
4021 {
4022 	int r;
4023 
4024 	cik_enable_gui_idle_interrupt(rdev, false);
4025 
4026 	r = cik_cp_load_microcode(rdev);
4027 	if (r)
4028 		return r;
4029 
4030 	r = cik_cp_gfx_resume(rdev);
4031 	if (r)
4032 		return r;
4033 	r = cik_cp_compute_resume(rdev);
4034 	if (r)
4035 		return r;
4036 
4037 	cik_enable_gui_idle_interrupt(rdev, true);
4038 
4039 	return 0;
4040 }
4041 
4042 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4043 {
4044 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4045 		RREG32(GRBM_STATUS));
4046 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4047 		RREG32(GRBM_STATUS2));
4048 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4049 		RREG32(GRBM_STATUS_SE0));
4050 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4051 		RREG32(GRBM_STATUS_SE1));
4052 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4053 		RREG32(GRBM_STATUS_SE2));
4054 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4055 		RREG32(GRBM_STATUS_SE3));
4056 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4057 		RREG32(SRBM_STATUS));
4058 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4059 		RREG32(SRBM_STATUS2));
4060 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4061 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4062 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4063 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4064 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4065 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4066 		 RREG32(CP_STALLED_STAT1));
4067 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4068 		 RREG32(CP_STALLED_STAT2));
4069 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4070 		 RREG32(CP_STALLED_STAT3));
4071 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4072 		 RREG32(CP_CPF_BUSY_STAT));
4073 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4074 		 RREG32(CP_CPF_STALLED_STAT1));
4075 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4076 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4077 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4078 		 RREG32(CP_CPC_STALLED_STAT1));
4079 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4080 }
4081 
4082 /**
4083  * cik_gpu_check_soft_reset - check which blocks are busy
4084  *
4085  * @rdev: radeon_device pointer
4086  *
4087  * Check which blocks are busy and return the relevant reset
4088  * mask to be used by cik_gpu_soft_reset().
4089  * Returns a mask of the blocks to be reset.
4090  */
4091 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4092 {
4093 	u32 reset_mask = 0;
4094 	u32 tmp;
4095 
4096 	/* GRBM_STATUS */
4097 	tmp = RREG32(GRBM_STATUS);
4098 	if (tmp & (PA_BUSY | SC_BUSY |
4099 		   BCI_BUSY | SX_BUSY |
4100 		   TA_BUSY | VGT_BUSY |
4101 		   DB_BUSY | CB_BUSY |
4102 		   GDS_BUSY | SPI_BUSY |
4103 		   IA_BUSY | IA_BUSY_NO_DMA))
4104 		reset_mask |= RADEON_RESET_GFX;
4105 
4106 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4107 		reset_mask |= RADEON_RESET_CP;
4108 
4109 	/* GRBM_STATUS2 */
4110 	tmp = RREG32(GRBM_STATUS2);
4111 	if (tmp & RLC_BUSY)
4112 		reset_mask |= RADEON_RESET_RLC;
4113 
4114 	/* SDMA0_STATUS_REG */
4115 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4116 	if (!(tmp & SDMA_IDLE))
4117 		reset_mask |= RADEON_RESET_DMA;
4118 
4119 	/* SDMA1_STATUS_REG */
4120 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4121 	if (!(tmp & SDMA_IDLE))
4122 		reset_mask |= RADEON_RESET_DMA1;
4123 
4124 	/* SRBM_STATUS2 */
4125 	tmp = RREG32(SRBM_STATUS2);
4126 	if (tmp & SDMA_BUSY)
4127 		reset_mask |= RADEON_RESET_DMA;
4128 
4129 	if (tmp & SDMA1_BUSY)
4130 		reset_mask |= RADEON_RESET_DMA1;
4131 
4132 	/* SRBM_STATUS */
4133 	tmp = RREG32(SRBM_STATUS);
4134 
4135 	if (tmp & IH_BUSY)
4136 		reset_mask |= RADEON_RESET_IH;
4137 
4138 	if (tmp & SEM_BUSY)
4139 		reset_mask |= RADEON_RESET_SEM;
4140 
4141 	if (tmp & GRBM_RQ_PENDING)
4142 		reset_mask |= RADEON_RESET_GRBM;
4143 
4144 	if (tmp & VMC_BUSY)
4145 		reset_mask |= RADEON_RESET_VMC;
4146 
4147 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4148 		   MCC_BUSY | MCD_BUSY))
4149 		reset_mask |= RADEON_RESET_MC;
4150 
4151 	if (evergreen_is_display_hung(rdev))
4152 		reset_mask |= RADEON_RESET_DISPLAY;
4153 
4154 	/* Skip MC reset as it's mostly likely not hung, just busy */
4155 	if (reset_mask & RADEON_RESET_MC) {
4156 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4157 		reset_mask &= ~RADEON_RESET_MC;
4158 	}
4159 
4160 	return reset_mask;
4161 }
4162 
4163 /**
4164  * cik_gpu_soft_reset - soft reset GPU
4165  *
4166  * @rdev: radeon_device pointer
4167  * @reset_mask: mask of which blocks to reset
4168  *
4169  * Soft reset the blocks specified in @reset_mask.
4170  */
4171 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4172 {
4173 	struct evergreen_mc_save save;
4174 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4175 	u32 tmp;
4176 
4177 	if (reset_mask == 0)
4178 		return;
4179 
4180 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4181 
4182 	cik_print_gpu_status_regs(rdev);
4183 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4184 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4185 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4186 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4187 
4188 	/* stop the rlc */
4189 	cik_rlc_stop(rdev);
4190 
4191 	/* Disable GFX parsing/prefetching */
4192 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4193 
4194 	/* Disable MEC parsing/prefetching */
4195 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4196 
4197 	if (reset_mask & RADEON_RESET_DMA) {
4198 		/* sdma0 */
4199 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4200 		tmp |= SDMA_HALT;
4201 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4202 	}
4203 	if (reset_mask & RADEON_RESET_DMA1) {
4204 		/* sdma1 */
4205 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4206 		tmp |= SDMA_HALT;
4207 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4208 	}
4209 
4210 	evergreen_mc_stop(rdev, &save);
4211 	if (evergreen_mc_wait_for_idle(rdev)) {
4212 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4213 	}
4214 
4215 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4216 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4217 
4218 	if (reset_mask & RADEON_RESET_CP) {
4219 		grbm_soft_reset |= SOFT_RESET_CP;
4220 
4221 		srbm_soft_reset |= SOFT_RESET_GRBM;
4222 	}
4223 
4224 	if (reset_mask & RADEON_RESET_DMA)
4225 		srbm_soft_reset |= SOFT_RESET_SDMA;
4226 
4227 	if (reset_mask & RADEON_RESET_DMA1)
4228 		srbm_soft_reset |= SOFT_RESET_SDMA1;
4229 
4230 	if (reset_mask & RADEON_RESET_DISPLAY)
4231 		srbm_soft_reset |= SOFT_RESET_DC;
4232 
4233 	if (reset_mask & RADEON_RESET_RLC)
4234 		grbm_soft_reset |= SOFT_RESET_RLC;
4235 
4236 	if (reset_mask & RADEON_RESET_SEM)
4237 		srbm_soft_reset |= SOFT_RESET_SEM;
4238 
4239 	if (reset_mask & RADEON_RESET_IH)
4240 		srbm_soft_reset |= SOFT_RESET_IH;
4241 
4242 	if (reset_mask & RADEON_RESET_GRBM)
4243 		srbm_soft_reset |= SOFT_RESET_GRBM;
4244 
4245 	if (reset_mask & RADEON_RESET_VMC)
4246 		srbm_soft_reset |= SOFT_RESET_VMC;
4247 
4248 	if (!(rdev->flags & RADEON_IS_IGP)) {
4249 		if (reset_mask & RADEON_RESET_MC)
4250 			srbm_soft_reset |= SOFT_RESET_MC;
4251 	}
4252 
4253 	if (grbm_soft_reset) {
4254 		tmp = RREG32(GRBM_SOFT_RESET);
4255 		tmp |= grbm_soft_reset;
4256 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4257 		WREG32(GRBM_SOFT_RESET, tmp);
4258 		tmp = RREG32(GRBM_SOFT_RESET);
4259 
4260 		udelay(50);
4261 
4262 		tmp &= ~grbm_soft_reset;
4263 		WREG32(GRBM_SOFT_RESET, tmp);
4264 		tmp = RREG32(GRBM_SOFT_RESET);
4265 	}
4266 
4267 	if (srbm_soft_reset) {
4268 		tmp = RREG32(SRBM_SOFT_RESET);
4269 		tmp |= srbm_soft_reset;
4270 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4271 		WREG32(SRBM_SOFT_RESET, tmp);
4272 		tmp = RREG32(SRBM_SOFT_RESET);
4273 
4274 		udelay(50);
4275 
4276 		tmp &= ~srbm_soft_reset;
4277 		WREG32(SRBM_SOFT_RESET, tmp);
4278 		tmp = RREG32(SRBM_SOFT_RESET);
4279 	}
4280 
4281 	/* Wait a little for things to settle down */
4282 	udelay(50);
4283 
4284 	evergreen_mc_resume(rdev, &save);
4285 	udelay(50);
4286 
4287 	cik_print_gpu_status_regs(rdev);
4288 }
4289 
4290 /**
4291  * cik_asic_reset - soft reset GPU
4292  *
4293  * @rdev: radeon_device pointer
4294  *
4295  * Look up which blocks are hung and attempt
4296  * to reset them.
4297  * Returns 0 for success.
4298  */
4299 int cik_asic_reset(struct radeon_device *rdev)
4300 {
4301 	u32 reset_mask;
4302 
4303 	reset_mask = cik_gpu_check_soft_reset(rdev);
4304 
4305 	if (reset_mask)
4306 		r600_set_bios_scratch_engine_hung(rdev, true);
4307 
4308 	cik_gpu_soft_reset(rdev, reset_mask);
4309 
4310 	reset_mask = cik_gpu_check_soft_reset(rdev);
4311 
4312 	if (!reset_mask)
4313 		r600_set_bios_scratch_engine_hung(rdev, false);
4314 
4315 	return 0;
4316 }
4317 
4318 /**
4319  * cik_gfx_is_lockup - check if the 3D engine is locked up
4320  *
4321  * @rdev: radeon_device pointer
4322  * @ring: radeon_ring structure holding ring information
4323  *
4324  * Check if the 3D engine is locked up (CIK).
4325  * Returns true if the engine is locked, false if not.
4326  */
4327 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4328 {
4329 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4330 
4331 	if (!(reset_mask & (RADEON_RESET_GFX |
4332 			    RADEON_RESET_COMPUTE |
4333 			    RADEON_RESET_CP))) {
4334 		radeon_ring_lockup_update(ring);
4335 		return false;
4336 	}
4337 	/* force CP activities */
4338 	radeon_ring_force_activity(rdev, ring);
4339 	return radeon_ring_test_lockup(rdev, ring);
4340 }
4341 
4342 /* MC */
4343 /**
4344  * cik_mc_program - program the GPU memory controller
4345  *
4346  * @rdev: radeon_device pointer
4347  *
4348  * Set the location of vram, gart, and AGP in the GPU's
4349  * physical address space (CIK).
4350  */
4351 static void cik_mc_program(struct radeon_device *rdev)
4352 {
4353 	struct evergreen_mc_save save;
4354 	u32 tmp;
4355 	int i, j;
4356 
4357 	/* Initialize HDP */
4358 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4359 		WREG32((0x2c14 + j), 0x00000000);
4360 		WREG32((0x2c18 + j), 0x00000000);
4361 		WREG32((0x2c1c + j), 0x00000000);
4362 		WREG32((0x2c20 + j), 0x00000000);
4363 		WREG32((0x2c24 + j), 0x00000000);
4364 	}
4365 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4366 
4367 	evergreen_mc_stop(rdev, &save);
4368 	if (radeon_mc_wait_for_idle(rdev)) {
4369 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4370 	}
4371 	/* Lockout access through VGA aperture*/
4372 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4373 	/* Update configuration */
4374 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4375 	       rdev->mc.vram_start >> 12);
4376 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4377 	       rdev->mc.vram_end >> 12);
4378 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4379 	       rdev->vram_scratch.gpu_addr >> 12);
4380 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4381 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4382 	WREG32(MC_VM_FB_LOCATION, tmp);
4383 	/* XXX double check these! */
4384 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4385 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4386 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4387 	WREG32(MC_VM_AGP_BASE, 0);
4388 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4389 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4390 	if (radeon_mc_wait_for_idle(rdev)) {
4391 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4392 	}
4393 	evergreen_mc_resume(rdev, &save);
4394 	/* we need to own VRAM, so turn off the VGA renderer here
4395 	 * to stop it overwriting our objects */
4396 	rv515_vga_render_disable(rdev);
4397 }
4398 
4399 /**
4400  * cik_mc_init - initialize the memory controller driver params
4401  *
4402  * @rdev: radeon_device pointer
4403  *
4404  * Look up the amount of vram, vram width, and decide how to place
4405  * vram and gart within the GPU's physical address space (CIK).
4406  * Returns 0 for success.
4407  */
4408 static int cik_mc_init(struct radeon_device *rdev)
4409 {
4410 	u32 tmp;
4411 	int chansize, numchan;
4412 
4413 	/* Get VRAM informations */
4414 	rdev->mc.vram_is_ddr = true;
4415 	tmp = RREG32(MC_ARB_RAMCFG);
4416 	if (tmp & CHANSIZE_MASK) {
4417 		chansize = 64;
4418 	} else {
4419 		chansize = 32;
4420 	}
4421 	tmp = RREG32(MC_SHARED_CHMAP);
4422 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4423 	case 0:
4424 	default:
4425 		numchan = 1;
4426 		break;
4427 	case 1:
4428 		numchan = 2;
4429 		break;
4430 	case 2:
4431 		numchan = 4;
4432 		break;
4433 	case 3:
4434 		numchan = 8;
4435 		break;
4436 	case 4:
4437 		numchan = 3;
4438 		break;
4439 	case 5:
4440 		numchan = 6;
4441 		break;
4442 	case 6:
4443 		numchan = 10;
4444 		break;
4445 	case 7:
4446 		numchan = 12;
4447 		break;
4448 	case 8:
4449 		numchan = 16;
4450 		break;
4451 	}
4452 	rdev->mc.vram_width = numchan * chansize;
4453 	/* Could aper size report 0 ? */
4454 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4455 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4456 	/* size in MB on si */
4457 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
4458 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
4459 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
4460 	si_vram_gtt_location(rdev, &rdev->mc);
4461 	radeon_update_bandwidth_info(rdev);
4462 
4463 	return 0;
4464 }
4465 
4466 /*
4467  * GART
4468  * VMID 0 is the physical GPU addresses as used by the kernel.
4469  * VMIDs 1-15 are used for userspace clients and are handled
4470  * by the radeon vm/hsa code.
4471  */
4472 /**
4473  * cik_pcie_gart_tlb_flush - gart tlb flush callback
4474  *
4475  * @rdev: radeon_device pointer
4476  *
4477  * Flush the TLB for the VMID 0 page table (CIK).
4478  */
4479 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
4480 {
4481 	/* flush hdp cache */
4482 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
4483 
4484 	/* bits 0-15 are the VM contexts0-15 */
4485 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
4486 }
4487 
4488 /**
4489  * cik_pcie_gart_enable - gart enable
4490  *
4491  * @rdev: radeon_device pointer
4492  *
4493  * This sets up the TLBs, programs the page tables for VMID0,
4494  * sets up the hw for VMIDs 1-15 which are allocated on
4495  * demand, and sets up the global locations for the LDS, GDS,
4496  * and GPUVM for FSA64 clients (CIK).
4497  * Returns 0 for success, errors for failure.
4498  */
4499 static int cik_pcie_gart_enable(struct radeon_device *rdev)
4500 {
4501 	int r, i;
4502 
4503 	if (rdev->gart.robj == NULL) {
4504 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4505 		return -EINVAL;
4506 	}
4507 	r = radeon_gart_table_vram_pin(rdev);
4508 	if (r)
4509 		return r;
4510 	radeon_gart_restore(rdev);
4511 	/* Setup TLB control */
4512 	WREG32(MC_VM_MX_L1_TLB_CNTL,
4513 	       (0xA << 7) |
4514 	       ENABLE_L1_TLB |
4515 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4516 	       ENABLE_ADVANCED_DRIVER_MODEL |
4517 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4518 	/* Setup L2 cache */
4519 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4520 	       ENABLE_L2_FRAGMENT_PROCESSING |
4521 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4522 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4523 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4524 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4525 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4526 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4527 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4528 	/* setup context0 */
4529 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4530 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4531 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4532 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4533 			(u32)(rdev->dummy_page.addr >> 12));
4534 	WREG32(VM_CONTEXT0_CNTL2, 0);
4535 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4536 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4537 
4538 	WREG32(0x15D4, 0);
4539 	WREG32(0x15D8, 0);
4540 	WREG32(0x15DC, 0);
4541 
4542 	/* empty context1-15 */
4543 	/* FIXME start with 4G, once using 2 level pt switch to full
4544 	 * vm size space
4545 	 */
4546 	/* set vm size, must be a multiple of 4 */
4547 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4548 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4549 	for (i = 1; i < 16; i++) {
4550 		if (i < 8)
4551 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4552 			       rdev->gart.table_addr >> 12);
4553 		else
4554 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4555 			       rdev->gart.table_addr >> 12);
4556 	}
4557 
4558 	/* enable context1-15 */
4559 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4560 	       (u32)(rdev->dummy_page.addr >> 12));
4561 	WREG32(VM_CONTEXT1_CNTL2, 4);
4562 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4563 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4564 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4565 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4566 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4567 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4568 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4569 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4570 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4571 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4572 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4573 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4574 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4575 
4576 	/* TC cache setup ??? */
4577 	WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
4578 	WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
4579 	WREG32(TC_CFG_L1_STORE_POLICY, 0);
4580 
4581 	WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
4582 	WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
4583 	WREG32(TC_CFG_L2_STORE_POLICY0, 0);
4584 	WREG32(TC_CFG_L2_STORE_POLICY1, 0);
4585 	WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
4586 
4587 	WREG32(TC_CFG_L1_VOLATILE, 0);
4588 	WREG32(TC_CFG_L2_VOLATILE, 0);
4589 
4590 	if (rdev->family == CHIP_KAVERI) {
4591 		u32 tmp = RREG32(CHUB_CONTROL);
4592 		tmp &= ~BYPASS_VM;
4593 		WREG32(CHUB_CONTROL, tmp);
4594 	}
4595 
4596 	/* XXX SH_MEM regs */
4597 	/* where to put LDS, scratch, GPUVM in FSA64 space */
4598 	mutex_lock(&rdev->srbm_mutex);
4599 	for (i = 0; i < 16; i++) {
4600 		cik_srbm_select(rdev, 0, 0, 0, i);
4601 		/* CP and shaders */
4602 		WREG32(SH_MEM_CONFIG, 0);
4603 		WREG32(SH_MEM_APE1_BASE, 1);
4604 		WREG32(SH_MEM_APE1_LIMIT, 0);
4605 		WREG32(SH_MEM_BASES, 0);
4606 		/* SDMA GFX */
4607 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
4608 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
4609 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
4610 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
4611 		/* XXX SDMA RLC - todo */
4612 	}
4613 	cik_srbm_select(rdev, 0, 0, 0, 0);
4614 	mutex_unlock(&rdev->srbm_mutex);
4615 
4616 	cik_pcie_gart_tlb_flush(rdev);
4617 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4618 		 (unsigned)(rdev->mc.gtt_size >> 20),
4619 		 (unsigned long long)rdev->gart.table_addr);
4620 	rdev->gart.ready = true;
4621 	return 0;
4622 }
4623 
4624 /**
4625  * cik_pcie_gart_disable - gart disable
4626  *
4627  * @rdev: radeon_device pointer
4628  *
4629  * This disables all VM page table (CIK).
4630  */
4631 static void cik_pcie_gart_disable(struct radeon_device *rdev)
4632 {
4633 	/* Disable all tables */
4634 	WREG32(VM_CONTEXT0_CNTL, 0);
4635 	WREG32(VM_CONTEXT1_CNTL, 0);
4636 	/* Setup TLB control */
4637 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4638 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4639 	/* Setup L2 cache */
4640 	WREG32(VM_L2_CNTL,
4641 	       ENABLE_L2_FRAGMENT_PROCESSING |
4642 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4643 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4644 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4645 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4646 	WREG32(VM_L2_CNTL2, 0);
4647 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4648 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4649 	radeon_gart_table_vram_unpin(rdev);
4650 }
4651 
4652 /**
4653  * cik_pcie_gart_fini - vm fini callback
4654  *
4655  * @rdev: radeon_device pointer
4656  *
4657  * Tears down the driver GART/VM setup (CIK).
4658  */
4659 static void cik_pcie_gart_fini(struct radeon_device *rdev)
4660 {
4661 	cik_pcie_gart_disable(rdev);
4662 	radeon_gart_table_vram_free(rdev);
4663 	radeon_gart_fini(rdev);
4664 }
4665 
4666 /* vm parser */
4667 /**
4668  * cik_ib_parse - vm ib_parse callback
4669  *
4670  * @rdev: radeon_device pointer
4671  * @ib: indirect buffer pointer
4672  *
4673  * CIK uses hw IB checking so this is a nop (CIK).
4674  */
4675 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4676 {
4677 	return 0;
4678 }
4679 
4680 /*
4681  * vm
4682  * VMID 0 is the physical GPU addresses as used by the kernel.
4683  * VMIDs 1-15 are used for userspace clients and are handled
4684  * by the radeon vm/hsa code.
4685  */
4686 /**
4687  * cik_vm_init - cik vm init callback
4688  *
4689  * @rdev: radeon_device pointer
4690  *
4691  * Inits cik specific vm parameters (number of VMs, base of vram for
4692  * VMIDs 1-15) (CIK).
4693  * Returns 0 for success.
4694  */
4695 int cik_vm_init(struct radeon_device *rdev)
4696 {
4697 	/* number of VMs */
4698 	rdev->vm_manager.nvm = 16;
4699 	/* base offset of vram pages */
4700 	if (rdev->flags & RADEON_IS_IGP) {
4701 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
4702 		tmp <<= 22;
4703 		rdev->vm_manager.vram_base_offset = tmp;
4704 	} else
4705 		rdev->vm_manager.vram_base_offset = 0;
4706 
4707 	return 0;
4708 }
4709 
4710 /**
4711  * cik_vm_fini - cik vm fini callback
4712  *
4713  * @rdev: radeon_device pointer
4714  *
4715  * Tear down any asic specific VM setup (CIK).
4716  */
4717 void cik_vm_fini(struct radeon_device *rdev)
4718 {
4719 }
4720 
4721 /**
4722  * cik_vm_decode_fault - print human readable fault info
4723  *
4724  * @rdev: radeon_device pointer
4725  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4726  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4727  *
4728  * Print human readable fault information (CIK).
4729  */
4730 static void cik_vm_decode_fault(struct radeon_device *rdev,
4731 				u32 status, u32 addr, u32 mc_client)
4732 {
4733 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4734 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4735 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4736 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
4737 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
4738 
4739 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
4740 	       protections, vmid, addr,
4741 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4742 	       block, mc_client, mc_id);
4743 }
4744 
4745 /**
4746  * cik_vm_flush - cik vm flush using the CP
4747  *
4748  * @rdev: radeon_device pointer
4749  *
4750  * Update the page table base and flush the VM TLB
4751  * using the CP (CIK).
4752  */
4753 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4754 {
4755 	struct radeon_ring *ring = &rdev->ring[ridx];
4756 
4757 	if (vm == NULL)
4758 		return;
4759 
4760 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4761 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4762 				 WRITE_DATA_DST_SEL(0)));
4763 	if (vm->id < 8) {
4764 		radeon_ring_write(ring,
4765 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4766 	} else {
4767 		radeon_ring_write(ring,
4768 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4769 	}
4770 	radeon_ring_write(ring, 0);
4771 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4772 
4773 	/* update SH_MEM_* regs */
4774 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4775 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4776 				 WRITE_DATA_DST_SEL(0)));
4777 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4778 	radeon_ring_write(ring, 0);
4779 	radeon_ring_write(ring, VMID(vm->id));
4780 
4781 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
4782 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4783 				 WRITE_DATA_DST_SEL(0)));
4784 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
4785 	radeon_ring_write(ring, 0);
4786 
4787 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
4788 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
4789 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
4790 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
4791 
4792 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4793 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4794 				 WRITE_DATA_DST_SEL(0)));
4795 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4796 	radeon_ring_write(ring, 0);
4797 	radeon_ring_write(ring, VMID(0));
4798 
4799 	/* HDP flush */
4800 	/* We should be using the WAIT_REG_MEM packet here like in
4801 	 * cik_fence_ring_emit(), but it causes the CP to hang in this
4802 	 * context...
4803 	 */
4804 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4805 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4806 				 WRITE_DATA_DST_SEL(0)));
4807 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4808 	radeon_ring_write(ring, 0);
4809 	radeon_ring_write(ring, 0);
4810 
4811 	/* bits 0-15 are the VM contexts0-15 */
4812 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4813 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4814 				 WRITE_DATA_DST_SEL(0)));
4815 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4816 	radeon_ring_write(ring, 0);
4817 	radeon_ring_write(ring, 1 << vm->id);
4818 
4819 	/* compute doesn't have PFP */
4820 	if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
4821 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
4822 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4823 		radeon_ring_write(ring, 0x0);
4824 	}
4825 }
4826 
4827 /**
4828  * cik_vm_set_page - update the page tables using sDMA
4829  *
4830  * @rdev: radeon_device pointer
4831  * @ib: indirect buffer to fill with commands
4832  * @pe: addr of the page entry
4833  * @addr: dst addr to write into pe
4834  * @count: number of page entries to update
4835  * @incr: increase next addr by incr bytes
4836  * @flags: access flags
4837  *
4838  * Update the page tables using CP or sDMA (CIK).
4839  */
4840 void cik_vm_set_page(struct radeon_device *rdev,
4841 		     struct radeon_ib *ib,
4842 		     uint64_t pe,
4843 		     uint64_t addr, unsigned count,
4844 		     uint32_t incr, uint32_t flags)
4845 {
4846 	uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4847 	uint64_t value;
4848 	unsigned ndw;
4849 
4850 	if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4851 		/* CP */
4852 		while (count) {
4853 			ndw = 2 + count * 2;
4854 			if (ndw > 0x3FFE)
4855 				ndw = 0x3FFE;
4856 
4857 			ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4858 			ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4859 						    WRITE_DATA_DST_SEL(1));
4860 			ib->ptr[ib->length_dw++] = pe;
4861 			ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4862 			for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4863 				if (flags & RADEON_VM_PAGE_SYSTEM) {
4864 					value = radeon_vm_map_gart(rdev, addr);
4865 					value &= 0xFFFFFFFFFFFFF000ULL;
4866 				} else if (flags & RADEON_VM_PAGE_VALID) {
4867 					value = addr;
4868 				} else {
4869 					value = 0;
4870 				}
4871 				addr += incr;
4872 				value |= r600_flags;
4873 				ib->ptr[ib->length_dw++] = value;
4874 				ib->ptr[ib->length_dw++] = upper_32_bits(value);
4875 			}
4876 		}
4877 	} else {
4878 		/* DMA */
4879 		cik_sdma_vm_set_page(rdev, ib, pe, addr, count, incr, flags);
4880 	}
4881 }
4882 
4883 /*
4884  * RLC
4885  * The RLC is a multi-purpose microengine that handles a
4886  * variety of functions, the most important of which is
4887  * the interrupt controller.
4888  */
4889 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
4890 					  bool enable)
4891 {
4892 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
4893 
4894 	if (enable)
4895 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4896 	else
4897 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4898 	WREG32(CP_INT_CNTL_RING0, tmp);
4899 }
4900 
4901 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
4902 {
4903 	u32 tmp;
4904 
4905 	tmp = RREG32(RLC_LB_CNTL);
4906 	if (enable)
4907 		tmp |= LOAD_BALANCE_ENABLE;
4908 	else
4909 		tmp &= ~LOAD_BALANCE_ENABLE;
4910 	WREG32(RLC_LB_CNTL, tmp);
4911 }
4912 
4913 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
4914 {
4915 	u32 i, j, k;
4916 	u32 mask;
4917 
4918 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
4919 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
4920 			cik_select_se_sh(rdev, i, j);
4921 			for (k = 0; k < rdev->usec_timeout; k++) {
4922 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
4923 					break;
4924 				udelay(1);
4925 			}
4926 		}
4927 	}
4928 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4929 
4930 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
4931 	for (k = 0; k < rdev->usec_timeout; k++) {
4932 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
4933 			break;
4934 		udelay(1);
4935 	}
4936 }
4937 
4938 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
4939 {
4940 	u32 tmp;
4941 
4942 	tmp = RREG32(RLC_CNTL);
4943 	if (tmp != rlc)
4944 		WREG32(RLC_CNTL, rlc);
4945 }
4946 
4947 static u32 cik_halt_rlc(struct radeon_device *rdev)
4948 {
4949 	u32 data, orig;
4950 
4951 	orig = data = RREG32(RLC_CNTL);
4952 
4953 	if (data & RLC_ENABLE) {
4954 		u32 i;
4955 
4956 		data &= ~RLC_ENABLE;
4957 		WREG32(RLC_CNTL, data);
4958 
4959 		for (i = 0; i < rdev->usec_timeout; i++) {
4960 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
4961 				break;
4962 			udelay(1);
4963 		}
4964 
4965 		cik_wait_for_rlc_serdes(rdev);
4966 	}
4967 
4968 	return orig;
4969 }
4970 
4971 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
4972 {
4973 	u32 tmp, i, mask;
4974 
4975 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
4976 	WREG32(RLC_GPR_REG2, tmp);
4977 
4978 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
4979 	for (i = 0; i < rdev->usec_timeout; i++) {
4980 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
4981 			break;
4982 		udelay(1);
4983 	}
4984 
4985 	for (i = 0; i < rdev->usec_timeout; i++) {
4986 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
4987 			break;
4988 		udelay(1);
4989 	}
4990 }
4991 
4992 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
4993 {
4994 	u32 tmp;
4995 
4996 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
4997 	WREG32(RLC_GPR_REG2, tmp);
4998 }
4999 
5000 /**
5001  * cik_rlc_stop - stop the RLC ME
5002  *
5003  * @rdev: radeon_device pointer
5004  *
5005  * Halt the RLC ME (MicroEngine) (CIK).
5006  */
5007 static void cik_rlc_stop(struct radeon_device *rdev)
5008 {
5009 	WREG32(RLC_CNTL, 0);
5010 
5011 	cik_enable_gui_idle_interrupt(rdev, false);
5012 
5013 	cik_wait_for_rlc_serdes(rdev);
5014 }
5015 
5016 /**
5017  * cik_rlc_start - start the RLC ME
5018  *
5019  * @rdev: radeon_device pointer
5020  *
5021  * Unhalt the RLC ME (MicroEngine) (CIK).
5022  */
5023 static void cik_rlc_start(struct radeon_device *rdev)
5024 {
5025 	WREG32(RLC_CNTL, RLC_ENABLE);
5026 
5027 	cik_enable_gui_idle_interrupt(rdev, true);
5028 
5029 	udelay(50);
5030 }
5031 
5032 /**
5033  * cik_rlc_resume - setup the RLC hw
5034  *
5035  * @rdev: radeon_device pointer
5036  *
5037  * Initialize the RLC registers, load the ucode,
5038  * and start the RLC (CIK).
5039  * Returns 0 for success, -EINVAL if the ucode is not available.
5040  */
5041 static int cik_rlc_resume(struct radeon_device *rdev)
5042 {
5043 	u32 i, size, tmp;
5044 	const __be32 *fw_data;
5045 
5046 	if (!rdev->rlc_fw)
5047 		return -EINVAL;
5048 
5049 	switch (rdev->family) {
5050 	case CHIP_BONAIRE:
5051 	default:
5052 		size = BONAIRE_RLC_UCODE_SIZE;
5053 		break;
5054 	case CHIP_KAVERI:
5055 		size = KV_RLC_UCODE_SIZE;
5056 		break;
5057 	case CHIP_KABINI:
5058 		size = KB_RLC_UCODE_SIZE;
5059 		break;
5060 	}
5061 
5062 	cik_rlc_stop(rdev);
5063 
5064 	/* disable CG */
5065 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5066 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5067 
5068 	si_rlc_reset(rdev);
5069 
5070 	cik_init_pg(rdev);
5071 
5072 	cik_init_cg(rdev);
5073 
5074 	WREG32(RLC_LB_CNTR_INIT, 0);
5075 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5076 
5077 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5078 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5079 	WREG32(RLC_LB_PARAMS, 0x00600408);
5080 	WREG32(RLC_LB_CNTL, 0x80000004);
5081 
5082 	WREG32(RLC_MC_CNTL, 0);
5083 	WREG32(RLC_UCODE_CNTL, 0);
5084 
5085 	fw_data = (const __be32 *)rdev->rlc_fw->data;
5086 		WREG32(RLC_GPM_UCODE_ADDR, 0);
5087 	for (i = 0; i < size; i++)
5088 		WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5089 	WREG32(RLC_GPM_UCODE_ADDR, 0);
5090 
5091 	/* XXX - find out what chips support lbpw */
5092 	cik_enable_lbpw(rdev, false);
5093 
5094 	if (rdev->family == CHIP_BONAIRE)
5095 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
5096 
5097 	cik_rlc_start(rdev);
5098 
5099 	return 0;
5100 }
5101 
5102 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5103 {
5104 	u32 data, orig, tmp, tmp2;
5105 
5106 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5107 
5108 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5109 		cik_enable_gui_idle_interrupt(rdev, true);
5110 
5111 		tmp = cik_halt_rlc(rdev);
5112 
5113 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5114 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5115 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5116 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5117 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
5118 
5119 		cik_update_rlc(rdev, tmp);
5120 
5121 		data |= CGCG_EN | CGLS_EN;
5122 	} else {
5123 		cik_enable_gui_idle_interrupt(rdev, false);
5124 
5125 		RREG32(CB_CGTT_SCLK_CTRL);
5126 		RREG32(CB_CGTT_SCLK_CTRL);
5127 		RREG32(CB_CGTT_SCLK_CTRL);
5128 		RREG32(CB_CGTT_SCLK_CTRL);
5129 
5130 		data &= ~(CGCG_EN | CGLS_EN);
5131 	}
5132 
5133 	if (orig != data)
5134 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5135 
5136 }
5137 
5138 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5139 {
5140 	u32 data, orig, tmp = 0;
5141 
5142 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5143 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5144 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5145 				orig = data = RREG32(CP_MEM_SLP_CNTL);
5146 				data |= CP_MEM_LS_EN;
5147 				if (orig != data)
5148 					WREG32(CP_MEM_SLP_CNTL, data);
5149 			}
5150 		}
5151 
5152 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5153 		data &= 0xfffffffd;
5154 		if (orig != data)
5155 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5156 
5157 		tmp = cik_halt_rlc(rdev);
5158 
5159 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5160 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5161 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5162 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5163 		WREG32(RLC_SERDES_WR_CTRL, data);
5164 
5165 		cik_update_rlc(rdev, tmp);
5166 
5167 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
5168 			orig = data = RREG32(CGTS_SM_CTRL_REG);
5169 			data &= ~SM_MODE_MASK;
5170 			data |= SM_MODE(0x2);
5171 			data |= SM_MODE_ENABLE;
5172 			data &= ~CGTS_OVERRIDE;
5173 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
5174 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
5175 				data &= ~CGTS_LS_OVERRIDE;
5176 			data &= ~ON_MONITOR_ADD_MASK;
5177 			data |= ON_MONITOR_ADD_EN;
5178 			data |= ON_MONITOR_ADD(0x96);
5179 			if (orig != data)
5180 				WREG32(CGTS_SM_CTRL_REG, data);
5181 		}
5182 	} else {
5183 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5184 		data |= 0x00000002;
5185 		if (orig != data)
5186 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5187 
5188 		data = RREG32(RLC_MEM_SLP_CNTL);
5189 		if (data & RLC_MEM_LS_EN) {
5190 			data &= ~RLC_MEM_LS_EN;
5191 			WREG32(RLC_MEM_SLP_CNTL, data);
5192 		}
5193 
5194 		data = RREG32(CP_MEM_SLP_CNTL);
5195 		if (data & CP_MEM_LS_EN) {
5196 			data &= ~CP_MEM_LS_EN;
5197 			WREG32(CP_MEM_SLP_CNTL, data);
5198 		}
5199 
5200 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5201 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5202 		if (orig != data)
5203 			WREG32(CGTS_SM_CTRL_REG, data);
5204 
5205 		tmp = cik_halt_rlc(rdev);
5206 
5207 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5208 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5209 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5210 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5211 		WREG32(RLC_SERDES_WR_CTRL, data);
5212 
5213 		cik_update_rlc(rdev, tmp);
5214 	}
5215 }
5216 
5217 static const u32 mc_cg_registers[] =
5218 {
5219 	MC_HUB_MISC_HUB_CG,
5220 	MC_HUB_MISC_SIP_CG,
5221 	MC_HUB_MISC_VM_CG,
5222 	MC_XPB_CLK_GAT,
5223 	ATC_MISC_CG,
5224 	MC_CITF_MISC_WR_CG,
5225 	MC_CITF_MISC_RD_CG,
5226 	MC_CITF_MISC_VM_CG,
5227 	VM_L2_CG,
5228 };
5229 
5230 static void cik_enable_mc_ls(struct radeon_device *rdev,
5231 			     bool enable)
5232 {
5233 	int i;
5234 	u32 orig, data;
5235 
5236 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5237 		orig = data = RREG32(mc_cg_registers[i]);
5238 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5239 			data |= MC_LS_ENABLE;
5240 		else
5241 			data &= ~MC_LS_ENABLE;
5242 		if (data != orig)
5243 			WREG32(mc_cg_registers[i], data);
5244 	}
5245 }
5246 
5247 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5248 			       bool enable)
5249 {
5250 	int i;
5251 	u32 orig, data;
5252 
5253 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5254 		orig = data = RREG32(mc_cg_registers[i]);
5255 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5256 			data |= MC_CG_ENABLE;
5257 		else
5258 			data &= ~MC_CG_ENABLE;
5259 		if (data != orig)
5260 			WREG32(mc_cg_registers[i], data);
5261 	}
5262 }
5263 
5264 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
5265 				 bool enable)
5266 {
5267 	u32 orig, data;
5268 
5269 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5270 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
5271 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
5272 	} else {
5273 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
5274 		data |= 0xff000000;
5275 		if (data != orig)
5276 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
5277 
5278 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
5279 		data |= 0xff000000;
5280 		if (data != orig)
5281 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
5282 	}
5283 }
5284 
5285 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
5286 				 bool enable)
5287 {
5288 	u32 orig, data;
5289 
5290 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
5291 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5292 		data |= 0x100;
5293 		if (orig != data)
5294 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5295 
5296 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5297 		data |= 0x100;
5298 		if (orig != data)
5299 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5300 	} else {
5301 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5302 		data &= ~0x100;
5303 		if (orig != data)
5304 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5305 
5306 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5307 		data &= ~0x100;
5308 		if (orig != data)
5309 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5310 	}
5311 }
5312 
5313 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
5314 				bool enable)
5315 {
5316 	u32 orig, data;
5317 
5318 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5319 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5320 		data = 0xfff;
5321 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5322 
5323 		orig = data = RREG32(UVD_CGC_CTRL);
5324 		data |= DCM;
5325 		if (orig != data)
5326 			WREG32(UVD_CGC_CTRL, data);
5327 	} else {
5328 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5329 		data &= ~0xfff;
5330 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5331 
5332 		orig = data = RREG32(UVD_CGC_CTRL);
5333 		data &= ~DCM;
5334 		if (orig != data)
5335 			WREG32(UVD_CGC_CTRL, data);
5336 	}
5337 }
5338 
5339 static void cik_enable_bif_mgls(struct radeon_device *rdev,
5340 			       bool enable)
5341 {
5342 	u32 orig, data;
5343 
5344 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
5345 
5346 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5347 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5348 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5349 	else
5350 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5351 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5352 
5353 	if (orig != data)
5354 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
5355 }
5356 
5357 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
5358 				bool enable)
5359 {
5360 	u32 orig, data;
5361 
5362 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
5363 
5364 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5365 		data &= ~CLOCK_GATING_DIS;
5366 	else
5367 		data |= CLOCK_GATING_DIS;
5368 
5369 	if (orig != data)
5370 		WREG32(HDP_HOST_PATH_CNTL, data);
5371 }
5372 
5373 static void cik_enable_hdp_ls(struct radeon_device *rdev,
5374 			      bool enable)
5375 {
5376 	u32 orig, data;
5377 
5378 	orig = data = RREG32(HDP_MEM_POWER_LS);
5379 
5380 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5381 		data |= HDP_LS_ENABLE;
5382 	else
5383 		data &= ~HDP_LS_ENABLE;
5384 
5385 	if (orig != data)
5386 		WREG32(HDP_MEM_POWER_LS, data);
5387 }
5388 
5389 void cik_update_cg(struct radeon_device *rdev,
5390 		   u32 block, bool enable)
5391 {
5392 
5393 	if (block & RADEON_CG_BLOCK_GFX) {
5394 		cik_enable_gui_idle_interrupt(rdev, false);
5395 		/* order matters! */
5396 		if (enable) {
5397 			cik_enable_mgcg(rdev, true);
5398 			cik_enable_cgcg(rdev, true);
5399 		} else {
5400 			cik_enable_cgcg(rdev, false);
5401 			cik_enable_mgcg(rdev, false);
5402 		}
5403 		cik_enable_gui_idle_interrupt(rdev, true);
5404 	}
5405 
5406 	if (block & RADEON_CG_BLOCK_MC) {
5407 		if (!(rdev->flags & RADEON_IS_IGP)) {
5408 			cik_enable_mc_mgcg(rdev, enable);
5409 			cik_enable_mc_ls(rdev, enable);
5410 		}
5411 	}
5412 
5413 	if (block & RADEON_CG_BLOCK_SDMA) {
5414 		cik_enable_sdma_mgcg(rdev, enable);
5415 		cik_enable_sdma_mgls(rdev, enable);
5416 	}
5417 
5418 	if (block & RADEON_CG_BLOCK_BIF) {
5419 		cik_enable_bif_mgls(rdev, enable);
5420 	}
5421 
5422 	if (block & RADEON_CG_BLOCK_UVD) {
5423 		if (rdev->has_uvd)
5424 			cik_enable_uvd_mgcg(rdev, enable);
5425 	}
5426 
5427 	if (block & RADEON_CG_BLOCK_HDP) {
5428 		cik_enable_hdp_mgcg(rdev, enable);
5429 		cik_enable_hdp_ls(rdev, enable);
5430 	}
5431 }
5432 
5433 static void cik_init_cg(struct radeon_device *rdev)
5434 {
5435 
5436 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
5437 
5438 	if (rdev->has_uvd)
5439 		si_init_uvd_internal_cg(rdev);
5440 
5441 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5442 			     RADEON_CG_BLOCK_SDMA |
5443 			     RADEON_CG_BLOCK_BIF |
5444 			     RADEON_CG_BLOCK_UVD |
5445 			     RADEON_CG_BLOCK_HDP), true);
5446 }
5447 
5448 static void cik_fini_cg(struct radeon_device *rdev)
5449 {
5450 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5451 			     RADEON_CG_BLOCK_SDMA |
5452 			     RADEON_CG_BLOCK_BIF |
5453 			     RADEON_CG_BLOCK_UVD |
5454 			     RADEON_CG_BLOCK_HDP), false);
5455 
5456 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
5457 }
5458 
5459 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
5460 					  bool enable)
5461 {
5462 	u32 data, orig;
5463 
5464 	orig = data = RREG32(RLC_PG_CNTL);
5465 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5466 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5467 	else
5468 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5469 	if (orig != data)
5470 		WREG32(RLC_PG_CNTL, data);
5471 }
5472 
5473 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
5474 					  bool enable)
5475 {
5476 	u32 data, orig;
5477 
5478 	orig = data = RREG32(RLC_PG_CNTL);
5479 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5480 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5481 	else
5482 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5483 	if (orig != data)
5484 		WREG32(RLC_PG_CNTL, data);
5485 }
5486 
5487 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
5488 {
5489 	u32 data, orig;
5490 
5491 	orig = data = RREG32(RLC_PG_CNTL);
5492 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
5493 		data &= ~DISABLE_CP_PG;
5494 	else
5495 		data |= DISABLE_CP_PG;
5496 	if (orig != data)
5497 		WREG32(RLC_PG_CNTL, data);
5498 }
5499 
5500 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
5501 {
5502 	u32 data, orig;
5503 
5504 	orig = data = RREG32(RLC_PG_CNTL);
5505 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
5506 		data &= ~DISABLE_GDS_PG;
5507 	else
5508 		data |= DISABLE_GDS_PG;
5509 	if (orig != data)
5510 		WREG32(RLC_PG_CNTL, data);
5511 }
5512 
5513 #define CP_ME_TABLE_SIZE    96
5514 #define CP_ME_TABLE_OFFSET  2048
5515 #define CP_MEC_TABLE_OFFSET 4096
5516 
5517 void cik_init_cp_pg_table(struct radeon_device *rdev)
5518 {
5519 	const __be32 *fw_data;
5520 	volatile u32 *dst_ptr;
5521 	int me, i, max_me = 4;
5522 	u32 bo_offset = 0;
5523 	u32 table_offset;
5524 
5525 	if (rdev->family == CHIP_KAVERI)
5526 		max_me = 5;
5527 
5528 	if (rdev->rlc.cp_table_ptr == NULL)
5529 		return;
5530 
5531 	/* write the cp table buffer */
5532 	dst_ptr = rdev->rlc.cp_table_ptr;
5533 	for (me = 0; me < max_me; me++) {
5534 		if (me == 0) {
5535 			fw_data = (const __be32 *)rdev->ce_fw->data;
5536 			table_offset = CP_ME_TABLE_OFFSET;
5537 		} else if (me == 1) {
5538 			fw_data = (const __be32 *)rdev->pfp_fw->data;
5539 			table_offset = CP_ME_TABLE_OFFSET;
5540 		} else if (me == 2) {
5541 			fw_data = (const __be32 *)rdev->me_fw->data;
5542 			table_offset = CP_ME_TABLE_OFFSET;
5543 		} else {
5544 			fw_data = (const __be32 *)rdev->mec_fw->data;
5545 			table_offset = CP_MEC_TABLE_OFFSET;
5546 		}
5547 
5548 		for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
5549 			dst_ptr[bo_offset + i] = be32_to_cpu(fw_data[table_offset + i]);
5550 		}
5551 		bo_offset += CP_ME_TABLE_SIZE;
5552 	}
5553 }
5554 
5555 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
5556 				bool enable)
5557 {
5558 	u32 data, orig;
5559 
5560 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5561 		orig = data = RREG32(RLC_PG_CNTL);
5562 		data |= GFX_PG_ENABLE;
5563 		if (orig != data)
5564 			WREG32(RLC_PG_CNTL, data);
5565 
5566 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
5567 		data |= AUTO_PG_EN;
5568 		if (orig != data)
5569 			WREG32(RLC_AUTO_PG_CTRL, data);
5570 	} else {
5571 		orig = data = RREG32(RLC_PG_CNTL);
5572 		data &= ~GFX_PG_ENABLE;
5573 		if (orig != data)
5574 			WREG32(RLC_PG_CNTL, data);
5575 
5576 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
5577 		data &= ~AUTO_PG_EN;
5578 		if (orig != data)
5579 			WREG32(RLC_AUTO_PG_CTRL, data);
5580 
5581 		data = RREG32(DB_RENDER_CONTROL);
5582 	}
5583 }
5584 
5585 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5586 {
5587 	u32 mask = 0, tmp, tmp1;
5588 	int i;
5589 
5590 	cik_select_se_sh(rdev, se, sh);
5591 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5592 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5593 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5594 
5595 	tmp &= 0xffff0000;
5596 
5597 	tmp |= tmp1;
5598 	tmp >>= 16;
5599 
5600 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
5601 		mask <<= 1;
5602 		mask |= 1;
5603 	}
5604 
5605 	return (~tmp) & mask;
5606 }
5607 
5608 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
5609 {
5610 	u32 i, j, k, active_cu_number = 0;
5611 	u32 mask, counter, cu_bitmap;
5612 	u32 tmp = 0;
5613 
5614 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5615 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5616 			mask = 1;
5617 			cu_bitmap = 0;
5618 			counter = 0;
5619 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
5620 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
5621 					if (counter < 2)
5622 						cu_bitmap |= mask;
5623 					counter ++;
5624 				}
5625 				mask <<= 1;
5626 			}
5627 
5628 			active_cu_number += counter;
5629 			tmp |= (cu_bitmap << (i * 16 + j * 8));
5630 		}
5631 	}
5632 
5633 	WREG32(RLC_PG_AO_CU_MASK, tmp);
5634 
5635 	tmp = RREG32(RLC_MAX_PG_CU);
5636 	tmp &= ~MAX_PU_CU_MASK;
5637 	tmp |= MAX_PU_CU(active_cu_number);
5638 	WREG32(RLC_MAX_PG_CU, tmp);
5639 }
5640 
5641 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
5642 				       bool enable)
5643 {
5644 	u32 data, orig;
5645 
5646 	orig = data = RREG32(RLC_PG_CNTL);
5647 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
5648 		data |= STATIC_PER_CU_PG_ENABLE;
5649 	else
5650 		data &= ~STATIC_PER_CU_PG_ENABLE;
5651 	if (orig != data)
5652 		WREG32(RLC_PG_CNTL, data);
5653 }
5654 
5655 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
5656 					bool enable)
5657 {
5658 	u32 data, orig;
5659 
5660 	orig = data = RREG32(RLC_PG_CNTL);
5661 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
5662 		data |= DYN_PER_CU_PG_ENABLE;
5663 	else
5664 		data &= ~DYN_PER_CU_PG_ENABLE;
5665 	if (orig != data)
5666 		WREG32(RLC_PG_CNTL, data);
5667 }
5668 
5669 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
5670 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
5671 
5672 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
5673 {
5674 	u32 data, orig;
5675 	u32 i;
5676 
5677 	if (rdev->rlc.cs_data) {
5678 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
5679 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
5680 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
5681 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
5682 	} else {
5683 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
5684 		for (i = 0; i < 3; i++)
5685 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
5686 	}
5687 	if (rdev->rlc.reg_list) {
5688 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
5689 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
5690 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
5691 	}
5692 
5693 	orig = data = RREG32(RLC_PG_CNTL);
5694 	data |= GFX_PG_SRC;
5695 	if (orig != data)
5696 		WREG32(RLC_PG_CNTL, data);
5697 
5698 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5699 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
5700 
5701 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
5702 	data &= ~IDLE_POLL_COUNT_MASK;
5703 	data |= IDLE_POLL_COUNT(0x60);
5704 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
5705 
5706 	data = 0x10101010;
5707 	WREG32(RLC_PG_DELAY, data);
5708 
5709 	data = RREG32(RLC_PG_DELAY_2);
5710 	data &= ~0xff;
5711 	data |= 0x3;
5712 	WREG32(RLC_PG_DELAY_2, data);
5713 
5714 	data = RREG32(RLC_AUTO_PG_CTRL);
5715 	data &= ~GRBM_REG_SGIT_MASK;
5716 	data |= GRBM_REG_SGIT(0x700);
5717 	WREG32(RLC_AUTO_PG_CTRL, data);
5718 
5719 }
5720 
5721 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
5722 {
5723 	cik_enable_gfx_cgpg(rdev, enable);
5724 	cik_enable_gfx_static_mgpg(rdev, enable);
5725 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
5726 }
5727 
5728 u32 cik_get_csb_size(struct radeon_device *rdev)
5729 {
5730 	u32 count = 0;
5731 	const struct cs_section_def *sect = NULL;
5732 	const struct cs_extent_def *ext = NULL;
5733 
5734 	if (rdev->rlc.cs_data == NULL)
5735 		return 0;
5736 
5737 	/* begin clear state */
5738 	count += 2;
5739 	/* context control state */
5740 	count += 3;
5741 
5742 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5743 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5744 			if (sect->id == SECT_CONTEXT)
5745 				count += 2 + ext->reg_count;
5746 			else
5747 				return 0;
5748 		}
5749 	}
5750 	/* pa_sc_raster_config/pa_sc_raster_config1 */
5751 	count += 4;
5752 	/* end clear state */
5753 	count += 2;
5754 	/* clear state */
5755 	count += 2;
5756 
5757 	return count;
5758 }
5759 
5760 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5761 {
5762 	u32 count = 0, i;
5763 	const struct cs_section_def *sect = NULL;
5764 	const struct cs_extent_def *ext = NULL;
5765 
5766 	if (rdev->rlc.cs_data == NULL)
5767 		return;
5768 	if (buffer == NULL)
5769 		return;
5770 
5771 	buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5772 	buffer[count++] = PACKET3_PREAMBLE_BEGIN_CLEAR_STATE;
5773 
5774 	buffer[count++] = PACKET3(PACKET3_CONTEXT_CONTROL, 1);
5775 	buffer[count++] = 0x80000000;
5776 	buffer[count++] = 0x80000000;
5777 
5778 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5779 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5780 			if (sect->id == SECT_CONTEXT) {
5781 				buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count);
5782 				buffer[count++] = ext->reg_index - 0xa000;
5783 				for (i = 0; i < ext->reg_count; i++)
5784 					buffer[count++] = ext->extent[i];
5785 			} else {
5786 				return;
5787 			}
5788 		}
5789 	}
5790 
5791 	buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
5792 	buffer[count++] = PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START;
5793 	switch (rdev->family) {
5794 	case CHIP_BONAIRE:
5795 		buffer[count++] = 0x16000012;
5796 		buffer[count++] = 0x00000000;
5797 		break;
5798 	case CHIP_KAVERI:
5799 		buffer[count++] = 0x00000000; /* XXX */
5800 		buffer[count++] = 0x00000000;
5801 		break;
5802 	case CHIP_KABINI:
5803 		buffer[count++] = 0x00000000; /* XXX */
5804 		buffer[count++] = 0x00000000;
5805 		break;
5806 	default:
5807 		buffer[count++] = 0x00000000;
5808 		buffer[count++] = 0x00000000;
5809 		break;
5810 	}
5811 
5812 	buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5813 	buffer[count++] = PACKET3_PREAMBLE_END_CLEAR_STATE;
5814 
5815 	buffer[count++] = PACKET3(PACKET3_CLEAR_STATE, 0);
5816 	buffer[count++] = 0;
5817 }
5818 
5819 static void cik_init_pg(struct radeon_device *rdev)
5820 {
5821 	if (rdev->pg_flags) {
5822 		cik_enable_sck_slowdown_on_pu(rdev, true);
5823 		cik_enable_sck_slowdown_on_pd(rdev, true);
5824 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5825 			cik_init_gfx_cgpg(rdev);
5826 			cik_enable_cp_pg(rdev, true);
5827 			cik_enable_gds_pg(rdev, true);
5828 		}
5829 		cik_init_ao_cu_mask(rdev);
5830 		cik_update_gfx_pg(rdev, true);
5831 	}
5832 }
5833 
5834 static void cik_fini_pg(struct radeon_device *rdev)
5835 {
5836 	if (rdev->pg_flags) {
5837 		cik_update_gfx_pg(rdev, false);
5838 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5839 			cik_enable_cp_pg(rdev, false);
5840 			cik_enable_gds_pg(rdev, false);
5841 		}
5842 	}
5843 }
5844 
5845 /*
5846  * Interrupts
5847  * Starting with r6xx, interrupts are handled via a ring buffer.
5848  * Ring buffers are areas of GPU accessible memory that the GPU
5849  * writes interrupt vectors into and the host reads vectors out of.
5850  * There is a rptr (read pointer) that determines where the
5851  * host is currently reading, and a wptr (write pointer)
5852  * which determines where the GPU has written.  When the
5853  * pointers are equal, the ring is idle.  When the GPU
5854  * writes vectors to the ring buffer, it increments the
5855  * wptr.  When there is an interrupt, the host then starts
5856  * fetching commands and processing them until the pointers are
5857  * equal again at which point it updates the rptr.
5858  */
5859 
5860 /**
5861  * cik_enable_interrupts - Enable the interrupt ring buffer
5862  *
5863  * @rdev: radeon_device pointer
5864  *
5865  * Enable the interrupt ring buffer (CIK).
5866  */
5867 static void cik_enable_interrupts(struct radeon_device *rdev)
5868 {
5869 	u32 ih_cntl = RREG32(IH_CNTL);
5870 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5871 
5872 	ih_cntl |= ENABLE_INTR;
5873 	ih_rb_cntl |= IH_RB_ENABLE;
5874 	WREG32(IH_CNTL, ih_cntl);
5875 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5876 	rdev->ih.enabled = true;
5877 }
5878 
5879 /**
5880  * cik_disable_interrupts - Disable the interrupt ring buffer
5881  *
5882  * @rdev: radeon_device pointer
5883  *
5884  * Disable the interrupt ring buffer (CIK).
5885  */
5886 static void cik_disable_interrupts(struct radeon_device *rdev)
5887 {
5888 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5889 	u32 ih_cntl = RREG32(IH_CNTL);
5890 
5891 	ih_rb_cntl &= ~IH_RB_ENABLE;
5892 	ih_cntl &= ~ENABLE_INTR;
5893 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5894 	WREG32(IH_CNTL, ih_cntl);
5895 	/* set rptr, wptr to 0 */
5896 	WREG32(IH_RB_RPTR, 0);
5897 	WREG32(IH_RB_WPTR, 0);
5898 	rdev->ih.enabled = false;
5899 	rdev->ih.rptr = 0;
5900 }
5901 
5902 /**
5903  * cik_disable_interrupt_state - Disable all interrupt sources
5904  *
5905  * @rdev: radeon_device pointer
5906  *
5907  * Clear all interrupt enable bits used by the driver (CIK).
5908  */
5909 static void cik_disable_interrupt_state(struct radeon_device *rdev)
5910 {
5911 	u32 tmp;
5912 
5913 	/* gfx ring */
5914 	tmp = RREG32(CP_INT_CNTL_RING0) &
5915 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5916 	WREG32(CP_INT_CNTL_RING0, tmp);
5917 	/* sdma */
5918 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5919 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5920 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5921 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5922 	/* compute queues */
5923 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
5924 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
5925 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
5926 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
5927 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
5928 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
5929 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
5930 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
5931 	/* grbm */
5932 	WREG32(GRBM_INT_CNTL, 0);
5933 	/* vline/vblank, etc. */
5934 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5935 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5936 	if (rdev->num_crtc >= 4) {
5937 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5938 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5939 	}
5940 	if (rdev->num_crtc >= 6) {
5941 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5942 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5943 	}
5944 
5945 	/* dac hotplug */
5946 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5947 
5948 	/* digital hotplug */
5949 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5950 	WREG32(DC_HPD1_INT_CONTROL, tmp);
5951 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5952 	WREG32(DC_HPD2_INT_CONTROL, tmp);
5953 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5954 	WREG32(DC_HPD3_INT_CONTROL, tmp);
5955 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5956 	WREG32(DC_HPD4_INT_CONTROL, tmp);
5957 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5958 	WREG32(DC_HPD5_INT_CONTROL, tmp);
5959 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5960 	WREG32(DC_HPD6_INT_CONTROL, tmp);
5961 
5962 }
5963 
5964 /**
5965  * cik_irq_init - init and enable the interrupt ring
5966  *
5967  * @rdev: radeon_device pointer
5968  *
5969  * Allocate a ring buffer for the interrupt controller,
5970  * enable the RLC, disable interrupts, enable the IH
5971  * ring buffer and enable it (CIK).
5972  * Called at device load and reume.
5973  * Returns 0 for success, errors for failure.
5974  */
5975 static int cik_irq_init(struct radeon_device *rdev)
5976 {
5977 	int ret = 0;
5978 	int rb_bufsz;
5979 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5980 
5981 	/* allocate ring */
5982 	ret = r600_ih_ring_alloc(rdev);
5983 	if (ret)
5984 		return ret;
5985 
5986 	/* disable irqs */
5987 	cik_disable_interrupts(rdev);
5988 
5989 	/* init rlc */
5990 	ret = cik_rlc_resume(rdev);
5991 	if (ret) {
5992 		r600_ih_ring_fini(rdev);
5993 		return ret;
5994 	}
5995 
5996 	/* setup interrupt control */
5997 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
5998 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5999 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
6000 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6001 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6002 	 */
6003 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6004 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6005 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6006 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6007 
6008 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6009 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6010 
6011 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6012 		      IH_WPTR_OVERFLOW_CLEAR |
6013 		      (rb_bufsz << 1));
6014 
6015 	if (rdev->wb.enabled)
6016 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6017 
6018 	/* set the writeback address whether it's enabled or not */
6019 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6020 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6021 
6022 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6023 
6024 	/* set rptr, wptr to 0 */
6025 	WREG32(IH_RB_RPTR, 0);
6026 	WREG32(IH_RB_WPTR, 0);
6027 
6028 	/* Default settings for IH_CNTL (disabled at first) */
6029 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6030 	/* RPTR_REARM only works if msi's are enabled */
6031 	if (rdev->msi_enabled)
6032 		ih_cntl |= RPTR_REARM;
6033 	WREG32(IH_CNTL, ih_cntl);
6034 
6035 	/* force the active interrupt state to all disabled */
6036 	cik_disable_interrupt_state(rdev);
6037 
6038 	pci_set_master(rdev->pdev);
6039 
6040 	/* enable irqs */
6041 	cik_enable_interrupts(rdev);
6042 
6043 	return ret;
6044 }
6045 
6046 /**
6047  * cik_irq_set - enable/disable interrupt sources
6048  *
6049  * @rdev: radeon_device pointer
6050  *
6051  * Enable interrupt sources on the GPU (vblanks, hpd,
6052  * etc.) (CIK).
6053  * Returns 0 for success, errors for failure.
6054  */
6055 int cik_irq_set(struct radeon_device *rdev)
6056 {
6057 	u32 cp_int_cntl;
6058 	u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6059 	u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6060 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6061 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6062 	u32 grbm_int_cntl = 0;
6063 	u32 dma_cntl, dma_cntl1;
6064 	u32 thermal_int;
6065 
6066 	if (!rdev->irq.installed) {
6067 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6068 		return -EINVAL;
6069 	}
6070 	/* don't enable anything if the ih is disabled */
6071 	if (!rdev->ih.enabled) {
6072 		cik_disable_interrupts(rdev);
6073 		/* force the active interrupt state to all disabled */
6074 		cik_disable_interrupt_state(rdev);
6075 		return 0;
6076 	}
6077 
6078 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6079 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6080 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6081 
6082 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6083 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6084 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6085 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6086 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6087 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6088 
6089 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6090 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6091 
6092 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6093 	cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6094 	cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6095 	cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6096 	cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6097 	cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6098 	cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6099 	cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6100 
6101 	if (rdev->flags & RADEON_IS_IGP)
6102 		thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6103 			~(THERM_INTH_MASK | THERM_INTL_MASK);
6104 	else
6105 		thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6106 			~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6107 
6108 	/* enable CP interrupts on all rings */
6109 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6110 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
6111 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6112 	}
6113 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6114 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6115 		DRM_DEBUG("si_irq_set: sw int cp1\n");
6116 		if (ring->me == 1) {
6117 			switch (ring->pipe) {
6118 			case 0:
6119 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6120 				break;
6121 			case 1:
6122 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6123 				break;
6124 			case 2:
6125 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6126 				break;
6127 			case 3:
6128 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6129 				break;
6130 			default:
6131 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6132 				break;
6133 			}
6134 		} else if (ring->me == 2) {
6135 			switch (ring->pipe) {
6136 			case 0:
6137 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6138 				break;
6139 			case 1:
6140 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6141 				break;
6142 			case 2:
6143 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6144 				break;
6145 			case 3:
6146 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6147 				break;
6148 			default:
6149 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6150 				break;
6151 			}
6152 		} else {
6153 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6154 		}
6155 	}
6156 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6157 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6158 		DRM_DEBUG("si_irq_set: sw int cp2\n");
6159 		if (ring->me == 1) {
6160 			switch (ring->pipe) {
6161 			case 0:
6162 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6163 				break;
6164 			case 1:
6165 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6166 				break;
6167 			case 2:
6168 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6169 				break;
6170 			case 3:
6171 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6172 				break;
6173 			default:
6174 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6175 				break;
6176 			}
6177 		} else if (ring->me == 2) {
6178 			switch (ring->pipe) {
6179 			case 0:
6180 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6181 				break;
6182 			case 1:
6183 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6184 				break;
6185 			case 2:
6186 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6187 				break;
6188 			case 3:
6189 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6190 				break;
6191 			default:
6192 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6193 				break;
6194 			}
6195 		} else {
6196 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6197 		}
6198 	}
6199 
6200 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6201 		DRM_DEBUG("cik_irq_set: sw int dma\n");
6202 		dma_cntl |= TRAP_ENABLE;
6203 	}
6204 
6205 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6206 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
6207 		dma_cntl1 |= TRAP_ENABLE;
6208 	}
6209 
6210 	if (rdev->irq.crtc_vblank_int[0] ||
6211 	    atomic_read(&rdev->irq.pflip[0])) {
6212 		DRM_DEBUG("cik_irq_set: vblank 0\n");
6213 		crtc1 |= VBLANK_INTERRUPT_MASK;
6214 	}
6215 	if (rdev->irq.crtc_vblank_int[1] ||
6216 	    atomic_read(&rdev->irq.pflip[1])) {
6217 		DRM_DEBUG("cik_irq_set: vblank 1\n");
6218 		crtc2 |= VBLANK_INTERRUPT_MASK;
6219 	}
6220 	if (rdev->irq.crtc_vblank_int[2] ||
6221 	    atomic_read(&rdev->irq.pflip[2])) {
6222 		DRM_DEBUG("cik_irq_set: vblank 2\n");
6223 		crtc3 |= VBLANK_INTERRUPT_MASK;
6224 	}
6225 	if (rdev->irq.crtc_vblank_int[3] ||
6226 	    atomic_read(&rdev->irq.pflip[3])) {
6227 		DRM_DEBUG("cik_irq_set: vblank 3\n");
6228 		crtc4 |= VBLANK_INTERRUPT_MASK;
6229 	}
6230 	if (rdev->irq.crtc_vblank_int[4] ||
6231 	    atomic_read(&rdev->irq.pflip[4])) {
6232 		DRM_DEBUG("cik_irq_set: vblank 4\n");
6233 		crtc5 |= VBLANK_INTERRUPT_MASK;
6234 	}
6235 	if (rdev->irq.crtc_vblank_int[5] ||
6236 	    atomic_read(&rdev->irq.pflip[5])) {
6237 		DRM_DEBUG("cik_irq_set: vblank 5\n");
6238 		crtc6 |= VBLANK_INTERRUPT_MASK;
6239 	}
6240 	if (rdev->irq.hpd[0]) {
6241 		DRM_DEBUG("cik_irq_set: hpd 1\n");
6242 		hpd1 |= DC_HPDx_INT_EN;
6243 	}
6244 	if (rdev->irq.hpd[1]) {
6245 		DRM_DEBUG("cik_irq_set: hpd 2\n");
6246 		hpd2 |= DC_HPDx_INT_EN;
6247 	}
6248 	if (rdev->irq.hpd[2]) {
6249 		DRM_DEBUG("cik_irq_set: hpd 3\n");
6250 		hpd3 |= DC_HPDx_INT_EN;
6251 	}
6252 	if (rdev->irq.hpd[3]) {
6253 		DRM_DEBUG("cik_irq_set: hpd 4\n");
6254 		hpd4 |= DC_HPDx_INT_EN;
6255 	}
6256 	if (rdev->irq.hpd[4]) {
6257 		DRM_DEBUG("cik_irq_set: hpd 5\n");
6258 		hpd5 |= DC_HPDx_INT_EN;
6259 	}
6260 	if (rdev->irq.hpd[5]) {
6261 		DRM_DEBUG("cik_irq_set: hpd 6\n");
6262 		hpd6 |= DC_HPDx_INT_EN;
6263 	}
6264 
6265 	if (rdev->irq.dpm_thermal) {
6266 		DRM_DEBUG("dpm thermal\n");
6267 		if (rdev->flags & RADEON_IS_IGP)
6268 			thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
6269 		else
6270 			thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6271 	}
6272 
6273 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6274 
6275 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
6276 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
6277 
6278 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
6279 	WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
6280 	WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
6281 	WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
6282 	WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
6283 	WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
6284 	WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
6285 	WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
6286 
6287 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6288 
6289 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6290 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6291 	if (rdev->num_crtc >= 4) {
6292 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6293 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6294 	}
6295 	if (rdev->num_crtc >= 6) {
6296 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6297 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6298 	}
6299 
6300 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
6301 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
6302 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
6303 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
6304 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
6305 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
6306 
6307 	if (rdev->flags & RADEON_IS_IGP)
6308 		WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
6309 	else
6310 		WREG32_SMC(CG_THERMAL_INT, thermal_int);
6311 
6312 	return 0;
6313 }
6314 
6315 /**
6316  * cik_irq_ack - ack interrupt sources
6317  *
6318  * @rdev: radeon_device pointer
6319  *
6320  * Ack interrupt sources on the GPU (vblanks, hpd,
6321  * etc.) (CIK).  Certain interrupts sources are sw
6322  * generated and do not require an explicit ack.
6323  */
6324 static inline void cik_irq_ack(struct radeon_device *rdev)
6325 {
6326 	u32 tmp;
6327 
6328 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6329 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6330 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6331 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6332 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6333 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6334 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
6335 
6336 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
6337 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6338 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
6339 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6340 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6341 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6342 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6343 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6344 
6345 	if (rdev->num_crtc >= 4) {
6346 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6347 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6348 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6349 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6350 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6351 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6352 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6353 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6354 	}
6355 
6356 	if (rdev->num_crtc >= 6) {
6357 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6358 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6359 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6360 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6361 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6362 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6363 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6364 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6365 	}
6366 
6367 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6368 		tmp = RREG32(DC_HPD1_INT_CONTROL);
6369 		tmp |= DC_HPDx_INT_ACK;
6370 		WREG32(DC_HPD1_INT_CONTROL, tmp);
6371 	}
6372 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6373 		tmp = RREG32(DC_HPD2_INT_CONTROL);
6374 		tmp |= DC_HPDx_INT_ACK;
6375 		WREG32(DC_HPD2_INT_CONTROL, tmp);
6376 	}
6377 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6378 		tmp = RREG32(DC_HPD3_INT_CONTROL);
6379 		tmp |= DC_HPDx_INT_ACK;
6380 		WREG32(DC_HPD3_INT_CONTROL, tmp);
6381 	}
6382 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6383 		tmp = RREG32(DC_HPD4_INT_CONTROL);
6384 		tmp |= DC_HPDx_INT_ACK;
6385 		WREG32(DC_HPD4_INT_CONTROL, tmp);
6386 	}
6387 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6388 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6389 		tmp |= DC_HPDx_INT_ACK;
6390 		WREG32(DC_HPD5_INT_CONTROL, tmp);
6391 	}
6392 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6393 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6394 		tmp |= DC_HPDx_INT_ACK;
6395 		WREG32(DC_HPD6_INT_CONTROL, tmp);
6396 	}
6397 }
6398 
6399 /**
6400  * cik_irq_disable - disable interrupts
6401  *
6402  * @rdev: radeon_device pointer
6403  *
6404  * Disable interrupts on the hw (CIK).
6405  */
6406 static void cik_irq_disable(struct radeon_device *rdev)
6407 {
6408 	cik_disable_interrupts(rdev);
6409 	/* Wait and acknowledge irq */
6410 	mdelay(1);
6411 	cik_irq_ack(rdev);
6412 	cik_disable_interrupt_state(rdev);
6413 }
6414 
6415 /**
6416  * cik_irq_disable - disable interrupts for suspend
6417  *
6418  * @rdev: radeon_device pointer
6419  *
6420  * Disable interrupts and stop the RLC (CIK).
6421  * Used for suspend.
6422  */
6423 static void cik_irq_suspend(struct radeon_device *rdev)
6424 {
6425 	cik_irq_disable(rdev);
6426 	cik_rlc_stop(rdev);
6427 }
6428 
6429 /**
6430  * cik_irq_fini - tear down interrupt support
6431  *
6432  * @rdev: radeon_device pointer
6433  *
6434  * Disable interrupts on the hw and free the IH ring
6435  * buffer (CIK).
6436  * Used for driver unload.
6437  */
6438 static void cik_irq_fini(struct radeon_device *rdev)
6439 {
6440 	cik_irq_suspend(rdev);
6441 	r600_ih_ring_fini(rdev);
6442 }
6443 
6444 /**
6445  * cik_get_ih_wptr - get the IH ring buffer wptr
6446  *
6447  * @rdev: radeon_device pointer
6448  *
6449  * Get the IH ring buffer wptr from either the register
6450  * or the writeback memory buffer (CIK).  Also check for
6451  * ring buffer overflow and deal with it.
6452  * Used by cik_irq_process().
6453  * Returns the value of the wptr.
6454  */
6455 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
6456 {
6457 	u32 wptr, tmp;
6458 
6459 	if (rdev->wb.enabled)
6460 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6461 	else
6462 		wptr = RREG32(IH_RB_WPTR);
6463 
6464 	if (wptr & RB_OVERFLOW) {
6465 		/* When a ring buffer overflow happen start parsing interrupt
6466 		 * from the last not overwritten vector (wptr + 16). Hopefully
6467 		 * this should allow us to catchup.
6468 		 */
6469 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
6470 			wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
6471 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6472 		tmp = RREG32(IH_RB_CNTL);
6473 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
6474 		WREG32(IH_RB_CNTL, tmp);
6475 	}
6476 	return (wptr & rdev->ih.ptr_mask);
6477 }
6478 
6479 /*        CIK IV Ring
6480  * Each IV ring entry is 128 bits:
6481  * [7:0]    - interrupt source id
6482  * [31:8]   - reserved
6483  * [59:32]  - interrupt source data
6484  * [63:60]  - reserved
6485  * [71:64]  - RINGID
6486  *            CP:
6487  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
6488  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
6489  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
6490  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
6491  *            PIPE_ID - ME0 0=3D
6492  *                    - ME1&2 compute dispatcher (4 pipes each)
6493  *            SDMA:
6494  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
6495  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
6496  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
6497  * [79:72]  - VMID
6498  * [95:80]  - PASID
6499  * [127:96] - reserved
6500  */
6501 /**
6502  * cik_irq_process - interrupt handler
6503  *
6504  * @rdev: radeon_device pointer
6505  *
6506  * Interrupt hander (CIK).  Walk the IH ring,
6507  * ack interrupts and schedule work to handle
6508  * interrupt events.
6509  * Returns irq process return code.
6510  */
6511 int cik_irq_process(struct radeon_device *rdev)
6512 {
6513 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6514 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6515 	u32 wptr;
6516 	u32 rptr;
6517 	u32 src_id, src_data, ring_id;
6518 	u8 me_id, pipe_id, queue_id;
6519 	u32 ring_index;
6520 	bool queue_hotplug = false;
6521 	bool queue_reset = false;
6522 	u32 addr, status, mc_client;
6523 	bool queue_thermal = false;
6524 
6525 	if (!rdev->ih.enabled || rdev->shutdown)
6526 		return IRQ_NONE;
6527 
6528 	wptr = cik_get_ih_wptr(rdev);
6529 
6530 restart_ih:
6531 	/* is somebody else already processing irqs? */
6532 	if (atomic_xchg(&rdev->ih.lock, 1))
6533 		return IRQ_NONE;
6534 
6535 	rptr = rdev->ih.rptr;
6536 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6537 
6538 	/* Order reading of wptr vs. reading of IH ring data */
6539 	rmb();
6540 
6541 	/* display interrupts */
6542 	cik_irq_ack(rdev);
6543 
6544 	while (rptr != wptr) {
6545 		/* wptr/rptr are in bytes! */
6546 		ring_index = rptr / 4;
6547 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6548 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6549 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6550 
6551 		switch (src_id) {
6552 		case 1: /* D1 vblank/vline */
6553 			switch (src_data) {
6554 			case 0: /* D1 vblank */
6555 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
6556 					if (rdev->irq.crtc_vblank_int[0]) {
6557 						drm_handle_vblank(rdev->ddev, 0);
6558 						rdev->pm.vblank_sync = true;
6559 						wake_up(&rdev->irq.vblank_queue);
6560 					}
6561 					if (atomic_read(&rdev->irq.pflip[0]))
6562 						radeon_crtc_handle_flip(rdev, 0);
6563 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6564 					DRM_DEBUG("IH: D1 vblank\n");
6565 				}
6566 				break;
6567 			case 1: /* D1 vline */
6568 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
6569 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6570 					DRM_DEBUG("IH: D1 vline\n");
6571 				}
6572 				break;
6573 			default:
6574 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6575 				break;
6576 			}
6577 			break;
6578 		case 2: /* D2 vblank/vline */
6579 			switch (src_data) {
6580 			case 0: /* D2 vblank */
6581 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6582 					if (rdev->irq.crtc_vblank_int[1]) {
6583 						drm_handle_vblank(rdev->ddev, 1);
6584 						rdev->pm.vblank_sync = true;
6585 						wake_up(&rdev->irq.vblank_queue);
6586 					}
6587 					if (atomic_read(&rdev->irq.pflip[1]))
6588 						radeon_crtc_handle_flip(rdev, 1);
6589 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6590 					DRM_DEBUG("IH: D2 vblank\n");
6591 				}
6592 				break;
6593 			case 1: /* D2 vline */
6594 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6595 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6596 					DRM_DEBUG("IH: D2 vline\n");
6597 				}
6598 				break;
6599 			default:
6600 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6601 				break;
6602 			}
6603 			break;
6604 		case 3: /* D3 vblank/vline */
6605 			switch (src_data) {
6606 			case 0: /* D3 vblank */
6607 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6608 					if (rdev->irq.crtc_vblank_int[2]) {
6609 						drm_handle_vblank(rdev->ddev, 2);
6610 						rdev->pm.vblank_sync = true;
6611 						wake_up(&rdev->irq.vblank_queue);
6612 					}
6613 					if (atomic_read(&rdev->irq.pflip[2]))
6614 						radeon_crtc_handle_flip(rdev, 2);
6615 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6616 					DRM_DEBUG("IH: D3 vblank\n");
6617 				}
6618 				break;
6619 			case 1: /* D3 vline */
6620 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6621 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6622 					DRM_DEBUG("IH: D3 vline\n");
6623 				}
6624 				break;
6625 			default:
6626 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6627 				break;
6628 			}
6629 			break;
6630 		case 4: /* D4 vblank/vline */
6631 			switch (src_data) {
6632 			case 0: /* D4 vblank */
6633 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6634 					if (rdev->irq.crtc_vblank_int[3]) {
6635 						drm_handle_vblank(rdev->ddev, 3);
6636 						rdev->pm.vblank_sync = true;
6637 						wake_up(&rdev->irq.vblank_queue);
6638 					}
6639 					if (atomic_read(&rdev->irq.pflip[3]))
6640 						radeon_crtc_handle_flip(rdev, 3);
6641 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6642 					DRM_DEBUG("IH: D4 vblank\n");
6643 				}
6644 				break;
6645 			case 1: /* D4 vline */
6646 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6647 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6648 					DRM_DEBUG("IH: D4 vline\n");
6649 				}
6650 				break;
6651 			default:
6652 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6653 				break;
6654 			}
6655 			break;
6656 		case 5: /* D5 vblank/vline */
6657 			switch (src_data) {
6658 			case 0: /* D5 vblank */
6659 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6660 					if (rdev->irq.crtc_vblank_int[4]) {
6661 						drm_handle_vblank(rdev->ddev, 4);
6662 						rdev->pm.vblank_sync = true;
6663 						wake_up(&rdev->irq.vblank_queue);
6664 					}
6665 					if (atomic_read(&rdev->irq.pflip[4]))
6666 						radeon_crtc_handle_flip(rdev, 4);
6667 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6668 					DRM_DEBUG("IH: D5 vblank\n");
6669 				}
6670 				break;
6671 			case 1: /* D5 vline */
6672 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6673 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6674 					DRM_DEBUG("IH: D5 vline\n");
6675 				}
6676 				break;
6677 			default:
6678 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6679 				break;
6680 			}
6681 			break;
6682 		case 6: /* D6 vblank/vline */
6683 			switch (src_data) {
6684 			case 0: /* D6 vblank */
6685 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6686 					if (rdev->irq.crtc_vblank_int[5]) {
6687 						drm_handle_vblank(rdev->ddev, 5);
6688 						rdev->pm.vblank_sync = true;
6689 						wake_up(&rdev->irq.vblank_queue);
6690 					}
6691 					if (atomic_read(&rdev->irq.pflip[5]))
6692 						radeon_crtc_handle_flip(rdev, 5);
6693 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6694 					DRM_DEBUG("IH: D6 vblank\n");
6695 				}
6696 				break;
6697 			case 1: /* D6 vline */
6698 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6699 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6700 					DRM_DEBUG("IH: D6 vline\n");
6701 				}
6702 				break;
6703 			default:
6704 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6705 				break;
6706 			}
6707 			break;
6708 		case 42: /* HPD hotplug */
6709 			switch (src_data) {
6710 			case 0:
6711 				if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6712 					rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
6713 					queue_hotplug = true;
6714 					DRM_DEBUG("IH: HPD1\n");
6715 				}
6716 				break;
6717 			case 1:
6718 				if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6719 					rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6720 					queue_hotplug = true;
6721 					DRM_DEBUG("IH: HPD2\n");
6722 				}
6723 				break;
6724 			case 2:
6725 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6726 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6727 					queue_hotplug = true;
6728 					DRM_DEBUG("IH: HPD3\n");
6729 				}
6730 				break;
6731 			case 3:
6732 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6733 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6734 					queue_hotplug = true;
6735 					DRM_DEBUG("IH: HPD4\n");
6736 				}
6737 				break;
6738 			case 4:
6739 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6740 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6741 					queue_hotplug = true;
6742 					DRM_DEBUG("IH: HPD5\n");
6743 				}
6744 				break;
6745 			case 5:
6746 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6747 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6748 					queue_hotplug = true;
6749 					DRM_DEBUG("IH: HPD6\n");
6750 				}
6751 				break;
6752 			default:
6753 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6754 				break;
6755 			}
6756 			break;
6757 		case 124: /* UVD */
6758 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6759 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6760 			break;
6761 		case 146:
6762 		case 147:
6763 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6764 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6765 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
6766 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6767 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6768 				addr);
6769 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6770 				status);
6771 			cik_vm_decode_fault(rdev, status, addr, mc_client);
6772 			/* reset addr and status */
6773 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6774 			break;
6775 		case 176: /* GFX RB CP_INT */
6776 		case 177: /* GFX IB CP_INT */
6777 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6778 			break;
6779 		case 181: /* CP EOP event */
6780 			DRM_DEBUG("IH: CP EOP\n");
6781 			/* XXX check the bitfield order! */
6782 			me_id = (ring_id & 0x60) >> 5;
6783 			pipe_id = (ring_id & 0x18) >> 3;
6784 			queue_id = (ring_id & 0x7) >> 0;
6785 			switch (me_id) {
6786 			case 0:
6787 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6788 				break;
6789 			case 1:
6790 			case 2:
6791 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
6792 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6793 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
6794 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6795 				break;
6796 			}
6797 			break;
6798 		case 184: /* CP Privileged reg access */
6799 			DRM_ERROR("Illegal register access in command stream\n");
6800 			/* XXX check the bitfield order! */
6801 			me_id = (ring_id & 0x60) >> 5;
6802 			pipe_id = (ring_id & 0x18) >> 3;
6803 			queue_id = (ring_id & 0x7) >> 0;
6804 			switch (me_id) {
6805 			case 0:
6806 				/* This results in a full GPU reset, but all we need to do is soft
6807 				 * reset the CP for gfx
6808 				 */
6809 				queue_reset = true;
6810 				break;
6811 			case 1:
6812 				/* XXX compute */
6813 				queue_reset = true;
6814 				break;
6815 			case 2:
6816 				/* XXX compute */
6817 				queue_reset = true;
6818 				break;
6819 			}
6820 			break;
6821 		case 185: /* CP Privileged inst */
6822 			DRM_ERROR("Illegal instruction in command stream\n");
6823 			/* XXX check the bitfield order! */
6824 			me_id = (ring_id & 0x60) >> 5;
6825 			pipe_id = (ring_id & 0x18) >> 3;
6826 			queue_id = (ring_id & 0x7) >> 0;
6827 			switch (me_id) {
6828 			case 0:
6829 				/* This results in a full GPU reset, but all we need to do is soft
6830 				 * reset the CP for gfx
6831 				 */
6832 				queue_reset = true;
6833 				break;
6834 			case 1:
6835 				/* XXX compute */
6836 				queue_reset = true;
6837 				break;
6838 			case 2:
6839 				/* XXX compute */
6840 				queue_reset = true;
6841 				break;
6842 			}
6843 			break;
6844 		case 224: /* SDMA trap event */
6845 			/* XXX check the bitfield order! */
6846 			me_id = (ring_id & 0x3) >> 0;
6847 			queue_id = (ring_id & 0xc) >> 2;
6848 			DRM_DEBUG("IH: SDMA trap\n");
6849 			switch (me_id) {
6850 			case 0:
6851 				switch (queue_id) {
6852 				case 0:
6853 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6854 					break;
6855 				case 1:
6856 					/* XXX compute */
6857 					break;
6858 				case 2:
6859 					/* XXX compute */
6860 					break;
6861 				}
6862 				break;
6863 			case 1:
6864 				switch (queue_id) {
6865 				case 0:
6866 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6867 					break;
6868 				case 1:
6869 					/* XXX compute */
6870 					break;
6871 				case 2:
6872 					/* XXX compute */
6873 					break;
6874 				}
6875 				break;
6876 			}
6877 			break;
6878 		case 230: /* thermal low to high */
6879 			DRM_DEBUG("IH: thermal low to high\n");
6880 			rdev->pm.dpm.thermal.high_to_low = false;
6881 			queue_thermal = true;
6882 			break;
6883 		case 231: /* thermal high to low */
6884 			DRM_DEBUG("IH: thermal high to low\n");
6885 			rdev->pm.dpm.thermal.high_to_low = true;
6886 			queue_thermal = true;
6887 			break;
6888 		case 233: /* GUI IDLE */
6889 			DRM_DEBUG("IH: GUI idle\n");
6890 			break;
6891 		case 241: /* SDMA Privileged inst */
6892 		case 247: /* SDMA Privileged inst */
6893 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
6894 			/* XXX check the bitfield order! */
6895 			me_id = (ring_id & 0x3) >> 0;
6896 			queue_id = (ring_id & 0xc) >> 2;
6897 			switch (me_id) {
6898 			case 0:
6899 				switch (queue_id) {
6900 				case 0:
6901 					queue_reset = true;
6902 					break;
6903 				case 1:
6904 					/* XXX compute */
6905 					queue_reset = true;
6906 					break;
6907 				case 2:
6908 					/* XXX compute */
6909 					queue_reset = true;
6910 					break;
6911 				}
6912 				break;
6913 			case 1:
6914 				switch (queue_id) {
6915 				case 0:
6916 					queue_reset = true;
6917 					break;
6918 				case 1:
6919 					/* XXX compute */
6920 					queue_reset = true;
6921 					break;
6922 				case 2:
6923 					/* XXX compute */
6924 					queue_reset = true;
6925 					break;
6926 				}
6927 				break;
6928 			}
6929 			break;
6930 		default:
6931 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6932 			break;
6933 		}
6934 
6935 		/* wptr/rptr are in bytes! */
6936 		rptr += 16;
6937 		rptr &= rdev->ih.ptr_mask;
6938 	}
6939 	if (queue_hotplug)
6940 		schedule_work(&rdev->hotplug_work);
6941 	if (queue_reset)
6942 		schedule_work(&rdev->reset_work);
6943 	if (queue_thermal)
6944 		schedule_work(&rdev->pm.dpm.thermal.work);
6945 	rdev->ih.rptr = rptr;
6946 	WREG32(IH_RB_RPTR, rdev->ih.rptr);
6947 	atomic_set(&rdev->ih.lock, 0);
6948 
6949 	/* make sure wptr hasn't changed while processing */
6950 	wptr = cik_get_ih_wptr(rdev);
6951 	if (wptr != rptr)
6952 		goto restart_ih;
6953 
6954 	return IRQ_HANDLED;
6955 }
6956 
6957 /*
6958  * startup/shutdown callbacks
6959  */
6960 /**
6961  * cik_startup - program the asic to a functional state
6962  *
6963  * @rdev: radeon_device pointer
6964  *
6965  * Programs the asic to a functional state (CIK).
6966  * Called by cik_init() and cik_resume().
6967  * Returns 0 for success, error for failure.
6968  */
6969 static int cik_startup(struct radeon_device *rdev)
6970 {
6971 	struct radeon_ring *ring;
6972 	int r;
6973 
6974 	/* enable pcie gen2/3 link */
6975 	cik_pcie_gen3_enable(rdev);
6976 	/* enable aspm */
6977 	cik_program_aspm(rdev);
6978 
6979 	/* scratch needs to be initialized before MC */
6980 	r = r600_vram_scratch_init(rdev);
6981 	if (r)
6982 		return r;
6983 
6984 	cik_mc_program(rdev);
6985 
6986 	if (rdev->flags & RADEON_IS_IGP) {
6987 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6988 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
6989 			r = cik_init_microcode(rdev);
6990 			if (r) {
6991 				DRM_ERROR("Failed to load firmware!\n");
6992 				return r;
6993 			}
6994 		}
6995 	} else {
6996 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6997 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
6998 		    !rdev->mc_fw) {
6999 			r = cik_init_microcode(rdev);
7000 			if (r) {
7001 				DRM_ERROR("Failed to load firmware!\n");
7002 				return r;
7003 			}
7004 		}
7005 
7006 		r = ci_mc_load_microcode(rdev);
7007 		if (r) {
7008 			DRM_ERROR("Failed to load MC firmware!\n");
7009 			return r;
7010 		}
7011 	}
7012 
7013 	r = cik_pcie_gart_enable(rdev);
7014 	if (r)
7015 		return r;
7016 	cik_gpu_init(rdev);
7017 
7018 	/* allocate rlc buffers */
7019 	if (rdev->flags & RADEON_IS_IGP) {
7020 		if (rdev->family == CHIP_KAVERI) {
7021 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7022 			rdev->rlc.reg_list_size =
7023 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7024 		} else {
7025 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7026 			rdev->rlc.reg_list_size =
7027 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7028 		}
7029 	}
7030 	rdev->rlc.cs_data = ci_cs_data;
7031 	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7032 	r = sumo_rlc_init(rdev);
7033 	if (r) {
7034 		DRM_ERROR("Failed to init rlc BOs!\n");
7035 		return r;
7036 	}
7037 
7038 	/* allocate wb buffer */
7039 	r = radeon_wb_init(rdev);
7040 	if (r)
7041 		return r;
7042 
7043 	/* allocate mec buffers */
7044 	r = cik_mec_init(rdev);
7045 	if (r) {
7046 		DRM_ERROR("Failed to init MEC BOs!\n");
7047 		return r;
7048 	}
7049 
7050 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7051 	if (r) {
7052 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7053 		return r;
7054 	}
7055 
7056 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7057 	if (r) {
7058 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7059 		return r;
7060 	}
7061 
7062 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7063 	if (r) {
7064 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7065 		return r;
7066 	}
7067 
7068 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7069 	if (r) {
7070 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7071 		return r;
7072 	}
7073 
7074 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7075 	if (r) {
7076 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7077 		return r;
7078 	}
7079 
7080 	r = radeon_uvd_resume(rdev);
7081 	if (!r) {
7082 		r = uvd_v4_2_resume(rdev);
7083 		if (!r) {
7084 			r = radeon_fence_driver_start_ring(rdev,
7085 							   R600_RING_TYPE_UVD_INDEX);
7086 			if (r)
7087 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7088 		}
7089 	}
7090 	if (r)
7091 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7092 
7093 	/* Enable IRQ */
7094 	if (!rdev->irq.installed) {
7095 		r = radeon_irq_kms_init(rdev);
7096 		if (r)
7097 			return r;
7098 	}
7099 
7100 	r = cik_irq_init(rdev);
7101 	if (r) {
7102 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
7103 		radeon_irq_kms_fini(rdev);
7104 		return r;
7105 	}
7106 	cik_irq_set(rdev);
7107 
7108 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7109 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7110 			     CP_RB0_RPTR, CP_RB0_WPTR,
7111 			     RADEON_CP_PACKET2);
7112 	if (r)
7113 		return r;
7114 
7115 	/* set up the compute queues */
7116 	/* type-2 packets are deprecated on MEC, use type-3 instead */
7117 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7118 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7119 			     CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7120 			     PACKET3(PACKET3_NOP, 0x3FFF));
7121 	if (r)
7122 		return r;
7123 	ring->me = 1; /* first MEC */
7124 	ring->pipe = 0; /* first pipe */
7125 	ring->queue = 0; /* first queue */
7126 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
7127 
7128 	/* type-2 packets are deprecated on MEC, use type-3 instead */
7129 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7130 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7131 			     CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7132 			     PACKET3(PACKET3_NOP, 0x3FFF));
7133 	if (r)
7134 		return r;
7135 	/* dGPU only have 1 MEC */
7136 	ring->me = 1; /* first MEC */
7137 	ring->pipe = 0; /* first pipe */
7138 	ring->queue = 1; /* second queue */
7139 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
7140 
7141 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7142 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7143 			     SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
7144 			     SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
7145 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7146 	if (r)
7147 		return r;
7148 
7149 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7150 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7151 			     SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
7152 			     SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
7153 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7154 	if (r)
7155 		return r;
7156 
7157 	r = cik_cp_resume(rdev);
7158 	if (r)
7159 		return r;
7160 
7161 	r = cik_sdma_resume(rdev);
7162 	if (r)
7163 		return r;
7164 
7165 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7166 	if (ring->ring_size) {
7167 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7168 				     UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
7169 				     RADEON_CP_PACKET2);
7170 		if (!r)
7171 			r = uvd_v1_0_init(rdev);
7172 		if (r)
7173 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7174 	}
7175 
7176 	r = radeon_ib_pool_init(rdev);
7177 	if (r) {
7178 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7179 		return r;
7180 	}
7181 
7182 	r = radeon_vm_manager_init(rdev);
7183 	if (r) {
7184 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7185 		return r;
7186 	}
7187 
7188 	r = dce6_audio_init(rdev);
7189 	if (r)
7190 		return r;
7191 
7192 	return 0;
7193 }
7194 
7195 /**
7196  * cik_resume - resume the asic to a functional state
7197  *
7198  * @rdev: radeon_device pointer
7199  *
7200  * Programs the asic to a functional state (CIK).
7201  * Called at resume.
7202  * Returns 0 for success, error for failure.
7203  */
7204 int cik_resume(struct radeon_device *rdev)
7205 {
7206 	int r;
7207 
7208 	/* post card */
7209 	atom_asic_init(rdev->mode_info.atom_context);
7210 
7211 	/* init golden registers */
7212 	cik_init_golden_registers(rdev);
7213 
7214 	rdev->accel_working = true;
7215 	r = cik_startup(rdev);
7216 	if (r) {
7217 		DRM_ERROR("cik startup failed on resume\n");
7218 		rdev->accel_working = false;
7219 		return r;
7220 	}
7221 
7222 	return r;
7223 
7224 }
7225 
7226 /**
7227  * cik_suspend - suspend the asic
7228  *
7229  * @rdev: radeon_device pointer
7230  *
7231  * Bring the chip into a state suitable for suspend (CIK).
7232  * Called at suspend.
7233  * Returns 0 for success.
7234  */
7235 int cik_suspend(struct radeon_device *rdev)
7236 {
7237 	dce6_audio_fini(rdev);
7238 	radeon_vm_manager_fini(rdev);
7239 	cik_cp_enable(rdev, false);
7240 	cik_sdma_enable(rdev, false);
7241 	uvd_v1_0_fini(rdev);
7242 	radeon_uvd_suspend(rdev);
7243 	cik_fini_pg(rdev);
7244 	cik_fini_cg(rdev);
7245 	cik_irq_suspend(rdev);
7246 	radeon_wb_disable(rdev);
7247 	cik_pcie_gart_disable(rdev);
7248 	return 0;
7249 }
7250 
7251 /* Plan is to move initialization in that function and use
7252  * helper function so that radeon_device_init pretty much
7253  * do nothing more than calling asic specific function. This
7254  * should also allow to remove a bunch of callback function
7255  * like vram_info.
7256  */
7257 /**
7258  * cik_init - asic specific driver and hw init
7259  *
7260  * @rdev: radeon_device pointer
7261  *
7262  * Setup asic specific driver variables and program the hw
7263  * to a functional state (CIK).
7264  * Called at driver startup.
7265  * Returns 0 for success, errors for failure.
7266  */
7267 int cik_init(struct radeon_device *rdev)
7268 {
7269 	struct radeon_ring *ring;
7270 	int r;
7271 
7272 	/* Read BIOS */
7273 	if (!radeon_get_bios(rdev)) {
7274 		if (ASIC_IS_AVIVO(rdev))
7275 			return -EINVAL;
7276 	}
7277 	/* Must be an ATOMBIOS */
7278 	if (!rdev->is_atom_bios) {
7279 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7280 		return -EINVAL;
7281 	}
7282 	r = radeon_atombios_init(rdev);
7283 	if (r)
7284 		return r;
7285 
7286 	/* Post card if necessary */
7287 	if (!radeon_card_posted(rdev)) {
7288 		if (!rdev->bios) {
7289 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7290 			return -EINVAL;
7291 		}
7292 		DRM_INFO("GPU not posted. posting now...\n");
7293 		atom_asic_init(rdev->mode_info.atom_context);
7294 	}
7295 	/* init golden registers */
7296 	cik_init_golden_registers(rdev);
7297 	/* Initialize scratch registers */
7298 	cik_scratch_init(rdev);
7299 	/* Initialize surface registers */
7300 	radeon_surface_init(rdev);
7301 	/* Initialize clocks */
7302 	radeon_get_clock_info(rdev->ddev);
7303 
7304 	/* Fence driver */
7305 	r = radeon_fence_driver_init(rdev);
7306 	if (r)
7307 		return r;
7308 
7309 	/* initialize memory controller */
7310 	r = cik_mc_init(rdev);
7311 	if (r)
7312 		return r;
7313 	/* Memory manager */
7314 	r = radeon_bo_init(rdev);
7315 	if (r)
7316 		return r;
7317 
7318 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7319 	ring->ring_obj = NULL;
7320 	r600_ring_init(rdev, ring, 1024 * 1024);
7321 
7322 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7323 	ring->ring_obj = NULL;
7324 	r600_ring_init(rdev, ring, 1024 * 1024);
7325 	r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7326 	if (r)
7327 		return r;
7328 
7329 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7330 	ring->ring_obj = NULL;
7331 	r600_ring_init(rdev, ring, 1024 * 1024);
7332 	r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7333 	if (r)
7334 		return r;
7335 
7336 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7337 	ring->ring_obj = NULL;
7338 	r600_ring_init(rdev, ring, 256 * 1024);
7339 
7340 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7341 	ring->ring_obj = NULL;
7342 	r600_ring_init(rdev, ring, 256 * 1024);
7343 
7344 	r = radeon_uvd_init(rdev);
7345 	if (!r) {
7346 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7347 		ring->ring_obj = NULL;
7348 		r600_ring_init(rdev, ring, 4096);
7349 	}
7350 
7351 	rdev->ih.ring_obj = NULL;
7352 	r600_ih_ring_init(rdev, 64 * 1024);
7353 
7354 	r = r600_pcie_gart_init(rdev);
7355 	if (r)
7356 		return r;
7357 
7358 	rdev->accel_working = true;
7359 	r = cik_startup(rdev);
7360 	if (r) {
7361 		dev_err(rdev->dev, "disabling GPU acceleration\n");
7362 		cik_cp_fini(rdev);
7363 		cik_sdma_fini(rdev);
7364 		cik_irq_fini(rdev);
7365 		sumo_rlc_fini(rdev);
7366 		cik_mec_fini(rdev);
7367 		radeon_wb_fini(rdev);
7368 		radeon_ib_pool_fini(rdev);
7369 		radeon_vm_manager_fini(rdev);
7370 		radeon_irq_kms_fini(rdev);
7371 		cik_pcie_gart_fini(rdev);
7372 		rdev->accel_working = false;
7373 	}
7374 
7375 	/* Don't start up if the MC ucode is missing.
7376 	 * The default clocks and voltages before the MC ucode
7377 	 * is loaded are not suffient for advanced operations.
7378 	 */
7379 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
7380 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
7381 		return -EINVAL;
7382 	}
7383 
7384 	return 0;
7385 }
7386 
7387 /**
7388  * cik_fini - asic specific driver and hw fini
7389  *
7390  * @rdev: radeon_device pointer
7391  *
7392  * Tear down the asic specific driver variables and program the hw
7393  * to an idle state (CIK).
7394  * Called at driver unload.
7395  */
7396 void cik_fini(struct radeon_device *rdev)
7397 {
7398 	cik_cp_fini(rdev);
7399 	cik_sdma_fini(rdev);
7400 	cik_fini_pg(rdev);
7401 	cik_fini_cg(rdev);
7402 	cik_irq_fini(rdev);
7403 	sumo_rlc_fini(rdev);
7404 	cik_mec_fini(rdev);
7405 	radeon_wb_fini(rdev);
7406 	radeon_vm_manager_fini(rdev);
7407 	radeon_ib_pool_fini(rdev);
7408 	radeon_irq_kms_fini(rdev);
7409 	uvd_v1_0_fini(rdev);
7410 	radeon_uvd_fini(rdev);
7411 	cik_pcie_gart_fini(rdev);
7412 	r600_vram_scratch_fini(rdev);
7413 	radeon_gem_fini(rdev);
7414 	radeon_fence_driver_fini(rdev);
7415 	radeon_bo_fini(rdev);
7416 	radeon_atombios_fini(rdev);
7417 	kfree(rdev->bios);
7418 	rdev->bios = NULL;
7419 }
7420 
7421 /* display watermark setup */
7422 /**
7423  * dce8_line_buffer_adjust - Set up the line buffer
7424  *
7425  * @rdev: radeon_device pointer
7426  * @radeon_crtc: the selected display controller
7427  * @mode: the current display mode on the selected display
7428  * controller
7429  *
7430  * Setup up the line buffer allocation for
7431  * the selected display controller (CIK).
7432  * Returns the line buffer size in pixels.
7433  */
7434 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
7435 				   struct radeon_crtc *radeon_crtc,
7436 				   struct drm_display_mode *mode)
7437 {
7438 	u32 tmp, buffer_alloc, i;
7439 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
7440 	/*
7441 	 * Line Buffer Setup
7442 	 * There are 6 line buffers, one for each display controllers.
7443 	 * There are 3 partitions per LB. Select the number of partitions
7444 	 * to enable based on the display width.  For display widths larger
7445 	 * than 4096, you need use to use 2 display controllers and combine
7446 	 * them using the stereo blender.
7447 	 */
7448 	if (radeon_crtc->base.enabled && mode) {
7449 		if (mode->crtc_hdisplay < 1920) {
7450 			tmp = 1;
7451 			buffer_alloc = 2;
7452 		} else if (mode->crtc_hdisplay < 2560) {
7453 			tmp = 2;
7454 			buffer_alloc = 2;
7455 		} else if (mode->crtc_hdisplay < 4096) {
7456 			tmp = 0;
7457 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
7458 		} else {
7459 			DRM_DEBUG_KMS("Mode too big for LB!\n");
7460 			tmp = 0;
7461 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
7462 		}
7463 	} else {
7464 		tmp = 1;
7465 		buffer_alloc = 0;
7466 	}
7467 
7468 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
7469 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
7470 
7471 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
7472 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
7473 	for (i = 0; i < rdev->usec_timeout; i++) {
7474 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
7475 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
7476 			break;
7477 		udelay(1);
7478 	}
7479 
7480 	if (radeon_crtc->base.enabled && mode) {
7481 		switch (tmp) {
7482 		case 0:
7483 		default:
7484 			return 4096 * 2;
7485 		case 1:
7486 			return 1920 * 2;
7487 		case 2:
7488 			return 2560 * 2;
7489 		}
7490 	}
7491 
7492 	/* controller not enabled, so no lb used */
7493 	return 0;
7494 }
7495 
7496 /**
7497  * cik_get_number_of_dram_channels - get the number of dram channels
7498  *
7499  * @rdev: radeon_device pointer
7500  *
7501  * Look up the number of video ram channels (CIK).
7502  * Used for display watermark bandwidth calculations
7503  * Returns the number of dram channels
7504  */
7505 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
7506 {
7507 	u32 tmp = RREG32(MC_SHARED_CHMAP);
7508 
7509 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
7510 	case 0:
7511 	default:
7512 		return 1;
7513 	case 1:
7514 		return 2;
7515 	case 2:
7516 		return 4;
7517 	case 3:
7518 		return 8;
7519 	case 4:
7520 		return 3;
7521 	case 5:
7522 		return 6;
7523 	case 6:
7524 		return 10;
7525 	case 7:
7526 		return 12;
7527 	case 8:
7528 		return 16;
7529 	}
7530 }
7531 
7532 struct dce8_wm_params {
7533 	u32 dram_channels; /* number of dram channels */
7534 	u32 yclk;          /* bandwidth per dram data pin in kHz */
7535 	u32 sclk;          /* engine clock in kHz */
7536 	u32 disp_clk;      /* display clock in kHz */
7537 	u32 src_width;     /* viewport width */
7538 	u32 active_time;   /* active display time in ns */
7539 	u32 blank_time;    /* blank time in ns */
7540 	bool interlaced;    /* mode is interlaced */
7541 	fixed20_12 vsc;    /* vertical scale ratio */
7542 	u32 num_heads;     /* number of active crtcs */
7543 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
7544 	u32 lb_size;       /* line buffer allocated to pipe */
7545 	u32 vtaps;         /* vertical scaler taps */
7546 };
7547 
7548 /**
7549  * dce8_dram_bandwidth - get the dram bandwidth
7550  *
7551  * @wm: watermark calculation data
7552  *
7553  * Calculate the raw dram bandwidth (CIK).
7554  * Used for display watermark bandwidth calculations
7555  * Returns the dram bandwidth in MBytes/s
7556  */
7557 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
7558 {
7559 	/* Calculate raw DRAM Bandwidth */
7560 	fixed20_12 dram_efficiency; /* 0.7 */
7561 	fixed20_12 yclk, dram_channels, bandwidth;
7562 	fixed20_12 a;
7563 
7564 	a.full = dfixed_const(1000);
7565 	yclk.full = dfixed_const(wm->yclk);
7566 	yclk.full = dfixed_div(yclk, a);
7567 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
7568 	a.full = dfixed_const(10);
7569 	dram_efficiency.full = dfixed_const(7);
7570 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
7571 	bandwidth.full = dfixed_mul(dram_channels, yclk);
7572 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
7573 
7574 	return dfixed_trunc(bandwidth);
7575 }
7576 
7577 /**
7578  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
7579  *
7580  * @wm: watermark calculation data
7581  *
7582  * Calculate the dram bandwidth used for display (CIK).
7583  * Used for display watermark bandwidth calculations
7584  * Returns the dram bandwidth for display in MBytes/s
7585  */
7586 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
7587 {
7588 	/* Calculate DRAM Bandwidth and the part allocated to display. */
7589 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
7590 	fixed20_12 yclk, dram_channels, bandwidth;
7591 	fixed20_12 a;
7592 
7593 	a.full = dfixed_const(1000);
7594 	yclk.full = dfixed_const(wm->yclk);
7595 	yclk.full = dfixed_div(yclk, a);
7596 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
7597 	a.full = dfixed_const(10);
7598 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
7599 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
7600 	bandwidth.full = dfixed_mul(dram_channels, yclk);
7601 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
7602 
7603 	return dfixed_trunc(bandwidth);
7604 }
7605 
7606 /**
7607  * dce8_data_return_bandwidth - get the data return bandwidth
7608  *
7609  * @wm: watermark calculation data
7610  *
7611  * Calculate the data return bandwidth used for display (CIK).
7612  * Used for display watermark bandwidth calculations
7613  * Returns the data return bandwidth in MBytes/s
7614  */
7615 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
7616 {
7617 	/* Calculate the display Data return Bandwidth */
7618 	fixed20_12 return_efficiency; /* 0.8 */
7619 	fixed20_12 sclk, bandwidth;
7620 	fixed20_12 a;
7621 
7622 	a.full = dfixed_const(1000);
7623 	sclk.full = dfixed_const(wm->sclk);
7624 	sclk.full = dfixed_div(sclk, a);
7625 	a.full = dfixed_const(10);
7626 	return_efficiency.full = dfixed_const(8);
7627 	return_efficiency.full = dfixed_div(return_efficiency, a);
7628 	a.full = dfixed_const(32);
7629 	bandwidth.full = dfixed_mul(a, sclk);
7630 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
7631 
7632 	return dfixed_trunc(bandwidth);
7633 }
7634 
7635 /**
7636  * dce8_dmif_request_bandwidth - get the dmif bandwidth
7637  *
7638  * @wm: watermark calculation data
7639  *
7640  * Calculate the dmif bandwidth used for display (CIK).
7641  * Used for display watermark bandwidth calculations
7642  * Returns the dmif bandwidth in MBytes/s
7643  */
7644 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
7645 {
7646 	/* Calculate the DMIF Request Bandwidth */
7647 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
7648 	fixed20_12 disp_clk, bandwidth;
7649 	fixed20_12 a, b;
7650 
7651 	a.full = dfixed_const(1000);
7652 	disp_clk.full = dfixed_const(wm->disp_clk);
7653 	disp_clk.full = dfixed_div(disp_clk, a);
7654 	a.full = dfixed_const(32);
7655 	b.full = dfixed_mul(a, disp_clk);
7656 
7657 	a.full = dfixed_const(10);
7658 	disp_clk_request_efficiency.full = dfixed_const(8);
7659 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
7660 
7661 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
7662 
7663 	return dfixed_trunc(bandwidth);
7664 }
7665 
7666 /**
7667  * dce8_available_bandwidth - get the min available bandwidth
7668  *
7669  * @wm: watermark calculation data
7670  *
7671  * Calculate the min available bandwidth used for display (CIK).
7672  * Used for display watermark bandwidth calculations
7673  * Returns the min available bandwidth in MBytes/s
7674  */
7675 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
7676 {
7677 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
7678 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
7679 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
7680 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
7681 
7682 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
7683 }
7684 
7685 /**
7686  * dce8_average_bandwidth - get the average available bandwidth
7687  *
7688  * @wm: watermark calculation data
7689  *
7690  * Calculate the average available bandwidth used for display (CIK).
7691  * Used for display watermark bandwidth calculations
7692  * Returns the average available bandwidth in MBytes/s
7693  */
7694 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
7695 {
7696 	/* Calculate the display mode Average Bandwidth
7697 	 * DisplayMode should contain the source and destination dimensions,
7698 	 * timing, etc.
7699 	 */
7700 	fixed20_12 bpp;
7701 	fixed20_12 line_time;
7702 	fixed20_12 src_width;
7703 	fixed20_12 bandwidth;
7704 	fixed20_12 a;
7705 
7706 	a.full = dfixed_const(1000);
7707 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
7708 	line_time.full = dfixed_div(line_time, a);
7709 	bpp.full = dfixed_const(wm->bytes_per_pixel);
7710 	src_width.full = dfixed_const(wm->src_width);
7711 	bandwidth.full = dfixed_mul(src_width, bpp);
7712 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
7713 	bandwidth.full = dfixed_div(bandwidth, line_time);
7714 
7715 	return dfixed_trunc(bandwidth);
7716 }
7717 
7718 /**
7719  * dce8_latency_watermark - get the latency watermark
7720  *
7721  * @wm: watermark calculation data
7722  *
7723  * Calculate the latency watermark (CIK).
7724  * Used for display watermark bandwidth calculations
7725  * Returns the latency watermark in ns
7726  */
7727 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
7728 {
7729 	/* First calculate the latency in ns */
7730 	u32 mc_latency = 2000; /* 2000 ns. */
7731 	u32 available_bandwidth = dce8_available_bandwidth(wm);
7732 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
7733 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
7734 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
7735 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
7736 		(wm->num_heads * cursor_line_pair_return_time);
7737 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
7738 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
7739 	u32 tmp, dmif_size = 12288;
7740 	fixed20_12 a, b, c;
7741 
7742 	if (wm->num_heads == 0)
7743 		return 0;
7744 
7745 	a.full = dfixed_const(2);
7746 	b.full = dfixed_const(1);
7747 	if ((wm->vsc.full > a.full) ||
7748 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
7749 	    (wm->vtaps >= 5) ||
7750 	    ((wm->vsc.full >= a.full) && wm->interlaced))
7751 		max_src_lines_per_dst_line = 4;
7752 	else
7753 		max_src_lines_per_dst_line = 2;
7754 
7755 	a.full = dfixed_const(available_bandwidth);
7756 	b.full = dfixed_const(wm->num_heads);
7757 	a.full = dfixed_div(a, b);
7758 
7759 	b.full = dfixed_const(mc_latency + 512);
7760 	c.full = dfixed_const(wm->disp_clk);
7761 	b.full = dfixed_div(b, c);
7762 
7763 	c.full = dfixed_const(dmif_size);
7764 	b.full = dfixed_div(c, b);
7765 
7766 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
7767 
7768 	b.full = dfixed_const(1000);
7769 	c.full = dfixed_const(wm->disp_clk);
7770 	b.full = dfixed_div(c, b);
7771 	c.full = dfixed_const(wm->bytes_per_pixel);
7772 	b.full = dfixed_mul(b, c);
7773 
7774 	lb_fill_bw = min(tmp, dfixed_trunc(b));
7775 
7776 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
7777 	b.full = dfixed_const(1000);
7778 	c.full = dfixed_const(lb_fill_bw);
7779 	b.full = dfixed_div(c, b);
7780 	a.full = dfixed_div(a, b);
7781 	line_fill_time = dfixed_trunc(a);
7782 
7783 	if (line_fill_time < wm->active_time)
7784 		return latency;
7785 	else
7786 		return latency + (line_fill_time - wm->active_time);
7787 
7788 }
7789 
7790 /**
7791  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
7792  * average and available dram bandwidth
7793  *
7794  * @wm: watermark calculation data
7795  *
7796  * Check if the display average bandwidth fits in the display
7797  * dram bandwidth (CIK).
7798  * Used for display watermark bandwidth calculations
7799  * Returns true if the display fits, false if not.
7800  */
7801 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
7802 {
7803 	if (dce8_average_bandwidth(wm) <=
7804 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
7805 		return true;
7806 	else
7807 		return false;
7808 }
7809 
7810 /**
7811  * dce8_average_bandwidth_vs_available_bandwidth - check
7812  * average and available bandwidth
7813  *
7814  * @wm: watermark calculation data
7815  *
7816  * Check if the display average bandwidth fits in the display
7817  * available bandwidth (CIK).
7818  * Used for display watermark bandwidth calculations
7819  * Returns true if the display fits, false if not.
7820  */
7821 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
7822 {
7823 	if (dce8_average_bandwidth(wm) <=
7824 	    (dce8_available_bandwidth(wm) / wm->num_heads))
7825 		return true;
7826 	else
7827 		return false;
7828 }
7829 
7830 /**
7831  * dce8_check_latency_hiding - check latency hiding
7832  *
7833  * @wm: watermark calculation data
7834  *
7835  * Check latency hiding (CIK).
7836  * Used for display watermark bandwidth calculations
7837  * Returns true if the display fits, false if not.
7838  */
7839 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
7840 {
7841 	u32 lb_partitions = wm->lb_size / wm->src_width;
7842 	u32 line_time = wm->active_time + wm->blank_time;
7843 	u32 latency_tolerant_lines;
7844 	u32 latency_hiding;
7845 	fixed20_12 a;
7846 
7847 	a.full = dfixed_const(1);
7848 	if (wm->vsc.full > a.full)
7849 		latency_tolerant_lines = 1;
7850 	else {
7851 		if (lb_partitions <= (wm->vtaps + 1))
7852 			latency_tolerant_lines = 1;
7853 		else
7854 			latency_tolerant_lines = 2;
7855 	}
7856 
7857 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
7858 
7859 	if (dce8_latency_watermark(wm) <= latency_hiding)
7860 		return true;
7861 	else
7862 		return false;
7863 }
7864 
7865 /**
7866  * dce8_program_watermarks - program display watermarks
7867  *
7868  * @rdev: radeon_device pointer
7869  * @radeon_crtc: the selected display controller
7870  * @lb_size: line buffer size
7871  * @num_heads: number of display controllers in use
7872  *
7873  * Calculate and program the display watermarks for the
7874  * selected display controller (CIK).
7875  */
7876 static void dce8_program_watermarks(struct radeon_device *rdev,
7877 				    struct radeon_crtc *radeon_crtc,
7878 				    u32 lb_size, u32 num_heads)
7879 {
7880 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
7881 	struct dce8_wm_params wm_low, wm_high;
7882 	u32 pixel_period;
7883 	u32 line_time = 0;
7884 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
7885 	u32 tmp, wm_mask;
7886 
7887 	if (radeon_crtc->base.enabled && num_heads && mode) {
7888 		pixel_period = 1000000 / (u32)mode->clock;
7889 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
7890 
7891 		/* watermark for high clocks */
7892 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
7893 		    rdev->pm.dpm_enabled) {
7894 			wm_high.yclk =
7895 				radeon_dpm_get_mclk(rdev, false) * 10;
7896 			wm_high.sclk =
7897 				radeon_dpm_get_sclk(rdev, false) * 10;
7898 		} else {
7899 			wm_high.yclk = rdev->pm.current_mclk * 10;
7900 			wm_high.sclk = rdev->pm.current_sclk * 10;
7901 		}
7902 
7903 		wm_high.disp_clk = mode->clock;
7904 		wm_high.src_width = mode->crtc_hdisplay;
7905 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
7906 		wm_high.blank_time = line_time - wm_high.active_time;
7907 		wm_high.interlaced = false;
7908 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
7909 			wm_high.interlaced = true;
7910 		wm_high.vsc = radeon_crtc->vsc;
7911 		wm_high.vtaps = 1;
7912 		if (radeon_crtc->rmx_type != RMX_OFF)
7913 			wm_high.vtaps = 2;
7914 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
7915 		wm_high.lb_size = lb_size;
7916 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
7917 		wm_high.num_heads = num_heads;
7918 
7919 		/* set for high clocks */
7920 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
7921 
7922 		/* possibly force display priority to high */
7923 		/* should really do this at mode validation time... */
7924 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
7925 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
7926 		    !dce8_check_latency_hiding(&wm_high) ||
7927 		    (rdev->disp_priority == 2)) {
7928 			DRM_DEBUG_KMS("force priority to high\n");
7929 		}
7930 
7931 		/* watermark for low clocks */
7932 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
7933 		    rdev->pm.dpm_enabled) {
7934 			wm_low.yclk =
7935 				radeon_dpm_get_mclk(rdev, true) * 10;
7936 			wm_low.sclk =
7937 				radeon_dpm_get_sclk(rdev, true) * 10;
7938 		} else {
7939 			wm_low.yclk = rdev->pm.current_mclk * 10;
7940 			wm_low.sclk = rdev->pm.current_sclk * 10;
7941 		}
7942 
7943 		wm_low.disp_clk = mode->clock;
7944 		wm_low.src_width = mode->crtc_hdisplay;
7945 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
7946 		wm_low.blank_time = line_time - wm_low.active_time;
7947 		wm_low.interlaced = false;
7948 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
7949 			wm_low.interlaced = true;
7950 		wm_low.vsc = radeon_crtc->vsc;
7951 		wm_low.vtaps = 1;
7952 		if (radeon_crtc->rmx_type != RMX_OFF)
7953 			wm_low.vtaps = 2;
7954 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
7955 		wm_low.lb_size = lb_size;
7956 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
7957 		wm_low.num_heads = num_heads;
7958 
7959 		/* set for low clocks */
7960 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
7961 
7962 		/* possibly force display priority to high */
7963 		/* should really do this at mode validation time... */
7964 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
7965 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
7966 		    !dce8_check_latency_hiding(&wm_low) ||
7967 		    (rdev->disp_priority == 2)) {
7968 			DRM_DEBUG_KMS("force priority to high\n");
7969 		}
7970 	}
7971 
7972 	/* select wm A */
7973 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
7974 	tmp = wm_mask;
7975 	tmp &= ~LATENCY_WATERMARK_MASK(3);
7976 	tmp |= LATENCY_WATERMARK_MASK(1);
7977 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
7978 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
7979 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
7980 		LATENCY_HIGH_WATERMARK(line_time)));
7981 	/* select wm B */
7982 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
7983 	tmp &= ~LATENCY_WATERMARK_MASK(3);
7984 	tmp |= LATENCY_WATERMARK_MASK(2);
7985 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
7986 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
7987 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
7988 		LATENCY_HIGH_WATERMARK(line_time)));
7989 	/* restore original selection */
7990 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
7991 
7992 	/* save values for DPM */
7993 	radeon_crtc->line_time = line_time;
7994 	radeon_crtc->wm_high = latency_watermark_a;
7995 	radeon_crtc->wm_low = latency_watermark_b;
7996 }
7997 
7998 /**
7999  * dce8_bandwidth_update - program display watermarks
8000  *
8001  * @rdev: radeon_device pointer
8002  *
8003  * Calculate and program the display watermarks and line
8004  * buffer allocation (CIK).
8005  */
8006 void dce8_bandwidth_update(struct radeon_device *rdev)
8007 {
8008 	struct drm_display_mode *mode = NULL;
8009 	u32 num_heads = 0, lb_size;
8010 	int i;
8011 
8012 	radeon_update_display_priority(rdev);
8013 
8014 	for (i = 0; i < rdev->num_crtc; i++) {
8015 		if (rdev->mode_info.crtcs[i]->base.enabled)
8016 			num_heads++;
8017 	}
8018 	for (i = 0; i < rdev->num_crtc; i++) {
8019 		mode = &rdev->mode_info.crtcs[i]->base.mode;
8020 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
8021 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
8022 	}
8023 }
8024 
8025 /**
8026  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8027  *
8028  * @rdev: radeon_device pointer
8029  *
8030  * Fetches a GPU clock counter snapshot (SI).
8031  * Returns the 64 bit clock counter snapshot.
8032  */
8033 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
8034 {
8035 	uint64_t clock;
8036 
8037 	mutex_lock(&rdev->gpu_clock_mutex);
8038 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
8039 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
8040 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
8041 	mutex_unlock(&rdev->gpu_clock_mutex);
8042 	return clock;
8043 }
8044 
8045 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
8046                               u32 cntl_reg, u32 status_reg)
8047 {
8048 	int r, i;
8049 	struct atom_clock_dividers dividers;
8050 	uint32_t tmp;
8051 
8052 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8053 					   clock, false, &dividers);
8054 	if (r)
8055 		return r;
8056 
8057 	tmp = RREG32_SMC(cntl_reg);
8058 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
8059 	tmp |= dividers.post_divider;
8060 	WREG32_SMC(cntl_reg, tmp);
8061 
8062 	for (i = 0; i < 100; i++) {
8063 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
8064 			break;
8065 		mdelay(10);
8066 	}
8067 	if (i == 100)
8068 		return -ETIMEDOUT;
8069 
8070 	return 0;
8071 }
8072 
8073 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
8074 {
8075 	int r = 0;
8076 
8077 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
8078 	if (r)
8079 		return r;
8080 
8081 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
8082 	return r;
8083 }
8084 
8085 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
8086 {
8087 	struct pci_dev *root = rdev->pdev->bus->self;
8088 	int bridge_pos, gpu_pos;
8089 	u32 speed_cntl, mask, current_data_rate;
8090 	int ret, i;
8091 	u16 tmp16;
8092 
8093 	if (radeon_pcie_gen2 == 0)
8094 		return;
8095 
8096 	if (rdev->flags & RADEON_IS_IGP)
8097 		return;
8098 
8099 	if (!(rdev->flags & RADEON_IS_PCIE))
8100 		return;
8101 
8102 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
8103 	if (ret != 0)
8104 		return;
8105 
8106 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
8107 		return;
8108 
8109 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8110 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
8111 		LC_CURRENT_DATA_RATE_SHIFT;
8112 	if (mask & DRM_PCIE_SPEED_80) {
8113 		if (current_data_rate == 2) {
8114 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
8115 			return;
8116 		}
8117 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
8118 	} else if (mask & DRM_PCIE_SPEED_50) {
8119 		if (current_data_rate == 1) {
8120 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
8121 			return;
8122 		}
8123 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
8124 	}
8125 
8126 	bridge_pos = pci_pcie_cap(root);
8127 	if (!bridge_pos)
8128 		return;
8129 
8130 	gpu_pos = pci_pcie_cap(rdev->pdev);
8131 	if (!gpu_pos)
8132 		return;
8133 
8134 	if (mask & DRM_PCIE_SPEED_80) {
8135 		/* re-try equalization if gen3 is not already enabled */
8136 		if (current_data_rate != 2) {
8137 			u16 bridge_cfg, gpu_cfg;
8138 			u16 bridge_cfg2, gpu_cfg2;
8139 			u32 max_lw, current_lw, tmp;
8140 
8141 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8142 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8143 
8144 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
8145 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8146 
8147 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
8148 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8149 
8150 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8151 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
8152 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
8153 
8154 			if (current_lw < max_lw) {
8155 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8156 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
8157 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
8158 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
8159 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
8160 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
8161 				}
8162 			}
8163 
8164 			for (i = 0; i < 10; i++) {
8165 				/* check status */
8166 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
8167 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
8168 					break;
8169 
8170 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8171 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8172 
8173 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
8174 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
8175 
8176 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8177 				tmp |= LC_SET_QUIESCE;
8178 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8179 
8180 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8181 				tmp |= LC_REDO_EQ;
8182 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8183 
8184 				mdelay(100);
8185 
8186 				/* linkctl */
8187 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
8188 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8189 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
8190 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8191 
8192 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
8193 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8194 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
8195 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8196 
8197 				/* linkctl2 */
8198 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
8199 				tmp16 &= ~((1 << 4) | (7 << 9));
8200 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
8201 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
8202 
8203 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8204 				tmp16 &= ~((1 << 4) | (7 << 9));
8205 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
8206 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8207 
8208 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8209 				tmp &= ~LC_SET_QUIESCE;
8210 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8211 			}
8212 		}
8213 	}
8214 
8215 	/* set the link speed */
8216 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
8217 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
8218 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8219 
8220 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8221 	tmp16 &= ~0xf;
8222 	if (mask & DRM_PCIE_SPEED_80)
8223 		tmp16 |= 3; /* gen3 */
8224 	else if (mask & DRM_PCIE_SPEED_50)
8225 		tmp16 |= 2; /* gen2 */
8226 	else
8227 		tmp16 |= 1; /* gen1 */
8228 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8229 
8230 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8231 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
8232 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8233 
8234 	for (i = 0; i < rdev->usec_timeout; i++) {
8235 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8236 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
8237 			break;
8238 		udelay(1);
8239 	}
8240 }
8241 
8242 static void cik_program_aspm(struct radeon_device *rdev)
8243 {
8244 	u32 data, orig;
8245 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
8246 	bool disable_clkreq = false;
8247 
8248 	if (radeon_aspm == 0)
8249 		return;
8250 
8251 	/* XXX double check IGPs */
8252 	if (rdev->flags & RADEON_IS_IGP)
8253 		return;
8254 
8255 	if (!(rdev->flags & RADEON_IS_PCIE))
8256 		return;
8257 
8258 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8259 	data &= ~LC_XMIT_N_FTS_MASK;
8260 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
8261 	if (orig != data)
8262 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
8263 
8264 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
8265 	data |= LC_GO_TO_RECOVERY;
8266 	if (orig != data)
8267 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
8268 
8269 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
8270 	data |= P_IGNORE_EDB_ERR;
8271 	if (orig != data)
8272 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
8273 
8274 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8275 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
8276 	data |= LC_PMI_TO_L1_DIS;
8277 	if (!disable_l0s)
8278 		data |= LC_L0S_INACTIVITY(7);
8279 
8280 	if (!disable_l1) {
8281 		data |= LC_L1_INACTIVITY(7);
8282 		data &= ~LC_PMI_TO_L1_DIS;
8283 		if (orig != data)
8284 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8285 
8286 		if (!disable_plloff_in_l1) {
8287 			bool clk_req_support;
8288 
8289 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
8290 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8291 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8292 			if (orig != data)
8293 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
8294 
8295 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
8296 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8297 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8298 			if (orig != data)
8299 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
8300 
8301 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
8302 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8303 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8304 			if (orig != data)
8305 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
8306 
8307 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
8308 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8309 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8310 			if (orig != data)
8311 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
8312 
8313 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8314 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
8315 			data |= LC_DYN_LANES_PWR_STATE(3);
8316 			if (orig != data)
8317 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
8318 
8319 			if (!disable_clkreq) {
8320 				struct pci_dev *root = rdev->pdev->bus->self;
8321 				u32 lnkcap;
8322 
8323 				clk_req_support = false;
8324 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
8325 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
8326 					clk_req_support = true;
8327 			} else {
8328 				clk_req_support = false;
8329 			}
8330 
8331 			if (clk_req_support) {
8332 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
8333 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
8334 				if (orig != data)
8335 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
8336 
8337 				orig = data = RREG32_SMC(THM_CLK_CNTL);
8338 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
8339 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
8340 				if (orig != data)
8341 					WREG32_SMC(THM_CLK_CNTL, data);
8342 
8343 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
8344 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
8345 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
8346 				if (orig != data)
8347 					WREG32_SMC(MISC_CLK_CTRL, data);
8348 
8349 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
8350 				data &= ~BCLK_AS_XCLK;
8351 				if (orig != data)
8352 					WREG32_SMC(CG_CLKPIN_CNTL, data);
8353 
8354 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
8355 				data &= ~FORCE_BIF_REFCLK_EN;
8356 				if (orig != data)
8357 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
8358 
8359 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
8360 				data &= ~MPLL_CLKOUT_SEL_MASK;
8361 				data |= MPLL_CLKOUT_SEL(4);
8362 				if (orig != data)
8363 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
8364 			}
8365 		}
8366 	} else {
8367 		if (orig != data)
8368 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8369 	}
8370 
8371 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
8372 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
8373 	if (orig != data)
8374 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
8375 
8376 	if (!disable_l0s) {
8377 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8378 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
8379 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8380 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
8381 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8382 				data &= ~LC_L0S_INACTIVITY_MASK;
8383 				if (orig != data)
8384 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8385 			}
8386 		}
8387 	}
8388 }
8389