xref: /openbmc/linux/drivers/gpu/drm/radeon/cik.c (revision 5bd8e16d)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35 
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
44 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
45 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
46 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
47 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
48 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
49 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
50 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
51 MODULE_FIRMWARE("radeon/KABINI_me.bin");
52 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
53 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
54 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
55 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
56 
57 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
58 extern void r600_ih_ring_fini(struct radeon_device *rdev);
59 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
60 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
61 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
62 extern void sumo_rlc_fini(struct radeon_device *rdev);
63 extern int sumo_rlc_init(struct radeon_device *rdev);
64 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
65 extern void si_rlc_reset(struct radeon_device *rdev);
66 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
67 extern int cik_sdma_resume(struct radeon_device *rdev);
68 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
69 extern void cik_sdma_fini(struct radeon_device *rdev);
70 extern void cik_sdma_vm_set_page(struct radeon_device *rdev,
71 				 struct radeon_ib *ib,
72 				 uint64_t pe,
73 				 uint64_t addr, unsigned count,
74 				 uint32_t incr, uint32_t flags);
75 static void cik_rlc_stop(struct radeon_device *rdev);
76 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
77 static void cik_program_aspm(struct radeon_device *rdev);
78 static void cik_init_pg(struct radeon_device *rdev);
79 static void cik_init_cg(struct radeon_device *rdev);
80 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
81 					  bool enable);
82 
83 /* get temperature in millidegrees */
84 int ci_get_temp(struct radeon_device *rdev)
85 {
86 	u32 temp;
87 	int actual_temp = 0;
88 
89 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
90 		CTF_TEMP_SHIFT;
91 
92 	if (temp & 0x200)
93 		actual_temp = 255;
94 	else
95 		actual_temp = temp & 0x1ff;
96 
97 	actual_temp = actual_temp * 1000;
98 
99 	return actual_temp;
100 }
101 
102 /* get temperature in millidegrees */
103 int kv_get_temp(struct radeon_device *rdev)
104 {
105 	u32 temp;
106 	int actual_temp = 0;
107 
108 	temp = RREG32_SMC(0xC0300E0C);
109 
110 	if (temp)
111 		actual_temp = (temp / 8) - 49;
112 	else
113 		actual_temp = 0;
114 
115 	actual_temp = actual_temp * 1000;
116 
117 	return actual_temp;
118 }
119 
120 /*
121  * Indirect registers accessor
122  */
123 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
124 {
125 	unsigned long flags;
126 	u32 r;
127 
128 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
129 	WREG32(PCIE_INDEX, reg);
130 	(void)RREG32(PCIE_INDEX);
131 	r = RREG32(PCIE_DATA);
132 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
133 	return r;
134 }
135 
136 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
137 {
138 	unsigned long flags;
139 
140 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
141 	WREG32(PCIE_INDEX, reg);
142 	(void)RREG32(PCIE_INDEX);
143 	WREG32(PCIE_DATA, v);
144 	(void)RREG32(PCIE_DATA);
145 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
146 }
147 
148 static const u32 spectre_rlc_save_restore_register_list[] =
149 {
150 	(0x0e00 << 16) | (0xc12c >> 2),
151 	0x00000000,
152 	(0x0e00 << 16) | (0xc140 >> 2),
153 	0x00000000,
154 	(0x0e00 << 16) | (0xc150 >> 2),
155 	0x00000000,
156 	(0x0e00 << 16) | (0xc15c >> 2),
157 	0x00000000,
158 	(0x0e00 << 16) | (0xc168 >> 2),
159 	0x00000000,
160 	(0x0e00 << 16) | (0xc170 >> 2),
161 	0x00000000,
162 	(0x0e00 << 16) | (0xc178 >> 2),
163 	0x00000000,
164 	(0x0e00 << 16) | (0xc204 >> 2),
165 	0x00000000,
166 	(0x0e00 << 16) | (0xc2b4 >> 2),
167 	0x00000000,
168 	(0x0e00 << 16) | (0xc2b8 >> 2),
169 	0x00000000,
170 	(0x0e00 << 16) | (0xc2bc >> 2),
171 	0x00000000,
172 	(0x0e00 << 16) | (0xc2c0 >> 2),
173 	0x00000000,
174 	(0x0e00 << 16) | (0x8228 >> 2),
175 	0x00000000,
176 	(0x0e00 << 16) | (0x829c >> 2),
177 	0x00000000,
178 	(0x0e00 << 16) | (0x869c >> 2),
179 	0x00000000,
180 	(0x0600 << 16) | (0x98f4 >> 2),
181 	0x00000000,
182 	(0x0e00 << 16) | (0x98f8 >> 2),
183 	0x00000000,
184 	(0x0e00 << 16) | (0x9900 >> 2),
185 	0x00000000,
186 	(0x0e00 << 16) | (0xc260 >> 2),
187 	0x00000000,
188 	(0x0e00 << 16) | (0x90e8 >> 2),
189 	0x00000000,
190 	(0x0e00 << 16) | (0x3c000 >> 2),
191 	0x00000000,
192 	(0x0e00 << 16) | (0x3c00c >> 2),
193 	0x00000000,
194 	(0x0e00 << 16) | (0x8c1c >> 2),
195 	0x00000000,
196 	(0x0e00 << 16) | (0x9700 >> 2),
197 	0x00000000,
198 	(0x0e00 << 16) | (0xcd20 >> 2),
199 	0x00000000,
200 	(0x4e00 << 16) | (0xcd20 >> 2),
201 	0x00000000,
202 	(0x5e00 << 16) | (0xcd20 >> 2),
203 	0x00000000,
204 	(0x6e00 << 16) | (0xcd20 >> 2),
205 	0x00000000,
206 	(0x7e00 << 16) | (0xcd20 >> 2),
207 	0x00000000,
208 	(0x8e00 << 16) | (0xcd20 >> 2),
209 	0x00000000,
210 	(0x9e00 << 16) | (0xcd20 >> 2),
211 	0x00000000,
212 	(0xae00 << 16) | (0xcd20 >> 2),
213 	0x00000000,
214 	(0xbe00 << 16) | (0xcd20 >> 2),
215 	0x00000000,
216 	(0x0e00 << 16) | (0x89bc >> 2),
217 	0x00000000,
218 	(0x0e00 << 16) | (0x8900 >> 2),
219 	0x00000000,
220 	0x3,
221 	(0x0e00 << 16) | (0xc130 >> 2),
222 	0x00000000,
223 	(0x0e00 << 16) | (0xc134 >> 2),
224 	0x00000000,
225 	(0x0e00 << 16) | (0xc1fc >> 2),
226 	0x00000000,
227 	(0x0e00 << 16) | (0xc208 >> 2),
228 	0x00000000,
229 	(0x0e00 << 16) | (0xc264 >> 2),
230 	0x00000000,
231 	(0x0e00 << 16) | (0xc268 >> 2),
232 	0x00000000,
233 	(0x0e00 << 16) | (0xc26c >> 2),
234 	0x00000000,
235 	(0x0e00 << 16) | (0xc270 >> 2),
236 	0x00000000,
237 	(0x0e00 << 16) | (0xc274 >> 2),
238 	0x00000000,
239 	(0x0e00 << 16) | (0xc278 >> 2),
240 	0x00000000,
241 	(0x0e00 << 16) | (0xc27c >> 2),
242 	0x00000000,
243 	(0x0e00 << 16) | (0xc280 >> 2),
244 	0x00000000,
245 	(0x0e00 << 16) | (0xc284 >> 2),
246 	0x00000000,
247 	(0x0e00 << 16) | (0xc288 >> 2),
248 	0x00000000,
249 	(0x0e00 << 16) | (0xc28c >> 2),
250 	0x00000000,
251 	(0x0e00 << 16) | (0xc290 >> 2),
252 	0x00000000,
253 	(0x0e00 << 16) | (0xc294 >> 2),
254 	0x00000000,
255 	(0x0e00 << 16) | (0xc298 >> 2),
256 	0x00000000,
257 	(0x0e00 << 16) | (0xc29c >> 2),
258 	0x00000000,
259 	(0x0e00 << 16) | (0xc2a0 >> 2),
260 	0x00000000,
261 	(0x0e00 << 16) | (0xc2a4 >> 2),
262 	0x00000000,
263 	(0x0e00 << 16) | (0xc2a8 >> 2),
264 	0x00000000,
265 	(0x0e00 << 16) | (0xc2ac  >> 2),
266 	0x00000000,
267 	(0x0e00 << 16) | (0xc2b0 >> 2),
268 	0x00000000,
269 	(0x0e00 << 16) | (0x301d0 >> 2),
270 	0x00000000,
271 	(0x0e00 << 16) | (0x30238 >> 2),
272 	0x00000000,
273 	(0x0e00 << 16) | (0x30250 >> 2),
274 	0x00000000,
275 	(0x0e00 << 16) | (0x30254 >> 2),
276 	0x00000000,
277 	(0x0e00 << 16) | (0x30258 >> 2),
278 	0x00000000,
279 	(0x0e00 << 16) | (0x3025c >> 2),
280 	0x00000000,
281 	(0x4e00 << 16) | (0xc900 >> 2),
282 	0x00000000,
283 	(0x5e00 << 16) | (0xc900 >> 2),
284 	0x00000000,
285 	(0x6e00 << 16) | (0xc900 >> 2),
286 	0x00000000,
287 	(0x7e00 << 16) | (0xc900 >> 2),
288 	0x00000000,
289 	(0x8e00 << 16) | (0xc900 >> 2),
290 	0x00000000,
291 	(0x9e00 << 16) | (0xc900 >> 2),
292 	0x00000000,
293 	(0xae00 << 16) | (0xc900 >> 2),
294 	0x00000000,
295 	(0xbe00 << 16) | (0xc900 >> 2),
296 	0x00000000,
297 	(0x4e00 << 16) | (0xc904 >> 2),
298 	0x00000000,
299 	(0x5e00 << 16) | (0xc904 >> 2),
300 	0x00000000,
301 	(0x6e00 << 16) | (0xc904 >> 2),
302 	0x00000000,
303 	(0x7e00 << 16) | (0xc904 >> 2),
304 	0x00000000,
305 	(0x8e00 << 16) | (0xc904 >> 2),
306 	0x00000000,
307 	(0x9e00 << 16) | (0xc904 >> 2),
308 	0x00000000,
309 	(0xae00 << 16) | (0xc904 >> 2),
310 	0x00000000,
311 	(0xbe00 << 16) | (0xc904 >> 2),
312 	0x00000000,
313 	(0x4e00 << 16) | (0xc908 >> 2),
314 	0x00000000,
315 	(0x5e00 << 16) | (0xc908 >> 2),
316 	0x00000000,
317 	(0x6e00 << 16) | (0xc908 >> 2),
318 	0x00000000,
319 	(0x7e00 << 16) | (0xc908 >> 2),
320 	0x00000000,
321 	(0x8e00 << 16) | (0xc908 >> 2),
322 	0x00000000,
323 	(0x9e00 << 16) | (0xc908 >> 2),
324 	0x00000000,
325 	(0xae00 << 16) | (0xc908 >> 2),
326 	0x00000000,
327 	(0xbe00 << 16) | (0xc908 >> 2),
328 	0x00000000,
329 	(0x4e00 << 16) | (0xc90c >> 2),
330 	0x00000000,
331 	(0x5e00 << 16) | (0xc90c >> 2),
332 	0x00000000,
333 	(0x6e00 << 16) | (0xc90c >> 2),
334 	0x00000000,
335 	(0x7e00 << 16) | (0xc90c >> 2),
336 	0x00000000,
337 	(0x8e00 << 16) | (0xc90c >> 2),
338 	0x00000000,
339 	(0x9e00 << 16) | (0xc90c >> 2),
340 	0x00000000,
341 	(0xae00 << 16) | (0xc90c >> 2),
342 	0x00000000,
343 	(0xbe00 << 16) | (0xc90c >> 2),
344 	0x00000000,
345 	(0x4e00 << 16) | (0xc910 >> 2),
346 	0x00000000,
347 	(0x5e00 << 16) | (0xc910 >> 2),
348 	0x00000000,
349 	(0x6e00 << 16) | (0xc910 >> 2),
350 	0x00000000,
351 	(0x7e00 << 16) | (0xc910 >> 2),
352 	0x00000000,
353 	(0x8e00 << 16) | (0xc910 >> 2),
354 	0x00000000,
355 	(0x9e00 << 16) | (0xc910 >> 2),
356 	0x00000000,
357 	(0xae00 << 16) | (0xc910 >> 2),
358 	0x00000000,
359 	(0xbe00 << 16) | (0xc910 >> 2),
360 	0x00000000,
361 	(0x0e00 << 16) | (0xc99c >> 2),
362 	0x00000000,
363 	(0x0e00 << 16) | (0x9834 >> 2),
364 	0x00000000,
365 	(0x0000 << 16) | (0x30f00 >> 2),
366 	0x00000000,
367 	(0x0001 << 16) | (0x30f00 >> 2),
368 	0x00000000,
369 	(0x0000 << 16) | (0x30f04 >> 2),
370 	0x00000000,
371 	(0x0001 << 16) | (0x30f04 >> 2),
372 	0x00000000,
373 	(0x0000 << 16) | (0x30f08 >> 2),
374 	0x00000000,
375 	(0x0001 << 16) | (0x30f08 >> 2),
376 	0x00000000,
377 	(0x0000 << 16) | (0x30f0c >> 2),
378 	0x00000000,
379 	(0x0001 << 16) | (0x30f0c >> 2),
380 	0x00000000,
381 	(0x0600 << 16) | (0x9b7c >> 2),
382 	0x00000000,
383 	(0x0e00 << 16) | (0x8a14 >> 2),
384 	0x00000000,
385 	(0x0e00 << 16) | (0x8a18 >> 2),
386 	0x00000000,
387 	(0x0600 << 16) | (0x30a00 >> 2),
388 	0x00000000,
389 	(0x0e00 << 16) | (0x8bf0 >> 2),
390 	0x00000000,
391 	(0x0e00 << 16) | (0x8bcc >> 2),
392 	0x00000000,
393 	(0x0e00 << 16) | (0x8b24 >> 2),
394 	0x00000000,
395 	(0x0e00 << 16) | (0x30a04 >> 2),
396 	0x00000000,
397 	(0x0600 << 16) | (0x30a10 >> 2),
398 	0x00000000,
399 	(0x0600 << 16) | (0x30a14 >> 2),
400 	0x00000000,
401 	(0x0600 << 16) | (0x30a18 >> 2),
402 	0x00000000,
403 	(0x0600 << 16) | (0x30a2c >> 2),
404 	0x00000000,
405 	(0x0e00 << 16) | (0xc700 >> 2),
406 	0x00000000,
407 	(0x0e00 << 16) | (0xc704 >> 2),
408 	0x00000000,
409 	(0x0e00 << 16) | (0xc708 >> 2),
410 	0x00000000,
411 	(0x0e00 << 16) | (0xc768 >> 2),
412 	0x00000000,
413 	(0x0400 << 16) | (0xc770 >> 2),
414 	0x00000000,
415 	(0x0400 << 16) | (0xc774 >> 2),
416 	0x00000000,
417 	(0x0400 << 16) | (0xc778 >> 2),
418 	0x00000000,
419 	(0x0400 << 16) | (0xc77c >> 2),
420 	0x00000000,
421 	(0x0400 << 16) | (0xc780 >> 2),
422 	0x00000000,
423 	(0x0400 << 16) | (0xc784 >> 2),
424 	0x00000000,
425 	(0x0400 << 16) | (0xc788 >> 2),
426 	0x00000000,
427 	(0x0400 << 16) | (0xc78c >> 2),
428 	0x00000000,
429 	(0x0400 << 16) | (0xc798 >> 2),
430 	0x00000000,
431 	(0x0400 << 16) | (0xc79c >> 2),
432 	0x00000000,
433 	(0x0400 << 16) | (0xc7a0 >> 2),
434 	0x00000000,
435 	(0x0400 << 16) | (0xc7a4 >> 2),
436 	0x00000000,
437 	(0x0400 << 16) | (0xc7a8 >> 2),
438 	0x00000000,
439 	(0x0400 << 16) | (0xc7ac >> 2),
440 	0x00000000,
441 	(0x0400 << 16) | (0xc7b0 >> 2),
442 	0x00000000,
443 	(0x0400 << 16) | (0xc7b4 >> 2),
444 	0x00000000,
445 	(0x0e00 << 16) | (0x9100 >> 2),
446 	0x00000000,
447 	(0x0e00 << 16) | (0x3c010 >> 2),
448 	0x00000000,
449 	(0x0e00 << 16) | (0x92a8 >> 2),
450 	0x00000000,
451 	(0x0e00 << 16) | (0x92ac >> 2),
452 	0x00000000,
453 	(0x0e00 << 16) | (0x92b4 >> 2),
454 	0x00000000,
455 	(0x0e00 << 16) | (0x92b8 >> 2),
456 	0x00000000,
457 	(0x0e00 << 16) | (0x92bc >> 2),
458 	0x00000000,
459 	(0x0e00 << 16) | (0x92c0 >> 2),
460 	0x00000000,
461 	(0x0e00 << 16) | (0x92c4 >> 2),
462 	0x00000000,
463 	(0x0e00 << 16) | (0x92c8 >> 2),
464 	0x00000000,
465 	(0x0e00 << 16) | (0x92cc >> 2),
466 	0x00000000,
467 	(0x0e00 << 16) | (0x92d0 >> 2),
468 	0x00000000,
469 	(0x0e00 << 16) | (0x8c00 >> 2),
470 	0x00000000,
471 	(0x0e00 << 16) | (0x8c04 >> 2),
472 	0x00000000,
473 	(0x0e00 << 16) | (0x8c20 >> 2),
474 	0x00000000,
475 	(0x0e00 << 16) | (0x8c38 >> 2),
476 	0x00000000,
477 	(0x0e00 << 16) | (0x8c3c >> 2),
478 	0x00000000,
479 	(0x0e00 << 16) | (0xae00 >> 2),
480 	0x00000000,
481 	(0x0e00 << 16) | (0x9604 >> 2),
482 	0x00000000,
483 	(0x0e00 << 16) | (0xac08 >> 2),
484 	0x00000000,
485 	(0x0e00 << 16) | (0xac0c >> 2),
486 	0x00000000,
487 	(0x0e00 << 16) | (0xac10 >> 2),
488 	0x00000000,
489 	(0x0e00 << 16) | (0xac14 >> 2),
490 	0x00000000,
491 	(0x0e00 << 16) | (0xac58 >> 2),
492 	0x00000000,
493 	(0x0e00 << 16) | (0xac68 >> 2),
494 	0x00000000,
495 	(0x0e00 << 16) | (0xac6c >> 2),
496 	0x00000000,
497 	(0x0e00 << 16) | (0xac70 >> 2),
498 	0x00000000,
499 	(0x0e00 << 16) | (0xac74 >> 2),
500 	0x00000000,
501 	(0x0e00 << 16) | (0xac78 >> 2),
502 	0x00000000,
503 	(0x0e00 << 16) | (0xac7c >> 2),
504 	0x00000000,
505 	(0x0e00 << 16) | (0xac80 >> 2),
506 	0x00000000,
507 	(0x0e00 << 16) | (0xac84 >> 2),
508 	0x00000000,
509 	(0x0e00 << 16) | (0xac88 >> 2),
510 	0x00000000,
511 	(0x0e00 << 16) | (0xac8c >> 2),
512 	0x00000000,
513 	(0x0e00 << 16) | (0x970c >> 2),
514 	0x00000000,
515 	(0x0e00 << 16) | (0x9714 >> 2),
516 	0x00000000,
517 	(0x0e00 << 16) | (0x9718 >> 2),
518 	0x00000000,
519 	(0x0e00 << 16) | (0x971c >> 2),
520 	0x00000000,
521 	(0x0e00 << 16) | (0x31068 >> 2),
522 	0x00000000,
523 	(0x4e00 << 16) | (0x31068 >> 2),
524 	0x00000000,
525 	(0x5e00 << 16) | (0x31068 >> 2),
526 	0x00000000,
527 	(0x6e00 << 16) | (0x31068 >> 2),
528 	0x00000000,
529 	(0x7e00 << 16) | (0x31068 >> 2),
530 	0x00000000,
531 	(0x8e00 << 16) | (0x31068 >> 2),
532 	0x00000000,
533 	(0x9e00 << 16) | (0x31068 >> 2),
534 	0x00000000,
535 	(0xae00 << 16) | (0x31068 >> 2),
536 	0x00000000,
537 	(0xbe00 << 16) | (0x31068 >> 2),
538 	0x00000000,
539 	(0x0e00 << 16) | (0xcd10 >> 2),
540 	0x00000000,
541 	(0x0e00 << 16) | (0xcd14 >> 2),
542 	0x00000000,
543 	(0x0e00 << 16) | (0x88b0 >> 2),
544 	0x00000000,
545 	(0x0e00 << 16) | (0x88b4 >> 2),
546 	0x00000000,
547 	(0x0e00 << 16) | (0x88b8 >> 2),
548 	0x00000000,
549 	(0x0e00 << 16) | (0x88bc >> 2),
550 	0x00000000,
551 	(0x0400 << 16) | (0x89c0 >> 2),
552 	0x00000000,
553 	(0x0e00 << 16) | (0x88c4 >> 2),
554 	0x00000000,
555 	(0x0e00 << 16) | (0x88c8 >> 2),
556 	0x00000000,
557 	(0x0e00 << 16) | (0x88d0 >> 2),
558 	0x00000000,
559 	(0x0e00 << 16) | (0x88d4 >> 2),
560 	0x00000000,
561 	(0x0e00 << 16) | (0x88d8 >> 2),
562 	0x00000000,
563 	(0x0e00 << 16) | (0x8980 >> 2),
564 	0x00000000,
565 	(0x0e00 << 16) | (0x30938 >> 2),
566 	0x00000000,
567 	(0x0e00 << 16) | (0x3093c >> 2),
568 	0x00000000,
569 	(0x0e00 << 16) | (0x30940 >> 2),
570 	0x00000000,
571 	(0x0e00 << 16) | (0x89a0 >> 2),
572 	0x00000000,
573 	(0x0e00 << 16) | (0x30900 >> 2),
574 	0x00000000,
575 	(0x0e00 << 16) | (0x30904 >> 2),
576 	0x00000000,
577 	(0x0e00 << 16) | (0x89b4 >> 2),
578 	0x00000000,
579 	(0x0e00 << 16) | (0x3c210 >> 2),
580 	0x00000000,
581 	(0x0e00 << 16) | (0x3c214 >> 2),
582 	0x00000000,
583 	(0x0e00 << 16) | (0x3c218 >> 2),
584 	0x00000000,
585 	(0x0e00 << 16) | (0x8904 >> 2),
586 	0x00000000,
587 	0x5,
588 	(0x0e00 << 16) | (0x8c28 >> 2),
589 	(0x0e00 << 16) | (0x8c2c >> 2),
590 	(0x0e00 << 16) | (0x8c30 >> 2),
591 	(0x0e00 << 16) | (0x8c34 >> 2),
592 	(0x0e00 << 16) | (0x9600 >> 2),
593 };
594 
595 static const u32 kalindi_rlc_save_restore_register_list[] =
596 {
597 	(0x0e00 << 16) | (0xc12c >> 2),
598 	0x00000000,
599 	(0x0e00 << 16) | (0xc140 >> 2),
600 	0x00000000,
601 	(0x0e00 << 16) | (0xc150 >> 2),
602 	0x00000000,
603 	(0x0e00 << 16) | (0xc15c >> 2),
604 	0x00000000,
605 	(0x0e00 << 16) | (0xc168 >> 2),
606 	0x00000000,
607 	(0x0e00 << 16) | (0xc170 >> 2),
608 	0x00000000,
609 	(0x0e00 << 16) | (0xc204 >> 2),
610 	0x00000000,
611 	(0x0e00 << 16) | (0xc2b4 >> 2),
612 	0x00000000,
613 	(0x0e00 << 16) | (0xc2b8 >> 2),
614 	0x00000000,
615 	(0x0e00 << 16) | (0xc2bc >> 2),
616 	0x00000000,
617 	(0x0e00 << 16) | (0xc2c0 >> 2),
618 	0x00000000,
619 	(0x0e00 << 16) | (0x8228 >> 2),
620 	0x00000000,
621 	(0x0e00 << 16) | (0x829c >> 2),
622 	0x00000000,
623 	(0x0e00 << 16) | (0x869c >> 2),
624 	0x00000000,
625 	(0x0600 << 16) | (0x98f4 >> 2),
626 	0x00000000,
627 	(0x0e00 << 16) | (0x98f8 >> 2),
628 	0x00000000,
629 	(0x0e00 << 16) | (0x9900 >> 2),
630 	0x00000000,
631 	(0x0e00 << 16) | (0xc260 >> 2),
632 	0x00000000,
633 	(0x0e00 << 16) | (0x90e8 >> 2),
634 	0x00000000,
635 	(0x0e00 << 16) | (0x3c000 >> 2),
636 	0x00000000,
637 	(0x0e00 << 16) | (0x3c00c >> 2),
638 	0x00000000,
639 	(0x0e00 << 16) | (0x8c1c >> 2),
640 	0x00000000,
641 	(0x0e00 << 16) | (0x9700 >> 2),
642 	0x00000000,
643 	(0x0e00 << 16) | (0xcd20 >> 2),
644 	0x00000000,
645 	(0x4e00 << 16) | (0xcd20 >> 2),
646 	0x00000000,
647 	(0x5e00 << 16) | (0xcd20 >> 2),
648 	0x00000000,
649 	(0x6e00 << 16) | (0xcd20 >> 2),
650 	0x00000000,
651 	(0x7e00 << 16) | (0xcd20 >> 2),
652 	0x00000000,
653 	(0x0e00 << 16) | (0x89bc >> 2),
654 	0x00000000,
655 	(0x0e00 << 16) | (0x8900 >> 2),
656 	0x00000000,
657 	0x3,
658 	(0x0e00 << 16) | (0xc130 >> 2),
659 	0x00000000,
660 	(0x0e00 << 16) | (0xc134 >> 2),
661 	0x00000000,
662 	(0x0e00 << 16) | (0xc1fc >> 2),
663 	0x00000000,
664 	(0x0e00 << 16) | (0xc208 >> 2),
665 	0x00000000,
666 	(0x0e00 << 16) | (0xc264 >> 2),
667 	0x00000000,
668 	(0x0e00 << 16) | (0xc268 >> 2),
669 	0x00000000,
670 	(0x0e00 << 16) | (0xc26c >> 2),
671 	0x00000000,
672 	(0x0e00 << 16) | (0xc270 >> 2),
673 	0x00000000,
674 	(0x0e00 << 16) | (0xc274 >> 2),
675 	0x00000000,
676 	(0x0e00 << 16) | (0xc28c >> 2),
677 	0x00000000,
678 	(0x0e00 << 16) | (0xc290 >> 2),
679 	0x00000000,
680 	(0x0e00 << 16) | (0xc294 >> 2),
681 	0x00000000,
682 	(0x0e00 << 16) | (0xc298 >> 2),
683 	0x00000000,
684 	(0x0e00 << 16) | (0xc2a0 >> 2),
685 	0x00000000,
686 	(0x0e00 << 16) | (0xc2a4 >> 2),
687 	0x00000000,
688 	(0x0e00 << 16) | (0xc2a8 >> 2),
689 	0x00000000,
690 	(0x0e00 << 16) | (0xc2ac >> 2),
691 	0x00000000,
692 	(0x0e00 << 16) | (0x301d0 >> 2),
693 	0x00000000,
694 	(0x0e00 << 16) | (0x30238 >> 2),
695 	0x00000000,
696 	(0x0e00 << 16) | (0x30250 >> 2),
697 	0x00000000,
698 	(0x0e00 << 16) | (0x30254 >> 2),
699 	0x00000000,
700 	(0x0e00 << 16) | (0x30258 >> 2),
701 	0x00000000,
702 	(0x0e00 << 16) | (0x3025c >> 2),
703 	0x00000000,
704 	(0x4e00 << 16) | (0xc900 >> 2),
705 	0x00000000,
706 	(0x5e00 << 16) | (0xc900 >> 2),
707 	0x00000000,
708 	(0x6e00 << 16) | (0xc900 >> 2),
709 	0x00000000,
710 	(0x7e00 << 16) | (0xc900 >> 2),
711 	0x00000000,
712 	(0x4e00 << 16) | (0xc904 >> 2),
713 	0x00000000,
714 	(0x5e00 << 16) | (0xc904 >> 2),
715 	0x00000000,
716 	(0x6e00 << 16) | (0xc904 >> 2),
717 	0x00000000,
718 	(0x7e00 << 16) | (0xc904 >> 2),
719 	0x00000000,
720 	(0x4e00 << 16) | (0xc908 >> 2),
721 	0x00000000,
722 	(0x5e00 << 16) | (0xc908 >> 2),
723 	0x00000000,
724 	(0x6e00 << 16) | (0xc908 >> 2),
725 	0x00000000,
726 	(0x7e00 << 16) | (0xc908 >> 2),
727 	0x00000000,
728 	(0x4e00 << 16) | (0xc90c >> 2),
729 	0x00000000,
730 	(0x5e00 << 16) | (0xc90c >> 2),
731 	0x00000000,
732 	(0x6e00 << 16) | (0xc90c >> 2),
733 	0x00000000,
734 	(0x7e00 << 16) | (0xc90c >> 2),
735 	0x00000000,
736 	(0x4e00 << 16) | (0xc910 >> 2),
737 	0x00000000,
738 	(0x5e00 << 16) | (0xc910 >> 2),
739 	0x00000000,
740 	(0x6e00 << 16) | (0xc910 >> 2),
741 	0x00000000,
742 	(0x7e00 << 16) | (0xc910 >> 2),
743 	0x00000000,
744 	(0x0e00 << 16) | (0xc99c >> 2),
745 	0x00000000,
746 	(0x0e00 << 16) | (0x9834 >> 2),
747 	0x00000000,
748 	(0x0000 << 16) | (0x30f00 >> 2),
749 	0x00000000,
750 	(0x0000 << 16) | (0x30f04 >> 2),
751 	0x00000000,
752 	(0x0000 << 16) | (0x30f08 >> 2),
753 	0x00000000,
754 	(0x0000 << 16) | (0x30f0c >> 2),
755 	0x00000000,
756 	(0x0600 << 16) | (0x9b7c >> 2),
757 	0x00000000,
758 	(0x0e00 << 16) | (0x8a14 >> 2),
759 	0x00000000,
760 	(0x0e00 << 16) | (0x8a18 >> 2),
761 	0x00000000,
762 	(0x0600 << 16) | (0x30a00 >> 2),
763 	0x00000000,
764 	(0x0e00 << 16) | (0x8bf0 >> 2),
765 	0x00000000,
766 	(0x0e00 << 16) | (0x8bcc >> 2),
767 	0x00000000,
768 	(0x0e00 << 16) | (0x8b24 >> 2),
769 	0x00000000,
770 	(0x0e00 << 16) | (0x30a04 >> 2),
771 	0x00000000,
772 	(0x0600 << 16) | (0x30a10 >> 2),
773 	0x00000000,
774 	(0x0600 << 16) | (0x30a14 >> 2),
775 	0x00000000,
776 	(0x0600 << 16) | (0x30a18 >> 2),
777 	0x00000000,
778 	(0x0600 << 16) | (0x30a2c >> 2),
779 	0x00000000,
780 	(0x0e00 << 16) | (0xc700 >> 2),
781 	0x00000000,
782 	(0x0e00 << 16) | (0xc704 >> 2),
783 	0x00000000,
784 	(0x0e00 << 16) | (0xc708 >> 2),
785 	0x00000000,
786 	(0x0e00 << 16) | (0xc768 >> 2),
787 	0x00000000,
788 	(0x0400 << 16) | (0xc770 >> 2),
789 	0x00000000,
790 	(0x0400 << 16) | (0xc774 >> 2),
791 	0x00000000,
792 	(0x0400 << 16) | (0xc798 >> 2),
793 	0x00000000,
794 	(0x0400 << 16) | (0xc79c >> 2),
795 	0x00000000,
796 	(0x0e00 << 16) | (0x9100 >> 2),
797 	0x00000000,
798 	(0x0e00 << 16) | (0x3c010 >> 2),
799 	0x00000000,
800 	(0x0e00 << 16) | (0x8c00 >> 2),
801 	0x00000000,
802 	(0x0e00 << 16) | (0x8c04 >> 2),
803 	0x00000000,
804 	(0x0e00 << 16) | (0x8c20 >> 2),
805 	0x00000000,
806 	(0x0e00 << 16) | (0x8c38 >> 2),
807 	0x00000000,
808 	(0x0e00 << 16) | (0x8c3c >> 2),
809 	0x00000000,
810 	(0x0e00 << 16) | (0xae00 >> 2),
811 	0x00000000,
812 	(0x0e00 << 16) | (0x9604 >> 2),
813 	0x00000000,
814 	(0x0e00 << 16) | (0xac08 >> 2),
815 	0x00000000,
816 	(0x0e00 << 16) | (0xac0c >> 2),
817 	0x00000000,
818 	(0x0e00 << 16) | (0xac10 >> 2),
819 	0x00000000,
820 	(0x0e00 << 16) | (0xac14 >> 2),
821 	0x00000000,
822 	(0x0e00 << 16) | (0xac58 >> 2),
823 	0x00000000,
824 	(0x0e00 << 16) | (0xac68 >> 2),
825 	0x00000000,
826 	(0x0e00 << 16) | (0xac6c >> 2),
827 	0x00000000,
828 	(0x0e00 << 16) | (0xac70 >> 2),
829 	0x00000000,
830 	(0x0e00 << 16) | (0xac74 >> 2),
831 	0x00000000,
832 	(0x0e00 << 16) | (0xac78 >> 2),
833 	0x00000000,
834 	(0x0e00 << 16) | (0xac7c >> 2),
835 	0x00000000,
836 	(0x0e00 << 16) | (0xac80 >> 2),
837 	0x00000000,
838 	(0x0e00 << 16) | (0xac84 >> 2),
839 	0x00000000,
840 	(0x0e00 << 16) | (0xac88 >> 2),
841 	0x00000000,
842 	(0x0e00 << 16) | (0xac8c >> 2),
843 	0x00000000,
844 	(0x0e00 << 16) | (0x970c >> 2),
845 	0x00000000,
846 	(0x0e00 << 16) | (0x9714 >> 2),
847 	0x00000000,
848 	(0x0e00 << 16) | (0x9718 >> 2),
849 	0x00000000,
850 	(0x0e00 << 16) | (0x971c >> 2),
851 	0x00000000,
852 	(0x0e00 << 16) | (0x31068 >> 2),
853 	0x00000000,
854 	(0x4e00 << 16) | (0x31068 >> 2),
855 	0x00000000,
856 	(0x5e00 << 16) | (0x31068 >> 2),
857 	0x00000000,
858 	(0x6e00 << 16) | (0x31068 >> 2),
859 	0x00000000,
860 	(0x7e00 << 16) | (0x31068 >> 2),
861 	0x00000000,
862 	(0x0e00 << 16) | (0xcd10 >> 2),
863 	0x00000000,
864 	(0x0e00 << 16) | (0xcd14 >> 2),
865 	0x00000000,
866 	(0x0e00 << 16) | (0x88b0 >> 2),
867 	0x00000000,
868 	(0x0e00 << 16) | (0x88b4 >> 2),
869 	0x00000000,
870 	(0x0e00 << 16) | (0x88b8 >> 2),
871 	0x00000000,
872 	(0x0e00 << 16) | (0x88bc >> 2),
873 	0x00000000,
874 	(0x0400 << 16) | (0x89c0 >> 2),
875 	0x00000000,
876 	(0x0e00 << 16) | (0x88c4 >> 2),
877 	0x00000000,
878 	(0x0e00 << 16) | (0x88c8 >> 2),
879 	0x00000000,
880 	(0x0e00 << 16) | (0x88d0 >> 2),
881 	0x00000000,
882 	(0x0e00 << 16) | (0x88d4 >> 2),
883 	0x00000000,
884 	(0x0e00 << 16) | (0x88d8 >> 2),
885 	0x00000000,
886 	(0x0e00 << 16) | (0x8980 >> 2),
887 	0x00000000,
888 	(0x0e00 << 16) | (0x30938 >> 2),
889 	0x00000000,
890 	(0x0e00 << 16) | (0x3093c >> 2),
891 	0x00000000,
892 	(0x0e00 << 16) | (0x30940 >> 2),
893 	0x00000000,
894 	(0x0e00 << 16) | (0x89a0 >> 2),
895 	0x00000000,
896 	(0x0e00 << 16) | (0x30900 >> 2),
897 	0x00000000,
898 	(0x0e00 << 16) | (0x30904 >> 2),
899 	0x00000000,
900 	(0x0e00 << 16) | (0x89b4 >> 2),
901 	0x00000000,
902 	(0x0e00 << 16) | (0x3e1fc >> 2),
903 	0x00000000,
904 	(0x0e00 << 16) | (0x3c210 >> 2),
905 	0x00000000,
906 	(0x0e00 << 16) | (0x3c214 >> 2),
907 	0x00000000,
908 	(0x0e00 << 16) | (0x3c218 >> 2),
909 	0x00000000,
910 	(0x0e00 << 16) | (0x8904 >> 2),
911 	0x00000000,
912 	0x5,
913 	(0x0e00 << 16) | (0x8c28 >> 2),
914 	(0x0e00 << 16) | (0x8c2c >> 2),
915 	(0x0e00 << 16) | (0x8c30 >> 2),
916 	(0x0e00 << 16) | (0x8c34 >> 2),
917 	(0x0e00 << 16) | (0x9600 >> 2),
918 };
919 
920 static const u32 bonaire_golden_spm_registers[] =
921 {
922 	0x30800, 0xe0ffffff, 0xe0000000
923 };
924 
925 static const u32 bonaire_golden_common_registers[] =
926 {
927 	0xc770, 0xffffffff, 0x00000800,
928 	0xc774, 0xffffffff, 0x00000800,
929 	0xc798, 0xffffffff, 0x00007fbf,
930 	0xc79c, 0xffffffff, 0x00007faf
931 };
932 
933 static const u32 bonaire_golden_registers[] =
934 {
935 	0x3354, 0x00000333, 0x00000333,
936 	0x3350, 0x000c0fc0, 0x00040200,
937 	0x9a10, 0x00010000, 0x00058208,
938 	0x3c000, 0xffff1fff, 0x00140000,
939 	0x3c200, 0xfdfc0fff, 0x00000100,
940 	0x3c234, 0x40000000, 0x40000200,
941 	0x9830, 0xffffffff, 0x00000000,
942 	0x9834, 0xf00fffff, 0x00000400,
943 	0x9838, 0x0002021c, 0x00020200,
944 	0xc78, 0x00000080, 0x00000000,
945 	0x5bb0, 0x000000f0, 0x00000070,
946 	0x5bc0, 0xf0311fff, 0x80300000,
947 	0x98f8, 0x73773777, 0x12010001,
948 	0x350c, 0x00810000, 0x408af000,
949 	0x7030, 0x31000111, 0x00000011,
950 	0x2f48, 0x73773777, 0x12010001,
951 	0x220c, 0x00007fb6, 0x0021a1b1,
952 	0x2210, 0x00007fb6, 0x002021b1,
953 	0x2180, 0x00007fb6, 0x00002191,
954 	0x2218, 0x00007fb6, 0x002121b1,
955 	0x221c, 0x00007fb6, 0x002021b1,
956 	0x21dc, 0x00007fb6, 0x00002191,
957 	0x21e0, 0x00007fb6, 0x00002191,
958 	0x3628, 0x0000003f, 0x0000000a,
959 	0x362c, 0x0000003f, 0x0000000a,
960 	0x2ae4, 0x00073ffe, 0x000022a2,
961 	0x240c, 0x000007ff, 0x00000000,
962 	0x8a14, 0xf000003f, 0x00000007,
963 	0x8bf0, 0x00002001, 0x00000001,
964 	0x8b24, 0xffffffff, 0x00ffffff,
965 	0x30a04, 0x0000ff0f, 0x00000000,
966 	0x28a4c, 0x07ffffff, 0x06000000,
967 	0x4d8, 0x00000fff, 0x00000100,
968 	0x3e78, 0x00000001, 0x00000002,
969 	0x9100, 0x03000000, 0x0362c688,
970 	0x8c00, 0x000000ff, 0x00000001,
971 	0xe40, 0x00001fff, 0x00001fff,
972 	0x9060, 0x0000007f, 0x00000020,
973 	0x9508, 0x00010000, 0x00010000,
974 	0xac14, 0x000003ff, 0x000000f3,
975 	0xac0c, 0xffffffff, 0x00001032
976 };
977 
978 static const u32 bonaire_mgcg_cgcg_init[] =
979 {
980 	0xc420, 0xffffffff, 0xfffffffc,
981 	0x30800, 0xffffffff, 0xe0000000,
982 	0x3c2a0, 0xffffffff, 0x00000100,
983 	0x3c208, 0xffffffff, 0x00000100,
984 	0x3c2c0, 0xffffffff, 0xc0000100,
985 	0x3c2c8, 0xffffffff, 0xc0000100,
986 	0x3c2c4, 0xffffffff, 0xc0000100,
987 	0x55e4, 0xffffffff, 0x00600100,
988 	0x3c280, 0xffffffff, 0x00000100,
989 	0x3c214, 0xffffffff, 0x06000100,
990 	0x3c220, 0xffffffff, 0x00000100,
991 	0x3c218, 0xffffffff, 0x06000100,
992 	0x3c204, 0xffffffff, 0x00000100,
993 	0x3c2e0, 0xffffffff, 0x00000100,
994 	0x3c224, 0xffffffff, 0x00000100,
995 	0x3c200, 0xffffffff, 0x00000100,
996 	0x3c230, 0xffffffff, 0x00000100,
997 	0x3c234, 0xffffffff, 0x00000100,
998 	0x3c250, 0xffffffff, 0x00000100,
999 	0x3c254, 0xffffffff, 0x00000100,
1000 	0x3c258, 0xffffffff, 0x00000100,
1001 	0x3c25c, 0xffffffff, 0x00000100,
1002 	0x3c260, 0xffffffff, 0x00000100,
1003 	0x3c27c, 0xffffffff, 0x00000100,
1004 	0x3c278, 0xffffffff, 0x00000100,
1005 	0x3c210, 0xffffffff, 0x06000100,
1006 	0x3c290, 0xffffffff, 0x00000100,
1007 	0x3c274, 0xffffffff, 0x00000100,
1008 	0x3c2b4, 0xffffffff, 0x00000100,
1009 	0x3c2b0, 0xffffffff, 0x00000100,
1010 	0x3c270, 0xffffffff, 0x00000100,
1011 	0x30800, 0xffffffff, 0xe0000000,
1012 	0x3c020, 0xffffffff, 0x00010000,
1013 	0x3c024, 0xffffffff, 0x00030002,
1014 	0x3c028, 0xffffffff, 0x00040007,
1015 	0x3c02c, 0xffffffff, 0x00060005,
1016 	0x3c030, 0xffffffff, 0x00090008,
1017 	0x3c034, 0xffffffff, 0x00010000,
1018 	0x3c038, 0xffffffff, 0x00030002,
1019 	0x3c03c, 0xffffffff, 0x00040007,
1020 	0x3c040, 0xffffffff, 0x00060005,
1021 	0x3c044, 0xffffffff, 0x00090008,
1022 	0x3c048, 0xffffffff, 0x00010000,
1023 	0x3c04c, 0xffffffff, 0x00030002,
1024 	0x3c050, 0xffffffff, 0x00040007,
1025 	0x3c054, 0xffffffff, 0x00060005,
1026 	0x3c058, 0xffffffff, 0x00090008,
1027 	0x3c05c, 0xffffffff, 0x00010000,
1028 	0x3c060, 0xffffffff, 0x00030002,
1029 	0x3c064, 0xffffffff, 0x00040007,
1030 	0x3c068, 0xffffffff, 0x00060005,
1031 	0x3c06c, 0xffffffff, 0x00090008,
1032 	0x3c070, 0xffffffff, 0x00010000,
1033 	0x3c074, 0xffffffff, 0x00030002,
1034 	0x3c078, 0xffffffff, 0x00040007,
1035 	0x3c07c, 0xffffffff, 0x00060005,
1036 	0x3c080, 0xffffffff, 0x00090008,
1037 	0x3c084, 0xffffffff, 0x00010000,
1038 	0x3c088, 0xffffffff, 0x00030002,
1039 	0x3c08c, 0xffffffff, 0x00040007,
1040 	0x3c090, 0xffffffff, 0x00060005,
1041 	0x3c094, 0xffffffff, 0x00090008,
1042 	0x3c098, 0xffffffff, 0x00010000,
1043 	0x3c09c, 0xffffffff, 0x00030002,
1044 	0x3c0a0, 0xffffffff, 0x00040007,
1045 	0x3c0a4, 0xffffffff, 0x00060005,
1046 	0x3c0a8, 0xffffffff, 0x00090008,
1047 	0x3c000, 0xffffffff, 0x96e00200,
1048 	0x8708, 0xffffffff, 0x00900100,
1049 	0xc424, 0xffffffff, 0x0020003f,
1050 	0x38, 0xffffffff, 0x0140001c,
1051 	0x3c, 0x000f0000, 0x000f0000,
1052 	0x220, 0xffffffff, 0xC060000C,
1053 	0x224, 0xc0000fff, 0x00000100,
1054 	0xf90, 0xffffffff, 0x00000100,
1055 	0xf98, 0x00000101, 0x00000000,
1056 	0x20a8, 0xffffffff, 0x00000104,
1057 	0x55e4, 0xff000fff, 0x00000100,
1058 	0x30cc, 0xc0000fff, 0x00000104,
1059 	0xc1e4, 0x00000001, 0x00000001,
1060 	0xd00c, 0xff000ff0, 0x00000100,
1061 	0xd80c, 0xff000ff0, 0x00000100
1062 };
1063 
1064 static const u32 spectre_golden_spm_registers[] =
1065 {
1066 	0x30800, 0xe0ffffff, 0xe0000000
1067 };
1068 
1069 static const u32 spectre_golden_common_registers[] =
1070 {
1071 	0xc770, 0xffffffff, 0x00000800,
1072 	0xc774, 0xffffffff, 0x00000800,
1073 	0xc798, 0xffffffff, 0x00007fbf,
1074 	0xc79c, 0xffffffff, 0x00007faf
1075 };
1076 
1077 static const u32 spectre_golden_registers[] =
1078 {
1079 	0x3c000, 0xffff1fff, 0x96940200,
1080 	0x3c00c, 0xffff0001, 0xff000000,
1081 	0x3c200, 0xfffc0fff, 0x00000100,
1082 	0x6ed8, 0x00010101, 0x00010000,
1083 	0x9834, 0xf00fffff, 0x00000400,
1084 	0x9838, 0xfffffffc, 0x00020200,
1085 	0x5bb0, 0x000000f0, 0x00000070,
1086 	0x5bc0, 0xf0311fff, 0x80300000,
1087 	0x98f8, 0x73773777, 0x12010001,
1088 	0x9b7c, 0x00ff0000, 0x00fc0000,
1089 	0x2f48, 0x73773777, 0x12010001,
1090 	0x8a14, 0xf000003f, 0x00000007,
1091 	0x8b24, 0xffffffff, 0x00ffffff,
1092 	0x28350, 0x3f3f3fff, 0x00000082,
1093 	0x28355, 0x0000003f, 0x00000000,
1094 	0x3e78, 0x00000001, 0x00000002,
1095 	0x913c, 0xffff03df, 0x00000004,
1096 	0xc768, 0x00000008, 0x00000008,
1097 	0x8c00, 0x000008ff, 0x00000800,
1098 	0x9508, 0x00010000, 0x00010000,
1099 	0xac0c, 0xffffffff, 0x54763210,
1100 	0x214f8, 0x01ff01ff, 0x00000002,
1101 	0x21498, 0x007ff800, 0x00200000,
1102 	0x2015c, 0xffffffff, 0x00000f40,
1103 	0x30934, 0xffffffff, 0x00000001
1104 };
1105 
1106 static const u32 spectre_mgcg_cgcg_init[] =
1107 {
1108 	0xc420, 0xffffffff, 0xfffffffc,
1109 	0x30800, 0xffffffff, 0xe0000000,
1110 	0x3c2a0, 0xffffffff, 0x00000100,
1111 	0x3c208, 0xffffffff, 0x00000100,
1112 	0x3c2c0, 0xffffffff, 0x00000100,
1113 	0x3c2c8, 0xffffffff, 0x00000100,
1114 	0x3c2c4, 0xffffffff, 0x00000100,
1115 	0x55e4, 0xffffffff, 0x00600100,
1116 	0x3c280, 0xffffffff, 0x00000100,
1117 	0x3c214, 0xffffffff, 0x06000100,
1118 	0x3c220, 0xffffffff, 0x00000100,
1119 	0x3c218, 0xffffffff, 0x06000100,
1120 	0x3c204, 0xffffffff, 0x00000100,
1121 	0x3c2e0, 0xffffffff, 0x00000100,
1122 	0x3c224, 0xffffffff, 0x00000100,
1123 	0x3c200, 0xffffffff, 0x00000100,
1124 	0x3c230, 0xffffffff, 0x00000100,
1125 	0x3c234, 0xffffffff, 0x00000100,
1126 	0x3c250, 0xffffffff, 0x00000100,
1127 	0x3c254, 0xffffffff, 0x00000100,
1128 	0x3c258, 0xffffffff, 0x00000100,
1129 	0x3c25c, 0xffffffff, 0x00000100,
1130 	0x3c260, 0xffffffff, 0x00000100,
1131 	0x3c27c, 0xffffffff, 0x00000100,
1132 	0x3c278, 0xffffffff, 0x00000100,
1133 	0x3c210, 0xffffffff, 0x06000100,
1134 	0x3c290, 0xffffffff, 0x00000100,
1135 	0x3c274, 0xffffffff, 0x00000100,
1136 	0x3c2b4, 0xffffffff, 0x00000100,
1137 	0x3c2b0, 0xffffffff, 0x00000100,
1138 	0x3c270, 0xffffffff, 0x00000100,
1139 	0x30800, 0xffffffff, 0xe0000000,
1140 	0x3c020, 0xffffffff, 0x00010000,
1141 	0x3c024, 0xffffffff, 0x00030002,
1142 	0x3c028, 0xffffffff, 0x00040007,
1143 	0x3c02c, 0xffffffff, 0x00060005,
1144 	0x3c030, 0xffffffff, 0x00090008,
1145 	0x3c034, 0xffffffff, 0x00010000,
1146 	0x3c038, 0xffffffff, 0x00030002,
1147 	0x3c03c, 0xffffffff, 0x00040007,
1148 	0x3c040, 0xffffffff, 0x00060005,
1149 	0x3c044, 0xffffffff, 0x00090008,
1150 	0x3c048, 0xffffffff, 0x00010000,
1151 	0x3c04c, 0xffffffff, 0x00030002,
1152 	0x3c050, 0xffffffff, 0x00040007,
1153 	0x3c054, 0xffffffff, 0x00060005,
1154 	0x3c058, 0xffffffff, 0x00090008,
1155 	0x3c05c, 0xffffffff, 0x00010000,
1156 	0x3c060, 0xffffffff, 0x00030002,
1157 	0x3c064, 0xffffffff, 0x00040007,
1158 	0x3c068, 0xffffffff, 0x00060005,
1159 	0x3c06c, 0xffffffff, 0x00090008,
1160 	0x3c070, 0xffffffff, 0x00010000,
1161 	0x3c074, 0xffffffff, 0x00030002,
1162 	0x3c078, 0xffffffff, 0x00040007,
1163 	0x3c07c, 0xffffffff, 0x00060005,
1164 	0x3c080, 0xffffffff, 0x00090008,
1165 	0x3c084, 0xffffffff, 0x00010000,
1166 	0x3c088, 0xffffffff, 0x00030002,
1167 	0x3c08c, 0xffffffff, 0x00040007,
1168 	0x3c090, 0xffffffff, 0x00060005,
1169 	0x3c094, 0xffffffff, 0x00090008,
1170 	0x3c098, 0xffffffff, 0x00010000,
1171 	0x3c09c, 0xffffffff, 0x00030002,
1172 	0x3c0a0, 0xffffffff, 0x00040007,
1173 	0x3c0a4, 0xffffffff, 0x00060005,
1174 	0x3c0a8, 0xffffffff, 0x00090008,
1175 	0x3c0ac, 0xffffffff, 0x00010000,
1176 	0x3c0b0, 0xffffffff, 0x00030002,
1177 	0x3c0b4, 0xffffffff, 0x00040007,
1178 	0x3c0b8, 0xffffffff, 0x00060005,
1179 	0x3c0bc, 0xffffffff, 0x00090008,
1180 	0x3c000, 0xffffffff, 0x96e00200,
1181 	0x8708, 0xffffffff, 0x00900100,
1182 	0xc424, 0xffffffff, 0x0020003f,
1183 	0x38, 0xffffffff, 0x0140001c,
1184 	0x3c, 0x000f0000, 0x000f0000,
1185 	0x220, 0xffffffff, 0xC060000C,
1186 	0x224, 0xc0000fff, 0x00000100,
1187 	0xf90, 0xffffffff, 0x00000100,
1188 	0xf98, 0x00000101, 0x00000000,
1189 	0x20a8, 0xffffffff, 0x00000104,
1190 	0x55e4, 0xff000fff, 0x00000100,
1191 	0x30cc, 0xc0000fff, 0x00000104,
1192 	0xc1e4, 0x00000001, 0x00000001,
1193 	0xd00c, 0xff000ff0, 0x00000100,
1194 	0xd80c, 0xff000ff0, 0x00000100
1195 };
1196 
1197 static const u32 kalindi_golden_spm_registers[] =
1198 {
1199 	0x30800, 0xe0ffffff, 0xe0000000
1200 };
1201 
1202 static const u32 kalindi_golden_common_registers[] =
1203 {
1204 	0xc770, 0xffffffff, 0x00000800,
1205 	0xc774, 0xffffffff, 0x00000800,
1206 	0xc798, 0xffffffff, 0x00007fbf,
1207 	0xc79c, 0xffffffff, 0x00007faf
1208 };
1209 
1210 static const u32 kalindi_golden_registers[] =
1211 {
1212 	0x3c000, 0xffffdfff, 0x6e944040,
1213 	0x55e4, 0xff607fff, 0xfc000100,
1214 	0x3c220, 0xff000fff, 0x00000100,
1215 	0x3c224, 0xff000fff, 0x00000100,
1216 	0x3c200, 0xfffc0fff, 0x00000100,
1217 	0x6ed8, 0x00010101, 0x00010000,
1218 	0x9830, 0xffffffff, 0x00000000,
1219 	0x9834, 0xf00fffff, 0x00000400,
1220 	0x5bb0, 0x000000f0, 0x00000070,
1221 	0x5bc0, 0xf0311fff, 0x80300000,
1222 	0x98f8, 0x73773777, 0x12010001,
1223 	0x98fc, 0xffffffff, 0x00000010,
1224 	0x9b7c, 0x00ff0000, 0x00fc0000,
1225 	0x8030, 0x00001f0f, 0x0000100a,
1226 	0x2f48, 0x73773777, 0x12010001,
1227 	0x2408, 0x000fffff, 0x000c007f,
1228 	0x8a14, 0xf000003f, 0x00000007,
1229 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1230 	0x30a04, 0x0000ff0f, 0x00000000,
1231 	0x28a4c, 0x07ffffff, 0x06000000,
1232 	0x4d8, 0x00000fff, 0x00000100,
1233 	0x3e78, 0x00000001, 0x00000002,
1234 	0xc768, 0x00000008, 0x00000008,
1235 	0x8c00, 0x000000ff, 0x00000003,
1236 	0x214f8, 0x01ff01ff, 0x00000002,
1237 	0x21498, 0x007ff800, 0x00200000,
1238 	0x2015c, 0xffffffff, 0x00000f40,
1239 	0x88c4, 0x001f3ae3, 0x00000082,
1240 	0x88d4, 0x0000001f, 0x00000010,
1241 	0x30934, 0xffffffff, 0x00000000
1242 };
1243 
1244 static const u32 kalindi_mgcg_cgcg_init[] =
1245 {
1246 	0xc420, 0xffffffff, 0xfffffffc,
1247 	0x30800, 0xffffffff, 0xe0000000,
1248 	0x3c2a0, 0xffffffff, 0x00000100,
1249 	0x3c208, 0xffffffff, 0x00000100,
1250 	0x3c2c0, 0xffffffff, 0x00000100,
1251 	0x3c2c8, 0xffffffff, 0x00000100,
1252 	0x3c2c4, 0xffffffff, 0x00000100,
1253 	0x55e4, 0xffffffff, 0x00600100,
1254 	0x3c280, 0xffffffff, 0x00000100,
1255 	0x3c214, 0xffffffff, 0x06000100,
1256 	0x3c220, 0xffffffff, 0x00000100,
1257 	0x3c218, 0xffffffff, 0x06000100,
1258 	0x3c204, 0xffffffff, 0x00000100,
1259 	0x3c2e0, 0xffffffff, 0x00000100,
1260 	0x3c224, 0xffffffff, 0x00000100,
1261 	0x3c200, 0xffffffff, 0x00000100,
1262 	0x3c230, 0xffffffff, 0x00000100,
1263 	0x3c234, 0xffffffff, 0x00000100,
1264 	0x3c250, 0xffffffff, 0x00000100,
1265 	0x3c254, 0xffffffff, 0x00000100,
1266 	0x3c258, 0xffffffff, 0x00000100,
1267 	0x3c25c, 0xffffffff, 0x00000100,
1268 	0x3c260, 0xffffffff, 0x00000100,
1269 	0x3c27c, 0xffffffff, 0x00000100,
1270 	0x3c278, 0xffffffff, 0x00000100,
1271 	0x3c210, 0xffffffff, 0x06000100,
1272 	0x3c290, 0xffffffff, 0x00000100,
1273 	0x3c274, 0xffffffff, 0x00000100,
1274 	0x3c2b4, 0xffffffff, 0x00000100,
1275 	0x3c2b0, 0xffffffff, 0x00000100,
1276 	0x3c270, 0xffffffff, 0x00000100,
1277 	0x30800, 0xffffffff, 0xe0000000,
1278 	0x3c020, 0xffffffff, 0x00010000,
1279 	0x3c024, 0xffffffff, 0x00030002,
1280 	0x3c028, 0xffffffff, 0x00040007,
1281 	0x3c02c, 0xffffffff, 0x00060005,
1282 	0x3c030, 0xffffffff, 0x00090008,
1283 	0x3c034, 0xffffffff, 0x00010000,
1284 	0x3c038, 0xffffffff, 0x00030002,
1285 	0x3c03c, 0xffffffff, 0x00040007,
1286 	0x3c040, 0xffffffff, 0x00060005,
1287 	0x3c044, 0xffffffff, 0x00090008,
1288 	0x3c000, 0xffffffff, 0x96e00200,
1289 	0x8708, 0xffffffff, 0x00900100,
1290 	0xc424, 0xffffffff, 0x0020003f,
1291 	0x38, 0xffffffff, 0x0140001c,
1292 	0x3c, 0x000f0000, 0x000f0000,
1293 	0x220, 0xffffffff, 0xC060000C,
1294 	0x224, 0xc0000fff, 0x00000100,
1295 	0x20a8, 0xffffffff, 0x00000104,
1296 	0x55e4, 0xff000fff, 0x00000100,
1297 	0x30cc, 0xc0000fff, 0x00000104,
1298 	0xc1e4, 0x00000001, 0x00000001,
1299 	0xd00c, 0xff000ff0, 0x00000100,
1300 	0xd80c, 0xff000ff0, 0x00000100
1301 };
1302 
1303 static void cik_init_golden_registers(struct radeon_device *rdev)
1304 {
1305 	switch (rdev->family) {
1306 	case CHIP_BONAIRE:
1307 		radeon_program_register_sequence(rdev,
1308 						 bonaire_mgcg_cgcg_init,
1309 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1310 		radeon_program_register_sequence(rdev,
1311 						 bonaire_golden_registers,
1312 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1313 		radeon_program_register_sequence(rdev,
1314 						 bonaire_golden_common_registers,
1315 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1316 		radeon_program_register_sequence(rdev,
1317 						 bonaire_golden_spm_registers,
1318 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1319 		break;
1320 	case CHIP_KABINI:
1321 		radeon_program_register_sequence(rdev,
1322 						 kalindi_mgcg_cgcg_init,
1323 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1324 		radeon_program_register_sequence(rdev,
1325 						 kalindi_golden_registers,
1326 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1327 		radeon_program_register_sequence(rdev,
1328 						 kalindi_golden_common_registers,
1329 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1330 		radeon_program_register_sequence(rdev,
1331 						 kalindi_golden_spm_registers,
1332 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1333 		break;
1334 	case CHIP_KAVERI:
1335 		radeon_program_register_sequence(rdev,
1336 						 spectre_mgcg_cgcg_init,
1337 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1338 		radeon_program_register_sequence(rdev,
1339 						 spectre_golden_registers,
1340 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1341 		radeon_program_register_sequence(rdev,
1342 						 spectre_golden_common_registers,
1343 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1344 		radeon_program_register_sequence(rdev,
1345 						 spectre_golden_spm_registers,
1346 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1347 		break;
1348 	default:
1349 		break;
1350 	}
1351 }
1352 
1353 /**
1354  * cik_get_xclk - get the xclk
1355  *
1356  * @rdev: radeon_device pointer
1357  *
1358  * Returns the reference clock used by the gfx engine
1359  * (CIK).
1360  */
1361 u32 cik_get_xclk(struct radeon_device *rdev)
1362 {
1363         u32 reference_clock = rdev->clock.spll.reference_freq;
1364 
1365 	if (rdev->flags & RADEON_IS_IGP) {
1366 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1367 			return reference_clock / 2;
1368 	} else {
1369 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1370 			return reference_clock / 4;
1371 	}
1372 	return reference_clock;
1373 }
1374 
1375 /**
1376  * cik_mm_rdoorbell - read a doorbell dword
1377  *
1378  * @rdev: radeon_device pointer
1379  * @offset: byte offset into the aperture
1380  *
1381  * Returns the value in the doorbell aperture at the
1382  * requested offset (CIK).
1383  */
1384 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
1385 {
1386 	if (offset < rdev->doorbell.size) {
1387 		return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
1388 	} else {
1389 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
1390 		return 0;
1391 	}
1392 }
1393 
1394 /**
1395  * cik_mm_wdoorbell - write a doorbell dword
1396  *
1397  * @rdev: radeon_device pointer
1398  * @offset: byte offset into the aperture
1399  * @v: value to write
1400  *
1401  * Writes @v to the doorbell aperture at the
1402  * requested offset (CIK).
1403  */
1404 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
1405 {
1406 	if (offset < rdev->doorbell.size) {
1407 		writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
1408 	} else {
1409 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
1410 	}
1411 }
1412 
1413 #define BONAIRE_IO_MC_REGS_SIZE 36
1414 
1415 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1416 {
1417 	{0x00000070, 0x04400000},
1418 	{0x00000071, 0x80c01803},
1419 	{0x00000072, 0x00004004},
1420 	{0x00000073, 0x00000100},
1421 	{0x00000074, 0x00ff0000},
1422 	{0x00000075, 0x34000000},
1423 	{0x00000076, 0x08000014},
1424 	{0x00000077, 0x00cc08ec},
1425 	{0x00000078, 0x00000400},
1426 	{0x00000079, 0x00000000},
1427 	{0x0000007a, 0x04090000},
1428 	{0x0000007c, 0x00000000},
1429 	{0x0000007e, 0x4408a8e8},
1430 	{0x0000007f, 0x00000304},
1431 	{0x00000080, 0x00000000},
1432 	{0x00000082, 0x00000001},
1433 	{0x00000083, 0x00000002},
1434 	{0x00000084, 0xf3e4f400},
1435 	{0x00000085, 0x052024e3},
1436 	{0x00000087, 0x00000000},
1437 	{0x00000088, 0x01000000},
1438 	{0x0000008a, 0x1c0a0000},
1439 	{0x0000008b, 0xff010000},
1440 	{0x0000008d, 0xffffefff},
1441 	{0x0000008e, 0xfff3efff},
1442 	{0x0000008f, 0xfff3efbf},
1443 	{0x00000092, 0xf7ffffff},
1444 	{0x00000093, 0xffffff7f},
1445 	{0x00000095, 0x00101101},
1446 	{0x00000096, 0x00000fff},
1447 	{0x00000097, 0x00116fff},
1448 	{0x00000098, 0x60010000},
1449 	{0x00000099, 0x10010000},
1450 	{0x0000009a, 0x00006000},
1451 	{0x0000009b, 0x00001000},
1452 	{0x0000009f, 0x00b48000}
1453 };
1454 
1455 /**
1456  * cik_srbm_select - select specific register instances
1457  *
1458  * @rdev: radeon_device pointer
1459  * @me: selected ME (micro engine)
1460  * @pipe: pipe
1461  * @queue: queue
1462  * @vmid: VMID
1463  *
1464  * Switches the currently active registers instances.  Some
1465  * registers are instanced per VMID, others are instanced per
1466  * me/pipe/queue combination.
1467  */
1468 static void cik_srbm_select(struct radeon_device *rdev,
1469 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1470 {
1471 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1472 			     MEID(me & 0x3) |
1473 			     VMID(vmid & 0xf) |
1474 			     QUEUEID(queue & 0x7));
1475 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1476 }
1477 
1478 /* ucode loading */
1479 /**
1480  * ci_mc_load_microcode - load MC ucode into the hw
1481  *
1482  * @rdev: radeon_device pointer
1483  *
1484  * Load the GDDR MC ucode into the hw (CIK).
1485  * Returns 0 on success, error on failure.
1486  */
1487 static int ci_mc_load_microcode(struct radeon_device *rdev)
1488 {
1489 	const __be32 *fw_data;
1490 	u32 running, blackout = 0;
1491 	u32 *io_mc_regs;
1492 	int i, ucode_size, regs_size;
1493 
1494 	if (!rdev->mc_fw)
1495 		return -EINVAL;
1496 
1497 	switch (rdev->family) {
1498 	case CHIP_BONAIRE:
1499 	default:
1500 		io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1501 		ucode_size = CIK_MC_UCODE_SIZE;
1502 		regs_size = BONAIRE_IO_MC_REGS_SIZE;
1503 		break;
1504 	}
1505 
1506 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1507 
1508 	if (running == 0) {
1509 		if (running) {
1510 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1511 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1512 		}
1513 
1514 		/* reset the engine and set to writable */
1515 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1516 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1517 
1518 		/* load mc io regs */
1519 		for (i = 0; i < regs_size; i++) {
1520 			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1521 			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1522 		}
1523 		/* load the MC ucode */
1524 		fw_data = (const __be32 *)rdev->mc_fw->data;
1525 		for (i = 0; i < ucode_size; i++)
1526 			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1527 
1528 		/* put the engine back into the active state */
1529 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1530 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1531 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1532 
1533 		/* wait for training to complete */
1534 		for (i = 0; i < rdev->usec_timeout; i++) {
1535 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1536 				break;
1537 			udelay(1);
1538 		}
1539 		for (i = 0; i < rdev->usec_timeout; i++) {
1540 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1541 				break;
1542 			udelay(1);
1543 		}
1544 
1545 		if (running)
1546 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1547 	}
1548 
1549 	return 0;
1550 }
1551 
1552 /**
1553  * cik_init_microcode - load ucode images from disk
1554  *
1555  * @rdev: radeon_device pointer
1556  *
1557  * Use the firmware interface to load the ucode images into
1558  * the driver (not loaded into hw).
1559  * Returns 0 on success, error on failure.
1560  */
1561 static int cik_init_microcode(struct radeon_device *rdev)
1562 {
1563 	const char *chip_name;
1564 	size_t pfp_req_size, me_req_size, ce_req_size,
1565 		mec_req_size, rlc_req_size, mc_req_size,
1566 		sdma_req_size, smc_req_size;
1567 	char fw_name[30];
1568 	int err;
1569 
1570 	DRM_DEBUG("\n");
1571 
1572 	switch (rdev->family) {
1573 	case CHIP_BONAIRE:
1574 		chip_name = "BONAIRE";
1575 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1576 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1577 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1578 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1579 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1580 		mc_req_size = CIK_MC_UCODE_SIZE * 4;
1581 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1582 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1583 		break;
1584 	case CHIP_KAVERI:
1585 		chip_name = "KAVERI";
1586 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1587 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1588 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1589 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1590 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1591 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1592 		break;
1593 	case CHIP_KABINI:
1594 		chip_name = "KABINI";
1595 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1596 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1597 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1598 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1599 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1600 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1601 		break;
1602 	default: BUG();
1603 	}
1604 
1605 	DRM_INFO("Loading %s Microcode\n", chip_name);
1606 
1607 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1608 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1609 	if (err)
1610 		goto out;
1611 	if (rdev->pfp_fw->size != pfp_req_size) {
1612 		printk(KERN_ERR
1613 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1614 		       rdev->pfp_fw->size, fw_name);
1615 		err = -EINVAL;
1616 		goto out;
1617 	}
1618 
1619 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1620 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1621 	if (err)
1622 		goto out;
1623 	if (rdev->me_fw->size != me_req_size) {
1624 		printk(KERN_ERR
1625 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1626 		       rdev->me_fw->size, fw_name);
1627 		err = -EINVAL;
1628 	}
1629 
1630 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1631 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1632 	if (err)
1633 		goto out;
1634 	if (rdev->ce_fw->size != ce_req_size) {
1635 		printk(KERN_ERR
1636 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1637 		       rdev->ce_fw->size, fw_name);
1638 		err = -EINVAL;
1639 	}
1640 
1641 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1642 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1643 	if (err)
1644 		goto out;
1645 	if (rdev->mec_fw->size != mec_req_size) {
1646 		printk(KERN_ERR
1647 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1648 		       rdev->mec_fw->size, fw_name);
1649 		err = -EINVAL;
1650 	}
1651 
1652 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1653 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1654 	if (err)
1655 		goto out;
1656 	if (rdev->rlc_fw->size != rlc_req_size) {
1657 		printk(KERN_ERR
1658 		       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1659 		       rdev->rlc_fw->size, fw_name);
1660 		err = -EINVAL;
1661 	}
1662 
1663 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1664 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1665 	if (err)
1666 		goto out;
1667 	if (rdev->sdma_fw->size != sdma_req_size) {
1668 		printk(KERN_ERR
1669 		       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1670 		       rdev->sdma_fw->size, fw_name);
1671 		err = -EINVAL;
1672 	}
1673 
1674 	/* No SMC, MC ucode on APUs */
1675 	if (!(rdev->flags & RADEON_IS_IGP)) {
1676 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1677 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1678 		if (err)
1679 			goto out;
1680 		if (rdev->mc_fw->size != mc_req_size) {
1681 			printk(KERN_ERR
1682 			       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1683 			       rdev->mc_fw->size, fw_name);
1684 			err = -EINVAL;
1685 		}
1686 
1687 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1688 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1689 		if (err) {
1690 			printk(KERN_ERR
1691 			       "smc: error loading firmware \"%s\"\n",
1692 			       fw_name);
1693 			release_firmware(rdev->smc_fw);
1694 			rdev->smc_fw = NULL;
1695 		} else if (rdev->smc_fw->size != smc_req_size) {
1696 			printk(KERN_ERR
1697 			       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1698 			       rdev->smc_fw->size, fw_name);
1699 			err = -EINVAL;
1700 		}
1701 	}
1702 
1703 out:
1704 	if (err) {
1705 		if (err != -EINVAL)
1706 			printk(KERN_ERR
1707 			       "cik_cp: Failed to load firmware \"%s\"\n",
1708 			       fw_name);
1709 		release_firmware(rdev->pfp_fw);
1710 		rdev->pfp_fw = NULL;
1711 		release_firmware(rdev->me_fw);
1712 		rdev->me_fw = NULL;
1713 		release_firmware(rdev->ce_fw);
1714 		rdev->ce_fw = NULL;
1715 		release_firmware(rdev->rlc_fw);
1716 		rdev->rlc_fw = NULL;
1717 		release_firmware(rdev->mc_fw);
1718 		rdev->mc_fw = NULL;
1719 		release_firmware(rdev->smc_fw);
1720 		rdev->smc_fw = NULL;
1721 	}
1722 	return err;
1723 }
1724 
1725 /*
1726  * Core functions
1727  */
1728 /**
1729  * cik_tiling_mode_table_init - init the hw tiling table
1730  *
1731  * @rdev: radeon_device pointer
1732  *
1733  * Starting with SI, the tiling setup is done globally in a
1734  * set of 32 tiling modes.  Rather than selecting each set of
1735  * parameters per surface as on older asics, we just select
1736  * which index in the tiling table we want to use, and the
1737  * surface uses those parameters (CIK).
1738  */
1739 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1740 {
1741 	const u32 num_tile_mode_states = 32;
1742 	const u32 num_secondary_tile_mode_states = 16;
1743 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1744 	u32 num_pipe_configs;
1745 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
1746 		rdev->config.cik.max_shader_engines;
1747 
1748 	switch (rdev->config.cik.mem_row_size_in_kb) {
1749 	case 1:
1750 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1751 		break;
1752 	case 2:
1753 	default:
1754 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1755 		break;
1756 	case 4:
1757 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1758 		break;
1759 	}
1760 
1761 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
1762 	if (num_pipe_configs > 8)
1763 		num_pipe_configs = 8; /* ??? */
1764 
1765 	if (num_pipe_configs == 8) {
1766 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1767 			switch (reg_offset) {
1768 			case 0:
1769 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1770 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1771 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1772 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1773 				break;
1774 			case 1:
1775 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1776 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1777 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1778 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1779 				break;
1780 			case 2:
1781 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1782 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1783 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1784 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1785 				break;
1786 			case 3:
1787 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1788 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1789 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1790 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1791 				break;
1792 			case 4:
1793 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1794 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1795 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1796 						 TILE_SPLIT(split_equal_to_row_size));
1797 				break;
1798 			case 5:
1799 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1800 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1801 				break;
1802 			case 6:
1803 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1804 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1805 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1806 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1807 				break;
1808 			case 7:
1809 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1810 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1811 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1812 						 TILE_SPLIT(split_equal_to_row_size));
1813 				break;
1814 			case 8:
1815 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1816 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
1817 				break;
1818 			case 9:
1819 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1820 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1821 				break;
1822 			case 10:
1823 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1824 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1825 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1826 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1827 				break;
1828 			case 11:
1829 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1830 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1831 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1832 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1833 				break;
1834 			case 12:
1835 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1836 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1837 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1838 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1839 				break;
1840 			case 13:
1841 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1842 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1843 				break;
1844 			case 14:
1845 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1846 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1847 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1848 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1849 				break;
1850 			case 16:
1851 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1852 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1853 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1854 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1855 				break;
1856 			case 17:
1857 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1858 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1859 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1860 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1861 				break;
1862 			case 27:
1863 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1864 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1865 				break;
1866 			case 28:
1867 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1868 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1869 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1870 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1871 				break;
1872 			case 29:
1873 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1874 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1875 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1876 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1877 				break;
1878 			case 30:
1879 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1880 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1881 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1882 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1883 				break;
1884 			default:
1885 				gb_tile_moden = 0;
1886 				break;
1887 			}
1888 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1889 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1890 		}
1891 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1892 			switch (reg_offset) {
1893 			case 0:
1894 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1895 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1896 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1897 						 NUM_BANKS(ADDR_SURF_16_BANK));
1898 				break;
1899 			case 1:
1900 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1901 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1902 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1903 						 NUM_BANKS(ADDR_SURF_16_BANK));
1904 				break;
1905 			case 2:
1906 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1907 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1908 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1909 						 NUM_BANKS(ADDR_SURF_16_BANK));
1910 				break;
1911 			case 3:
1912 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1913 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1914 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1915 						 NUM_BANKS(ADDR_SURF_16_BANK));
1916 				break;
1917 			case 4:
1918 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1919 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1920 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1921 						 NUM_BANKS(ADDR_SURF_8_BANK));
1922 				break;
1923 			case 5:
1924 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1925 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1926 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1927 						 NUM_BANKS(ADDR_SURF_4_BANK));
1928 				break;
1929 			case 6:
1930 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1931 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1932 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1933 						 NUM_BANKS(ADDR_SURF_2_BANK));
1934 				break;
1935 			case 8:
1936 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1937 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1938 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1939 						 NUM_BANKS(ADDR_SURF_16_BANK));
1940 				break;
1941 			case 9:
1942 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1943 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1944 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1945 						 NUM_BANKS(ADDR_SURF_16_BANK));
1946 				break;
1947 			case 10:
1948 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1949 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1950 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1951 						 NUM_BANKS(ADDR_SURF_16_BANK));
1952 				break;
1953 			case 11:
1954 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1955 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1956 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1957 						 NUM_BANKS(ADDR_SURF_16_BANK));
1958 				break;
1959 			case 12:
1960 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1961 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1962 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1963 						 NUM_BANKS(ADDR_SURF_8_BANK));
1964 				break;
1965 			case 13:
1966 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1967 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1968 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1969 						 NUM_BANKS(ADDR_SURF_4_BANK));
1970 				break;
1971 			case 14:
1972 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1973 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1974 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1975 						 NUM_BANKS(ADDR_SURF_2_BANK));
1976 				break;
1977 			default:
1978 				gb_tile_moden = 0;
1979 				break;
1980 			}
1981 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1982 		}
1983 	} else if (num_pipe_configs == 4) {
1984 		if (num_rbs == 4) {
1985 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1986 				switch (reg_offset) {
1987 				case 0:
1988 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1989 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1990 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1991 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1992 					break;
1993 				case 1:
1994 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1995 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1996 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1997 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1998 					break;
1999 				case 2:
2000 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2001 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2002 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2003 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2004 					break;
2005 				case 3:
2006 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2007 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2008 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2009 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2010 					break;
2011 				case 4:
2012 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2013 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2014 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2015 							 TILE_SPLIT(split_equal_to_row_size));
2016 					break;
2017 				case 5:
2018 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2019 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2020 					break;
2021 				case 6:
2022 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2023 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2024 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2025 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2026 					break;
2027 				case 7:
2028 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2029 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2030 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2031 							 TILE_SPLIT(split_equal_to_row_size));
2032 					break;
2033 				case 8:
2034 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2035 							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2036 					break;
2037 				case 9:
2038 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2039 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2040 					break;
2041 				case 10:
2042 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2043 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2044 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2045 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2046 					break;
2047 				case 11:
2048 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2049 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2050 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2051 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2052 					break;
2053 				case 12:
2054 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2055 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2056 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2057 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2058 					break;
2059 				case 13:
2060 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2061 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2062 					break;
2063 				case 14:
2064 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2065 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2066 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2067 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2068 					break;
2069 				case 16:
2070 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2071 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2072 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2073 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2074 					break;
2075 				case 17:
2076 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2077 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2078 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2079 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2080 					break;
2081 				case 27:
2082 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2083 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2084 					break;
2085 				case 28:
2086 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2087 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2088 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2089 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2090 					break;
2091 				case 29:
2092 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2093 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2094 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2095 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2096 					break;
2097 				case 30:
2098 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2099 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2100 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2101 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2102 					break;
2103 				default:
2104 					gb_tile_moden = 0;
2105 					break;
2106 				}
2107 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2108 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2109 			}
2110 		} else if (num_rbs < 4) {
2111 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2112 				switch (reg_offset) {
2113 				case 0:
2114 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2115 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2116 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2117 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2118 					break;
2119 				case 1:
2120 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2121 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2122 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2123 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2124 					break;
2125 				case 2:
2126 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2127 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2128 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2129 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2130 					break;
2131 				case 3:
2132 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2133 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2134 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2135 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2136 					break;
2137 				case 4:
2138 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2139 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2140 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2141 							 TILE_SPLIT(split_equal_to_row_size));
2142 					break;
2143 				case 5:
2144 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2145 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2146 					break;
2147 				case 6:
2148 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2149 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2150 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2151 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2152 					break;
2153 				case 7:
2154 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2155 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2156 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2157 							 TILE_SPLIT(split_equal_to_row_size));
2158 					break;
2159 				case 8:
2160 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2161 						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2162 					break;
2163 				case 9:
2164 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2165 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2166 					break;
2167 				case 10:
2168 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2169 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2170 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2171 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2172 					break;
2173 				case 11:
2174 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2175 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2176 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2177 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2178 					break;
2179 				case 12:
2180 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2181 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2182 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2183 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2184 					break;
2185 				case 13:
2186 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2187 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2188 					break;
2189 				case 14:
2190 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2191 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2192 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2193 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2194 					break;
2195 				case 16:
2196 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2197 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2198 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2199 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2200 					break;
2201 				case 17:
2202 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2203 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2204 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2205 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2206 					break;
2207 				case 27:
2208 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2209 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2210 					break;
2211 				case 28:
2212 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2213 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2214 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2215 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2216 					break;
2217 				case 29:
2218 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2219 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2220 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2221 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2222 					break;
2223 				case 30:
2224 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2225 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2226 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2227 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2228 					break;
2229 				default:
2230 					gb_tile_moden = 0;
2231 					break;
2232 				}
2233 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2234 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2235 			}
2236 		}
2237 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2238 			switch (reg_offset) {
2239 			case 0:
2240 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2241 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2242 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2243 						 NUM_BANKS(ADDR_SURF_16_BANK));
2244 				break;
2245 			case 1:
2246 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2247 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2248 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2249 						 NUM_BANKS(ADDR_SURF_16_BANK));
2250 				break;
2251 			case 2:
2252 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2253 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2254 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2255 						 NUM_BANKS(ADDR_SURF_16_BANK));
2256 				break;
2257 			case 3:
2258 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2259 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2260 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2261 						 NUM_BANKS(ADDR_SURF_16_BANK));
2262 				break;
2263 			case 4:
2264 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2265 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2266 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2267 						 NUM_BANKS(ADDR_SURF_16_BANK));
2268 				break;
2269 			case 5:
2270 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2271 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2272 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2273 						 NUM_BANKS(ADDR_SURF_8_BANK));
2274 				break;
2275 			case 6:
2276 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2277 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2278 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2279 						 NUM_BANKS(ADDR_SURF_4_BANK));
2280 				break;
2281 			case 8:
2282 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2283 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2284 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2285 						 NUM_BANKS(ADDR_SURF_16_BANK));
2286 				break;
2287 			case 9:
2288 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2289 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2290 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2291 						 NUM_BANKS(ADDR_SURF_16_BANK));
2292 				break;
2293 			case 10:
2294 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2295 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2296 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2297 						 NUM_BANKS(ADDR_SURF_16_BANK));
2298 				break;
2299 			case 11:
2300 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2301 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2302 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2303 						 NUM_BANKS(ADDR_SURF_16_BANK));
2304 				break;
2305 			case 12:
2306 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2307 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2308 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2309 						 NUM_BANKS(ADDR_SURF_16_BANK));
2310 				break;
2311 			case 13:
2312 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2313 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2314 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2315 						 NUM_BANKS(ADDR_SURF_8_BANK));
2316 				break;
2317 			case 14:
2318 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2319 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2320 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2321 						 NUM_BANKS(ADDR_SURF_4_BANK));
2322 				break;
2323 			default:
2324 				gb_tile_moden = 0;
2325 				break;
2326 			}
2327 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2328 		}
2329 	} else if (num_pipe_configs == 2) {
2330 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2331 			switch (reg_offset) {
2332 			case 0:
2333 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2334 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2335 						 PIPE_CONFIG(ADDR_SURF_P2) |
2336 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2337 				break;
2338 			case 1:
2339 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2340 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2341 						 PIPE_CONFIG(ADDR_SURF_P2) |
2342 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2343 				break;
2344 			case 2:
2345 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2346 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2347 						 PIPE_CONFIG(ADDR_SURF_P2) |
2348 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2349 				break;
2350 			case 3:
2351 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2352 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2353 						 PIPE_CONFIG(ADDR_SURF_P2) |
2354 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2355 				break;
2356 			case 4:
2357 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2358 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2359 						 PIPE_CONFIG(ADDR_SURF_P2) |
2360 						 TILE_SPLIT(split_equal_to_row_size));
2361 				break;
2362 			case 5:
2363 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2364 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2365 				break;
2366 			case 6:
2367 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2368 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2369 						 PIPE_CONFIG(ADDR_SURF_P2) |
2370 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2371 				break;
2372 			case 7:
2373 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2374 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2375 						 PIPE_CONFIG(ADDR_SURF_P2) |
2376 						 TILE_SPLIT(split_equal_to_row_size));
2377 				break;
2378 			case 8:
2379 				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
2380 				break;
2381 			case 9:
2382 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2383 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2384 				break;
2385 			case 10:
2386 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2387 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2388 						 PIPE_CONFIG(ADDR_SURF_P2) |
2389 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2390 				break;
2391 			case 11:
2392 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2393 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2394 						 PIPE_CONFIG(ADDR_SURF_P2) |
2395 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2396 				break;
2397 			case 12:
2398 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2399 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2400 						 PIPE_CONFIG(ADDR_SURF_P2) |
2401 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2402 				break;
2403 			case 13:
2404 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2405 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2406 				break;
2407 			case 14:
2408 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2409 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2410 						 PIPE_CONFIG(ADDR_SURF_P2) |
2411 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2412 				break;
2413 			case 16:
2414 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2415 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2416 						 PIPE_CONFIG(ADDR_SURF_P2) |
2417 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2418 				break;
2419 			case 17:
2420 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2421 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2422 						 PIPE_CONFIG(ADDR_SURF_P2) |
2423 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2424 				break;
2425 			case 27:
2426 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2427 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2428 				break;
2429 			case 28:
2430 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2431 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2432 						 PIPE_CONFIG(ADDR_SURF_P2) |
2433 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2434 				break;
2435 			case 29:
2436 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2437 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2438 						 PIPE_CONFIG(ADDR_SURF_P2) |
2439 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440 				break;
2441 			case 30:
2442 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2443 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2444 						 PIPE_CONFIG(ADDR_SURF_P2) |
2445 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2446 				break;
2447 			default:
2448 				gb_tile_moden = 0;
2449 				break;
2450 			}
2451 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2452 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2453 		}
2454 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2455 			switch (reg_offset) {
2456 			case 0:
2457 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2458 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2459 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2460 						 NUM_BANKS(ADDR_SURF_16_BANK));
2461 				break;
2462 			case 1:
2463 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2464 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2465 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2466 						 NUM_BANKS(ADDR_SURF_16_BANK));
2467 				break;
2468 			case 2:
2469 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2470 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2471 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2472 						 NUM_BANKS(ADDR_SURF_16_BANK));
2473 				break;
2474 			case 3:
2475 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2476 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2477 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2478 						 NUM_BANKS(ADDR_SURF_16_BANK));
2479 				break;
2480 			case 4:
2481 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2483 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2484 						 NUM_BANKS(ADDR_SURF_16_BANK));
2485 				break;
2486 			case 5:
2487 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2488 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2489 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2490 						 NUM_BANKS(ADDR_SURF_16_BANK));
2491 				break;
2492 			case 6:
2493 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2494 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2495 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2496 						 NUM_BANKS(ADDR_SURF_8_BANK));
2497 				break;
2498 			case 8:
2499 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2500 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2501 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2502 						 NUM_BANKS(ADDR_SURF_16_BANK));
2503 				break;
2504 			case 9:
2505 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2506 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2507 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2508 						 NUM_BANKS(ADDR_SURF_16_BANK));
2509 				break;
2510 			case 10:
2511 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2512 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2513 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2514 						 NUM_BANKS(ADDR_SURF_16_BANK));
2515 				break;
2516 			case 11:
2517 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2518 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2519 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2520 						 NUM_BANKS(ADDR_SURF_16_BANK));
2521 				break;
2522 			case 12:
2523 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2524 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2525 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2526 						 NUM_BANKS(ADDR_SURF_16_BANK));
2527 				break;
2528 			case 13:
2529 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2530 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2531 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2532 						 NUM_BANKS(ADDR_SURF_16_BANK));
2533 				break;
2534 			case 14:
2535 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2536 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2537 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2538 						 NUM_BANKS(ADDR_SURF_8_BANK));
2539 				break;
2540 			default:
2541 				gb_tile_moden = 0;
2542 				break;
2543 			}
2544 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2545 		}
2546 	} else
2547 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
2548 }
2549 
2550 /**
2551  * cik_select_se_sh - select which SE, SH to address
2552  *
2553  * @rdev: radeon_device pointer
2554  * @se_num: shader engine to address
2555  * @sh_num: sh block to address
2556  *
2557  * Select which SE, SH combinations to address. Certain
2558  * registers are instanced per SE or SH.  0xffffffff means
2559  * broadcast to all SEs or SHs (CIK).
2560  */
2561 static void cik_select_se_sh(struct radeon_device *rdev,
2562 			     u32 se_num, u32 sh_num)
2563 {
2564 	u32 data = INSTANCE_BROADCAST_WRITES;
2565 
2566 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2567 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2568 	else if (se_num == 0xffffffff)
2569 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2570 	else if (sh_num == 0xffffffff)
2571 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2572 	else
2573 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2574 	WREG32(GRBM_GFX_INDEX, data);
2575 }
2576 
2577 /**
2578  * cik_create_bitmask - create a bitmask
2579  *
2580  * @bit_width: length of the mask
2581  *
2582  * create a variable length bit mask (CIK).
2583  * Returns the bitmask.
2584  */
2585 static u32 cik_create_bitmask(u32 bit_width)
2586 {
2587 	u32 i, mask = 0;
2588 
2589 	for (i = 0; i < bit_width; i++) {
2590 		mask <<= 1;
2591 		mask |= 1;
2592 	}
2593 	return mask;
2594 }
2595 
2596 /**
2597  * cik_select_se_sh - select which SE, SH to address
2598  *
2599  * @rdev: radeon_device pointer
2600  * @max_rb_num: max RBs (render backends) for the asic
2601  * @se_num: number of SEs (shader engines) for the asic
2602  * @sh_per_se: number of SH blocks per SE for the asic
2603  *
2604  * Calculates the bitmask of disabled RBs (CIK).
2605  * Returns the disabled RB bitmask.
2606  */
2607 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
2608 			      u32 max_rb_num, u32 se_num,
2609 			      u32 sh_per_se)
2610 {
2611 	u32 data, mask;
2612 
2613 	data = RREG32(CC_RB_BACKEND_DISABLE);
2614 	if (data & 1)
2615 		data &= BACKEND_DISABLE_MASK;
2616 	else
2617 		data = 0;
2618 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2619 
2620 	data >>= BACKEND_DISABLE_SHIFT;
2621 
2622 	mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
2623 
2624 	return data & mask;
2625 }
2626 
2627 /**
2628  * cik_setup_rb - setup the RBs on the asic
2629  *
2630  * @rdev: radeon_device pointer
2631  * @se_num: number of SEs (shader engines) for the asic
2632  * @sh_per_se: number of SH blocks per SE for the asic
2633  * @max_rb_num: max RBs (render backends) for the asic
2634  *
2635  * Configures per-SE/SH RB registers (CIK).
2636  */
2637 static void cik_setup_rb(struct radeon_device *rdev,
2638 			 u32 se_num, u32 sh_per_se,
2639 			 u32 max_rb_num)
2640 {
2641 	int i, j;
2642 	u32 data, mask;
2643 	u32 disabled_rbs = 0;
2644 	u32 enabled_rbs = 0;
2645 
2646 	for (i = 0; i < se_num; i++) {
2647 		for (j = 0; j < sh_per_se; j++) {
2648 			cik_select_se_sh(rdev, i, j);
2649 			data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2650 			disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
2651 		}
2652 	}
2653 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2654 
2655 	mask = 1;
2656 	for (i = 0; i < max_rb_num; i++) {
2657 		if (!(disabled_rbs & mask))
2658 			enabled_rbs |= mask;
2659 		mask <<= 1;
2660 	}
2661 
2662 	for (i = 0; i < se_num; i++) {
2663 		cik_select_se_sh(rdev, i, 0xffffffff);
2664 		data = 0;
2665 		for (j = 0; j < sh_per_se; j++) {
2666 			switch (enabled_rbs & 3) {
2667 			case 1:
2668 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2669 				break;
2670 			case 2:
2671 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2672 				break;
2673 			case 3:
2674 			default:
2675 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2676 				break;
2677 			}
2678 			enabled_rbs >>= 2;
2679 		}
2680 		WREG32(PA_SC_RASTER_CONFIG, data);
2681 	}
2682 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2683 }
2684 
2685 /**
2686  * cik_gpu_init - setup the 3D engine
2687  *
2688  * @rdev: radeon_device pointer
2689  *
2690  * Configures the 3D engine and tiling configuration
2691  * registers so that the 3D engine is usable.
2692  */
2693 static void cik_gpu_init(struct radeon_device *rdev)
2694 {
2695 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
2696 	u32 mc_shared_chmap, mc_arb_ramcfg;
2697 	u32 hdp_host_path_cntl;
2698 	u32 tmp;
2699 	int i, j;
2700 
2701 	switch (rdev->family) {
2702 	case CHIP_BONAIRE:
2703 		rdev->config.cik.max_shader_engines = 2;
2704 		rdev->config.cik.max_tile_pipes = 4;
2705 		rdev->config.cik.max_cu_per_sh = 7;
2706 		rdev->config.cik.max_sh_per_se = 1;
2707 		rdev->config.cik.max_backends_per_se = 2;
2708 		rdev->config.cik.max_texture_channel_caches = 4;
2709 		rdev->config.cik.max_gprs = 256;
2710 		rdev->config.cik.max_gs_threads = 32;
2711 		rdev->config.cik.max_hw_contexts = 8;
2712 
2713 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2714 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2715 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2716 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2717 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2718 		break;
2719 	case CHIP_KAVERI:
2720 		rdev->config.cik.max_shader_engines = 1;
2721 		rdev->config.cik.max_tile_pipes = 4;
2722 		if ((rdev->pdev->device == 0x1304) ||
2723 		    (rdev->pdev->device == 0x1305) ||
2724 		    (rdev->pdev->device == 0x130C) ||
2725 		    (rdev->pdev->device == 0x130F) ||
2726 		    (rdev->pdev->device == 0x1310) ||
2727 		    (rdev->pdev->device == 0x1311) ||
2728 		    (rdev->pdev->device == 0x131C)) {
2729 			rdev->config.cik.max_cu_per_sh = 8;
2730 			rdev->config.cik.max_backends_per_se = 2;
2731 		} else if ((rdev->pdev->device == 0x1309) ||
2732 			   (rdev->pdev->device == 0x130A) ||
2733 			   (rdev->pdev->device == 0x130D) ||
2734 			   (rdev->pdev->device == 0x1313) ||
2735 			   (rdev->pdev->device == 0x131D)) {
2736 			rdev->config.cik.max_cu_per_sh = 6;
2737 			rdev->config.cik.max_backends_per_se = 2;
2738 		} else if ((rdev->pdev->device == 0x1306) ||
2739 			   (rdev->pdev->device == 0x1307) ||
2740 			   (rdev->pdev->device == 0x130B) ||
2741 			   (rdev->pdev->device == 0x130E) ||
2742 			   (rdev->pdev->device == 0x1315) ||
2743 			   (rdev->pdev->device == 0x131B)) {
2744 			rdev->config.cik.max_cu_per_sh = 4;
2745 			rdev->config.cik.max_backends_per_se = 1;
2746 		} else {
2747 			rdev->config.cik.max_cu_per_sh = 3;
2748 			rdev->config.cik.max_backends_per_se = 1;
2749 		}
2750 		rdev->config.cik.max_sh_per_se = 1;
2751 		rdev->config.cik.max_texture_channel_caches = 4;
2752 		rdev->config.cik.max_gprs = 256;
2753 		rdev->config.cik.max_gs_threads = 16;
2754 		rdev->config.cik.max_hw_contexts = 8;
2755 
2756 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2757 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2758 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2759 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2760 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2761 		break;
2762 	case CHIP_KABINI:
2763 	default:
2764 		rdev->config.cik.max_shader_engines = 1;
2765 		rdev->config.cik.max_tile_pipes = 2;
2766 		rdev->config.cik.max_cu_per_sh = 2;
2767 		rdev->config.cik.max_sh_per_se = 1;
2768 		rdev->config.cik.max_backends_per_se = 1;
2769 		rdev->config.cik.max_texture_channel_caches = 2;
2770 		rdev->config.cik.max_gprs = 256;
2771 		rdev->config.cik.max_gs_threads = 16;
2772 		rdev->config.cik.max_hw_contexts = 8;
2773 
2774 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2775 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2776 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2777 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2778 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2779 		break;
2780 	}
2781 
2782 	/* Initialize HDP */
2783 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2784 		WREG32((0x2c14 + j), 0x00000000);
2785 		WREG32((0x2c18 + j), 0x00000000);
2786 		WREG32((0x2c1c + j), 0x00000000);
2787 		WREG32((0x2c20 + j), 0x00000000);
2788 		WREG32((0x2c24 + j), 0x00000000);
2789 	}
2790 
2791 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2792 
2793 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2794 
2795 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2796 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2797 
2798 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
2799 	rdev->config.cik.mem_max_burst_length_bytes = 256;
2800 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2801 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2802 	if (rdev->config.cik.mem_row_size_in_kb > 4)
2803 		rdev->config.cik.mem_row_size_in_kb = 4;
2804 	/* XXX use MC settings? */
2805 	rdev->config.cik.shader_engine_tile_size = 32;
2806 	rdev->config.cik.num_gpus = 1;
2807 	rdev->config.cik.multi_gpu_tile_size = 64;
2808 
2809 	/* fix up row size */
2810 	gb_addr_config &= ~ROW_SIZE_MASK;
2811 	switch (rdev->config.cik.mem_row_size_in_kb) {
2812 	case 1:
2813 	default:
2814 		gb_addr_config |= ROW_SIZE(0);
2815 		break;
2816 	case 2:
2817 		gb_addr_config |= ROW_SIZE(1);
2818 		break;
2819 	case 4:
2820 		gb_addr_config |= ROW_SIZE(2);
2821 		break;
2822 	}
2823 
2824 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
2825 	 * not have bank info, so create a custom tiling dword.
2826 	 * bits 3:0   num_pipes
2827 	 * bits 7:4   num_banks
2828 	 * bits 11:8  group_size
2829 	 * bits 15:12 row_size
2830 	 */
2831 	rdev->config.cik.tile_config = 0;
2832 	switch (rdev->config.cik.num_tile_pipes) {
2833 	case 1:
2834 		rdev->config.cik.tile_config |= (0 << 0);
2835 		break;
2836 	case 2:
2837 		rdev->config.cik.tile_config |= (1 << 0);
2838 		break;
2839 	case 4:
2840 		rdev->config.cik.tile_config |= (2 << 0);
2841 		break;
2842 	case 8:
2843 	default:
2844 		/* XXX what about 12? */
2845 		rdev->config.cik.tile_config |= (3 << 0);
2846 		break;
2847 	}
2848 	if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
2849 		rdev->config.cik.tile_config |= 1 << 4;
2850 	else
2851 		rdev->config.cik.tile_config |= 0 << 4;
2852 	rdev->config.cik.tile_config |=
2853 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
2854 	rdev->config.cik.tile_config |=
2855 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
2856 
2857 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
2858 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
2859 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
2860 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
2861 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
2862 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
2863 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
2864 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
2865 
2866 	cik_tiling_mode_table_init(rdev);
2867 
2868 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
2869 		     rdev->config.cik.max_sh_per_se,
2870 		     rdev->config.cik.max_backends_per_se);
2871 
2872 	/* set HW defaults for 3D engine */
2873 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
2874 
2875 	WREG32(SX_DEBUG_1, 0x20);
2876 
2877 	WREG32(TA_CNTL_AUX, 0x00010000);
2878 
2879 	tmp = RREG32(SPI_CONFIG_CNTL);
2880 	tmp |= 0x03000000;
2881 	WREG32(SPI_CONFIG_CNTL, tmp);
2882 
2883 	WREG32(SQ_CONFIG, 1);
2884 
2885 	WREG32(DB_DEBUG, 0);
2886 
2887 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
2888 	tmp |= 0x00000400;
2889 	WREG32(DB_DEBUG2, tmp);
2890 
2891 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2892 	tmp |= 0x00020200;
2893 	WREG32(DB_DEBUG3, tmp);
2894 
2895 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2896 	tmp |= 0x00018208;
2897 	WREG32(CB_HW_CONTROL, tmp);
2898 
2899 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2900 
2901 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2902 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2903 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2904 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2905 
2906 	WREG32(VGT_NUM_INSTANCES, 1);
2907 
2908 	WREG32(CP_PERFMON_CNTL, 0);
2909 
2910 	WREG32(SQ_CONFIG, 0);
2911 
2912 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2913 					  FORCE_EOV_MAX_REZ_CNT(255)));
2914 
2915 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2916 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
2917 
2918 	WREG32(VGT_GS_VERTEX_REUSE, 16);
2919 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2920 
2921 	tmp = RREG32(HDP_MISC_CNTL);
2922 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2923 	WREG32(HDP_MISC_CNTL, tmp);
2924 
2925 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2926 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2927 
2928 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2929 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2930 
2931 	udelay(50);
2932 }
2933 
2934 /*
2935  * GPU scratch registers helpers function.
2936  */
2937 /**
2938  * cik_scratch_init - setup driver info for CP scratch regs
2939  *
2940  * @rdev: radeon_device pointer
2941  *
2942  * Set up the number and offset of the CP scratch registers.
2943  * NOTE: use of CP scratch registers is a legacy inferface and
2944  * is not used by default on newer asics (r6xx+).  On newer asics,
2945  * memory buffers are used for fences rather than scratch regs.
2946  */
2947 static void cik_scratch_init(struct radeon_device *rdev)
2948 {
2949 	int i;
2950 
2951 	rdev->scratch.num_reg = 7;
2952 	rdev->scratch.reg_base = SCRATCH_REG0;
2953 	for (i = 0; i < rdev->scratch.num_reg; i++) {
2954 		rdev->scratch.free[i] = true;
2955 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2956 	}
2957 }
2958 
2959 /**
2960  * cik_ring_test - basic gfx ring test
2961  *
2962  * @rdev: radeon_device pointer
2963  * @ring: radeon_ring structure holding ring information
2964  *
2965  * Allocate a scratch register and write to it using the gfx ring (CIK).
2966  * Provides a basic gfx ring test to verify that the ring is working.
2967  * Used by cik_cp_gfx_resume();
2968  * Returns 0 on success, error on failure.
2969  */
2970 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2971 {
2972 	uint32_t scratch;
2973 	uint32_t tmp = 0;
2974 	unsigned i;
2975 	int r;
2976 
2977 	r = radeon_scratch_get(rdev, &scratch);
2978 	if (r) {
2979 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2980 		return r;
2981 	}
2982 	WREG32(scratch, 0xCAFEDEAD);
2983 	r = radeon_ring_lock(rdev, ring, 3);
2984 	if (r) {
2985 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2986 		radeon_scratch_free(rdev, scratch);
2987 		return r;
2988 	}
2989 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2990 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2991 	radeon_ring_write(ring, 0xDEADBEEF);
2992 	radeon_ring_unlock_commit(rdev, ring);
2993 
2994 	for (i = 0; i < rdev->usec_timeout; i++) {
2995 		tmp = RREG32(scratch);
2996 		if (tmp == 0xDEADBEEF)
2997 			break;
2998 		DRM_UDELAY(1);
2999 	}
3000 	if (i < rdev->usec_timeout) {
3001 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3002 	} else {
3003 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3004 			  ring->idx, scratch, tmp);
3005 		r = -EINVAL;
3006 	}
3007 	radeon_scratch_free(rdev, scratch);
3008 	return r;
3009 }
3010 
3011 /**
3012  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3013  *
3014  * @rdev: radeon_device pointer
3015  * @fence: radeon fence object
3016  *
3017  * Emits a fence sequnce number on the gfx ring and flushes
3018  * GPU caches.
3019  */
3020 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3021 			     struct radeon_fence *fence)
3022 {
3023 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3024 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3025 
3026 	/* EVENT_WRITE_EOP - flush caches, send int */
3027 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3028 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3029 				 EOP_TC_ACTION_EN |
3030 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3031 				 EVENT_INDEX(5)));
3032 	radeon_ring_write(ring, addr & 0xfffffffc);
3033 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3034 	radeon_ring_write(ring, fence->seq);
3035 	radeon_ring_write(ring, 0);
3036 	/* HDP flush */
3037 	/* We should be using the new WAIT_REG_MEM special op packet here
3038 	 * but it causes the CP to hang
3039 	 */
3040 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3041 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3042 				 WRITE_DATA_DST_SEL(0)));
3043 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3044 	radeon_ring_write(ring, 0);
3045 	radeon_ring_write(ring, 0);
3046 }
3047 
3048 /**
3049  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3050  *
3051  * @rdev: radeon_device pointer
3052  * @fence: radeon fence object
3053  *
3054  * Emits a fence sequnce number on the compute ring and flushes
3055  * GPU caches.
3056  */
3057 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3058 				 struct radeon_fence *fence)
3059 {
3060 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3061 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3062 
3063 	/* RELEASE_MEM - flush caches, send int */
3064 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3065 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3066 				 EOP_TC_ACTION_EN |
3067 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3068 				 EVENT_INDEX(5)));
3069 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3070 	radeon_ring_write(ring, addr & 0xfffffffc);
3071 	radeon_ring_write(ring, upper_32_bits(addr));
3072 	radeon_ring_write(ring, fence->seq);
3073 	radeon_ring_write(ring, 0);
3074 	/* HDP flush */
3075 	/* We should be using the new WAIT_REG_MEM special op packet here
3076 	 * but it causes the CP to hang
3077 	 */
3078 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3079 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3080 				 WRITE_DATA_DST_SEL(0)));
3081 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3082 	radeon_ring_write(ring, 0);
3083 	radeon_ring_write(ring, 0);
3084 }
3085 
3086 void cik_semaphore_ring_emit(struct radeon_device *rdev,
3087 			     struct radeon_ring *ring,
3088 			     struct radeon_semaphore *semaphore,
3089 			     bool emit_wait)
3090 {
3091 	uint64_t addr = semaphore->gpu_addr;
3092 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3093 
3094 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3095 	radeon_ring_write(ring, addr & 0xffffffff);
3096 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3097 }
3098 
3099 /*
3100  * IB stuff
3101  */
3102 /**
3103  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3104  *
3105  * @rdev: radeon_device pointer
3106  * @ib: radeon indirect buffer object
3107  *
3108  * Emits an DE (drawing engine) or CE (constant engine) IB
3109  * on the gfx ring.  IBs are usually generated by userspace
3110  * acceleration drivers and submitted to the kernel for
3111  * sheduling on the ring.  This function schedules the IB
3112  * on the gfx ring for execution by the GPU.
3113  */
3114 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3115 {
3116 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3117 	u32 header, control = INDIRECT_BUFFER_VALID;
3118 
3119 	if (ib->is_const_ib) {
3120 		/* set switch buffer packet before const IB */
3121 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3122 		radeon_ring_write(ring, 0);
3123 
3124 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3125 	} else {
3126 		u32 next_rptr;
3127 		if (ring->rptr_save_reg) {
3128 			next_rptr = ring->wptr + 3 + 4;
3129 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3130 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3131 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
3132 			radeon_ring_write(ring, next_rptr);
3133 		} else if (rdev->wb.enabled) {
3134 			next_rptr = ring->wptr + 5 + 4;
3135 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3136 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3137 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3138 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3139 			radeon_ring_write(ring, next_rptr);
3140 		}
3141 
3142 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3143 	}
3144 
3145 	control |= ib->length_dw |
3146 		(ib->vm ? (ib->vm->id << 24) : 0);
3147 
3148 	radeon_ring_write(ring, header);
3149 	radeon_ring_write(ring,
3150 #ifdef __BIG_ENDIAN
3151 			  (2 << 0) |
3152 #endif
3153 			  (ib->gpu_addr & 0xFFFFFFFC));
3154 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3155 	radeon_ring_write(ring, control);
3156 }
3157 
3158 /**
3159  * cik_ib_test - basic gfx ring IB test
3160  *
3161  * @rdev: radeon_device pointer
3162  * @ring: radeon_ring structure holding ring information
3163  *
3164  * Allocate an IB and execute it on the gfx ring (CIK).
3165  * Provides a basic gfx ring test to verify that IBs are working.
3166  * Returns 0 on success, error on failure.
3167  */
3168 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3169 {
3170 	struct radeon_ib ib;
3171 	uint32_t scratch;
3172 	uint32_t tmp = 0;
3173 	unsigned i;
3174 	int r;
3175 
3176 	r = radeon_scratch_get(rdev, &scratch);
3177 	if (r) {
3178 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3179 		return r;
3180 	}
3181 	WREG32(scratch, 0xCAFEDEAD);
3182 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3183 	if (r) {
3184 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3185 		return r;
3186 	}
3187 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3188 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3189 	ib.ptr[2] = 0xDEADBEEF;
3190 	ib.length_dw = 3;
3191 	r = radeon_ib_schedule(rdev, &ib, NULL);
3192 	if (r) {
3193 		radeon_scratch_free(rdev, scratch);
3194 		radeon_ib_free(rdev, &ib);
3195 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3196 		return r;
3197 	}
3198 	r = radeon_fence_wait(ib.fence, false);
3199 	if (r) {
3200 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3201 		return r;
3202 	}
3203 	for (i = 0; i < rdev->usec_timeout; i++) {
3204 		tmp = RREG32(scratch);
3205 		if (tmp == 0xDEADBEEF)
3206 			break;
3207 		DRM_UDELAY(1);
3208 	}
3209 	if (i < rdev->usec_timeout) {
3210 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3211 	} else {
3212 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3213 			  scratch, tmp);
3214 		r = -EINVAL;
3215 	}
3216 	radeon_scratch_free(rdev, scratch);
3217 	radeon_ib_free(rdev, &ib);
3218 	return r;
3219 }
3220 
3221 /*
3222  * CP.
3223  * On CIK, gfx and compute now have independant command processors.
3224  *
3225  * GFX
3226  * Gfx consists of a single ring and can process both gfx jobs and
3227  * compute jobs.  The gfx CP consists of three microengines (ME):
3228  * PFP - Pre-Fetch Parser
3229  * ME - Micro Engine
3230  * CE - Constant Engine
3231  * The PFP and ME make up what is considered the Drawing Engine (DE).
3232  * The CE is an asynchronous engine used for updating buffer desciptors
3233  * used by the DE so that they can be loaded into cache in parallel
3234  * while the DE is processing state update packets.
3235  *
3236  * Compute
3237  * The compute CP consists of two microengines (ME):
3238  * MEC1 - Compute MicroEngine 1
3239  * MEC2 - Compute MicroEngine 2
3240  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3241  * The queues are exposed to userspace and are programmed directly
3242  * by the compute runtime.
3243  */
3244 /**
3245  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3246  *
3247  * @rdev: radeon_device pointer
3248  * @enable: enable or disable the MEs
3249  *
3250  * Halts or unhalts the gfx MEs.
3251  */
3252 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3253 {
3254 	if (enable)
3255 		WREG32(CP_ME_CNTL, 0);
3256 	else {
3257 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3258 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3259 	}
3260 	udelay(50);
3261 }
3262 
3263 /**
3264  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3265  *
3266  * @rdev: radeon_device pointer
3267  *
3268  * Loads the gfx PFP, ME, and CE ucode.
3269  * Returns 0 for success, -EINVAL if the ucode is not available.
3270  */
3271 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3272 {
3273 	const __be32 *fw_data;
3274 	int i;
3275 
3276 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3277 		return -EINVAL;
3278 
3279 	cik_cp_gfx_enable(rdev, false);
3280 
3281 	/* PFP */
3282 	fw_data = (const __be32 *)rdev->pfp_fw->data;
3283 	WREG32(CP_PFP_UCODE_ADDR, 0);
3284 	for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3285 		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3286 	WREG32(CP_PFP_UCODE_ADDR, 0);
3287 
3288 	/* CE */
3289 	fw_data = (const __be32 *)rdev->ce_fw->data;
3290 	WREG32(CP_CE_UCODE_ADDR, 0);
3291 	for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3292 		WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3293 	WREG32(CP_CE_UCODE_ADDR, 0);
3294 
3295 	/* ME */
3296 	fw_data = (const __be32 *)rdev->me_fw->data;
3297 	WREG32(CP_ME_RAM_WADDR, 0);
3298 	for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3299 		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3300 	WREG32(CP_ME_RAM_WADDR, 0);
3301 
3302 	WREG32(CP_PFP_UCODE_ADDR, 0);
3303 	WREG32(CP_CE_UCODE_ADDR, 0);
3304 	WREG32(CP_ME_RAM_WADDR, 0);
3305 	WREG32(CP_ME_RAM_RADDR, 0);
3306 	return 0;
3307 }
3308 
3309 /**
3310  * cik_cp_gfx_start - start the gfx ring
3311  *
3312  * @rdev: radeon_device pointer
3313  *
3314  * Enables the ring and loads the clear state context and other
3315  * packets required to init the ring.
3316  * Returns 0 for success, error for failure.
3317  */
3318 static int cik_cp_gfx_start(struct radeon_device *rdev)
3319 {
3320 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3321 	int r, i;
3322 
3323 	/* init the CP */
3324 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3325 	WREG32(CP_ENDIAN_SWAP, 0);
3326 	WREG32(CP_DEVICE_ID, 1);
3327 
3328 	cik_cp_gfx_enable(rdev, true);
3329 
3330 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3331 	if (r) {
3332 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3333 		return r;
3334 	}
3335 
3336 	/* init the CE partitions.  CE only used for gfx on CIK */
3337 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3338 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3339 	radeon_ring_write(ring, 0xc000);
3340 	radeon_ring_write(ring, 0xc000);
3341 
3342 	/* setup clear context state */
3343 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3344 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3345 
3346 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3347 	radeon_ring_write(ring, 0x80000000);
3348 	radeon_ring_write(ring, 0x80000000);
3349 
3350 	for (i = 0; i < cik_default_size; i++)
3351 		radeon_ring_write(ring, cik_default_state[i]);
3352 
3353 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3354 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3355 
3356 	/* set clear context state */
3357 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3358 	radeon_ring_write(ring, 0);
3359 
3360 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3361 	radeon_ring_write(ring, 0x00000316);
3362 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3363 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3364 
3365 	radeon_ring_unlock_commit(rdev, ring);
3366 
3367 	return 0;
3368 }
3369 
3370 /**
3371  * cik_cp_gfx_fini - stop the gfx ring
3372  *
3373  * @rdev: radeon_device pointer
3374  *
3375  * Stop the gfx ring and tear down the driver ring
3376  * info.
3377  */
3378 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3379 {
3380 	cik_cp_gfx_enable(rdev, false);
3381 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3382 }
3383 
3384 /**
3385  * cik_cp_gfx_resume - setup the gfx ring buffer registers
3386  *
3387  * @rdev: radeon_device pointer
3388  *
3389  * Program the location and size of the gfx ring buffer
3390  * and test it to make sure it's working.
3391  * Returns 0 for success, error for failure.
3392  */
3393 static int cik_cp_gfx_resume(struct radeon_device *rdev)
3394 {
3395 	struct radeon_ring *ring;
3396 	u32 tmp;
3397 	u32 rb_bufsz;
3398 	u64 rb_addr;
3399 	int r;
3400 
3401 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3402 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3403 
3404 	/* Set the write pointer delay */
3405 	WREG32(CP_RB_WPTR_DELAY, 0);
3406 
3407 	/* set the RB to use vmid 0 */
3408 	WREG32(CP_RB_VMID, 0);
3409 
3410 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3411 
3412 	/* ring 0 - compute and gfx */
3413 	/* Set ring buffer size */
3414 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3415 	rb_bufsz = order_base_2(ring->ring_size / 8);
3416 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3417 #ifdef __BIG_ENDIAN
3418 	tmp |= BUF_SWAP_32BIT;
3419 #endif
3420 	WREG32(CP_RB0_CNTL, tmp);
3421 
3422 	/* Initialize the ring buffer's read and write pointers */
3423 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3424 	ring->wptr = 0;
3425 	WREG32(CP_RB0_WPTR, ring->wptr);
3426 
3427 	/* set the wb address wether it's enabled or not */
3428 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3429 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3430 
3431 	/* scratch register shadowing is no longer supported */
3432 	WREG32(SCRATCH_UMSK, 0);
3433 
3434 	if (!rdev->wb.enabled)
3435 		tmp |= RB_NO_UPDATE;
3436 
3437 	mdelay(1);
3438 	WREG32(CP_RB0_CNTL, tmp);
3439 
3440 	rb_addr = ring->gpu_addr >> 8;
3441 	WREG32(CP_RB0_BASE, rb_addr);
3442 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
3443 
3444 	ring->rptr = RREG32(CP_RB0_RPTR);
3445 
3446 	/* start the ring */
3447 	cik_cp_gfx_start(rdev);
3448 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3449 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3450 	if (r) {
3451 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3452 		return r;
3453 	}
3454 	return 0;
3455 }
3456 
3457 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
3458 			      struct radeon_ring *ring)
3459 {
3460 	u32 rptr;
3461 
3462 
3463 
3464 	if (rdev->wb.enabled) {
3465 		rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
3466 	} else {
3467 		mutex_lock(&rdev->srbm_mutex);
3468 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3469 		rptr = RREG32(CP_HQD_PQ_RPTR);
3470 		cik_srbm_select(rdev, 0, 0, 0, 0);
3471 		mutex_unlock(&rdev->srbm_mutex);
3472 	}
3473 
3474 	return rptr;
3475 }
3476 
3477 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
3478 			      struct radeon_ring *ring)
3479 {
3480 	u32 wptr;
3481 
3482 	if (rdev->wb.enabled) {
3483 		wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
3484 	} else {
3485 		mutex_lock(&rdev->srbm_mutex);
3486 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3487 		wptr = RREG32(CP_HQD_PQ_WPTR);
3488 		cik_srbm_select(rdev, 0, 0, 0, 0);
3489 		mutex_unlock(&rdev->srbm_mutex);
3490 	}
3491 
3492 	return wptr;
3493 }
3494 
3495 void cik_compute_ring_set_wptr(struct radeon_device *rdev,
3496 			       struct radeon_ring *ring)
3497 {
3498 	rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(ring->wptr);
3499 	WDOORBELL32(ring->doorbell_offset, ring->wptr);
3500 }
3501 
3502 /**
3503  * cik_cp_compute_enable - enable/disable the compute CP MEs
3504  *
3505  * @rdev: radeon_device pointer
3506  * @enable: enable or disable the MEs
3507  *
3508  * Halts or unhalts the compute MEs.
3509  */
3510 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
3511 {
3512 	if (enable)
3513 		WREG32(CP_MEC_CNTL, 0);
3514 	else
3515 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
3516 	udelay(50);
3517 }
3518 
3519 /**
3520  * cik_cp_compute_load_microcode - load the compute CP ME ucode
3521  *
3522  * @rdev: radeon_device pointer
3523  *
3524  * Loads the compute MEC1&2 ucode.
3525  * Returns 0 for success, -EINVAL if the ucode is not available.
3526  */
3527 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
3528 {
3529 	const __be32 *fw_data;
3530 	int i;
3531 
3532 	if (!rdev->mec_fw)
3533 		return -EINVAL;
3534 
3535 	cik_cp_compute_enable(rdev, false);
3536 
3537 	/* MEC1 */
3538 	fw_data = (const __be32 *)rdev->mec_fw->data;
3539 	WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3540 	for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3541 		WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
3542 	WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3543 
3544 	if (rdev->family == CHIP_KAVERI) {
3545 		/* MEC2 */
3546 		fw_data = (const __be32 *)rdev->mec_fw->data;
3547 		WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3548 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3549 			WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
3550 		WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3551 	}
3552 
3553 	return 0;
3554 }
3555 
3556 /**
3557  * cik_cp_compute_start - start the compute queues
3558  *
3559  * @rdev: radeon_device pointer
3560  *
3561  * Enable the compute queues.
3562  * Returns 0 for success, error for failure.
3563  */
3564 static int cik_cp_compute_start(struct radeon_device *rdev)
3565 {
3566 	cik_cp_compute_enable(rdev, true);
3567 
3568 	return 0;
3569 }
3570 
3571 /**
3572  * cik_cp_compute_fini - stop the compute queues
3573  *
3574  * @rdev: radeon_device pointer
3575  *
3576  * Stop the compute queues and tear down the driver queue
3577  * info.
3578  */
3579 static void cik_cp_compute_fini(struct radeon_device *rdev)
3580 {
3581 	int i, idx, r;
3582 
3583 	cik_cp_compute_enable(rdev, false);
3584 
3585 	for (i = 0; i < 2; i++) {
3586 		if (i == 0)
3587 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
3588 		else
3589 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
3590 
3591 		if (rdev->ring[idx].mqd_obj) {
3592 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3593 			if (unlikely(r != 0))
3594 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
3595 
3596 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
3597 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3598 
3599 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
3600 			rdev->ring[idx].mqd_obj = NULL;
3601 		}
3602 	}
3603 }
3604 
3605 static void cik_mec_fini(struct radeon_device *rdev)
3606 {
3607 	int r;
3608 
3609 	if (rdev->mec.hpd_eop_obj) {
3610 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3611 		if (unlikely(r != 0))
3612 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
3613 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
3614 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3615 
3616 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
3617 		rdev->mec.hpd_eop_obj = NULL;
3618 	}
3619 }
3620 
3621 #define MEC_HPD_SIZE 2048
3622 
3623 static int cik_mec_init(struct radeon_device *rdev)
3624 {
3625 	int r;
3626 	u32 *hpd;
3627 
3628 	/*
3629 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
3630 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
3631 	 */
3632 	if (rdev->family == CHIP_KAVERI)
3633 		rdev->mec.num_mec = 2;
3634 	else
3635 		rdev->mec.num_mec = 1;
3636 	rdev->mec.num_pipe = 4;
3637 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
3638 
3639 	if (rdev->mec.hpd_eop_obj == NULL) {
3640 		r = radeon_bo_create(rdev,
3641 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
3642 				     PAGE_SIZE, true,
3643 				     RADEON_GEM_DOMAIN_GTT, NULL,
3644 				     &rdev->mec.hpd_eop_obj);
3645 		if (r) {
3646 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
3647 			return r;
3648 		}
3649 	}
3650 
3651 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3652 	if (unlikely(r != 0)) {
3653 		cik_mec_fini(rdev);
3654 		return r;
3655 	}
3656 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
3657 			  &rdev->mec.hpd_eop_gpu_addr);
3658 	if (r) {
3659 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
3660 		cik_mec_fini(rdev);
3661 		return r;
3662 	}
3663 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
3664 	if (r) {
3665 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
3666 		cik_mec_fini(rdev);
3667 		return r;
3668 	}
3669 
3670 	/* clear memory.  Not sure if this is required or not */
3671 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
3672 
3673 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
3674 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3675 
3676 	return 0;
3677 }
3678 
3679 struct hqd_registers
3680 {
3681 	u32 cp_mqd_base_addr;
3682 	u32 cp_mqd_base_addr_hi;
3683 	u32 cp_hqd_active;
3684 	u32 cp_hqd_vmid;
3685 	u32 cp_hqd_persistent_state;
3686 	u32 cp_hqd_pipe_priority;
3687 	u32 cp_hqd_queue_priority;
3688 	u32 cp_hqd_quantum;
3689 	u32 cp_hqd_pq_base;
3690 	u32 cp_hqd_pq_base_hi;
3691 	u32 cp_hqd_pq_rptr;
3692 	u32 cp_hqd_pq_rptr_report_addr;
3693 	u32 cp_hqd_pq_rptr_report_addr_hi;
3694 	u32 cp_hqd_pq_wptr_poll_addr;
3695 	u32 cp_hqd_pq_wptr_poll_addr_hi;
3696 	u32 cp_hqd_pq_doorbell_control;
3697 	u32 cp_hqd_pq_wptr;
3698 	u32 cp_hqd_pq_control;
3699 	u32 cp_hqd_ib_base_addr;
3700 	u32 cp_hqd_ib_base_addr_hi;
3701 	u32 cp_hqd_ib_rptr;
3702 	u32 cp_hqd_ib_control;
3703 	u32 cp_hqd_iq_timer;
3704 	u32 cp_hqd_iq_rptr;
3705 	u32 cp_hqd_dequeue_request;
3706 	u32 cp_hqd_dma_offload;
3707 	u32 cp_hqd_sema_cmd;
3708 	u32 cp_hqd_msg_type;
3709 	u32 cp_hqd_atomic0_preop_lo;
3710 	u32 cp_hqd_atomic0_preop_hi;
3711 	u32 cp_hqd_atomic1_preop_lo;
3712 	u32 cp_hqd_atomic1_preop_hi;
3713 	u32 cp_hqd_hq_scheduler0;
3714 	u32 cp_hqd_hq_scheduler1;
3715 	u32 cp_mqd_control;
3716 };
3717 
3718 struct bonaire_mqd
3719 {
3720 	u32 header;
3721 	u32 dispatch_initiator;
3722 	u32 dimensions[3];
3723 	u32 start_idx[3];
3724 	u32 num_threads[3];
3725 	u32 pipeline_stat_enable;
3726 	u32 perf_counter_enable;
3727 	u32 pgm[2];
3728 	u32 tba[2];
3729 	u32 tma[2];
3730 	u32 pgm_rsrc[2];
3731 	u32 vmid;
3732 	u32 resource_limits;
3733 	u32 static_thread_mgmt01[2];
3734 	u32 tmp_ring_size;
3735 	u32 static_thread_mgmt23[2];
3736 	u32 restart[3];
3737 	u32 thread_trace_enable;
3738 	u32 reserved1;
3739 	u32 user_data[16];
3740 	u32 vgtcs_invoke_count[2];
3741 	struct hqd_registers queue_state;
3742 	u32 dequeue_cntr;
3743 	u32 interrupt_queue[64];
3744 };
3745 
3746 /**
3747  * cik_cp_compute_resume - setup the compute queue registers
3748  *
3749  * @rdev: radeon_device pointer
3750  *
3751  * Program the compute queues and test them to make sure they
3752  * are working.
3753  * Returns 0 for success, error for failure.
3754  */
3755 static int cik_cp_compute_resume(struct radeon_device *rdev)
3756 {
3757 	int r, i, idx;
3758 	u32 tmp;
3759 	bool use_doorbell = true;
3760 	u64 hqd_gpu_addr;
3761 	u64 mqd_gpu_addr;
3762 	u64 eop_gpu_addr;
3763 	u64 wb_gpu_addr;
3764 	u32 *buf;
3765 	struct bonaire_mqd *mqd;
3766 
3767 	r = cik_cp_compute_start(rdev);
3768 	if (r)
3769 		return r;
3770 
3771 	/* fix up chicken bits */
3772 	tmp = RREG32(CP_CPF_DEBUG);
3773 	tmp |= (1 << 23);
3774 	WREG32(CP_CPF_DEBUG, tmp);
3775 
3776 	/* init the pipes */
3777 	mutex_lock(&rdev->srbm_mutex);
3778 	for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
3779 		int me = (i < 4) ? 1 : 2;
3780 		int pipe = (i < 4) ? i : (i - 4);
3781 
3782 		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
3783 
3784 		cik_srbm_select(rdev, me, pipe, 0, 0);
3785 
3786 		/* write the EOP addr */
3787 		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
3788 		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
3789 
3790 		/* set the VMID assigned */
3791 		WREG32(CP_HPD_EOP_VMID, 0);
3792 
3793 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3794 		tmp = RREG32(CP_HPD_EOP_CONTROL);
3795 		tmp &= ~EOP_SIZE_MASK;
3796 		tmp |= order_base_2(MEC_HPD_SIZE / 8);
3797 		WREG32(CP_HPD_EOP_CONTROL, tmp);
3798 	}
3799 	cik_srbm_select(rdev, 0, 0, 0, 0);
3800 	mutex_unlock(&rdev->srbm_mutex);
3801 
3802 	/* init the queues.  Just two for now. */
3803 	for (i = 0; i < 2; i++) {
3804 		if (i == 0)
3805 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
3806 		else
3807 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
3808 
3809 		if (rdev->ring[idx].mqd_obj == NULL) {
3810 			r = radeon_bo_create(rdev,
3811 					     sizeof(struct bonaire_mqd),
3812 					     PAGE_SIZE, true,
3813 					     RADEON_GEM_DOMAIN_GTT, NULL,
3814 					     &rdev->ring[idx].mqd_obj);
3815 			if (r) {
3816 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
3817 				return r;
3818 			}
3819 		}
3820 
3821 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3822 		if (unlikely(r != 0)) {
3823 			cik_cp_compute_fini(rdev);
3824 			return r;
3825 		}
3826 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
3827 				  &mqd_gpu_addr);
3828 		if (r) {
3829 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
3830 			cik_cp_compute_fini(rdev);
3831 			return r;
3832 		}
3833 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
3834 		if (r) {
3835 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
3836 			cik_cp_compute_fini(rdev);
3837 			return r;
3838 		}
3839 
3840 		/* doorbell offset */
3841 		rdev->ring[idx].doorbell_offset =
3842 			(rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
3843 
3844 		/* init the mqd struct */
3845 		memset(buf, 0, sizeof(struct bonaire_mqd));
3846 
3847 		mqd = (struct bonaire_mqd *)buf;
3848 		mqd->header = 0xC0310800;
3849 		mqd->static_thread_mgmt01[0] = 0xffffffff;
3850 		mqd->static_thread_mgmt01[1] = 0xffffffff;
3851 		mqd->static_thread_mgmt23[0] = 0xffffffff;
3852 		mqd->static_thread_mgmt23[1] = 0xffffffff;
3853 
3854 		mutex_lock(&rdev->srbm_mutex);
3855 		cik_srbm_select(rdev, rdev->ring[idx].me,
3856 				rdev->ring[idx].pipe,
3857 				rdev->ring[idx].queue, 0);
3858 
3859 		/* disable wptr polling */
3860 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
3861 		tmp &= ~WPTR_POLL_EN;
3862 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
3863 
3864 		/* enable doorbell? */
3865 		mqd->queue_state.cp_hqd_pq_doorbell_control =
3866 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3867 		if (use_doorbell)
3868 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3869 		else
3870 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
3871 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3872 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
3873 
3874 		/* disable the queue if it's active */
3875 		mqd->queue_state.cp_hqd_dequeue_request = 0;
3876 		mqd->queue_state.cp_hqd_pq_rptr = 0;
3877 		mqd->queue_state.cp_hqd_pq_wptr= 0;
3878 		if (RREG32(CP_HQD_ACTIVE) & 1) {
3879 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
3880 			for (i = 0; i < rdev->usec_timeout; i++) {
3881 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
3882 					break;
3883 				udelay(1);
3884 			}
3885 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3886 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3887 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3888 		}
3889 
3890 		/* set the pointer to the MQD */
3891 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3892 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3893 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3894 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3895 		/* set MQD vmid to 0 */
3896 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3897 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3898 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3899 
3900 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3901 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3902 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3903 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3904 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3905 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3906 
3907 		/* set up the HQD, this is similar to CP_RB0_CNTL */
3908 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3909 		mqd->queue_state.cp_hqd_pq_control &=
3910 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3911 
3912 		mqd->queue_state.cp_hqd_pq_control |=
3913 			order_base_2(rdev->ring[idx].ring_size / 8);
3914 		mqd->queue_state.cp_hqd_pq_control |=
3915 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
3916 #ifdef __BIG_ENDIAN
3917 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3918 #endif
3919 		mqd->queue_state.cp_hqd_pq_control &=
3920 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3921 		mqd->queue_state.cp_hqd_pq_control |=
3922 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3923 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3924 
3925 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3926 		if (i == 0)
3927 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3928 		else
3929 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3930 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3931 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3932 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3933 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3934 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3935 
3936 		/* set the wb address wether it's enabled or not */
3937 		if (i == 0)
3938 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3939 		else
3940 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3941 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3942 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3943 			upper_32_bits(wb_gpu_addr) & 0xffff;
3944 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3945 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3946 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3947 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3948 
3949 		/* enable the doorbell if requested */
3950 		if (use_doorbell) {
3951 			mqd->queue_state.cp_hqd_pq_doorbell_control =
3952 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3953 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3954 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
3955 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3956 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3957 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
3958 				~(DOORBELL_SOURCE | DOORBELL_HIT);
3959 
3960 		} else {
3961 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3962 		}
3963 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3964 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
3965 
3966 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3967 		rdev->ring[idx].wptr = 0;
3968 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3969 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3970 		rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3971 		mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3972 
3973 		/* set the vmid for the queue */
3974 		mqd->queue_state.cp_hqd_vmid = 0;
3975 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3976 
3977 		/* activate the queue */
3978 		mqd->queue_state.cp_hqd_active = 1;
3979 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3980 
3981 		cik_srbm_select(rdev, 0, 0, 0, 0);
3982 		mutex_unlock(&rdev->srbm_mutex);
3983 
3984 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3985 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3986 
3987 		rdev->ring[idx].ready = true;
3988 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3989 		if (r)
3990 			rdev->ring[idx].ready = false;
3991 	}
3992 
3993 	return 0;
3994 }
3995 
3996 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
3997 {
3998 	cik_cp_gfx_enable(rdev, enable);
3999 	cik_cp_compute_enable(rdev, enable);
4000 }
4001 
4002 static int cik_cp_load_microcode(struct radeon_device *rdev)
4003 {
4004 	int r;
4005 
4006 	r = cik_cp_gfx_load_microcode(rdev);
4007 	if (r)
4008 		return r;
4009 	r = cik_cp_compute_load_microcode(rdev);
4010 	if (r)
4011 		return r;
4012 
4013 	return 0;
4014 }
4015 
4016 static void cik_cp_fini(struct radeon_device *rdev)
4017 {
4018 	cik_cp_gfx_fini(rdev);
4019 	cik_cp_compute_fini(rdev);
4020 }
4021 
4022 static int cik_cp_resume(struct radeon_device *rdev)
4023 {
4024 	int r;
4025 
4026 	cik_enable_gui_idle_interrupt(rdev, false);
4027 
4028 	r = cik_cp_load_microcode(rdev);
4029 	if (r)
4030 		return r;
4031 
4032 	r = cik_cp_gfx_resume(rdev);
4033 	if (r)
4034 		return r;
4035 	r = cik_cp_compute_resume(rdev);
4036 	if (r)
4037 		return r;
4038 
4039 	cik_enable_gui_idle_interrupt(rdev, true);
4040 
4041 	return 0;
4042 }
4043 
4044 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4045 {
4046 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4047 		RREG32(GRBM_STATUS));
4048 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4049 		RREG32(GRBM_STATUS2));
4050 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4051 		RREG32(GRBM_STATUS_SE0));
4052 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4053 		RREG32(GRBM_STATUS_SE1));
4054 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4055 		RREG32(GRBM_STATUS_SE2));
4056 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4057 		RREG32(GRBM_STATUS_SE3));
4058 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4059 		RREG32(SRBM_STATUS));
4060 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4061 		RREG32(SRBM_STATUS2));
4062 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4063 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4064 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4065 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4066 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4067 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4068 		 RREG32(CP_STALLED_STAT1));
4069 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4070 		 RREG32(CP_STALLED_STAT2));
4071 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4072 		 RREG32(CP_STALLED_STAT3));
4073 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4074 		 RREG32(CP_CPF_BUSY_STAT));
4075 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4076 		 RREG32(CP_CPF_STALLED_STAT1));
4077 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4078 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4079 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4080 		 RREG32(CP_CPC_STALLED_STAT1));
4081 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4082 }
4083 
4084 /**
4085  * cik_gpu_check_soft_reset - check which blocks are busy
4086  *
4087  * @rdev: radeon_device pointer
4088  *
4089  * Check which blocks are busy and return the relevant reset
4090  * mask to be used by cik_gpu_soft_reset().
4091  * Returns a mask of the blocks to be reset.
4092  */
4093 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4094 {
4095 	u32 reset_mask = 0;
4096 	u32 tmp;
4097 
4098 	/* GRBM_STATUS */
4099 	tmp = RREG32(GRBM_STATUS);
4100 	if (tmp & (PA_BUSY | SC_BUSY |
4101 		   BCI_BUSY | SX_BUSY |
4102 		   TA_BUSY | VGT_BUSY |
4103 		   DB_BUSY | CB_BUSY |
4104 		   GDS_BUSY | SPI_BUSY |
4105 		   IA_BUSY | IA_BUSY_NO_DMA))
4106 		reset_mask |= RADEON_RESET_GFX;
4107 
4108 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4109 		reset_mask |= RADEON_RESET_CP;
4110 
4111 	/* GRBM_STATUS2 */
4112 	tmp = RREG32(GRBM_STATUS2);
4113 	if (tmp & RLC_BUSY)
4114 		reset_mask |= RADEON_RESET_RLC;
4115 
4116 	/* SDMA0_STATUS_REG */
4117 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4118 	if (!(tmp & SDMA_IDLE))
4119 		reset_mask |= RADEON_RESET_DMA;
4120 
4121 	/* SDMA1_STATUS_REG */
4122 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4123 	if (!(tmp & SDMA_IDLE))
4124 		reset_mask |= RADEON_RESET_DMA1;
4125 
4126 	/* SRBM_STATUS2 */
4127 	tmp = RREG32(SRBM_STATUS2);
4128 	if (tmp & SDMA_BUSY)
4129 		reset_mask |= RADEON_RESET_DMA;
4130 
4131 	if (tmp & SDMA1_BUSY)
4132 		reset_mask |= RADEON_RESET_DMA1;
4133 
4134 	/* SRBM_STATUS */
4135 	tmp = RREG32(SRBM_STATUS);
4136 
4137 	if (tmp & IH_BUSY)
4138 		reset_mask |= RADEON_RESET_IH;
4139 
4140 	if (tmp & SEM_BUSY)
4141 		reset_mask |= RADEON_RESET_SEM;
4142 
4143 	if (tmp & GRBM_RQ_PENDING)
4144 		reset_mask |= RADEON_RESET_GRBM;
4145 
4146 	if (tmp & VMC_BUSY)
4147 		reset_mask |= RADEON_RESET_VMC;
4148 
4149 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4150 		   MCC_BUSY | MCD_BUSY))
4151 		reset_mask |= RADEON_RESET_MC;
4152 
4153 	if (evergreen_is_display_hung(rdev))
4154 		reset_mask |= RADEON_RESET_DISPLAY;
4155 
4156 	/* Skip MC reset as it's mostly likely not hung, just busy */
4157 	if (reset_mask & RADEON_RESET_MC) {
4158 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4159 		reset_mask &= ~RADEON_RESET_MC;
4160 	}
4161 
4162 	return reset_mask;
4163 }
4164 
4165 /**
4166  * cik_gpu_soft_reset - soft reset GPU
4167  *
4168  * @rdev: radeon_device pointer
4169  * @reset_mask: mask of which blocks to reset
4170  *
4171  * Soft reset the blocks specified in @reset_mask.
4172  */
4173 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4174 {
4175 	struct evergreen_mc_save save;
4176 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4177 	u32 tmp;
4178 
4179 	if (reset_mask == 0)
4180 		return;
4181 
4182 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4183 
4184 	cik_print_gpu_status_regs(rdev);
4185 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4186 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4187 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4188 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4189 
4190 	/* stop the rlc */
4191 	cik_rlc_stop(rdev);
4192 
4193 	/* Disable GFX parsing/prefetching */
4194 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4195 
4196 	/* Disable MEC parsing/prefetching */
4197 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4198 
4199 	if (reset_mask & RADEON_RESET_DMA) {
4200 		/* sdma0 */
4201 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4202 		tmp |= SDMA_HALT;
4203 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4204 	}
4205 	if (reset_mask & RADEON_RESET_DMA1) {
4206 		/* sdma1 */
4207 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4208 		tmp |= SDMA_HALT;
4209 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4210 	}
4211 
4212 	evergreen_mc_stop(rdev, &save);
4213 	if (evergreen_mc_wait_for_idle(rdev)) {
4214 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4215 	}
4216 
4217 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4218 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4219 
4220 	if (reset_mask & RADEON_RESET_CP) {
4221 		grbm_soft_reset |= SOFT_RESET_CP;
4222 
4223 		srbm_soft_reset |= SOFT_RESET_GRBM;
4224 	}
4225 
4226 	if (reset_mask & RADEON_RESET_DMA)
4227 		srbm_soft_reset |= SOFT_RESET_SDMA;
4228 
4229 	if (reset_mask & RADEON_RESET_DMA1)
4230 		srbm_soft_reset |= SOFT_RESET_SDMA1;
4231 
4232 	if (reset_mask & RADEON_RESET_DISPLAY)
4233 		srbm_soft_reset |= SOFT_RESET_DC;
4234 
4235 	if (reset_mask & RADEON_RESET_RLC)
4236 		grbm_soft_reset |= SOFT_RESET_RLC;
4237 
4238 	if (reset_mask & RADEON_RESET_SEM)
4239 		srbm_soft_reset |= SOFT_RESET_SEM;
4240 
4241 	if (reset_mask & RADEON_RESET_IH)
4242 		srbm_soft_reset |= SOFT_RESET_IH;
4243 
4244 	if (reset_mask & RADEON_RESET_GRBM)
4245 		srbm_soft_reset |= SOFT_RESET_GRBM;
4246 
4247 	if (reset_mask & RADEON_RESET_VMC)
4248 		srbm_soft_reset |= SOFT_RESET_VMC;
4249 
4250 	if (!(rdev->flags & RADEON_IS_IGP)) {
4251 		if (reset_mask & RADEON_RESET_MC)
4252 			srbm_soft_reset |= SOFT_RESET_MC;
4253 	}
4254 
4255 	if (grbm_soft_reset) {
4256 		tmp = RREG32(GRBM_SOFT_RESET);
4257 		tmp |= grbm_soft_reset;
4258 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4259 		WREG32(GRBM_SOFT_RESET, tmp);
4260 		tmp = RREG32(GRBM_SOFT_RESET);
4261 
4262 		udelay(50);
4263 
4264 		tmp &= ~grbm_soft_reset;
4265 		WREG32(GRBM_SOFT_RESET, tmp);
4266 		tmp = RREG32(GRBM_SOFT_RESET);
4267 	}
4268 
4269 	if (srbm_soft_reset) {
4270 		tmp = RREG32(SRBM_SOFT_RESET);
4271 		tmp |= srbm_soft_reset;
4272 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4273 		WREG32(SRBM_SOFT_RESET, tmp);
4274 		tmp = RREG32(SRBM_SOFT_RESET);
4275 
4276 		udelay(50);
4277 
4278 		tmp &= ~srbm_soft_reset;
4279 		WREG32(SRBM_SOFT_RESET, tmp);
4280 		tmp = RREG32(SRBM_SOFT_RESET);
4281 	}
4282 
4283 	/* Wait a little for things to settle down */
4284 	udelay(50);
4285 
4286 	evergreen_mc_resume(rdev, &save);
4287 	udelay(50);
4288 
4289 	cik_print_gpu_status_regs(rdev);
4290 }
4291 
4292 /**
4293  * cik_asic_reset - soft reset GPU
4294  *
4295  * @rdev: radeon_device pointer
4296  *
4297  * Look up which blocks are hung and attempt
4298  * to reset them.
4299  * Returns 0 for success.
4300  */
4301 int cik_asic_reset(struct radeon_device *rdev)
4302 {
4303 	u32 reset_mask;
4304 
4305 	reset_mask = cik_gpu_check_soft_reset(rdev);
4306 
4307 	if (reset_mask)
4308 		r600_set_bios_scratch_engine_hung(rdev, true);
4309 
4310 	cik_gpu_soft_reset(rdev, reset_mask);
4311 
4312 	reset_mask = cik_gpu_check_soft_reset(rdev);
4313 
4314 	if (!reset_mask)
4315 		r600_set_bios_scratch_engine_hung(rdev, false);
4316 
4317 	return 0;
4318 }
4319 
4320 /**
4321  * cik_gfx_is_lockup - check if the 3D engine is locked up
4322  *
4323  * @rdev: radeon_device pointer
4324  * @ring: radeon_ring structure holding ring information
4325  *
4326  * Check if the 3D engine is locked up (CIK).
4327  * Returns true if the engine is locked, false if not.
4328  */
4329 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4330 {
4331 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4332 
4333 	if (!(reset_mask & (RADEON_RESET_GFX |
4334 			    RADEON_RESET_COMPUTE |
4335 			    RADEON_RESET_CP))) {
4336 		radeon_ring_lockup_update(ring);
4337 		return false;
4338 	}
4339 	/* force CP activities */
4340 	radeon_ring_force_activity(rdev, ring);
4341 	return radeon_ring_test_lockup(rdev, ring);
4342 }
4343 
4344 /* MC */
4345 /**
4346  * cik_mc_program - program the GPU memory controller
4347  *
4348  * @rdev: radeon_device pointer
4349  *
4350  * Set the location of vram, gart, and AGP in the GPU's
4351  * physical address space (CIK).
4352  */
4353 static void cik_mc_program(struct radeon_device *rdev)
4354 {
4355 	struct evergreen_mc_save save;
4356 	u32 tmp;
4357 	int i, j;
4358 
4359 	/* Initialize HDP */
4360 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4361 		WREG32((0x2c14 + j), 0x00000000);
4362 		WREG32((0x2c18 + j), 0x00000000);
4363 		WREG32((0x2c1c + j), 0x00000000);
4364 		WREG32((0x2c20 + j), 0x00000000);
4365 		WREG32((0x2c24 + j), 0x00000000);
4366 	}
4367 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4368 
4369 	evergreen_mc_stop(rdev, &save);
4370 	if (radeon_mc_wait_for_idle(rdev)) {
4371 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4372 	}
4373 	/* Lockout access through VGA aperture*/
4374 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4375 	/* Update configuration */
4376 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4377 	       rdev->mc.vram_start >> 12);
4378 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4379 	       rdev->mc.vram_end >> 12);
4380 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4381 	       rdev->vram_scratch.gpu_addr >> 12);
4382 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4383 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4384 	WREG32(MC_VM_FB_LOCATION, tmp);
4385 	/* XXX double check these! */
4386 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4387 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4388 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4389 	WREG32(MC_VM_AGP_BASE, 0);
4390 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4391 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4392 	if (radeon_mc_wait_for_idle(rdev)) {
4393 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4394 	}
4395 	evergreen_mc_resume(rdev, &save);
4396 	/* we need to own VRAM, so turn off the VGA renderer here
4397 	 * to stop it overwriting our objects */
4398 	rv515_vga_render_disable(rdev);
4399 }
4400 
4401 /**
4402  * cik_mc_init - initialize the memory controller driver params
4403  *
4404  * @rdev: radeon_device pointer
4405  *
4406  * Look up the amount of vram, vram width, and decide how to place
4407  * vram and gart within the GPU's physical address space (CIK).
4408  * Returns 0 for success.
4409  */
4410 static int cik_mc_init(struct radeon_device *rdev)
4411 {
4412 	u32 tmp;
4413 	int chansize, numchan;
4414 
4415 	/* Get VRAM informations */
4416 	rdev->mc.vram_is_ddr = true;
4417 	tmp = RREG32(MC_ARB_RAMCFG);
4418 	if (tmp & CHANSIZE_MASK) {
4419 		chansize = 64;
4420 	} else {
4421 		chansize = 32;
4422 	}
4423 	tmp = RREG32(MC_SHARED_CHMAP);
4424 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4425 	case 0:
4426 	default:
4427 		numchan = 1;
4428 		break;
4429 	case 1:
4430 		numchan = 2;
4431 		break;
4432 	case 2:
4433 		numchan = 4;
4434 		break;
4435 	case 3:
4436 		numchan = 8;
4437 		break;
4438 	case 4:
4439 		numchan = 3;
4440 		break;
4441 	case 5:
4442 		numchan = 6;
4443 		break;
4444 	case 6:
4445 		numchan = 10;
4446 		break;
4447 	case 7:
4448 		numchan = 12;
4449 		break;
4450 	case 8:
4451 		numchan = 16;
4452 		break;
4453 	}
4454 	rdev->mc.vram_width = numchan * chansize;
4455 	/* Could aper size report 0 ? */
4456 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4457 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4458 	/* size in MB on si */
4459 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4460 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4461 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
4462 	si_vram_gtt_location(rdev, &rdev->mc);
4463 	radeon_update_bandwidth_info(rdev);
4464 
4465 	return 0;
4466 }
4467 
4468 /*
4469  * GART
4470  * VMID 0 is the physical GPU addresses as used by the kernel.
4471  * VMIDs 1-15 are used for userspace clients and are handled
4472  * by the radeon vm/hsa code.
4473  */
4474 /**
4475  * cik_pcie_gart_tlb_flush - gart tlb flush callback
4476  *
4477  * @rdev: radeon_device pointer
4478  *
4479  * Flush the TLB for the VMID 0 page table (CIK).
4480  */
4481 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
4482 {
4483 	/* flush hdp cache */
4484 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
4485 
4486 	/* bits 0-15 are the VM contexts0-15 */
4487 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
4488 }
4489 
4490 /**
4491  * cik_pcie_gart_enable - gart enable
4492  *
4493  * @rdev: radeon_device pointer
4494  *
4495  * This sets up the TLBs, programs the page tables for VMID0,
4496  * sets up the hw for VMIDs 1-15 which are allocated on
4497  * demand, and sets up the global locations for the LDS, GDS,
4498  * and GPUVM for FSA64 clients (CIK).
4499  * Returns 0 for success, errors for failure.
4500  */
4501 static int cik_pcie_gart_enable(struct radeon_device *rdev)
4502 {
4503 	int r, i;
4504 
4505 	if (rdev->gart.robj == NULL) {
4506 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4507 		return -EINVAL;
4508 	}
4509 	r = radeon_gart_table_vram_pin(rdev);
4510 	if (r)
4511 		return r;
4512 	radeon_gart_restore(rdev);
4513 	/* Setup TLB control */
4514 	WREG32(MC_VM_MX_L1_TLB_CNTL,
4515 	       (0xA << 7) |
4516 	       ENABLE_L1_TLB |
4517 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4518 	       ENABLE_ADVANCED_DRIVER_MODEL |
4519 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4520 	/* Setup L2 cache */
4521 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4522 	       ENABLE_L2_FRAGMENT_PROCESSING |
4523 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4524 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4525 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4526 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4527 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4528 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4529 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4530 	/* setup context0 */
4531 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4532 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4533 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4534 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4535 			(u32)(rdev->dummy_page.addr >> 12));
4536 	WREG32(VM_CONTEXT0_CNTL2, 0);
4537 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4538 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4539 
4540 	WREG32(0x15D4, 0);
4541 	WREG32(0x15D8, 0);
4542 	WREG32(0x15DC, 0);
4543 
4544 	/* empty context1-15 */
4545 	/* FIXME start with 4G, once using 2 level pt switch to full
4546 	 * vm size space
4547 	 */
4548 	/* set vm size, must be a multiple of 4 */
4549 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4550 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4551 	for (i = 1; i < 16; i++) {
4552 		if (i < 8)
4553 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4554 			       rdev->gart.table_addr >> 12);
4555 		else
4556 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4557 			       rdev->gart.table_addr >> 12);
4558 	}
4559 
4560 	/* enable context1-15 */
4561 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4562 	       (u32)(rdev->dummy_page.addr >> 12));
4563 	WREG32(VM_CONTEXT1_CNTL2, 4);
4564 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4565 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4566 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4567 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4568 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4569 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4570 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4571 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4572 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4573 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4574 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4575 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4576 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4577 
4578 	/* TC cache setup ??? */
4579 	WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
4580 	WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
4581 	WREG32(TC_CFG_L1_STORE_POLICY, 0);
4582 
4583 	WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
4584 	WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
4585 	WREG32(TC_CFG_L2_STORE_POLICY0, 0);
4586 	WREG32(TC_CFG_L2_STORE_POLICY1, 0);
4587 	WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
4588 
4589 	WREG32(TC_CFG_L1_VOLATILE, 0);
4590 	WREG32(TC_CFG_L2_VOLATILE, 0);
4591 
4592 	if (rdev->family == CHIP_KAVERI) {
4593 		u32 tmp = RREG32(CHUB_CONTROL);
4594 		tmp &= ~BYPASS_VM;
4595 		WREG32(CHUB_CONTROL, tmp);
4596 	}
4597 
4598 	/* XXX SH_MEM regs */
4599 	/* where to put LDS, scratch, GPUVM in FSA64 space */
4600 	mutex_lock(&rdev->srbm_mutex);
4601 	for (i = 0; i < 16; i++) {
4602 		cik_srbm_select(rdev, 0, 0, 0, i);
4603 		/* CP and shaders */
4604 		WREG32(SH_MEM_CONFIG, 0);
4605 		WREG32(SH_MEM_APE1_BASE, 1);
4606 		WREG32(SH_MEM_APE1_LIMIT, 0);
4607 		WREG32(SH_MEM_BASES, 0);
4608 		/* SDMA GFX */
4609 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
4610 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
4611 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
4612 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
4613 		/* XXX SDMA RLC - todo */
4614 	}
4615 	cik_srbm_select(rdev, 0, 0, 0, 0);
4616 	mutex_unlock(&rdev->srbm_mutex);
4617 
4618 	cik_pcie_gart_tlb_flush(rdev);
4619 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4620 		 (unsigned)(rdev->mc.gtt_size >> 20),
4621 		 (unsigned long long)rdev->gart.table_addr);
4622 	rdev->gart.ready = true;
4623 	return 0;
4624 }
4625 
4626 /**
4627  * cik_pcie_gart_disable - gart disable
4628  *
4629  * @rdev: radeon_device pointer
4630  *
4631  * This disables all VM page table (CIK).
4632  */
4633 static void cik_pcie_gart_disable(struct radeon_device *rdev)
4634 {
4635 	/* Disable all tables */
4636 	WREG32(VM_CONTEXT0_CNTL, 0);
4637 	WREG32(VM_CONTEXT1_CNTL, 0);
4638 	/* Setup TLB control */
4639 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4640 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4641 	/* Setup L2 cache */
4642 	WREG32(VM_L2_CNTL,
4643 	       ENABLE_L2_FRAGMENT_PROCESSING |
4644 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4645 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4646 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4647 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4648 	WREG32(VM_L2_CNTL2, 0);
4649 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4650 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4651 	radeon_gart_table_vram_unpin(rdev);
4652 }
4653 
4654 /**
4655  * cik_pcie_gart_fini - vm fini callback
4656  *
4657  * @rdev: radeon_device pointer
4658  *
4659  * Tears down the driver GART/VM setup (CIK).
4660  */
4661 static void cik_pcie_gart_fini(struct radeon_device *rdev)
4662 {
4663 	cik_pcie_gart_disable(rdev);
4664 	radeon_gart_table_vram_free(rdev);
4665 	radeon_gart_fini(rdev);
4666 }
4667 
4668 /* vm parser */
4669 /**
4670  * cik_ib_parse - vm ib_parse callback
4671  *
4672  * @rdev: radeon_device pointer
4673  * @ib: indirect buffer pointer
4674  *
4675  * CIK uses hw IB checking so this is a nop (CIK).
4676  */
4677 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4678 {
4679 	return 0;
4680 }
4681 
4682 /*
4683  * vm
4684  * VMID 0 is the physical GPU addresses as used by the kernel.
4685  * VMIDs 1-15 are used for userspace clients and are handled
4686  * by the radeon vm/hsa code.
4687  */
4688 /**
4689  * cik_vm_init - cik vm init callback
4690  *
4691  * @rdev: radeon_device pointer
4692  *
4693  * Inits cik specific vm parameters (number of VMs, base of vram for
4694  * VMIDs 1-15) (CIK).
4695  * Returns 0 for success.
4696  */
4697 int cik_vm_init(struct radeon_device *rdev)
4698 {
4699 	/* number of VMs */
4700 	rdev->vm_manager.nvm = 16;
4701 	/* base offset of vram pages */
4702 	if (rdev->flags & RADEON_IS_IGP) {
4703 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
4704 		tmp <<= 22;
4705 		rdev->vm_manager.vram_base_offset = tmp;
4706 	} else
4707 		rdev->vm_manager.vram_base_offset = 0;
4708 
4709 	return 0;
4710 }
4711 
4712 /**
4713  * cik_vm_fini - cik vm fini callback
4714  *
4715  * @rdev: radeon_device pointer
4716  *
4717  * Tear down any asic specific VM setup (CIK).
4718  */
4719 void cik_vm_fini(struct radeon_device *rdev)
4720 {
4721 }
4722 
4723 /**
4724  * cik_vm_decode_fault - print human readable fault info
4725  *
4726  * @rdev: radeon_device pointer
4727  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4728  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4729  *
4730  * Print human readable fault information (CIK).
4731  */
4732 static void cik_vm_decode_fault(struct radeon_device *rdev,
4733 				u32 status, u32 addr, u32 mc_client)
4734 {
4735 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4736 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4737 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4738 	char *block = (char *)&mc_client;
4739 
4740 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4741 	       protections, vmid, addr,
4742 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4743 	       block, mc_id);
4744 }
4745 
4746 /**
4747  * cik_vm_flush - cik vm flush using the CP
4748  *
4749  * @rdev: radeon_device pointer
4750  *
4751  * Update the page table base and flush the VM TLB
4752  * using the CP (CIK).
4753  */
4754 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4755 {
4756 	struct radeon_ring *ring = &rdev->ring[ridx];
4757 
4758 	if (vm == NULL)
4759 		return;
4760 
4761 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4762 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4763 				 WRITE_DATA_DST_SEL(0)));
4764 	if (vm->id < 8) {
4765 		radeon_ring_write(ring,
4766 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4767 	} else {
4768 		radeon_ring_write(ring,
4769 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4770 	}
4771 	radeon_ring_write(ring, 0);
4772 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4773 
4774 	/* update SH_MEM_* regs */
4775 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4776 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4777 				 WRITE_DATA_DST_SEL(0)));
4778 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4779 	radeon_ring_write(ring, 0);
4780 	radeon_ring_write(ring, VMID(vm->id));
4781 
4782 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
4783 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4784 				 WRITE_DATA_DST_SEL(0)));
4785 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
4786 	radeon_ring_write(ring, 0);
4787 
4788 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
4789 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
4790 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
4791 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
4792 
4793 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4794 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4795 				 WRITE_DATA_DST_SEL(0)));
4796 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4797 	radeon_ring_write(ring, 0);
4798 	radeon_ring_write(ring, VMID(0));
4799 
4800 	/* HDP flush */
4801 	/* We should be using the WAIT_REG_MEM packet here like in
4802 	 * cik_fence_ring_emit(), but it causes the CP to hang in this
4803 	 * context...
4804 	 */
4805 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4806 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4807 				 WRITE_DATA_DST_SEL(0)));
4808 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4809 	radeon_ring_write(ring, 0);
4810 	radeon_ring_write(ring, 0);
4811 
4812 	/* bits 0-15 are the VM contexts0-15 */
4813 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4814 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4815 				 WRITE_DATA_DST_SEL(0)));
4816 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4817 	radeon_ring_write(ring, 0);
4818 	radeon_ring_write(ring, 1 << vm->id);
4819 
4820 	/* compute doesn't have PFP */
4821 	if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
4822 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
4823 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4824 		radeon_ring_write(ring, 0x0);
4825 	}
4826 }
4827 
4828 /**
4829  * cik_vm_set_page - update the page tables using sDMA
4830  *
4831  * @rdev: radeon_device pointer
4832  * @ib: indirect buffer to fill with commands
4833  * @pe: addr of the page entry
4834  * @addr: dst addr to write into pe
4835  * @count: number of page entries to update
4836  * @incr: increase next addr by incr bytes
4837  * @flags: access flags
4838  *
4839  * Update the page tables using CP or sDMA (CIK).
4840  */
4841 void cik_vm_set_page(struct radeon_device *rdev,
4842 		     struct radeon_ib *ib,
4843 		     uint64_t pe,
4844 		     uint64_t addr, unsigned count,
4845 		     uint32_t incr, uint32_t flags)
4846 {
4847 	uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4848 	uint64_t value;
4849 	unsigned ndw;
4850 
4851 	if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4852 		/* CP */
4853 		while (count) {
4854 			ndw = 2 + count * 2;
4855 			if (ndw > 0x3FFE)
4856 				ndw = 0x3FFE;
4857 
4858 			ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4859 			ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4860 						    WRITE_DATA_DST_SEL(1));
4861 			ib->ptr[ib->length_dw++] = pe;
4862 			ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4863 			for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4864 				if (flags & RADEON_VM_PAGE_SYSTEM) {
4865 					value = radeon_vm_map_gart(rdev, addr);
4866 					value &= 0xFFFFFFFFFFFFF000ULL;
4867 				} else if (flags & RADEON_VM_PAGE_VALID) {
4868 					value = addr;
4869 				} else {
4870 					value = 0;
4871 				}
4872 				addr += incr;
4873 				value |= r600_flags;
4874 				ib->ptr[ib->length_dw++] = value;
4875 				ib->ptr[ib->length_dw++] = upper_32_bits(value);
4876 			}
4877 		}
4878 	} else {
4879 		/* DMA */
4880 		cik_sdma_vm_set_page(rdev, ib, pe, addr, count, incr, flags);
4881 	}
4882 }
4883 
4884 /*
4885  * RLC
4886  * The RLC is a multi-purpose microengine that handles a
4887  * variety of functions, the most important of which is
4888  * the interrupt controller.
4889  */
4890 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
4891 					  bool enable)
4892 {
4893 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
4894 
4895 	if (enable)
4896 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4897 	else
4898 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4899 	WREG32(CP_INT_CNTL_RING0, tmp);
4900 }
4901 
4902 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
4903 {
4904 	u32 tmp;
4905 
4906 	tmp = RREG32(RLC_LB_CNTL);
4907 	if (enable)
4908 		tmp |= LOAD_BALANCE_ENABLE;
4909 	else
4910 		tmp &= ~LOAD_BALANCE_ENABLE;
4911 	WREG32(RLC_LB_CNTL, tmp);
4912 }
4913 
4914 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
4915 {
4916 	u32 i, j, k;
4917 	u32 mask;
4918 
4919 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
4920 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
4921 			cik_select_se_sh(rdev, i, j);
4922 			for (k = 0; k < rdev->usec_timeout; k++) {
4923 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
4924 					break;
4925 				udelay(1);
4926 			}
4927 		}
4928 	}
4929 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4930 
4931 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
4932 	for (k = 0; k < rdev->usec_timeout; k++) {
4933 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
4934 			break;
4935 		udelay(1);
4936 	}
4937 }
4938 
4939 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
4940 {
4941 	u32 tmp;
4942 
4943 	tmp = RREG32(RLC_CNTL);
4944 	if (tmp != rlc)
4945 		WREG32(RLC_CNTL, rlc);
4946 }
4947 
4948 static u32 cik_halt_rlc(struct radeon_device *rdev)
4949 {
4950 	u32 data, orig;
4951 
4952 	orig = data = RREG32(RLC_CNTL);
4953 
4954 	if (data & RLC_ENABLE) {
4955 		u32 i;
4956 
4957 		data &= ~RLC_ENABLE;
4958 		WREG32(RLC_CNTL, data);
4959 
4960 		for (i = 0; i < rdev->usec_timeout; i++) {
4961 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
4962 				break;
4963 			udelay(1);
4964 		}
4965 
4966 		cik_wait_for_rlc_serdes(rdev);
4967 	}
4968 
4969 	return orig;
4970 }
4971 
4972 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
4973 {
4974 	u32 tmp, i, mask;
4975 
4976 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
4977 	WREG32(RLC_GPR_REG2, tmp);
4978 
4979 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
4980 	for (i = 0; i < rdev->usec_timeout; i++) {
4981 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
4982 			break;
4983 		udelay(1);
4984 	}
4985 
4986 	for (i = 0; i < rdev->usec_timeout; i++) {
4987 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
4988 			break;
4989 		udelay(1);
4990 	}
4991 }
4992 
4993 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
4994 {
4995 	u32 tmp;
4996 
4997 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
4998 	WREG32(RLC_GPR_REG2, tmp);
4999 }
5000 
5001 /**
5002  * cik_rlc_stop - stop the RLC ME
5003  *
5004  * @rdev: radeon_device pointer
5005  *
5006  * Halt the RLC ME (MicroEngine) (CIK).
5007  */
5008 static void cik_rlc_stop(struct radeon_device *rdev)
5009 {
5010 	WREG32(RLC_CNTL, 0);
5011 
5012 	cik_enable_gui_idle_interrupt(rdev, false);
5013 
5014 	cik_wait_for_rlc_serdes(rdev);
5015 }
5016 
5017 /**
5018  * cik_rlc_start - start the RLC ME
5019  *
5020  * @rdev: radeon_device pointer
5021  *
5022  * Unhalt the RLC ME (MicroEngine) (CIK).
5023  */
5024 static void cik_rlc_start(struct radeon_device *rdev)
5025 {
5026 	WREG32(RLC_CNTL, RLC_ENABLE);
5027 
5028 	cik_enable_gui_idle_interrupt(rdev, true);
5029 
5030 	udelay(50);
5031 }
5032 
5033 /**
5034  * cik_rlc_resume - setup the RLC hw
5035  *
5036  * @rdev: radeon_device pointer
5037  *
5038  * Initialize the RLC registers, load the ucode,
5039  * and start the RLC (CIK).
5040  * Returns 0 for success, -EINVAL if the ucode is not available.
5041  */
5042 static int cik_rlc_resume(struct radeon_device *rdev)
5043 {
5044 	u32 i, size, tmp;
5045 	const __be32 *fw_data;
5046 
5047 	if (!rdev->rlc_fw)
5048 		return -EINVAL;
5049 
5050 	switch (rdev->family) {
5051 	case CHIP_BONAIRE:
5052 	default:
5053 		size = BONAIRE_RLC_UCODE_SIZE;
5054 		break;
5055 	case CHIP_KAVERI:
5056 		size = KV_RLC_UCODE_SIZE;
5057 		break;
5058 	case CHIP_KABINI:
5059 		size = KB_RLC_UCODE_SIZE;
5060 		break;
5061 	}
5062 
5063 	cik_rlc_stop(rdev);
5064 
5065 	/* disable CG */
5066 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5067 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5068 
5069 	si_rlc_reset(rdev);
5070 
5071 	cik_init_pg(rdev);
5072 
5073 	cik_init_cg(rdev);
5074 
5075 	WREG32(RLC_LB_CNTR_INIT, 0);
5076 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5077 
5078 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5079 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5080 	WREG32(RLC_LB_PARAMS, 0x00600408);
5081 	WREG32(RLC_LB_CNTL, 0x80000004);
5082 
5083 	WREG32(RLC_MC_CNTL, 0);
5084 	WREG32(RLC_UCODE_CNTL, 0);
5085 
5086 	fw_data = (const __be32 *)rdev->rlc_fw->data;
5087 		WREG32(RLC_GPM_UCODE_ADDR, 0);
5088 	for (i = 0; i < size; i++)
5089 		WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5090 	WREG32(RLC_GPM_UCODE_ADDR, 0);
5091 
5092 	/* XXX - find out what chips support lbpw */
5093 	cik_enable_lbpw(rdev, false);
5094 
5095 	if (rdev->family == CHIP_BONAIRE)
5096 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
5097 
5098 	cik_rlc_start(rdev);
5099 
5100 	return 0;
5101 }
5102 
5103 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5104 {
5105 	u32 data, orig, tmp, tmp2;
5106 
5107 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5108 
5109 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5110 		cik_enable_gui_idle_interrupt(rdev, true);
5111 
5112 		tmp = cik_halt_rlc(rdev);
5113 
5114 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5115 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5116 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5117 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5118 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
5119 
5120 		cik_update_rlc(rdev, tmp);
5121 
5122 		data |= CGCG_EN | CGLS_EN;
5123 	} else {
5124 		cik_enable_gui_idle_interrupt(rdev, false);
5125 
5126 		RREG32(CB_CGTT_SCLK_CTRL);
5127 		RREG32(CB_CGTT_SCLK_CTRL);
5128 		RREG32(CB_CGTT_SCLK_CTRL);
5129 		RREG32(CB_CGTT_SCLK_CTRL);
5130 
5131 		data &= ~(CGCG_EN | CGLS_EN);
5132 	}
5133 
5134 	if (orig != data)
5135 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5136 
5137 }
5138 
5139 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5140 {
5141 	u32 data, orig, tmp = 0;
5142 
5143 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5144 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5145 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5146 				orig = data = RREG32(CP_MEM_SLP_CNTL);
5147 				data |= CP_MEM_LS_EN;
5148 				if (orig != data)
5149 					WREG32(CP_MEM_SLP_CNTL, data);
5150 			}
5151 		}
5152 
5153 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5154 		data &= 0xfffffffd;
5155 		if (orig != data)
5156 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5157 
5158 		tmp = cik_halt_rlc(rdev);
5159 
5160 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5161 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5162 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5163 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5164 		WREG32(RLC_SERDES_WR_CTRL, data);
5165 
5166 		cik_update_rlc(rdev, tmp);
5167 
5168 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
5169 			orig = data = RREG32(CGTS_SM_CTRL_REG);
5170 			data &= ~SM_MODE_MASK;
5171 			data |= SM_MODE(0x2);
5172 			data |= SM_MODE_ENABLE;
5173 			data &= ~CGTS_OVERRIDE;
5174 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
5175 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
5176 				data &= ~CGTS_LS_OVERRIDE;
5177 			data &= ~ON_MONITOR_ADD_MASK;
5178 			data |= ON_MONITOR_ADD_EN;
5179 			data |= ON_MONITOR_ADD(0x96);
5180 			if (orig != data)
5181 				WREG32(CGTS_SM_CTRL_REG, data);
5182 		}
5183 	} else {
5184 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5185 		data |= 0x00000002;
5186 		if (orig != data)
5187 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5188 
5189 		data = RREG32(RLC_MEM_SLP_CNTL);
5190 		if (data & RLC_MEM_LS_EN) {
5191 			data &= ~RLC_MEM_LS_EN;
5192 			WREG32(RLC_MEM_SLP_CNTL, data);
5193 		}
5194 
5195 		data = RREG32(CP_MEM_SLP_CNTL);
5196 		if (data & CP_MEM_LS_EN) {
5197 			data &= ~CP_MEM_LS_EN;
5198 			WREG32(CP_MEM_SLP_CNTL, data);
5199 		}
5200 
5201 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5202 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5203 		if (orig != data)
5204 			WREG32(CGTS_SM_CTRL_REG, data);
5205 
5206 		tmp = cik_halt_rlc(rdev);
5207 
5208 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5209 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5210 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5211 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5212 		WREG32(RLC_SERDES_WR_CTRL, data);
5213 
5214 		cik_update_rlc(rdev, tmp);
5215 	}
5216 }
5217 
5218 static const u32 mc_cg_registers[] =
5219 {
5220 	MC_HUB_MISC_HUB_CG,
5221 	MC_HUB_MISC_SIP_CG,
5222 	MC_HUB_MISC_VM_CG,
5223 	MC_XPB_CLK_GAT,
5224 	ATC_MISC_CG,
5225 	MC_CITF_MISC_WR_CG,
5226 	MC_CITF_MISC_RD_CG,
5227 	MC_CITF_MISC_VM_CG,
5228 	VM_L2_CG,
5229 };
5230 
5231 static void cik_enable_mc_ls(struct radeon_device *rdev,
5232 			     bool enable)
5233 {
5234 	int i;
5235 	u32 orig, data;
5236 
5237 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5238 		orig = data = RREG32(mc_cg_registers[i]);
5239 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5240 			data |= MC_LS_ENABLE;
5241 		else
5242 			data &= ~MC_LS_ENABLE;
5243 		if (data != orig)
5244 			WREG32(mc_cg_registers[i], data);
5245 	}
5246 }
5247 
5248 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5249 			       bool enable)
5250 {
5251 	int i;
5252 	u32 orig, data;
5253 
5254 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5255 		orig = data = RREG32(mc_cg_registers[i]);
5256 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5257 			data |= MC_CG_ENABLE;
5258 		else
5259 			data &= ~MC_CG_ENABLE;
5260 		if (data != orig)
5261 			WREG32(mc_cg_registers[i], data);
5262 	}
5263 }
5264 
5265 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
5266 				 bool enable)
5267 {
5268 	u32 orig, data;
5269 
5270 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5271 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
5272 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
5273 	} else {
5274 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
5275 		data |= 0xff000000;
5276 		if (data != orig)
5277 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
5278 
5279 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
5280 		data |= 0xff000000;
5281 		if (data != orig)
5282 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
5283 	}
5284 }
5285 
5286 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
5287 				 bool enable)
5288 {
5289 	u32 orig, data;
5290 
5291 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
5292 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5293 		data |= 0x100;
5294 		if (orig != data)
5295 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5296 
5297 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5298 		data |= 0x100;
5299 		if (orig != data)
5300 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5301 	} else {
5302 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5303 		data &= ~0x100;
5304 		if (orig != data)
5305 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5306 
5307 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5308 		data &= ~0x100;
5309 		if (orig != data)
5310 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5311 	}
5312 }
5313 
5314 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
5315 				bool enable)
5316 {
5317 	u32 orig, data;
5318 
5319 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5320 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5321 		data = 0xfff;
5322 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5323 
5324 		orig = data = RREG32(UVD_CGC_CTRL);
5325 		data |= DCM;
5326 		if (orig != data)
5327 			WREG32(UVD_CGC_CTRL, data);
5328 	} else {
5329 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5330 		data &= ~0xfff;
5331 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5332 
5333 		orig = data = RREG32(UVD_CGC_CTRL);
5334 		data &= ~DCM;
5335 		if (orig != data)
5336 			WREG32(UVD_CGC_CTRL, data);
5337 	}
5338 }
5339 
5340 static void cik_enable_bif_mgls(struct radeon_device *rdev,
5341 			       bool enable)
5342 {
5343 	u32 orig, data;
5344 
5345 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
5346 
5347 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5348 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5349 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5350 	else
5351 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5352 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5353 
5354 	if (orig != data)
5355 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
5356 }
5357 
5358 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
5359 				bool enable)
5360 {
5361 	u32 orig, data;
5362 
5363 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
5364 
5365 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5366 		data &= ~CLOCK_GATING_DIS;
5367 	else
5368 		data |= CLOCK_GATING_DIS;
5369 
5370 	if (orig != data)
5371 		WREG32(HDP_HOST_PATH_CNTL, data);
5372 }
5373 
5374 static void cik_enable_hdp_ls(struct radeon_device *rdev,
5375 			      bool enable)
5376 {
5377 	u32 orig, data;
5378 
5379 	orig = data = RREG32(HDP_MEM_POWER_LS);
5380 
5381 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5382 		data |= HDP_LS_ENABLE;
5383 	else
5384 		data &= ~HDP_LS_ENABLE;
5385 
5386 	if (orig != data)
5387 		WREG32(HDP_MEM_POWER_LS, data);
5388 }
5389 
5390 void cik_update_cg(struct radeon_device *rdev,
5391 		   u32 block, bool enable)
5392 {
5393 
5394 	if (block & RADEON_CG_BLOCK_GFX) {
5395 		cik_enable_gui_idle_interrupt(rdev, false);
5396 		/* order matters! */
5397 		if (enable) {
5398 			cik_enable_mgcg(rdev, true);
5399 			cik_enable_cgcg(rdev, true);
5400 		} else {
5401 			cik_enable_cgcg(rdev, false);
5402 			cik_enable_mgcg(rdev, false);
5403 		}
5404 		cik_enable_gui_idle_interrupt(rdev, true);
5405 	}
5406 
5407 	if (block & RADEON_CG_BLOCK_MC) {
5408 		if (!(rdev->flags & RADEON_IS_IGP)) {
5409 			cik_enable_mc_mgcg(rdev, enable);
5410 			cik_enable_mc_ls(rdev, enable);
5411 		}
5412 	}
5413 
5414 	if (block & RADEON_CG_BLOCK_SDMA) {
5415 		cik_enable_sdma_mgcg(rdev, enable);
5416 		cik_enable_sdma_mgls(rdev, enable);
5417 	}
5418 
5419 	if (block & RADEON_CG_BLOCK_BIF) {
5420 		cik_enable_bif_mgls(rdev, enable);
5421 	}
5422 
5423 	if (block & RADEON_CG_BLOCK_UVD) {
5424 		if (rdev->has_uvd)
5425 			cik_enable_uvd_mgcg(rdev, enable);
5426 	}
5427 
5428 	if (block & RADEON_CG_BLOCK_HDP) {
5429 		cik_enable_hdp_mgcg(rdev, enable);
5430 		cik_enable_hdp_ls(rdev, enable);
5431 	}
5432 }
5433 
5434 static void cik_init_cg(struct radeon_device *rdev)
5435 {
5436 
5437 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
5438 
5439 	if (rdev->has_uvd)
5440 		si_init_uvd_internal_cg(rdev);
5441 
5442 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5443 			     RADEON_CG_BLOCK_SDMA |
5444 			     RADEON_CG_BLOCK_BIF |
5445 			     RADEON_CG_BLOCK_UVD |
5446 			     RADEON_CG_BLOCK_HDP), true);
5447 }
5448 
5449 static void cik_fini_cg(struct radeon_device *rdev)
5450 {
5451 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5452 			     RADEON_CG_BLOCK_SDMA |
5453 			     RADEON_CG_BLOCK_BIF |
5454 			     RADEON_CG_BLOCK_UVD |
5455 			     RADEON_CG_BLOCK_HDP), false);
5456 
5457 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
5458 }
5459 
5460 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
5461 					  bool enable)
5462 {
5463 	u32 data, orig;
5464 
5465 	orig = data = RREG32(RLC_PG_CNTL);
5466 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5467 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5468 	else
5469 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5470 	if (orig != data)
5471 		WREG32(RLC_PG_CNTL, data);
5472 }
5473 
5474 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
5475 					  bool enable)
5476 {
5477 	u32 data, orig;
5478 
5479 	orig = data = RREG32(RLC_PG_CNTL);
5480 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5481 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5482 	else
5483 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5484 	if (orig != data)
5485 		WREG32(RLC_PG_CNTL, data);
5486 }
5487 
5488 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
5489 {
5490 	u32 data, orig;
5491 
5492 	orig = data = RREG32(RLC_PG_CNTL);
5493 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
5494 		data &= ~DISABLE_CP_PG;
5495 	else
5496 		data |= DISABLE_CP_PG;
5497 	if (orig != data)
5498 		WREG32(RLC_PG_CNTL, data);
5499 }
5500 
5501 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
5502 {
5503 	u32 data, orig;
5504 
5505 	orig = data = RREG32(RLC_PG_CNTL);
5506 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
5507 		data &= ~DISABLE_GDS_PG;
5508 	else
5509 		data |= DISABLE_GDS_PG;
5510 	if (orig != data)
5511 		WREG32(RLC_PG_CNTL, data);
5512 }
5513 
5514 #define CP_ME_TABLE_SIZE    96
5515 #define CP_ME_TABLE_OFFSET  2048
5516 #define CP_MEC_TABLE_OFFSET 4096
5517 
5518 void cik_init_cp_pg_table(struct radeon_device *rdev)
5519 {
5520 	const __be32 *fw_data;
5521 	volatile u32 *dst_ptr;
5522 	int me, i, max_me = 4;
5523 	u32 bo_offset = 0;
5524 	u32 table_offset;
5525 
5526 	if (rdev->family == CHIP_KAVERI)
5527 		max_me = 5;
5528 
5529 	if (rdev->rlc.cp_table_ptr == NULL)
5530 		return;
5531 
5532 	/* write the cp table buffer */
5533 	dst_ptr = rdev->rlc.cp_table_ptr;
5534 	for (me = 0; me < max_me; me++) {
5535 		if (me == 0) {
5536 			fw_data = (const __be32 *)rdev->ce_fw->data;
5537 			table_offset = CP_ME_TABLE_OFFSET;
5538 		} else if (me == 1) {
5539 			fw_data = (const __be32 *)rdev->pfp_fw->data;
5540 			table_offset = CP_ME_TABLE_OFFSET;
5541 		} else if (me == 2) {
5542 			fw_data = (const __be32 *)rdev->me_fw->data;
5543 			table_offset = CP_ME_TABLE_OFFSET;
5544 		} else {
5545 			fw_data = (const __be32 *)rdev->mec_fw->data;
5546 			table_offset = CP_MEC_TABLE_OFFSET;
5547 		}
5548 
5549 		for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
5550 			dst_ptr[bo_offset + i] = be32_to_cpu(fw_data[table_offset + i]);
5551 		}
5552 		bo_offset += CP_ME_TABLE_SIZE;
5553 	}
5554 }
5555 
5556 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
5557 				bool enable)
5558 {
5559 	u32 data, orig;
5560 
5561 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5562 		orig = data = RREG32(RLC_PG_CNTL);
5563 		data |= GFX_PG_ENABLE;
5564 		if (orig != data)
5565 			WREG32(RLC_PG_CNTL, data);
5566 
5567 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
5568 		data |= AUTO_PG_EN;
5569 		if (orig != data)
5570 			WREG32(RLC_AUTO_PG_CTRL, data);
5571 	} else {
5572 		orig = data = RREG32(RLC_PG_CNTL);
5573 		data &= ~GFX_PG_ENABLE;
5574 		if (orig != data)
5575 			WREG32(RLC_PG_CNTL, data);
5576 
5577 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
5578 		data &= ~AUTO_PG_EN;
5579 		if (orig != data)
5580 			WREG32(RLC_AUTO_PG_CTRL, data);
5581 
5582 		data = RREG32(DB_RENDER_CONTROL);
5583 	}
5584 }
5585 
5586 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5587 {
5588 	u32 mask = 0, tmp, tmp1;
5589 	int i;
5590 
5591 	cik_select_se_sh(rdev, se, sh);
5592 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5593 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5594 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5595 
5596 	tmp &= 0xffff0000;
5597 
5598 	tmp |= tmp1;
5599 	tmp >>= 16;
5600 
5601 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
5602 		mask <<= 1;
5603 		mask |= 1;
5604 	}
5605 
5606 	return (~tmp) & mask;
5607 }
5608 
5609 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
5610 {
5611 	u32 i, j, k, active_cu_number = 0;
5612 	u32 mask, counter, cu_bitmap;
5613 	u32 tmp = 0;
5614 
5615 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5616 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5617 			mask = 1;
5618 			cu_bitmap = 0;
5619 			counter = 0;
5620 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
5621 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
5622 					if (counter < 2)
5623 						cu_bitmap |= mask;
5624 					counter ++;
5625 				}
5626 				mask <<= 1;
5627 			}
5628 
5629 			active_cu_number += counter;
5630 			tmp |= (cu_bitmap << (i * 16 + j * 8));
5631 		}
5632 	}
5633 
5634 	WREG32(RLC_PG_AO_CU_MASK, tmp);
5635 
5636 	tmp = RREG32(RLC_MAX_PG_CU);
5637 	tmp &= ~MAX_PU_CU_MASK;
5638 	tmp |= MAX_PU_CU(active_cu_number);
5639 	WREG32(RLC_MAX_PG_CU, tmp);
5640 }
5641 
5642 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
5643 				       bool enable)
5644 {
5645 	u32 data, orig;
5646 
5647 	orig = data = RREG32(RLC_PG_CNTL);
5648 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
5649 		data |= STATIC_PER_CU_PG_ENABLE;
5650 	else
5651 		data &= ~STATIC_PER_CU_PG_ENABLE;
5652 	if (orig != data)
5653 		WREG32(RLC_PG_CNTL, data);
5654 }
5655 
5656 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
5657 					bool enable)
5658 {
5659 	u32 data, orig;
5660 
5661 	orig = data = RREG32(RLC_PG_CNTL);
5662 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
5663 		data |= DYN_PER_CU_PG_ENABLE;
5664 	else
5665 		data &= ~DYN_PER_CU_PG_ENABLE;
5666 	if (orig != data)
5667 		WREG32(RLC_PG_CNTL, data);
5668 }
5669 
5670 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
5671 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
5672 
5673 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
5674 {
5675 	u32 data, orig;
5676 	u32 i;
5677 
5678 	if (rdev->rlc.cs_data) {
5679 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
5680 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
5681 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
5682 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
5683 	} else {
5684 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
5685 		for (i = 0; i < 3; i++)
5686 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
5687 	}
5688 	if (rdev->rlc.reg_list) {
5689 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
5690 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
5691 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
5692 	}
5693 
5694 	orig = data = RREG32(RLC_PG_CNTL);
5695 	data |= GFX_PG_SRC;
5696 	if (orig != data)
5697 		WREG32(RLC_PG_CNTL, data);
5698 
5699 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5700 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
5701 
5702 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
5703 	data &= ~IDLE_POLL_COUNT_MASK;
5704 	data |= IDLE_POLL_COUNT(0x60);
5705 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
5706 
5707 	data = 0x10101010;
5708 	WREG32(RLC_PG_DELAY, data);
5709 
5710 	data = RREG32(RLC_PG_DELAY_2);
5711 	data &= ~0xff;
5712 	data |= 0x3;
5713 	WREG32(RLC_PG_DELAY_2, data);
5714 
5715 	data = RREG32(RLC_AUTO_PG_CTRL);
5716 	data &= ~GRBM_REG_SGIT_MASK;
5717 	data |= GRBM_REG_SGIT(0x700);
5718 	WREG32(RLC_AUTO_PG_CTRL, data);
5719 
5720 }
5721 
5722 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
5723 {
5724 	cik_enable_gfx_cgpg(rdev, enable);
5725 	cik_enable_gfx_static_mgpg(rdev, enable);
5726 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
5727 }
5728 
5729 u32 cik_get_csb_size(struct radeon_device *rdev)
5730 {
5731 	u32 count = 0;
5732 	const struct cs_section_def *sect = NULL;
5733 	const struct cs_extent_def *ext = NULL;
5734 
5735 	if (rdev->rlc.cs_data == NULL)
5736 		return 0;
5737 
5738 	/* begin clear state */
5739 	count += 2;
5740 	/* context control state */
5741 	count += 3;
5742 
5743 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5744 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5745 			if (sect->id == SECT_CONTEXT)
5746 				count += 2 + ext->reg_count;
5747 			else
5748 				return 0;
5749 		}
5750 	}
5751 	/* pa_sc_raster_config/pa_sc_raster_config1 */
5752 	count += 4;
5753 	/* end clear state */
5754 	count += 2;
5755 	/* clear state */
5756 	count += 2;
5757 
5758 	return count;
5759 }
5760 
5761 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5762 {
5763 	u32 count = 0, i;
5764 	const struct cs_section_def *sect = NULL;
5765 	const struct cs_extent_def *ext = NULL;
5766 
5767 	if (rdev->rlc.cs_data == NULL)
5768 		return;
5769 	if (buffer == NULL)
5770 		return;
5771 
5772 	buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5773 	buffer[count++] = PACKET3_PREAMBLE_BEGIN_CLEAR_STATE;
5774 
5775 	buffer[count++] = PACKET3(PACKET3_CONTEXT_CONTROL, 1);
5776 	buffer[count++] = 0x80000000;
5777 	buffer[count++] = 0x80000000;
5778 
5779 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5780 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5781 			if (sect->id == SECT_CONTEXT) {
5782 				buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count);
5783 				buffer[count++] = ext->reg_index - 0xa000;
5784 				for (i = 0; i < ext->reg_count; i++)
5785 					buffer[count++] = ext->extent[i];
5786 			} else {
5787 				return;
5788 			}
5789 		}
5790 	}
5791 
5792 	buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
5793 	buffer[count++] = PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START;
5794 	switch (rdev->family) {
5795 	case CHIP_BONAIRE:
5796 		buffer[count++] = 0x16000012;
5797 		buffer[count++] = 0x00000000;
5798 		break;
5799 	case CHIP_KAVERI:
5800 		buffer[count++] = 0x00000000; /* XXX */
5801 		buffer[count++] = 0x00000000;
5802 		break;
5803 	case CHIP_KABINI:
5804 		buffer[count++] = 0x00000000; /* XXX */
5805 		buffer[count++] = 0x00000000;
5806 		break;
5807 	default:
5808 		buffer[count++] = 0x00000000;
5809 		buffer[count++] = 0x00000000;
5810 		break;
5811 	}
5812 
5813 	buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5814 	buffer[count++] = PACKET3_PREAMBLE_END_CLEAR_STATE;
5815 
5816 	buffer[count++] = PACKET3(PACKET3_CLEAR_STATE, 0);
5817 	buffer[count++] = 0;
5818 }
5819 
5820 static void cik_init_pg(struct radeon_device *rdev)
5821 {
5822 	if (rdev->pg_flags) {
5823 		cik_enable_sck_slowdown_on_pu(rdev, true);
5824 		cik_enable_sck_slowdown_on_pd(rdev, true);
5825 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5826 			cik_init_gfx_cgpg(rdev);
5827 			cik_enable_cp_pg(rdev, true);
5828 			cik_enable_gds_pg(rdev, true);
5829 		}
5830 		cik_init_ao_cu_mask(rdev);
5831 		cik_update_gfx_pg(rdev, true);
5832 	}
5833 }
5834 
5835 static void cik_fini_pg(struct radeon_device *rdev)
5836 {
5837 	if (rdev->pg_flags) {
5838 		cik_update_gfx_pg(rdev, false);
5839 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5840 			cik_enable_cp_pg(rdev, false);
5841 			cik_enable_gds_pg(rdev, false);
5842 		}
5843 	}
5844 }
5845 
5846 /*
5847  * Interrupts
5848  * Starting with r6xx, interrupts are handled via a ring buffer.
5849  * Ring buffers are areas of GPU accessible memory that the GPU
5850  * writes interrupt vectors into and the host reads vectors out of.
5851  * There is a rptr (read pointer) that determines where the
5852  * host is currently reading, and a wptr (write pointer)
5853  * which determines where the GPU has written.  When the
5854  * pointers are equal, the ring is idle.  When the GPU
5855  * writes vectors to the ring buffer, it increments the
5856  * wptr.  When there is an interrupt, the host then starts
5857  * fetching commands and processing them until the pointers are
5858  * equal again at which point it updates the rptr.
5859  */
5860 
5861 /**
5862  * cik_enable_interrupts - Enable the interrupt ring buffer
5863  *
5864  * @rdev: radeon_device pointer
5865  *
5866  * Enable the interrupt ring buffer (CIK).
5867  */
5868 static void cik_enable_interrupts(struct radeon_device *rdev)
5869 {
5870 	u32 ih_cntl = RREG32(IH_CNTL);
5871 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5872 
5873 	ih_cntl |= ENABLE_INTR;
5874 	ih_rb_cntl |= IH_RB_ENABLE;
5875 	WREG32(IH_CNTL, ih_cntl);
5876 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5877 	rdev->ih.enabled = true;
5878 }
5879 
5880 /**
5881  * cik_disable_interrupts - Disable the interrupt ring buffer
5882  *
5883  * @rdev: radeon_device pointer
5884  *
5885  * Disable the interrupt ring buffer (CIK).
5886  */
5887 static void cik_disable_interrupts(struct radeon_device *rdev)
5888 {
5889 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5890 	u32 ih_cntl = RREG32(IH_CNTL);
5891 
5892 	ih_rb_cntl &= ~IH_RB_ENABLE;
5893 	ih_cntl &= ~ENABLE_INTR;
5894 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5895 	WREG32(IH_CNTL, ih_cntl);
5896 	/* set rptr, wptr to 0 */
5897 	WREG32(IH_RB_RPTR, 0);
5898 	WREG32(IH_RB_WPTR, 0);
5899 	rdev->ih.enabled = false;
5900 	rdev->ih.rptr = 0;
5901 }
5902 
5903 /**
5904  * cik_disable_interrupt_state - Disable all interrupt sources
5905  *
5906  * @rdev: radeon_device pointer
5907  *
5908  * Clear all interrupt enable bits used by the driver (CIK).
5909  */
5910 static void cik_disable_interrupt_state(struct radeon_device *rdev)
5911 {
5912 	u32 tmp;
5913 
5914 	/* gfx ring */
5915 	tmp = RREG32(CP_INT_CNTL_RING0) &
5916 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5917 	WREG32(CP_INT_CNTL_RING0, tmp);
5918 	/* sdma */
5919 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5920 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5921 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5922 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5923 	/* compute queues */
5924 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
5925 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
5926 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
5927 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
5928 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
5929 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
5930 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
5931 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
5932 	/* grbm */
5933 	WREG32(GRBM_INT_CNTL, 0);
5934 	/* vline/vblank, etc. */
5935 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5936 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5937 	if (rdev->num_crtc >= 4) {
5938 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5939 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5940 	}
5941 	if (rdev->num_crtc >= 6) {
5942 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5943 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5944 	}
5945 
5946 	/* dac hotplug */
5947 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5948 
5949 	/* digital hotplug */
5950 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5951 	WREG32(DC_HPD1_INT_CONTROL, tmp);
5952 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5953 	WREG32(DC_HPD2_INT_CONTROL, tmp);
5954 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5955 	WREG32(DC_HPD3_INT_CONTROL, tmp);
5956 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5957 	WREG32(DC_HPD4_INT_CONTROL, tmp);
5958 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5959 	WREG32(DC_HPD5_INT_CONTROL, tmp);
5960 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5961 	WREG32(DC_HPD6_INT_CONTROL, tmp);
5962 
5963 }
5964 
5965 /**
5966  * cik_irq_init - init and enable the interrupt ring
5967  *
5968  * @rdev: radeon_device pointer
5969  *
5970  * Allocate a ring buffer for the interrupt controller,
5971  * enable the RLC, disable interrupts, enable the IH
5972  * ring buffer and enable it (CIK).
5973  * Called at device load and reume.
5974  * Returns 0 for success, errors for failure.
5975  */
5976 static int cik_irq_init(struct radeon_device *rdev)
5977 {
5978 	int ret = 0;
5979 	int rb_bufsz;
5980 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5981 
5982 	/* allocate ring */
5983 	ret = r600_ih_ring_alloc(rdev);
5984 	if (ret)
5985 		return ret;
5986 
5987 	/* disable irqs */
5988 	cik_disable_interrupts(rdev);
5989 
5990 	/* init rlc */
5991 	ret = cik_rlc_resume(rdev);
5992 	if (ret) {
5993 		r600_ih_ring_fini(rdev);
5994 		return ret;
5995 	}
5996 
5997 	/* setup interrupt control */
5998 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
5999 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6000 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
6001 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6002 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6003 	 */
6004 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6005 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6006 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6007 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6008 
6009 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6010 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6011 
6012 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6013 		      IH_WPTR_OVERFLOW_CLEAR |
6014 		      (rb_bufsz << 1));
6015 
6016 	if (rdev->wb.enabled)
6017 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6018 
6019 	/* set the writeback address whether it's enabled or not */
6020 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6021 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6022 
6023 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6024 
6025 	/* set rptr, wptr to 0 */
6026 	WREG32(IH_RB_RPTR, 0);
6027 	WREG32(IH_RB_WPTR, 0);
6028 
6029 	/* Default settings for IH_CNTL (disabled at first) */
6030 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6031 	/* RPTR_REARM only works if msi's are enabled */
6032 	if (rdev->msi_enabled)
6033 		ih_cntl |= RPTR_REARM;
6034 	WREG32(IH_CNTL, ih_cntl);
6035 
6036 	/* force the active interrupt state to all disabled */
6037 	cik_disable_interrupt_state(rdev);
6038 
6039 	pci_set_master(rdev->pdev);
6040 
6041 	/* enable irqs */
6042 	cik_enable_interrupts(rdev);
6043 
6044 	return ret;
6045 }
6046 
6047 /**
6048  * cik_irq_set - enable/disable interrupt sources
6049  *
6050  * @rdev: radeon_device pointer
6051  *
6052  * Enable interrupt sources on the GPU (vblanks, hpd,
6053  * etc.) (CIK).
6054  * Returns 0 for success, errors for failure.
6055  */
6056 int cik_irq_set(struct radeon_device *rdev)
6057 {
6058 	u32 cp_int_cntl;
6059 	u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6060 	u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6061 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6062 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6063 	u32 grbm_int_cntl = 0;
6064 	u32 dma_cntl, dma_cntl1;
6065 	u32 thermal_int;
6066 
6067 	if (!rdev->irq.installed) {
6068 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6069 		return -EINVAL;
6070 	}
6071 	/* don't enable anything if the ih is disabled */
6072 	if (!rdev->ih.enabled) {
6073 		cik_disable_interrupts(rdev);
6074 		/* force the active interrupt state to all disabled */
6075 		cik_disable_interrupt_state(rdev);
6076 		return 0;
6077 	}
6078 
6079 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6080 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6081 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6082 
6083 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6084 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6085 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6086 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6087 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6088 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6089 
6090 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6091 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6092 
6093 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6094 	cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6095 	cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6096 	cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6097 	cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6098 	cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6099 	cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6100 	cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6101 
6102 	if (rdev->flags & RADEON_IS_IGP)
6103 		thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6104 			~(THERM_INTH_MASK | THERM_INTL_MASK);
6105 	else
6106 		thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6107 			~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6108 
6109 	/* enable CP interrupts on all rings */
6110 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6111 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
6112 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6113 	}
6114 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6115 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6116 		DRM_DEBUG("si_irq_set: sw int cp1\n");
6117 		if (ring->me == 1) {
6118 			switch (ring->pipe) {
6119 			case 0:
6120 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6121 				break;
6122 			case 1:
6123 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6124 				break;
6125 			case 2:
6126 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6127 				break;
6128 			case 3:
6129 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6130 				break;
6131 			default:
6132 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6133 				break;
6134 			}
6135 		} else if (ring->me == 2) {
6136 			switch (ring->pipe) {
6137 			case 0:
6138 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6139 				break;
6140 			case 1:
6141 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6142 				break;
6143 			case 2:
6144 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6145 				break;
6146 			case 3:
6147 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6148 				break;
6149 			default:
6150 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6151 				break;
6152 			}
6153 		} else {
6154 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6155 		}
6156 	}
6157 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6158 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6159 		DRM_DEBUG("si_irq_set: sw int cp2\n");
6160 		if (ring->me == 1) {
6161 			switch (ring->pipe) {
6162 			case 0:
6163 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6164 				break;
6165 			case 1:
6166 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6167 				break;
6168 			case 2:
6169 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6170 				break;
6171 			case 3:
6172 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6173 				break;
6174 			default:
6175 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6176 				break;
6177 			}
6178 		} else if (ring->me == 2) {
6179 			switch (ring->pipe) {
6180 			case 0:
6181 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6182 				break;
6183 			case 1:
6184 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6185 				break;
6186 			case 2:
6187 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6188 				break;
6189 			case 3:
6190 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6191 				break;
6192 			default:
6193 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6194 				break;
6195 			}
6196 		} else {
6197 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6198 		}
6199 	}
6200 
6201 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6202 		DRM_DEBUG("cik_irq_set: sw int dma\n");
6203 		dma_cntl |= TRAP_ENABLE;
6204 	}
6205 
6206 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6207 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
6208 		dma_cntl1 |= TRAP_ENABLE;
6209 	}
6210 
6211 	if (rdev->irq.crtc_vblank_int[0] ||
6212 	    atomic_read(&rdev->irq.pflip[0])) {
6213 		DRM_DEBUG("cik_irq_set: vblank 0\n");
6214 		crtc1 |= VBLANK_INTERRUPT_MASK;
6215 	}
6216 	if (rdev->irq.crtc_vblank_int[1] ||
6217 	    atomic_read(&rdev->irq.pflip[1])) {
6218 		DRM_DEBUG("cik_irq_set: vblank 1\n");
6219 		crtc2 |= VBLANK_INTERRUPT_MASK;
6220 	}
6221 	if (rdev->irq.crtc_vblank_int[2] ||
6222 	    atomic_read(&rdev->irq.pflip[2])) {
6223 		DRM_DEBUG("cik_irq_set: vblank 2\n");
6224 		crtc3 |= VBLANK_INTERRUPT_MASK;
6225 	}
6226 	if (rdev->irq.crtc_vblank_int[3] ||
6227 	    atomic_read(&rdev->irq.pflip[3])) {
6228 		DRM_DEBUG("cik_irq_set: vblank 3\n");
6229 		crtc4 |= VBLANK_INTERRUPT_MASK;
6230 	}
6231 	if (rdev->irq.crtc_vblank_int[4] ||
6232 	    atomic_read(&rdev->irq.pflip[4])) {
6233 		DRM_DEBUG("cik_irq_set: vblank 4\n");
6234 		crtc5 |= VBLANK_INTERRUPT_MASK;
6235 	}
6236 	if (rdev->irq.crtc_vblank_int[5] ||
6237 	    atomic_read(&rdev->irq.pflip[5])) {
6238 		DRM_DEBUG("cik_irq_set: vblank 5\n");
6239 		crtc6 |= VBLANK_INTERRUPT_MASK;
6240 	}
6241 	if (rdev->irq.hpd[0]) {
6242 		DRM_DEBUG("cik_irq_set: hpd 1\n");
6243 		hpd1 |= DC_HPDx_INT_EN;
6244 	}
6245 	if (rdev->irq.hpd[1]) {
6246 		DRM_DEBUG("cik_irq_set: hpd 2\n");
6247 		hpd2 |= DC_HPDx_INT_EN;
6248 	}
6249 	if (rdev->irq.hpd[2]) {
6250 		DRM_DEBUG("cik_irq_set: hpd 3\n");
6251 		hpd3 |= DC_HPDx_INT_EN;
6252 	}
6253 	if (rdev->irq.hpd[3]) {
6254 		DRM_DEBUG("cik_irq_set: hpd 4\n");
6255 		hpd4 |= DC_HPDx_INT_EN;
6256 	}
6257 	if (rdev->irq.hpd[4]) {
6258 		DRM_DEBUG("cik_irq_set: hpd 5\n");
6259 		hpd5 |= DC_HPDx_INT_EN;
6260 	}
6261 	if (rdev->irq.hpd[5]) {
6262 		DRM_DEBUG("cik_irq_set: hpd 6\n");
6263 		hpd6 |= DC_HPDx_INT_EN;
6264 	}
6265 
6266 	if (rdev->irq.dpm_thermal) {
6267 		DRM_DEBUG("dpm thermal\n");
6268 		if (rdev->flags & RADEON_IS_IGP)
6269 			thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
6270 		else
6271 			thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6272 	}
6273 
6274 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6275 
6276 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
6277 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
6278 
6279 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
6280 	WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
6281 	WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
6282 	WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
6283 	WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
6284 	WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
6285 	WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
6286 	WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
6287 
6288 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6289 
6290 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6291 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6292 	if (rdev->num_crtc >= 4) {
6293 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6294 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6295 	}
6296 	if (rdev->num_crtc >= 6) {
6297 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6298 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6299 	}
6300 
6301 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
6302 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
6303 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
6304 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
6305 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
6306 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
6307 
6308 	if (rdev->flags & RADEON_IS_IGP)
6309 		WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
6310 	else
6311 		WREG32_SMC(CG_THERMAL_INT, thermal_int);
6312 
6313 	return 0;
6314 }
6315 
6316 /**
6317  * cik_irq_ack - ack interrupt sources
6318  *
6319  * @rdev: radeon_device pointer
6320  *
6321  * Ack interrupt sources on the GPU (vblanks, hpd,
6322  * etc.) (CIK).  Certain interrupts sources are sw
6323  * generated and do not require an explicit ack.
6324  */
6325 static inline void cik_irq_ack(struct radeon_device *rdev)
6326 {
6327 	u32 tmp;
6328 
6329 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6330 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6331 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6332 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6333 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6334 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6335 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
6336 
6337 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
6338 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6339 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
6340 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6341 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6342 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6343 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6344 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6345 
6346 	if (rdev->num_crtc >= 4) {
6347 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6348 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6349 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6350 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6351 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6352 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6353 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6354 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6355 	}
6356 
6357 	if (rdev->num_crtc >= 6) {
6358 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6359 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6360 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6361 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6362 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6363 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6364 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6365 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6366 	}
6367 
6368 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6369 		tmp = RREG32(DC_HPD1_INT_CONTROL);
6370 		tmp |= DC_HPDx_INT_ACK;
6371 		WREG32(DC_HPD1_INT_CONTROL, tmp);
6372 	}
6373 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6374 		tmp = RREG32(DC_HPD2_INT_CONTROL);
6375 		tmp |= DC_HPDx_INT_ACK;
6376 		WREG32(DC_HPD2_INT_CONTROL, tmp);
6377 	}
6378 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6379 		tmp = RREG32(DC_HPD3_INT_CONTROL);
6380 		tmp |= DC_HPDx_INT_ACK;
6381 		WREG32(DC_HPD3_INT_CONTROL, tmp);
6382 	}
6383 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6384 		tmp = RREG32(DC_HPD4_INT_CONTROL);
6385 		tmp |= DC_HPDx_INT_ACK;
6386 		WREG32(DC_HPD4_INT_CONTROL, tmp);
6387 	}
6388 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6389 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6390 		tmp |= DC_HPDx_INT_ACK;
6391 		WREG32(DC_HPD5_INT_CONTROL, tmp);
6392 	}
6393 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6394 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6395 		tmp |= DC_HPDx_INT_ACK;
6396 		WREG32(DC_HPD6_INT_CONTROL, tmp);
6397 	}
6398 }
6399 
6400 /**
6401  * cik_irq_disable - disable interrupts
6402  *
6403  * @rdev: radeon_device pointer
6404  *
6405  * Disable interrupts on the hw (CIK).
6406  */
6407 static void cik_irq_disable(struct radeon_device *rdev)
6408 {
6409 	cik_disable_interrupts(rdev);
6410 	/* Wait and acknowledge irq */
6411 	mdelay(1);
6412 	cik_irq_ack(rdev);
6413 	cik_disable_interrupt_state(rdev);
6414 }
6415 
6416 /**
6417  * cik_irq_disable - disable interrupts for suspend
6418  *
6419  * @rdev: radeon_device pointer
6420  *
6421  * Disable interrupts and stop the RLC (CIK).
6422  * Used for suspend.
6423  */
6424 static void cik_irq_suspend(struct radeon_device *rdev)
6425 {
6426 	cik_irq_disable(rdev);
6427 	cik_rlc_stop(rdev);
6428 }
6429 
6430 /**
6431  * cik_irq_fini - tear down interrupt support
6432  *
6433  * @rdev: radeon_device pointer
6434  *
6435  * Disable interrupts on the hw and free the IH ring
6436  * buffer (CIK).
6437  * Used for driver unload.
6438  */
6439 static void cik_irq_fini(struct radeon_device *rdev)
6440 {
6441 	cik_irq_suspend(rdev);
6442 	r600_ih_ring_fini(rdev);
6443 }
6444 
6445 /**
6446  * cik_get_ih_wptr - get the IH ring buffer wptr
6447  *
6448  * @rdev: radeon_device pointer
6449  *
6450  * Get the IH ring buffer wptr from either the register
6451  * or the writeback memory buffer (CIK).  Also check for
6452  * ring buffer overflow and deal with it.
6453  * Used by cik_irq_process().
6454  * Returns the value of the wptr.
6455  */
6456 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
6457 {
6458 	u32 wptr, tmp;
6459 
6460 	if (rdev->wb.enabled)
6461 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6462 	else
6463 		wptr = RREG32(IH_RB_WPTR);
6464 
6465 	if (wptr & RB_OVERFLOW) {
6466 		/* When a ring buffer overflow happen start parsing interrupt
6467 		 * from the last not overwritten vector (wptr + 16). Hopefully
6468 		 * this should allow us to catchup.
6469 		 */
6470 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
6471 			wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
6472 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6473 		tmp = RREG32(IH_RB_CNTL);
6474 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
6475 		WREG32(IH_RB_CNTL, tmp);
6476 	}
6477 	return (wptr & rdev->ih.ptr_mask);
6478 }
6479 
6480 /*        CIK IV Ring
6481  * Each IV ring entry is 128 bits:
6482  * [7:0]    - interrupt source id
6483  * [31:8]   - reserved
6484  * [59:32]  - interrupt source data
6485  * [63:60]  - reserved
6486  * [71:64]  - RINGID
6487  *            CP:
6488  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
6489  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
6490  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
6491  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
6492  *            PIPE_ID - ME0 0=3D
6493  *                    - ME1&2 compute dispatcher (4 pipes each)
6494  *            SDMA:
6495  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
6496  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
6497  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
6498  * [79:72]  - VMID
6499  * [95:80]  - PASID
6500  * [127:96] - reserved
6501  */
6502 /**
6503  * cik_irq_process - interrupt handler
6504  *
6505  * @rdev: radeon_device pointer
6506  *
6507  * Interrupt hander (CIK).  Walk the IH ring,
6508  * ack interrupts and schedule work to handle
6509  * interrupt events.
6510  * Returns irq process return code.
6511  */
6512 int cik_irq_process(struct radeon_device *rdev)
6513 {
6514 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6515 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6516 	u32 wptr;
6517 	u32 rptr;
6518 	u32 src_id, src_data, ring_id;
6519 	u8 me_id, pipe_id, queue_id;
6520 	u32 ring_index;
6521 	bool queue_hotplug = false;
6522 	bool queue_reset = false;
6523 	u32 addr, status, mc_client;
6524 	bool queue_thermal = false;
6525 
6526 	if (!rdev->ih.enabled || rdev->shutdown)
6527 		return IRQ_NONE;
6528 
6529 	wptr = cik_get_ih_wptr(rdev);
6530 
6531 restart_ih:
6532 	/* is somebody else already processing irqs? */
6533 	if (atomic_xchg(&rdev->ih.lock, 1))
6534 		return IRQ_NONE;
6535 
6536 	rptr = rdev->ih.rptr;
6537 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6538 
6539 	/* Order reading of wptr vs. reading of IH ring data */
6540 	rmb();
6541 
6542 	/* display interrupts */
6543 	cik_irq_ack(rdev);
6544 
6545 	while (rptr != wptr) {
6546 		/* wptr/rptr are in bytes! */
6547 		ring_index = rptr / 4;
6548 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6549 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6550 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6551 
6552 		switch (src_id) {
6553 		case 1: /* D1 vblank/vline */
6554 			switch (src_data) {
6555 			case 0: /* D1 vblank */
6556 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
6557 					if (rdev->irq.crtc_vblank_int[0]) {
6558 						drm_handle_vblank(rdev->ddev, 0);
6559 						rdev->pm.vblank_sync = true;
6560 						wake_up(&rdev->irq.vblank_queue);
6561 					}
6562 					if (atomic_read(&rdev->irq.pflip[0]))
6563 						radeon_crtc_handle_flip(rdev, 0);
6564 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6565 					DRM_DEBUG("IH: D1 vblank\n");
6566 				}
6567 				break;
6568 			case 1: /* D1 vline */
6569 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
6570 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6571 					DRM_DEBUG("IH: D1 vline\n");
6572 				}
6573 				break;
6574 			default:
6575 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6576 				break;
6577 			}
6578 			break;
6579 		case 2: /* D2 vblank/vline */
6580 			switch (src_data) {
6581 			case 0: /* D2 vblank */
6582 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6583 					if (rdev->irq.crtc_vblank_int[1]) {
6584 						drm_handle_vblank(rdev->ddev, 1);
6585 						rdev->pm.vblank_sync = true;
6586 						wake_up(&rdev->irq.vblank_queue);
6587 					}
6588 					if (atomic_read(&rdev->irq.pflip[1]))
6589 						radeon_crtc_handle_flip(rdev, 1);
6590 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6591 					DRM_DEBUG("IH: D2 vblank\n");
6592 				}
6593 				break;
6594 			case 1: /* D2 vline */
6595 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6596 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6597 					DRM_DEBUG("IH: D2 vline\n");
6598 				}
6599 				break;
6600 			default:
6601 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6602 				break;
6603 			}
6604 			break;
6605 		case 3: /* D3 vblank/vline */
6606 			switch (src_data) {
6607 			case 0: /* D3 vblank */
6608 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6609 					if (rdev->irq.crtc_vblank_int[2]) {
6610 						drm_handle_vblank(rdev->ddev, 2);
6611 						rdev->pm.vblank_sync = true;
6612 						wake_up(&rdev->irq.vblank_queue);
6613 					}
6614 					if (atomic_read(&rdev->irq.pflip[2]))
6615 						radeon_crtc_handle_flip(rdev, 2);
6616 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6617 					DRM_DEBUG("IH: D3 vblank\n");
6618 				}
6619 				break;
6620 			case 1: /* D3 vline */
6621 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6622 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6623 					DRM_DEBUG("IH: D3 vline\n");
6624 				}
6625 				break;
6626 			default:
6627 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6628 				break;
6629 			}
6630 			break;
6631 		case 4: /* D4 vblank/vline */
6632 			switch (src_data) {
6633 			case 0: /* D4 vblank */
6634 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6635 					if (rdev->irq.crtc_vblank_int[3]) {
6636 						drm_handle_vblank(rdev->ddev, 3);
6637 						rdev->pm.vblank_sync = true;
6638 						wake_up(&rdev->irq.vblank_queue);
6639 					}
6640 					if (atomic_read(&rdev->irq.pflip[3]))
6641 						radeon_crtc_handle_flip(rdev, 3);
6642 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6643 					DRM_DEBUG("IH: D4 vblank\n");
6644 				}
6645 				break;
6646 			case 1: /* D4 vline */
6647 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6648 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6649 					DRM_DEBUG("IH: D4 vline\n");
6650 				}
6651 				break;
6652 			default:
6653 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6654 				break;
6655 			}
6656 			break;
6657 		case 5: /* D5 vblank/vline */
6658 			switch (src_data) {
6659 			case 0: /* D5 vblank */
6660 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6661 					if (rdev->irq.crtc_vblank_int[4]) {
6662 						drm_handle_vblank(rdev->ddev, 4);
6663 						rdev->pm.vblank_sync = true;
6664 						wake_up(&rdev->irq.vblank_queue);
6665 					}
6666 					if (atomic_read(&rdev->irq.pflip[4]))
6667 						radeon_crtc_handle_flip(rdev, 4);
6668 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6669 					DRM_DEBUG("IH: D5 vblank\n");
6670 				}
6671 				break;
6672 			case 1: /* D5 vline */
6673 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6674 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6675 					DRM_DEBUG("IH: D5 vline\n");
6676 				}
6677 				break;
6678 			default:
6679 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6680 				break;
6681 			}
6682 			break;
6683 		case 6: /* D6 vblank/vline */
6684 			switch (src_data) {
6685 			case 0: /* D6 vblank */
6686 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6687 					if (rdev->irq.crtc_vblank_int[5]) {
6688 						drm_handle_vblank(rdev->ddev, 5);
6689 						rdev->pm.vblank_sync = true;
6690 						wake_up(&rdev->irq.vblank_queue);
6691 					}
6692 					if (atomic_read(&rdev->irq.pflip[5]))
6693 						radeon_crtc_handle_flip(rdev, 5);
6694 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6695 					DRM_DEBUG("IH: D6 vblank\n");
6696 				}
6697 				break;
6698 			case 1: /* D6 vline */
6699 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6700 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6701 					DRM_DEBUG("IH: D6 vline\n");
6702 				}
6703 				break;
6704 			default:
6705 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6706 				break;
6707 			}
6708 			break;
6709 		case 42: /* HPD hotplug */
6710 			switch (src_data) {
6711 			case 0:
6712 				if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6713 					rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
6714 					queue_hotplug = true;
6715 					DRM_DEBUG("IH: HPD1\n");
6716 				}
6717 				break;
6718 			case 1:
6719 				if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6720 					rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6721 					queue_hotplug = true;
6722 					DRM_DEBUG("IH: HPD2\n");
6723 				}
6724 				break;
6725 			case 2:
6726 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6727 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6728 					queue_hotplug = true;
6729 					DRM_DEBUG("IH: HPD3\n");
6730 				}
6731 				break;
6732 			case 3:
6733 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6734 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6735 					queue_hotplug = true;
6736 					DRM_DEBUG("IH: HPD4\n");
6737 				}
6738 				break;
6739 			case 4:
6740 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6741 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6742 					queue_hotplug = true;
6743 					DRM_DEBUG("IH: HPD5\n");
6744 				}
6745 				break;
6746 			case 5:
6747 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6748 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6749 					queue_hotplug = true;
6750 					DRM_DEBUG("IH: HPD6\n");
6751 				}
6752 				break;
6753 			default:
6754 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6755 				break;
6756 			}
6757 			break;
6758 		case 124: /* UVD */
6759 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6760 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6761 			break;
6762 		case 146:
6763 		case 147:
6764 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6765 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6766 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
6767 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6768 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6769 				addr);
6770 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6771 				status);
6772 			cik_vm_decode_fault(rdev, status, addr, mc_client);
6773 			/* reset addr and status */
6774 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6775 			break;
6776 		case 176: /* GFX RB CP_INT */
6777 		case 177: /* GFX IB CP_INT */
6778 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6779 			break;
6780 		case 181: /* CP EOP event */
6781 			DRM_DEBUG("IH: CP EOP\n");
6782 			/* XXX check the bitfield order! */
6783 			me_id = (ring_id & 0x60) >> 5;
6784 			pipe_id = (ring_id & 0x18) >> 3;
6785 			queue_id = (ring_id & 0x7) >> 0;
6786 			switch (me_id) {
6787 			case 0:
6788 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6789 				break;
6790 			case 1:
6791 			case 2:
6792 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
6793 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6794 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
6795 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6796 				break;
6797 			}
6798 			break;
6799 		case 184: /* CP Privileged reg access */
6800 			DRM_ERROR("Illegal register access in command stream\n");
6801 			/* XXX check the bitfield order! */
6802 			me_id = (ring_id & 0x60) >> 5;
6803 			pipe_id = (ring_id & 0x18) >> 3;
6804 			queue_id = (ring_id & 0x7) >> 0;
6805 			switch (me_id) {
6806 			case 0:
6807 				/* This results in a full GPU reset, but all we need to do is soft
6808 				 * reset the CP for gfx
6809 				 */
6810 				queue_reset = true;
6811 				break;
6812 			case 1:
6813 				/* XXX compute */
6814 				queue_reset = true;
6815 				break;
6816 			case 2:
6817 				/* XXX compute */
6818 				queue_reset = true;
6819 				break;
6820 			}
6821 			break;
6822 		case 185: /* CP Privileged inst */
6823 			DRM_ERROR("Illegal instruction in command stream\n");
6824 			/* XXX check the bitfield order! */
6825 			me_id = (ring_id & 0x60) >> 5;
6826 			pipe_id = (ring_id & 0x18) >> 3;
6827 			queue_id = (ring_id & 0x7) >> 0;
6828 			switch (me_id) {
6829 			case 0:
6830 				/* This results in a full GPU reset, but all we need to do is soft
6831 				 * reset the CP for gfx
6832 				 */
6833 				queue_reset = true;
6834 				break;
6835 			case 1:
6836 				/* XXX compute */
6837 				queue_reset = true;
6838 				break;
6839 			case 2:
6840 				/* XXX compute */
6841 				queue_reset = true;
6842 				break;
6843 			}
6844 			break;
6845 		case 224: /* SDMA trap event */
6846 			/* XXX check the bitfield order! */
6847 			me_id = (ring_id & 0x3) >> 0;
6848 			queue_id = (ring_id & 0xc) >> 2;
6849 			DRM_DEBUG("IH: SDMA trap\n");
6850 			switch (me_id) {
6851 			case 0:
6852 				switch (queue_id) {
6853 				case 0:
6854 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6855 					break;
6856 				case 1:
6857 					/* XXX compute */
6858 					break;
6859 				case 2:
6860 					/* XXX compute */
6861 					break;
6862 				}
6863 				break;
6864 			case 1:
6865 				switch (queue_id) {
6866 				case 0:
6867 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6868 					break;
6869 				case 1:
6870 					/* XXX compute */
6871 					break;
6872 				case 2:
6873 					/* XXX compute */
6874 					break;
6875 				}
6876 				break;
6877 			}
6878 			break;
6879 		case 230: /* thermal low to high */
6880 			DRM_DEBUG("IH: thermal low to high\n");
6881 			rdev->pm.dpm.thermal.high_to_low = false;
6882 			queue_thermal = true;
6883 			break;
6884 		case 231: /* thermal high to low */
6885 			DRM_DEBUG("IH: thermal high to low\n");
6886 			rdev->pm.dpm.thermal.high_to_low = true;
6887 			queue_thermal = true;
6888 			break;
6889 		case 233: /* GUI IDLE */
6890 			DRM_DEBUG("IH: GUI idle\n");
6891 			break;
6892 		case 241: /* SDMA Privileged inst */
6893 		case 247: /* SDMA Privileged inst */
6894 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
6895 			/* XXX check the bitfield order! */
6896 			me_id = (ring_id & 0x3) >> 0;
6897 			queue_id = (ring_id & 0xc) >> 2;
6898 			switch (me_id) {
6899 			case 0:
6900 				switch (queue_id) {
6901 				case 0:
6902 					queue_reset = true;
6903 					break;
6904 				case 1:
6905 					/* XXX compute */
6906 					queue_reset = true;
6907 					break;
6908 				case 2:
6909 					/* XXX compute */
6910 					queue_reset = true;
6911 					break;
6912 				}
6913 				break;
6914 			case 1:
6915 				switch (queue_id) {
6916 				case 0:
6917 					queue_reset = true;
6918 					break;
6919 				case 1:
6920 					/* XXX compute */
6921 					queue_reset = true;
6922 					break;
6923 				case 2:
6924 					/* XXX compute */
6925 					queue_reset = true;
6926 					break;
6927 				}
6928 				break;
6929 			}
6930 			break;
6931 		default:
6932 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6933 			break;
6934 		}
6935 
6936 		/* wptr/rptr are in bytes! */
6937 		rptr += 16;
6938 		rptr &= rdev->ih.ptr_mask;
6939 	}
6940 	if (queue_hotplug)
6941 		schedule_work(&rdev->hotplug_work);
6942 	if (queue_reset)
6943 		schedule_work(&rdev->reset_work);
6944 	if (queue_thermal)
6945 		schedule_work(&rdev->pm.dpm.thermal.work);
6946 	rdev->ih.rptr = rptr;
6947 	WREG32(IH_RB_RPTR, rdev->ih.rptr);
6948 	atomic_set(&rdev->ih.lock, 0);
6949 
6950 	/* make sure wptr hasn't changed while processing */
6951 	wptr = cik_get_ih_wptr(rdev);
6952 	if (wptr != rptr)
6953 		goto restart_ih;
6954 
6955 	return IRQ_HANDLED;
6956 }
6957 
6958 /*
6959  * startup/shutdown callbacks
6960  */
6961 /**
6962  * cik_startup - program the asic to a functional state
6963  *
6964  * @rdev: radeon_device pointer
6965  *
6966  * Programs the asic to a functional state (CIK).
6967  * Called by cik_init() and cik_resume().
6968  * Returns 0 for success, error for failure.
6969  */
6970 static int cik_startup(struct radeon_device *rdev)
6971 {
6972 	struct radeon_ring *ring;
6973 	int r;
6974 
6975 	/* enable pcie gen2/3 link */
6976 	cik_pcie_gen3_enable(rdev);
6977 	/* enable aspm */
6978 	cik_program_aspm(rdev);
6979 
6980 	/* scratch needs to be initialized before MC */
6981 	r = r600_vram_scratch_init(rdev);
6982 	if (r)
6983 		return r;
6984 
6985 	cik_mc_program(rdev);
6986 
6987 	if (rdev->flags & RADEON_IS_IGP) {
6988 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6989 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
6990 			r = cik_init_microcode(rdev);
6991 			if (r) {
6992 				DRM_ERROR("Failed to load firmware!\n");
6993 				return r;
6994 			}
6995 		}
6996 	} else {
6997 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6998 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
6999 		    !rdev->mc_fw) {
7000 			r = cik_init_microcode(rdev);
7001 			if (r) {
7002 				DRM_ERROR("Failed to load firmware!\n");
7003 				return r;
7004 			}
7005 		}
7006 
7007 		r = ci_mc_load_microcode(rdev);
7008 		if (r) {
7009 			DRM_ERROR("Failed to load MC firmware!\n");
7010 			return r;
7011 		}
7012 	}
7013 
7014 	r = cik_pcie_gart_enable(rdev);
7015 	if (r)
7016 		return r;
7017 	cik_gpu_init(rdev);
7018 
7019 	/* allocate rlc buffers */
7020 	if (rdev->flags & RADEON_IS_IGP) {
7021 		if (rdev->family == CHIP_KAVERI) {
7022 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7023 			rdev->rlc.reg_list_size =
7024 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7025 		} else {
7026 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7027 			rdev->rlc.reg_list_size =
7028 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7029 		}
7030 	}
7031 	rdev->rlc.cs_data = ci_cs_data;
7032 	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7033 	r = sumo_rlc_init(rdev);
7034 	if (r) {
7035 		DRM_ERROR("Failed to init rlc BOs!\n");
7036 		return r;
7037 	}
7038 
7039 	/* allocate wb buffer */
7040 	r = radeon_wb_init(rdev);
7041 	if (r)
7042 		return r;
7043 
7044 	/* allocate mec buffers */
7045 	r = cik_mec_init(rdev);
7046 	if (r) {
7047 		DRM_ERROR("Failed to init MEC BOs!\n");
7048 		return r;
7049 	}
7050 
7051 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7052 	if (r) {
7053 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7054 		return r;
7055 	}
7056 
7057 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7058 	if (r) {
7059 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7060 		return r;
7061 	}
7062 
7063 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7064 	if (r) {
7065 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7066 		return r;
7067 	}
7068 
7069 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7070 	if (r) {
7071 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7072 		return r;
7073 	}
7074 
7075 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7076 	if (r) {
7077 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7078 		return r;
7079 	}
7080 
7081 	r = radeon_uvd_resume(rdev);
7082 	if (!r) {
7083 		r = uvd_v4_2_resume(rdev);
7084 		if (!r) {
7085 			r = radeon_fence_driver_start_ring(rdev,
7086 							   R600_RING_TYPE_UVD_INDEX);
7087 			if (r)
7088 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7089 		}
7090 	}
7091 	if (r)
7092 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7093 
7094 	/* Enable IRQ */
7095 	if (!rdev->irq.installed) {
7096 		r = radeon_irq_kms_init(rdev);
7097 		if (r)
7098 			return r;
7099 	}
7100 
7101 	r = cik_irq_init(rdev);
7102 	if (r) {
7103 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
7104 		radeon_irq_kms_fini(rdev);
7105 		return r;
7106 	}
7107 	cik_irq_set(rdev);
7108 
7109 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7110 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7111 			     CP_RB0_RPTR, CP_RB0_WPTR,
7112 			     RADEON_CP_PACKET2);
7113 	if (r)
7114 		return r;
7115 
7116 	/* set up the compute queues */
7117 	/* type-2 packets are deprecated on MEC, use type-3 instead */
7118 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7119 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7120 			     CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7121 			     PACKET3(PACKET3_NOP, 0x3FFF));
7122 	if (r)
7123 		return r;
7124 	ring->me = 1; /* first MEC */
7125 	ring->pipe = 0; /* first pipe */
7126 	ring->queue = 0; /* first queue */
7127 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
7128 
7129 	/* type-2 packets are deprecated on MEC, use type-3 instead */
7130 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7131 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7132 			     CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7133 			     PACKET3(PACKET3_NOP, 0x3FFF));
7134 	if (r)
7135 		return r;
7136 	/* dGPU only have 1 MEC */
7137 	ring->me = 1; /* first MEC */
7138 	ring->pipe = 0; /* first pipe */
7139 	ring->queue = 1; /* second queue */
7140 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
7141 
7142 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7143 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7144 			     SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
7145 			     SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
7146 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7147 	if (r)
7148 		return r;
7149 
7150 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7151 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7152 			     SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
7153 			     SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
7154 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7155 	if (r)
7156 		return r;
7157 
7158 	r = cik_cp_resume(rdev);
7159 	if (r)
7160 		return r;
7161 
7162 	r = cik_sdma_resume(rdev);
7163 	if (r)
7164 		return r;
7165 
7166 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7167 	if (ring->ring_size) {
7168 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7169 				     UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
7170 				     RADEON_CP_PACKET2);
7171 		if (!r)
7172 			r = uvd_v1_0_init(rdev);
7173 		if (r)
7174 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7175 	}
7176 
7177 	r = radeon_ib_pool_init(rdev);
7178 	if (r) {
7179 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7180 		return r;
7181 	}
7182 
7183 	r = radeon_vm_manager_init(rdev);
7184 	if (r) {
7185 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7186 		return r;
7187 	}
7188 
7189 	r = dce6_audio_init(rdev);
7190 	if (r)
7191 		return r;
7192 
7193 	return 0;
7194 }
7195 
7196 /**
7197  * cik_resume - resume the asic to a functional state
7198  *
7199  * @rdev: radeon_device pointer
7200  *
7201  * Programs the asic to a functional state (CIK).
7202  * Called at resume.
7203  * Returns 0 for success, error for failure.
7204  */
7205 int cik_resume(struct radeon_device *rdev)
7206 {
7207 	int r;
7208 
7209 	/* post card */
7210 	atom_asic_init(rdev->mode_info.atom_context);
7211 
7212 	/* init golden registers */
7213 	cik_init_golden_registers(rdev);
7214 
7215 	rdev->accel_working = true;
7216 	r = cik_startup(rdev);
7217 	if (r) {
7218 		DRM_ERROR("cik startup failed on resume\n");
7219 		rdev->accel_working = false;
7220 		return r;
7221 	}
7222 
7223 	return r;
7224 
7225 }
7226 
7227 /**
7228  * cik_suspend - suspend the asic
7229  *
7230  * @rdev: radeon_device pointer
7231  *
7232  * Bring the chip into a state suitable for suspend (CIK).
7233  * Called at suspend.
7234  * Returns 0 for success.
7235  */
7236 int cik_suspend(struct radeon_device *rdev)
7237 {
7238 	dce6_audio_fini(rdev);
7239 	radeon_vm_manager_fini(rdev);
7240 	cik_cp_enable(rdev, false);
7241 	cik_sdma_enable(rdev, false);
7242 	uvd_v1_0_fini(rdev);
7243 	radeon_uvd_suspend(rdev);
7244 	cik_fini_pg(rdev);
7245 	cik_fini_cg(rdev);
7246 	cik_irq_suspend(rdev);
7247 	radeon_wb_disable(rdev);
7248 	cik_pcie_gart_disable(rdev);
7249 	return 0;
7250 }
7251 
7252 /* Plan is to move initialization in that function and use
7253  * helper function so that radeon_device_init pretty much
7254  * do nothing more than calling asic specific function. This
7255  * should also allow to remove a bunch of callback function
7256  * like vram_info.
7257  */
7258 /**
7259  * cik_init - asic specific driver and hw init
7260  *
7261  * @rdev: radeon_device pointer
7262  *
7263  * Setup asic specific driver variables and program the hw
7264  * to a functional state (CIK).
7265  * Called at driver startup.
7266  * Returns 0 for success, errors for failure.
7267  */
7268 int cik_init(struct radeon_device *rdev)
7269 {
7270 	struct radeon_ring *ring;
7271 	int r;
7272 
7273 	/* Read BIOS */
7274 	if (!radeon_get_bios(rdev)) {
7275 		if (ASIC_IS_AVIVO(rdev))
7276 			return -EINVAL;
7277 	}
7278 	/* Must be an ATOMBIOS */
7279 	if (!rdev->is_atom_bios) {
7280 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7281 		return -EINVAL;
7282 	}
7283 	r = radeon_atombios_init(rdev);
7284 	if (r)
7285 		return r;
7286 
7287 	/* Post card if necessary */
7288 	if (!radeon_card_posted(rdev)) {
7289 		if (!rdev->bios) {
7290 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7291 			return -EINVAL;
7292 		}
7293 		DRM_INFO("GPU not posted. posting now...\n");
7294 		atom_asic_init(rdev->mode_info.atom_context);
7295 	}
7296 	/* init golden registers */
7297 	cik_init_golden_registers(rdev);
7298 	/* Initialize scratch registers */
7299 	cik_scratch_init(rdev);
7300 	/* Initialize surface registers */
7301 	radeon_surface_init(rdev);
7302 	/* Initialize clocks */
7303 	radeon_get_clock_info(rdev->ddev);
7304 
7305 	/* Fence driver */
7306 	r = radeon_fence_driver_init(rdev);
7307 	if (r)
7308 		return r;
7309 
7310 	/* initialize memory controller */
7311 	r = cik_mc_init(rdev);
7312 	if (r)
7313 		return r;
7314 	/* Memory manager */
7315 	r = radeon_bo_init(rdev);
7316 	if (r)
7317 		return r;
7318 
7319 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7320 	ring->ring_obj = NULL;
7321 	r600_ring_init(rdev, ring, 1024 * 1024);
7322 
7323 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7324 	ring->ring_obj = NULL;
7325 	r600_ring_init(rdev, ring, 1024 * 1024);
7326 	r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7327 	if (r)
7328 		return r;
7329 
7330 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7331 	ring->ring_obj = NULL;
7332 	r600_ring_init(rdev, ring, 1024 * 1024);
7333 	r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7334 	if (r)
7335 		return r;
7336 
7337 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7338 	ring->ring_obj = NULL;
7339 	r600_ring_init(rdev, ring, 256 * 1024);
7340 
7341 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7342 	ring->ring_obj = NULL;
7343 	r600_ring_init(rdev, ring, 256 * 1024);
7344 
7345 	r = radeon_uvd_init(rdev);
7346 	if (!r) {
7347 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7348 		ring->ring_obj = NULL;
7349 		r600_ring_init(rdev, ring, 4096);
7350 	}
7351 
7352 	rdev->ih.ring_obj = NULL;
7353 	r600_ih_ring_init(rdev, 64 * 1024);
7354 
7355 	r = r600_pcie_gart_init(rdev);
7356 	if (r)
7357 		return r;
7358 
7359 	rdev->accel_working = true;
7360 	r = cik_startup(rdev);
7361 	if (r) {
7362 		dev_err(rdev->dev, "disabling GPU acceleration\n");
7363 		cik_cp_fini(rdev);
7364 		cik_sdma_fini(rdev);
7365 		cik_irq_fini(rdev);
7366 		sumo_rlc_fini(rdev);
7367 		cik_mec_fini(rdev);
7368 		radeon_wb_fini(rdev);
7369 		radeon_ib_pool_fini(rdev);
7370 		radeon_vm_manager_fini(rdev);
7371 		radeon_irq_kms_fini(rdev);
7372 		cik_pcie_gart_fini(rdev);
7373 		rdev->accel_working = false;
7374 	}
7375 
7376 	/* Don't start up if the MC ucode is missing.
7377 	 * The default clocks and voltages before the MC ucode
7378 	 * is loaded are not suffient for advanced operations.
7379 	 */
7380 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
7381 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
7382 		return -EINVAL;
7383 	}
7384 
7385 	return 0;
7386 }
7387 
7388 /**
7389  * cik_fini - asic specific driver and hw fini
7390  *
7391  * @rdev: radeon_device pointer
7392  *
7393  * Tear down the asic specific driver variables and program the hw
7394  * to an idle state (CIK).
7395  * Called at driver unload.
7396  */
7397 void cik_fini(struct radeon_device *rdev)
7398 {
7399 	cik_cp_fini(rdev);
7400 	cik_sdma_fini(rdev);
7401 	cik_fini_pg(rdev);
7402 	cik_fini_cg(rdev);
7403 	cik_irq_fini(rdev);
7404 	sumo_rlc_fini(rdev);
7405 	cik_mec_fini(rdev);
7406 	radeon_wb_fini(rdev);
7407 	radeon_vm_manager_fini(rdev);
7408 	radeon_ib_pool_fini(rdev);
7409 	radeon_irq_kms_fini(rdev);
7410 	uvd_v1_0_fini(rdev);
7411 	radeon_uvd_fini(rdev);
7412 	cik_pcie_gart_fini(rdev);
7413 	r600_vram_scratch_fini(rdev);
7414 	radeon_gem_fini(rdev);
7415 	radeon_fence_driver_fini(rdev);
7416 	radeon_bo_fini(rdev);
7417 	radeon_atombios_fini(rdev);
7418 	kfree(rdev->bios);
7419 	rdev->bios = NULL;
7420 }
7421 
7422 /* display watermark setup */
7423 /**
7424  * dce8_line_buffer_adjust - Set up the line buffer
7425  *
7426  * @rdev: radeon_device pointer
7427  * @radeon_crtc: the selected display controller
7428  * @mode: the current display mode on the selected display
7429  * controller
7430  *
7431  * Setup up the line buffer allocation for
7432  * the selected display controller (CIK).
7433  * Returns the line buffer size in pixels.
7434  */
7435 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
7436 				   struct radeon_crtc *radeon_crtc,
7437 				   struct drm_display_mode *mode)
7438 {
7439 	u32 tmp, buffer_alloc, i;
7440 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
7441 	/*
7442 	 * Line Buffer Setup
7443 	 * There are 6 line buffers, one for each display controllers.
7444 	 * There are 3 partitions per LB. Select the number of partitions
7445 	 * to enable based on the display width.  For display widths larger
7446 	 * than 4096, you need use to use 2 display controllers and combine
7447 	 * them using the stereo blender.
7448 	 */
7449 	if (radeon_crtc->base.enabled && mode) {
7450 		if (mode->crtc_hdisplay < 1920) {
7451 			tmp = 1;
7452 			buffer_alloc = 2;
7453 		} else if (mode->crtc_hdisplay < 2560) {
7454 			tmp = 2;
7455 			buffer_alloc = 2;
7456 		} else if (mode->crtc_hdisplay < 4096) {
7457 			tmp = 0;
7458 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
7459 		} else {
7460 			DRM_DEBUG_KMS("Mode too big for LB!\n");
7461 			tmp = 0;
7462 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
7463 		}
7464 	} else {
7465 		tmp = 1;
7466 		buffer_alloc = 0;
7467 	}
7468 
7469 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
7470 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
7471 
7472 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
7473 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
7474 	for (i = 0; i < rdev->usec_timeout; i++) {
7475 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
7476 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
7477 			break;
7478 		udelay(1);
7479 	}
7480 
7481 	if (radeon_crtc->base.enabled && mode) {
7482 		switch (tmp) {
7483 		case 0:
7484 		default:
7485 			return 4096 * 2;
7486 		case 1:
7487 			return 1920 * 2;
7488 		case 2:
7489 			return 2560 * 2;
7490 		}
7491 	}
7492 
7493 	/* controller not enabled, so no lb used */
7494 	return 0;
7495 }
7496 
7497 /**
7498  * cik_get_number_of_dram_channels - get the number of dram channels
7499  *
7500  * @rdev: radeon_device pointer
7501  *
7502  * Look up the number of video ram channels (CIK).
7503  * Used for display watermark bandwidth calculations
7504  * Returns the number of dram channels
7505  */
7506 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
7507 {
7508 	u32 tmp = RREG32(MC_SHARED_CHMAP);
7509 
7510 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
7511 	case 0:
7512 	default:
7513 		return 1;
7514 	case 1:
7515 		return 2;
7516 	case 2:
7517 		return 4;
7518 	case 3:
7519 		return 8;
7520 	case 4:
7521 		return 3;
7522 	case 5:
7523 		return 6;
7524 	case 6:
7525 		return 10;
7526 	case 7:
7527 		return 12;
7528 	case 8:
7529 		return 16;
7530 	}
7531 }
7532 
7533 struct dce8_wm_params {
7534 	u32 dram_channels; /* number of dram channels */
7535 	u32 yclk;          /* bandwidth per dram data pin in kHz */
7536 	u32 sclk;          /* engine clock in kHz */
7537 	u32 disp_clk;      /* display clock in kHz */
7538 	u32 src_width;     /* viewport width */
7539 	u32 active_time;   /* active display time in ns */
7540 	u32 blank_time;    /* blank time in ns */
7541 	bool interlaced;    /* mode is interlaced */
7542 	fixed20_12 vsc;    /* vertical scale ratio */
7543 	u32 num_heads;     /* number of active crtcs */
7544 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
7545 	u32 lb_size;       /* line buffer allocated to pipe */
7546 	u32 vtaps;         /* vertical scaler taps */
7547 };
7548 
7549 /**
7550  * dce8_dram_bandwidth - get the dram bandwidth
7551  *
7552  * @wm: watermark calculation data
7553  *
7554  * Calculate the raw dram bandwidth (CIK).
7555  * Used for display watermark bandwidth calculations
7556  * Returns the dram bandwidth in MBytes/s
7557  */
7558 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
7559 {
7560 	/* Calculate raw DRAM Bandwidth */
7561 	fixed20_12 dram_efficiency; /* 0.7 */
7562 	fixed20_12 yclk, dram_channels, bandwidth;
7563 	fixed20_12 a;
7564 
7565 	a.full = dfixed_const(1000);
7566 	yclk.full = dfixed_const(wm->yclk);
7567 	yclk.full = dfixed_div(yclk, a);
7568 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
7569 	a.full = dfixed_const(10);
7570 	dram_efficiency.full = dfixed_const(7);
7571 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
7572 	bandwidth.full = dfixed_mul(dram_channels, yclk);
7573 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
7574 
7575 	return dfixed_trunc(bandwidth);
7576 }
7577 
7578 /**
7579  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
7580  *
7581  * @wm: watermark calculation data
7582  *
7583  * Calculate the dram bandwidth used for display (CIK).
7584  * Used for display watermark bandwidth calculations
7585  * Returns the dram bandwidth for display in MBytes/s
7586  */
7587 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
7588 {
7589 	/* Calculate DRAM Bandwidth and the part allocated to display. */
7590 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
7591 	fixed20_12 yclk, dram_channels, bandwidth;
7592 	fixed20_12 a;
7593 
7594 	a.full = dfixed_const(1000);
7595 	yclk.full = dfixed_const(wm->yclk);
7596 	yclk.full = dfixed_div(yclk, a);
7597 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
7598 	a.full = dfixed_const(10);
7599 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
7600 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
7601 	bandwidth.full = dfixed_mul(dram_channels, yclk);
7602 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
7603 
7604 	return dfixed_trunc(bandwidth);
7605 }
7606 
7607 /**
7608  * dce8_data_return_bandwidth - get the data return bandwidth
7609  *
7610  * @wm: watermark calculation data
7611  *
7612  * Calculate the data return bandwidth used for display (CIK).
7613  * Used for display watermark bandwidth calculations
7614  * Returns the data return bandwidth in MBytes/s
7615  */
7616 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
7617 {
7618 	/* Calculate the display Data return Bandwidth */
7619 	fixed20_12 return_efficiency; /* 0.8 */
7620 	fixed20_12 sclk, bandwidth;
7621 	fixed20_12 a;
7622 
7623 	a.full = dfixed_const(1000);
7624 	sclk.full = dfixed_const(wm->sclk);
7625 	sclk.full = dfixed_div(sclk, a);
7626 	a.full = dfixed_const(10);
7627 	return_efficiency.full = dfixed_const(8);
7628 	return_efficiency.full = dfixed_div(return_efficiency, a);
7629 	a.full = dfixed_const(32);
7630 	bandwidth.full = dfixed_mul(a, sclk);
7631 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
7632 
7633 	return dfixed_trunc(bandwidth);
7634 }
7635 
7636 /**
7637  * dce8_dmif_request_bandwidth - get the dmif bandwidth
7638  *
7639  * @wm: watermark calculation data
7640  *
7641  * Calculate the dmif bandwidth used for display (CIK).
7642  * Used for display watermark bandwidth calculations
7643  * Returns the dmif bandwidth in MBytes/s
7644  */
7645 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
7646 {
7647 	/* Calculate the DMIF Request Bandwidth */
7648 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
7649 	fixed20_12 disp_clk, bandwidth;
7650 	fixed20_12 a, b;
7651 
7652 	a.full = dfixed_const(1000);
7653 	disp_clk.full = dfixed_const(wm->disp_clk);
7654 	disp_clk.full = dfixed_div(disp_clk, a);
7655 	a.full = dfixed_const(32);
7656 	b.full = dfixed_mul(a, disp_clk);
7657 
7658 	a.full = dfixed_const(10);
7659 	disp_clk_request_efficiency.full = dfixed_const(8);
7660 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
7661 
7662 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
7663 
7664 	return dfixed_trunc(bandwidth);
7665 }
7666 
7667 /**
7668  * dce8_available_bandwidth - get the min available bandwidth
7669  *
7670  * @wm: watermark calculation data
7671  *
7672  * Calculate the min available bandwidth used for display (CIK).
7673  * Used for display watermark bandwidth calculations
7674  * Returns the min available bandwidth in MBytes/s
7675  */
7676 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
7677 {
7678 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
7679 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
7680 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
7681 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
7682 
7683 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
7684 }
7685 
7686 /**
7687  * dce8_average_bandwidth - get the average available bandwidth
7688  *
7689  * @wm: watermark calculation data
7690  *
7691  * Calculate the average available bandwidth used for display (CIK).
7692  * Used for display watermark bandwidth calculations
7693  * Returns the average available bandwidth in MBytes/s
7694  */
7695 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
7696 {
7697 	/* Calculate the display mode Average Bandwidth
7698 	 * DisplayMode should contain the source and destination dimensions,
7699 	 * timing, etc.
7700 	 */
7701 	fixed20_12 bpp;
7702 	fixed20_12 line_time;
7703 	fixed20_12 src_width;
7704 	fixed20_12 bandwidth;
7705 	fixed20_12 a;
7706 
7707 	a.full = dfixed_const(1000);
7708 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
7709 	line_time.full = dfixed_div(line_time, a);
7710 	bpp.full = dfixed_const(wm->bytes_per_pixel);
7711 	src_width.full = dfixed_const(wm->src_width);
7712 	bandwidth.full = dfixed_mul(src_width, bpp);
7713 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
7714 	bandwidth.full = dfixed_div(bandwidth, line_time);
7715 
7716 	return dfixed_trunc(bandwidth);
7717 }
7718 
7719 /**
7720  * dce8_latency_watermark - get the latency watermark
7721  *
7722  * @wm: watermark calculation data
7723  *
7724  * Calculate the latency watermark (CIK).
7725  * Used for display watermark bandwidth calculations
7726  * Returns the latency watermark in ns
7727  */
7728 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
7729 {
7730 	/* First calculate the latency in ns */
7731 	u32 mc_latency = 2000; /* 2000 ns. */
7732 	u32 available_bandwidth = dce8_available_bandwidth(wm);
7733 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
7734 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
7735 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
7736 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
7737 		(wm->num_heads * cursor_line_pair_return_time);
7738 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
7739 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
7740 	u32 tmp, dmif_size = 12288;
7741 	fixed20_12 a, b, c;
7742 
7743 	if (wm->num_heads == 0)
7744 		return 0;
7745 
7746 	a.full = dfixed_const(2);
7747 	b.full = dfixed_const(1);
7748 	if ((wm->vsc.full > a.full) ||
7749 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
7750 	    (wm->vtaps >= 5) ||
7751 	    ((wm->vsc.full >= a.full) && wm->interlaced))
7752 		max_src_lines_per_dst_line = 4;
7753 	else
7754 		max_src_lines_per_dst_line = 2;
7755 
7756 	a.full = dfixed_const(available_bandwidth);
7757 	b.full = dfixed_const(wm->num_heads);
7758 	a.full = dfixed_div(a, b);
7759 
7760 	b.full = dfixed_const(mc_latency + 512);
7761 	c.full = dfixed_const(wm->disp_clk);
7762 	b.full = dfixed_div(b, c);
7763 
7764 	c.full = dfixed_const(dmif_size);
7765 	b.full = dfixed_div(c, b);
7766 
7767 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
7768 
7769 	b.full = dfixed_const(1000);
7770 	c.full = dfixed_const(wm->disp_clk);
7771 	b.full = dfixed_div(c, b);
7772 	c.full = dfixed_const(wm->bytes_per_pixel);
7773 	b.full = dfixed_mul(b, c);
7774 
7775 	lb_fill_bw = min(tmp, dfixed_trunc(b));
7776 
7777 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
7778 	b.full = dfixed_const(1000);
7779 	c.full = dfixed_const(lb_fill_bw);
7780 	b.full = dfixed_div(c, b);
7781 	a.full = dfixed_div(a, b);
7782 	line_fill_time = dfixed_trunc(a);
7783 
7784 	if (line_fill_time < wm->active_time)
7785 		return latency;
7786 	else
7787 		return latency + (line_fill_time - wm->active_time);
7788 
7789 }
7790 
7791 /**
7792  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
7793  * average and available dram bandwidth
7794  *
7795  * @wm: watermark calculation data
7796  *
7797  * Check if the display average bandwidth fits in the display
7798  * dram bandwidth (CIK).
7799  * Used for display watermark bandwidth calculations
7800  * Returns true if the display fits, false if not.
7801  */
7802 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
7803 {
7804 	if (dce8_average_bandwidth(wm) <=
7805 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
7806 		return true;
7807 	else
7808 		return false;
7809 }
7810 
7811 /**
7812  * dce8_average_bandwidth_vs_available_bandwidth - check
7813  * average and available bandwidth
7814  *
7815  * @wm: watermark calculation data
7816  *
7817  * Check if the display average bandwidth fits in the display
7818  * available bandwidth (CIK).
7819  * Used for display watermark bandwidth calculations
7820  * Returns true if the display fits, false if not.
7821  */
7822 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
7823 {
7824 	if (dce8_average_bandwidth(wm) <=
7825 	    (dce8_available_bandwidth(wm) / wm->num_heads))
7826 		return true;
7827 	else
7828 		return false;
7829 }
7830 
7831 /**
7832  * dce8_check_latency_hiding - check latency hiding
7833  *
7834  * @wm: watermark calculation data
7835  *
7836  * Check latency hiding (CIK).
7837  * Used for display watermark bandwidth calculations
7838  * Returns true if the display fits, false if not.
7839  */
7840 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
7841 {
7842 	u32 lb_partitions = wm->lb_size / wm->src_width;
7843 	u32 line_time = wm->active_time + wm->blank_time;
7844 	u32 latency_tolerant_lines;
7845 	u32 latency_hiding;
7846 	fixed20_12 a;
7847 
7848 	a.full = dfixed_const(1);
7849 	if (wm->vsc.full > a.full)
7850 		latency_tolerant_lines = 1;
7851 	else {
7852 		if (lb_partitions <= (wm->vtaps + 1))
7853 			latency_tolerant_lines = 1;
7854 		else
7855 			latency_tolerant_lines = 2;
7856 	}
7857 
7858 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
7859 
7860 	if (dce8_latency_watermark(wm) <= latency_hiding)
7861 		return true;
7862 	else
7863 		return false;
7864 }
7865 
7866 /**
7867  * dce8_program_watermarks - program display watermarks
7868  *
7869  * @rdev: radeon_device pointer
7870  * @radeon_crtc: the selected display controller
7871  * @lb_size: line buffer size
7872  * @num_heads: number of display controllers in use
7873  *
7874  * Calculate and program the display watermarks for the
7875  * selected display controller (CIK).
7876  */
7877 static void dce8_program_watermarks(struct radeon_device *rdev,
7878 				    struct radeon_crtc *radeon_crtc,
7879 				    u32 lb_size, u32 num_heads)
7880 {
7881 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
7882 	struct dce8_wm_params wm_low, wm_high;
7883 	u32 pixel_period;
7884 	u32 line_time = 0;
7885 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
7886 	u32 tmp, wm_mask;
7887 
7888 	if (radeon_crtc->base.enabled && num_heads && mode) {
7889 		pixel_period = 1000000 / (u32)mode->clock;
7890 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
7891 
7892 		/* watermark for high clocks */
7893 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
7894 		    rdev->pm.dpm_enabled) {
7895 			wm_high.yclk =
7896 				radeon_dpm_get_mclk(rdev, false) * 10;
7897 			wm_high.sclk =
7898 				radeon_dpm_get_sclk(rdev, false) * 10;
7899 		} else {
7900 			wm_high.yclk = rdev->pm.current_mclk * 10;
7901 			wm_high.sclk = rdev->pm.current_sclk * 10;
7902 		}
7903 
7904 		wm_high.disp_clk = mode->clock;
7905 		wm_high.src_width = mode->crtc_hdisplay;
7906 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
7907 		wm_high.blank_time = line_time - wm_high.active_time;
7908 		wm_high.interlaced = false;
7909 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
7910 			wm_high.interlaced = true;
7911 		wm_high.vsc = radeon_crtc->vsc;
7912 		wm_high.vtaps = 1;
7913 		if (radeon_crtc->rmx_type != RMX_OFF)
7914 			wm_high.vtaps = 2;
7915 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
7916 		wm_high.lb_size = lb_size;
7917 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
7918 		wm_high.num_heads = num_heads;
7919 
7920 		/* set for high clocks */
7921 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
7922 
7923 		/* possibly force display priority to high */
7924 		/* should really do this at mode validation time... */
7925 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
7926 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
7927 		    !dce8_check_latency_hiding(&wm_high) ||
7928 		    (rdev->disp_priority == 2)) {
7929 			DRM_DEBUG_KMS("force priority to high\n");
7930 		}
7931 
7932 		/* watermark for low clocks */
7933 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
7934 		    rdev->pm.dpm_enabled) {
7935 			wm_low.yclk =
7936 				radeon_dpm_get_mclk(rdev, true) * 10;
7937 			wm_low.sclk =
7938 				radeon_dpm_get_sclk(rdev, true) * 10;
7939 		} else {
7940 			wm_low.yclk = rdev->pm.current_mclk * 10;
7941 			wm_low.sclk = rdev->pm.current_sclk * 10;
7942 		}
7943 
7944 		wm_low.disp_clk = mode->clock;
7945 		wm_low.src_width = mode->crtc_hdisplay;
7946 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
7947 		wm_low.blank_time = line_time - wm_low.active_time;
7948 		wm_low.interlaced = false;
7949 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
7950 			wm_low.interlaced = true;
7951 		wm_low.vsc = radeon_crtc->vsc;
7952 		wm_low.vtaps = 1;
7953 		if (radeon_crtc->rmx_type != RMX_OFF)
7954 			wm_low.vtaps = 2;
7955 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
7956 		wm_low.lb_size = lb_size;
7957 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
7958 		wm_low.num_heads = num_heads;
7959 
7960 		/* set for low clocks */
7961 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
7962 
7963 		/* possibly force display priority to high */
7964 		/* should really do this at mode validation time... */
7965 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
7966 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
7967 		    !dce8_check_latency_hiding(&wm_low) ||
7968 		    (rdev->disp_priority == 2)) {
7969 			DRM_DEBUG_KMS("force priority to high\n");
7970 		}
7971 	}
7972 
7973 	/* select wm A */
7974 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
7975 	tmp = wm_mask;
7976 	tmp &= ~LATENCY_WATERMARK_MASK(3);
7977 	tmp |= LATENCY_WATERMARK_MASK(1);
7978 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
7979 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
7980 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
7981 		LATENCY_HIGH_WATERMARK(line_time)));
7982 	/* select wm B */
7983 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
7984 	tmp &= ~LATENCY_WATERMARK_MASK(3);
7985 	tmp |= LATENCY_WATERMARK_MASK(2);
7986 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
7987 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
7988 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
7989 		LATENCY_HIGH_WATERMARK(line_time)));
7990 	/* restore original selection */
7991 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
7992 
7993 	/* save values for DPM */
7994 	radeon_crtc->line_time = line_time;
7995 	radeon_crtc->wm_high = latency_watermark_a;
7996 	radeon_crtc->wm_low = latency_watermark_b;
7997 }
7998 
7999 /**
8000  * dce8_bandwidth_update - program display watermarks
8001  *
8002  * @rdev: radeon_device pointer
8003  *
8004  * Calculate and program the display watermarks and line
8005  * buffer allocation (CIK).
8006  */
8007 void dce8_bandwidth_update(struct radeon_device *rdev)
8008 {
8009 	struct drm_display_mode *mode = NULL;
8010 	u32 num_heads = 0, lb_size;
8011 	int i;
8012 
8013 	radeon_update_display_priority(rdev);
8014 
8015 	for (i = 0; i < rdev->num_crtc; i++) {
8016 		if (rdev->mode_info.crtcs[i]->base.enabled)
8017 			num_heads++;
8018 	}
8019 	for (i = 0; i < rdev->num_crtc; i++) {
8020 		mode = &rdev->mode_info.crtcs[i]->base.mode;
8021 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
8022 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
8023 	}
8024 }
8025 
8026 /**
8027  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8028  *
8029  * @rdev: radeon_device pointer
8030  *
8031  * Fetches a GPU clock counter snapshot (SI).
8032  * Returns the 64 bit clock counter snapshot.
8033  */
8034 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
8035 {
8036 	uint64_t clock;
8037 
8038 	mutex_lock(&rdev->gpu_clock_mutex);
8039 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
8040 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
8041 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
8042 	mutex_unlock(&rdev->gpu_clock_mutex);
8043 	return clock;
8044 }
8045 
8046 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
8047                               u32 cntl_reg, u32 status_reg)
8048 {
8049 	int r, i;
8050 	struct atom_clock_dividers dividers;
8051 	uint32_t tmp;
8052 
8053 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8054 					   clock, false, &dividers);
8055 	if (r)
8056 		return r;
8057 
8058 	tmp = RREG32_SMC(cntl_reg);
8059 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
8060 	tmp |= dividers.post_divider;
8061 	WREG32_SMC(cntl_reg, tmp);
8062 
8063 	for (i = 0; i < 100; i++) {
8064 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
8065 			break;
8066 		mdelay(10);
8067 	}
8068 	if (i == 100)
8069 		return -ETIMEDOUT;
8070 
8071 	return 0;
8072 }
8073 
8074 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
8075 {
8076 	int r = 0;
8077 
8078 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
8079 	if (r)
8080 		return r;
8081 
8082 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
8083 	return r;
8084 }
8085 
8086 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
8087 {
8088 	struct pci_dev *root = rdev->pdev->bus->self;
8089 	int bridge_pos, gpu_pos;
8090 	u32 speed_cntl, mask, current_data_rate;
8091 	int ret, i;
8092 	u16 tmp16;
8093 
8094 	if (radeon_pcie_gen2 == 0)
8095 		return;
8096 
8097 	if (rdev->flags & RADEON_IS_IGP)
8098 		return;
8099 
8100 	if (!(rdev->flags & RADEON_IS_PCIE))
8101 		return;
8102 
8103 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
8104 	if (ret != 0)
8105 		return;
8106 
8107 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
8108 		return;
8109 
8110 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8111 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
8112 		LC_CURRENT_DATA_RATE_SHIFT;
8113 	if (mask & DRM_PCIE_SPEED_80) {
8114 		if (current_data_rate == 2) {
8115 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
8116 			return;
8117 		}
8118 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
8119 	} else if (mask & DRM_PCIE_SPEED_50) {
8120 		if (current_data_rate == 1) {
8121 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
8122 			return;
8123 		}
8124 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
8125 	}
8126 
8127 	bridge_pos = pci_pcie_cap(root);
8128 	if (!bridge_pos)
8129 		return;
8130 
8131 	gpu_pos = pci_pcie_cap(rdev->pdev);
8132 	if (!gpu_pos)
8133 		return;
8134 
8135 	if (mask & DRM_PCIE_SPEED_80) {
8136 		/* re-try equalization if gen3 is not already enabled */
8137 		if (current_data_rate != 2) {
8138 			u16 bridge_cfg, gpu_cfg;
8139 			u16 bridge_cfg2, gpu_cfg2;
8140 			u32 max_lw, current_lw, tmp;
8141 
8142 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8143 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8144 
8145 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
8146 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8147 
8148 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
8149 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8150 
8151 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8152 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
8153 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
8154 
8155 			if (current_lw < max_lw) {
8156 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8157 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
8158 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
8159 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
8160 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
8161 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
8162 				}
8163 			}
8164 
8165 			for (i = 0; i < 10; i++) {
8166 				/* check status */
8167 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
8168 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
8169 					break;
8170 
8171 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8172 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8173 
8174 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
8175 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
8176 
8177 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8178 				tmp |= LC_SET_QUIESCE;
8179 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8180 
8181 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8182 				tmp |= LC_REDO_EQ;
8183 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8184 
8185 				mdelay(100);
8186 
8187 				/* linkctl */
8188 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
8189 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8190 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
8191 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8192 
8193 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
8194 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8195 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
8196 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8197 
8198 				/* linkctl2 */
8199 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
8200 				tmp16 &= ~((1 << 4) | (7 << 9));
8201 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
8202 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
8203 
8204 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8205 				tmp16 &= ~((1 << 4) | (7 << 9));
8206 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
8207 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8208 
8209 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8210 				tmp &= ~LC_SET_QUIESCE;
8211 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8212 			}
8213 		}
8214 	}
8215 
8216 	/* set the link speed */
8217 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
8218 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
8219 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8220 
8221 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8222 	tmp16 &= ~0xf;
8223 	if (mask & DRM_PCIE_SPEED_80)
8224 		tmp16 |= 3; /* gen3 */
8225 	else if (mask & DRM_PCIE_SPEED_50)
8226 		tmp16 |= 2; /* gen2 */
8227 	else
8228 		tmp16 |= 1; /* gen1 */
8229 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8230 
8231 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8232 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
8233 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8234 
8235 	for (i = 0; i < rdev->usec_timeout; i++) {
8236 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8237 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
8238 			break;
8239 		udelay(1);
8240 	}
8241 }
8242 
8243 static void cik_program_aspm(struct radeon_device *rdev)
8244 {
8245 	u32 data, orig;
8246 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
8247 	bool disable_clkreq = false;
8248 
8249 	if (radeon_aspm == 0)
8250 		return;
8251 
8252 	/* XXX double check IGPs */
8253 	if (rdev->flags & RADEON_IS_IGP)
8254 		return;
8255 
8256 	if (!(rdev->flags & RADEON_IS_PCIE))
8257 		return;
8258 
8259 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8260 	data &= ~LC_XMIT_N_FTS_MASK;
8261 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
8262 	if (orig != data)
8263 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
8264 
8265 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
8266 	data |= LC_GO_TO_RECOVERY;
8267 	if (orig != data)
8268 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
8269 
8270 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
8271 	data |= P_IGNORE_EDB_ERR;
8272 	if (orig != data)
8273 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
8274 
8275 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8276 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
8277 	data |= LC_PMI_TO_L1_DIS;
8278 	if (!disable_l0s)
8279 		data |= LC_L0S_INACTIVITY(7);
8280 
8281 	if (!disable_l1) {
8282 		data |= LC_L1_INACTIVITY(7);
8283 		data &= ~LC_PMI_TO_L1_DIS;
8284 		if (orig != data)
8285 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8286 
8287 		if (!disable_plloff_in_l1) {
8288 			bool clk_req_support;
8289 
8290 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
8291 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8292 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8293 			if (orig != data)
8294 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
8295 
8296 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
8297 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8298 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8299 			if (orig != data)
8300 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
8301 
8302 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
8303 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8304 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8305 			if (orig != data)
8306 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
8307 
8308 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
8309 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8310 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8311 			if (orig != data)
8312 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
8313 
8314 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8315 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
8316 			data |= LC_DYN_LANES_PWR_STATE(3);
8317 			if (orig != data)
8318 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
8319 
8320 			if (!disable_clkreq) {
8321 				struct pci_dev *root = rdev->pdev->bus->self;
8322 				u32 lnkcap;
8323 
8324 				clk_req_support = false;
8325 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
8326 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
8327 					clk_req_support = true;
8328 			} else {
8329 				clk_req_support = false;
8330 			}
8331 
8332 			if (clk_req_support) {
8333 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
8334 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
8335 				if (orig != data)
8336 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
8337 
8338 				orig = data = RREG32_SMC(THM_CLK_CNTL);
8339 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
8340 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
8341 				if (orig != data)
8342 					WREG32_SMC(THM_CLK_CNTL, data);
8343 
8344 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
8345 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
8346 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
8347 				if (orig != data)
8348 					WREG32_SMC(MISC_CLK_CTRL, data);
8349 
8350 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
8351 				data &= ~BCLK_AS_XCLK;
8352 				if (orig != data)
8353 					WREG32_SMC(CG_CLKPIN_CNTL, data);
8354 
8355 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
8356 				data &= ~FORCE_BIF_REFCLK_EN;
8357 				if (orig != data)
8358 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
8359 
8360 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
8361 				data &= ~MPLL_CLKOUT_SEL_MASK;
8362 				data |= MPLL_CLKOUT_SEL(4);
8363 				if (orig != data)
8364 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
8365 			}
8366 		}
8367 	} else {
8368 		if (orig != data)
8369 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8370 	}
8371 
8372 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
8373 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
8374 	if (orig != data)
8375 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
8376 
8377 	if (!disable_l0s) {
8378 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8379 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
8380 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8381 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
8382 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8383 				data &= ~LC_L0S_INACTIVITY_MASK;
8384 				if (orig != data)
8385 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8386 			}
8387 		}
8388 	}
8389 }
8390