xref: /openbmc/linux/drivers/gpu/drm/radeon/cik.c (revision e23feb16)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35 
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
44 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
45 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
46 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
47 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
48 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
49 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
50 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
51 MODULE_FIRMWARE("radeon/KABINI_me.bin");
52 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
53 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
54 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
55 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
56 
57 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
58 extern void r600_ih_ring_fini(struct radeon_device *rdev);
59 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
60 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
61 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
62 extern void sumo_rlc_fini(struct radeon_device *rdev);
63 extern int sumo_rlc_init(struct radeon_device *rdev);
64 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
65 extern void si_rlc_reset(struct radeon_device *rdev);
66 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
67 extern int cik_sdma_resume(struct radeon_device *rdev);
68 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
69 extern void cik_sdma_fini(struct radeon_device *rdev);
70 extern void cik_sdma_vm_set_page(struct radeon_device *rdev,
71 				 struct radeon_ib *ib,
72 				 uint64_t pe,
73 				 uint64_t addr, unsigned count,
74 				 uint32_t incr, uint32_t flags);
75 static void cik_rlc_stop(struct radeon_device *rdev);
76 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
77 static void cik_program_aspm(struct radeon_device *rdev);
78 static void cik_init_pg(struct radeon_device *rdev);
79 static void cik_init_cg(struct radeon_device *rdev);
80 static void cik_fini_pg(struct radeon_device *rdev);
81 static void cik_fini_cg(struct radeon_device *rdev);
82 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
83 					  bool enable);
84 
85 /* get temperature in millidegrees */
86 int ci_get_temp(struct radeon_device *rdev)
87 {
88 	u32 temp;
89 	int actual_temp = 0;
90 
91 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
92 		CTF_TEMP_SHIFT;
93 
94 	if (temp & 0x200)
95 		actual_temp = 255;
96 	else
97 		actual_temp = temp & 0x1ff;
98 
99 	actual_temp = actual_temp * 1000;
100 
101 	return actual_temp;
102 }
103 
104 /* get temperature in millidegrees */
105 int kv_get_temp(struct radeon_device *rdev)
106 {
107 	u32 temp;
108 	int actual_temp = 0;
109 
110 	temp = RREG32_SMC(0xC0300E0C);
111 
112 	if (temp)
113 		actual_temp = (temp / 8) - 49;
114 	else
115 		actual_temp = 0;
116 
117 	actual_temp = actual_temp * 1000;
118 
119 	return actual_temp;
120 }
121 
122 /*
123  * Indirect registers accessor
124  */
125 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
126 {
127 	unsigned long flags;
128 	u32 r;
129 
130 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
131 	WREG32(PCIE_INDEX, reg);
132 	(void)RREG32(PCIE_INDEX);
133 	r = RREG32(PCIE_DATA);
134 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
135 	return r;
136 }
137 
138 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
139 {
140 	unsigned long flags;
141 
142 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
143 	WREG32(PCIE_INDEX, reg);
144 	(void)RREG32(PCIE_INDEX);
145 	WREG32(PCIE_DATA, v);
146 	(void)RREG32(PCIE_DATA);
147 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
148 }
149 
150 static const u32 spectre_rlc_save_restore_register_list[] =
151 {
152 	(0x0e00 << 16) | (0xc12c >> 2),
153 	0x00000000,
154 	(0x0e00 << 16) | (0xc140 >> 2),
155 	0x00000000,
156 	(0x0e00 << 16) | (0xc150 >> 2),
157 	0x00000000,
158 	(0x0e00 << 16) | (0xc15c >> 2),
159 	0x00000000,
160 	(0x0e00 << 16) | (0xc168 >> 2),
161 	0x00000000,
162 	(0x0e00 << 16) | (0xc170 >> 2),
163 	0x00000000,
164 	(0x0e00 << 16) | (0xc178 >> 2),
165 	0x00000000,
166 	(0x0e00 << 16) | (0xc204 >> 2),
167 	0x00000000,
168 	(0x0e00 << 16) | (0xc2b4 >> 2),
169 	0x00000000,
170 	(0x0e00 << 16) | (0xc2b8 >> 2),
171 	0x00000000,
172 	(0x0e00 << 16) | (0xc2bc >> 2),
173 	0x00000000,
174 	(0x0e00 << 16) | (0xc2c0 >> 2),
175 	0x00000000,
176 	(0x0e00 << 16) | (0x8228 >> 2),
177 	0x00000000,
178 	(0x0e00 << 16) | (0x829c >> 2),
179 	0x00000000,
180 	(0x0e00 << 16) | (0x869c >> 2),
181 	0x00000000,
182 	(0x0600 << 16) | (0x98f4 >> 2),
183 	0x00000000,
184 	(0x0e00 << 16) | (0x98f8 >> 2),
185 	0x00000000,
186 	(0x0e00 << 16) | (0x9900 >> 2),
187 	0x00000000,
188 	(0x0e00 << 16) | (0xc260 >> 2),
189 	0x00000000,
190 	(0x0e00 << 16) | (0x90e8 >> 2),
191 	0x00000000,
192 	(0x0e00 << 16) | (0x3c000 >> 2),
193 	0x00000000,
194 	(0x0e00 << 16) | (0x3c00c >> 2),
195 	0x00000000,
196 	(0x0e00 << 16) | (0x8c1c >> 2),
197 	0x00000000,
198 	(0x0e00 << 16) | (0x9700 >> 2),
199 	0x00000000,
200 	(0x0e00 << 16) | (0xcd20 >> 2),
201 	0x00000000,
202 	(0x4e00 << 16) | (0xcd20 >> 2),
203 	0x00000000,
204 	(0x5e00 << 16) | (0xcd20 >> 2),
205 	0x00000000,
206 	(0x6e00 << 16) | (0xcd20 >> 2),
207 	0x00000000,
208 	(0x7e00 << 16) | (0xcd20 >> 2),
209 	0x00000000,
210 	(0x8e00 << 16) | (0xcd20 >> 2),
211 	0x00000000,
212 	(0x9e00 << 16) | (0xcd20 >> 2),
213 	0x00000000,
214 	(0xae00 << 16) | (0xcd20 >> 2),
215 	0x00000000,
216 	(0xbe00 << 16) | (0xcd20 >> 2),
217 	0x00000000,
218 	(0x0e00 << 16) | (0x89bc >> 2),
219 	0x00000000,
220 	(0x0e00 << 16) | (0x8900 >> 2),
221 	0x00000000,
222 	0x3,
223 	(0x0e00 << 16) | (0xc130 >> 2),
224 	0x00000000,
225 	(0x0e00 << 16) | (0xc134 >> 2),
226 	0x00000000,
227 	(0x0e00 << 16) | (0xc1fc >> 2),
228 	0x00000000,
229 	(0x0e00 << 16) | (0xc208 >> 2),
230 	0x00000000,
231 	(0x0e00 << 16) | (0xc264 >> 2),
232 	0x00000000,
233 	(0x0e00 << 16) | (0xc268 >> 2),
234 	0x00000000,
235 	(0x0e00 << 16) | (0xc26c >> 2),
236 	0x00000000,
237 	(0x0e00 << 16) | (0xc270 >> 2),
238 	0x00000000,
239 	(0x0e00 << 16) | (0xc274 >> 2),
240 	0x00000000,
241 	(0x0e00 << 16) | (0xc278 >> 2),
242 	0x00000000,
243 	(0x0e00 << 16) | (0xc27c >> 2),
244 	0x00000000,
245 	(0x0e00 << 16) | (0xc280 >> 2),
246 	0x00000000,
247 	(0x0e00 << 16) | (0xc284 >> 2),
248 	0x00000000,
249 	(0x0e00 << 16) | (0xc288 >> 2),
250 	0x00000000,
251 	(0x0e00 << 16) | (0xc28c >> 2),
252 	0x00000000,
253 	(0x0e00 << 16) | (0xc290 >> 2),
254 	0x00000000,
255 	(0x0e00 << 16) | (0xc294 >> 2),
256 	0x00000000,
257 	(0x0e00 << 16) | (0xc298 >> 2),
258 	0x00000000,
259 	(0x0e00 << 16) | (0xc29c >> 2),
260 	0x00000000,
261 	(0x0e00 << 16) | (0xc2a0 >> 2),
262 	0x00000000,
263 	(0x0e00 << 16) | (0xc2a4 >> 2),
264 	0x00000000,
265 	(0x0e00 << 16) | (0xc2a8 >> 2),
266 	0x00000000,
267 	(0x0e00 << 16) | (0xc2ac  >> 2),
268 	0x00000000,
269 	(0x0e00 << 16) | (0xc2b0 >> 2),
270 	0x00000000,
271 	(0x0e00 << 16) | (0x301d0 >> 2),
272 	0x00000000,
273 	(0x0e00 << 16) | (0x30238 >> 2),
274 	0x00000000,
275 	(0x0e00 << 16) | (0x30250 >> 2),
276 	0x00000000,
277 	(0x0e00 << 16) | (0x30254 >> 2),
278 	0x00000000,
279 	(0x0e00 << 16) | (0x30258 >> 2),
280 	0x00000000,
281 	(0x0e00 << 16) | (0x3025c >> 2),
282 	0x00000000,
283 	(0x4e00 << 16) | (0xc900 >> 2),
284 	0x00000000,
285 	(0x5e00 << 16) | (0xc900 >> 2),
286 	0x00000000,
287 	(0x6e00 << 16) | (0xc900 >> 2),
288 	0x00000000,
289 	(0x7e00 << 16) | (0xc900 >> 2),
290 	0x00000000,
291 	(0x8e00 << 16) | (0xc900 >> 2),
292 	0x00000000,
293 	(0x9e00 << 16) | (0xc900 >> 2),
294 	0x00000000,
295 	(0xae00 << 16) | (0xc900 >> 2),
296 	0x00000000,
297 	(0xbe00 << 16) | (0xc900 >> 2),
298 	0x00000000,
299 	(0x4e00 << 16) | (0xc904 >> 2),
300 	0x00000000,
301 	(0x5e00 << 16) | (0xc904 >> 2),
302 	0x00000000,
303 	(0x6e00 << 16) | (0xc904 >> 2),
304 	0x00000000,
305 	(0x7e00 << 16) | (0xc904 >> 2),
306 	0x00000000,
307 	(0x8e00 << 16) | (0xc904 >> 2),
308 	0x00000000,
309 	(0x9e00 << 16) | (0xc904 >> 2),
310 	0x00000000,
311 	(0xae00 << 16) | (0xc904 >> 2),
312 	0x00000000,
313 	(0xbe00 << 16) | (0xc904 >> 2),
314 	0x00000000,
315 	(0x4e00 << 16) | (0xc908 >> 2),
316 	0x00000000,
317 	(0x5e00 << 16) | (0xc908 >> 2),
318 	0x00000000,
319 	(0x6e00 << 16) | (0xc908 >> 2),
320 	0x00000000,
321 	(0x7e00 << 16) | (0xc908 >> 2),
322 	0x00000000,
323 	(0x8e00 << 16) | (0xc908 >> 2),
324 	0x00000000,
325 	(0x9e00 << 16) | (0xc908 >> 2),
326 	0x00000000,
327 	(0xae00 << 16) | (0xc908 >> 2),
328 	0x00000000,
329 	(0xbe00 << 16) | (0xc908 >> 2),
330 	0x00000000,
331 	(0x4e00 << 16) | (0xc90c >> 2),
332 	0x00000000,
333 	(0x5e00 << 16) | (0xc90c >> 2),
334 	0x00000000,
335 	(0x6e00 << 16) | (0xc90c >> 2),
336 	0x00000000,
337 	(0x7e00 << 16) | (0xc90c >> 2),
338 	0x00000000,
339 	(0x8e00 << 16) | (0xc90c >> 2),
340 	0x00000000,
341 	(0x9e00 << 16) | (0xc90c >> 2),
342 	0x00000000,
343 	(0xae00 << 16) | (0xc90c >> 2),
344 	0x00000000,
345 	(0xbe00 << 16) | (0xc90c >> 2),
346 	0x00000000,
347 	(0x4e00 << 16) | (0xc910 >> 2),
348 	0x00000000,
349 	(0x5e00 << 16) | (0xc910 >> 2),
350 	0x00000000,
351 	(0x6e00 << 16) | (0xc910 >> 2),
352 	0x00000000,
353 	(0x7e00 << 16) | (0xc910 >> 2),
354 	0x00000000,
355 	(0x8e00 << 16) | (0xc910 >> 2),
356 	0x00000000,
357 	(0x9e00 << 16) | (0xc910 >> 2),
358 	0x00000000,
359 	(0xae00 << 16) | (0xc910 >> 2),
360 	0x00000000,
361 	(0xbe00 << 16) | (0xc910 >> 2),
362 	0x00000000,
363 	(0x0e00 << 16) | (0xc99c >> 2),
364 	0x00000000,
365 	(0x0e00 << 16) | (0x9834 >> 2),
366 	0x00000000,
367 	(0x0000 << 16) | (0x30f00 >> 2),
368 	0x00000000,
369 	(0x0001 << 16) | (0x30f00 >> 2),
370 	0x00000000,
371 	(0x0000 << 16) | (0x30f04 >> 2),
372 	0x00000000,
373 	(0x0001 << 16) | (0x30f04 >> 2),
374 	0x00000000,
375 	(0x0000 << 16) | (0x30f08 >> 2),
376 	0x00000000,
377 	(0x0001 << 16) | (0x30f08 >> 2),
378 	0x00000000,
379 	(0x0000 << 16) | (0x30f0c >> 2),
380 	0x00000000,
381 	(0x0001 << 16) | (0x30f0c >> 2),
382 	0x00000000,
383 	(0x0600 << 16) | (0x9b7c >> 2),
384 	0x00000000,
385 	(0x0e00 << 16) | (0x8a14 >> 2),
386 	0x00000000,
387 	(0x0e00 << 16) | (0x8a18 >> 2),
388 	0x00000000,
389 	(0x0600 << 16) | (0x30a00 >> 2),
390 	0x00000000,
391 	(0x0e00 << 16) | (0x8bf0 >> 2),
392 	0x00000000,
393 	(0x0e00 << 16) | (0x8bcc >> 2),
394 	0x00000000,
395 	(0x0e00 << 16) | (0x8b24 >> 2),
396 	0x00000000,
397 	(0x0e00 << 16) | (0x30a04 >> 2),
398 	0x00000000,
399 	(0x0600 << 16) | (0x30a10 >> 2),
400 	0x00000000,
401 	(0x0600 << 16) | (0x30a14 >> 2),
402 	0x00000000,
403 	(0x0600 << 16) | (0x30a18 >> 2),
404 	0x00000000,
405 	(0x0600 << 16) | (0x30a2c >> 2),
406 	0x00000000,
407 	(0x0e00 << 16) | (0xc700 >> 2),
408 	0x00000000,
409 	(0x0e00 << 16) | (0xc704 >> 2),
410 	0x00000000,
411 	(0x0e00 << 16) | (0xc708 >> 2),
412 	0x00000000,
413 	(0x0e00 << 16) | (0xc768 >> 2),
414 	0x00000000,
415 	(0x0400 << 16) | (0xc770 >> 2),
416 	0x00000000,
417 	(0x0400 << 16) | (0xc774 >> 2),
418 	0x00000000,
419 	(0x0400 << 16) | (0xc778 >> 2),
420 	0x00000000,
421 	(0x0400 << 16) | (0xc77c >> 2),
422 	0x00000000,
423 	(0x0400 << 16) | (0xc780 >> 2),
424 	0x00000000,
425 	(0x0400 << 16) | (0xc784 >> 2),
426 	0x00000000,
427 	(0x0400 << 16) | (0xc788 >> 2),
428 	0x00000000,
429 	(0x0400 << 16) | (0xc78c >> 2),
430 	0x00000000,
431 	(0x0400 << 16) | (0xc798 >> 2),
432 	0x00000000,
433 	(0x0400 << 16) | (0xc79c >> 2),
434 	0x00000000,
435 	(0x0400 << 16) | (0xc7a0 >> 2),
436 	0x00000000,
437 	(0x0400 << 16) | (0xc7a4 >> 2),
438 	0x00000000,
439 	(0x0400 << 16) | (0xc7a8 >> 2),
440 	0x00000000,
441 	(0x0400 << 16) | (0xc7ac >> 2),
442 	0x00000000,
443 	(0x0400 << 16) | (0xc7b0 >> 2),
444 	0x00000000,
445 	(0x0400 << 16) | (0xc7b4 >> 2),
446 	0x00000000,
447 	(0x0e00 << 16) | (0x9100 >> 2),
448 	0x00000000,
449 	(0x0e00 << 16) | (0x3c010 >> 2),
450 	0x00000000,
451 	(0x0e00 << 16) | (0x92a8 >> 2),
452 	0x00000000,
453 	(0x0e00 << 16) | (0x92ac >> 2),
454 	0x00000000,
455 	(0x0e00 << 16) | (0x92b4 >> 2),
456 	0x00000000,
457 	(0x0e00 << 16) | (0x92b8 >> 2),
458 	0x00000000,
459 	(0x0e00 << 16) | (0x92bc >> 2),
460 	0x00000000,
461 	(0x0e00 << 16) | (0x92c0 >> 2),
462 	0x00000000,
463 	(0x0e00 << 16) | (0x92c4 >> 2),
464 	0x00000000,
465 	(0x0e00 << 16) | (0x92c8 >> 2),
466 	0x00000000,
467 	(0x0e00 << 16) | (0x92cc >> 2),
468 	0x00000000,
469 	(0x0e00 << 16) | (0x92d0 >> 2),
470 	0x00000000,
471 	(0x0e00 << 16) | (0x8c00 >> 2),
472 	0x00000000,
473 	(0x0e00 << 16) | (0x8c04 >> 2),
474 	0x00000000,
475 	(0x0e00 << 16) | (0x8c20 >> 2),
476 	0x00000000,
477 	(0x0e00 << 16) | (0x8c38 >> 2),
478 	0x00000000,
479 	(0x0e00 << 16) | (0x8c3c >> 2),
480 	0x00000000,
481 	(0x0e00 << 16) | (0xae00 >> 2),
482 	0x00000000,
483 	(0x0e00 << 16) | (0x9604 >> 2),
484 	0x00000000,
485 	(0x0e00 << 16) | (0xac08 >> 2),
486 	0x00000000,
487 	(0x0e00 << 16) | (0xac0c >> 2),
488 	0x00000000,
489 	(0x0e00 << 16) | (0xac10 >> 2),
490 	0x00000000,
491 	(0x0e00 << 16) | (0xac14 >> 2),
492 	0x00000000,
493 	(0x0e00 << 16) | (0xac58 >> 2),
494 	0x00000000,
495 	(0x0e00 << 16) | (0xac68 >> 2),
496 	0x00000000,
497 	(0x0e00 << 16) | (0xac6c >> 2),
498 	0x00000000,
499 	(0x0e00 << 16) | (0xac70 >> 2),
500 	0x00000000,
501 	(0x0e00 << 16) | (0xac74 >> 2),
502 	0x00000000,
503 	(0x0e00 << 16) | (0xac78 >> 2),
504 	0x00000000,
505 	(0x0e00 << 16) | (0xac7c >> 2),
506 	0x00000000,
507 	(0x0e00 << 16) | (0xac80 >> 2),
508 	0x00000000,
509 	(0x0e00 << 16) | (0xac84 >> 2),
510 	0x00000000,
511 	(0x0e00 << 16) | (0xac88 >> 2),
512 	0x00000000,
513 	(0x0e00 << 16) | (0xac8c >> 2),
514 	0x00000000,
515 	(0x0e00 << 16) | (0x970c >> 2),
516 	0x00000000,
517 	(0x0e00 << 16) | (0x9714 >> 2),
518 	0x00000000,
519 	(0x0e00 << 16) | (0x9718 >> 2),
520 	0x00000000,
521 	(0x0e00 << 16) | (0x971c >> 2),
522 	0x00000000,
523 	(0x0e00 << 16) | (0x31068 >> 2),
524 	0x00000000,
525 	(0x4e00 << 16) | (0x31068 >> 2),
526 	0x00000000,
527 	(0x5e00 << 16) | (0x31068 >> 2),
528 	0x00000000,
529 	(0x6e00 << 16) | (0x31068 >> 2),
530 	0x00000000,
531 	(0x7e00 << 16) | (0x31068 >> 2),
532 	0x00000000,
533 	(0x8e00 << 16) | (0x31068 >> 2),
534 	0x00000000,
535 	(0x9e00 << 16) | (0x31068 >> 2),
536 	0x00000000,
537 	(0xae00 << 16) | (0x31068 >> 2),
538 	0x00000000,
539 	(0xbe00 << 16) | (0x31068 >> 2),
540 	0x00000000,
541 	(0x0e00 << 16) | (0xcd10 >> 2),
542 	0x00000000,
543 	(0x0e00 << 16) | (0xcd14 >> 2),
544 	0x00000000,
545 	(0x0e00 << 16) | (0x88b0 >> 2),
546 	0x00000000,
547 	(0x0e00 << 16) | (0x88b4 >> 2),
548 	0x00000000,
549 	(0x0e00 << 16) | (0x88b8 >> 2),
550 	0x00000000,
551 	(0x0e00 << 16) | (0x88bc >> 2),
552 	0x00000000,
553 	(0x0400 << 16) | (0x89c0 >> 2),
554 	0x00000000,
555 	(0x0e00 << 16) | (0x88c4 >> 2),
556 	0x00000000,
557 	(0x0e00 << 16) | (0x88c8 >> 2),
558 	0x00000000,
559 	(0x0e00 << 16) | (0x88d0 >> 2),
560 	0x00000000,
561 	(0x0e00 << 16) | (0x88d4 >> 2),
562 	0x00000000,
563 	(0x0e00 << 16) | (0x88d8 >> 2),
564 	0x00000000,
565 	(0x0e00 << 16) | (0x8980 >> 2),
566 	0x00000000,
567 	(0x0e00 << 16) | (0x30938 >> 2),
568 	0x00000000,
569 	(0x0e00 << 16) | (0x3093c >> 2),
570 	0x00000000,
571 	(0x0e00 << 16) | (0x30940 >> 2),
572 	0x00000000,
573 	(0x0e00 << 16) | (0x89a0 >> 2),
574 	0x00000000,
575 	(0x0e00 << 16) | (0x30900 >> 2),
576 	0x00000000,
577 	(0x0e00 << 16) | (0x30904 >> 2),
578 	0x00000000,
579 	(0x0e00 << 16) | (0x89b4 >> 2),
580 	0x00000000,
581 	(0x0e00 << 16) | (0x3c210 >> 2),
582 	0x00000000,
583 	(0x0e00 << 16) | (0x3c214 >> 2),
584 	0x00000000,
585 	(0x0e00 << 16) | (0x3c218 >> 2),
586 	0x00000000,
587 	(0x0e00 << 16) | (0x8904 >> 2),
588 	0x00000000,
589 	0x5,
590 	(0x0e00 << 16) | (0x8c28 >> 2),
591 	(0x0e00 << 16) | (0x8c2c >> 2),
592 	(0x0e00 << 16) | (0x8c30 >> 2),
593 	(0x0e00 << 16) | (0x8c34 >> 2),
594 	(0x0e00 << 16) | (0x9600 >> 2),
595 };
596 
597 static const u32 kalindi_rlc_save_restore_register_list[] =
598 {
599 	(0x0e00 << 16) | (0xc12c >> 2),
600 	0x00000000,
601 	(0x0e00 << 16) | (0xc140 >> 2),
602 	0x00000000,
603 	(0x0e00 << 16) | (0xc150 >> 2),
604 	0x00000000,
605 	(0x0e00 << 16) | (0xc15c >> 2),
606 	0x00000000,
607 	(0x0e00 << 16) | (0xc168 >> 2),
608 	0x00000000,
609 	(0x0e00 << 16) | (0xc170 >> 2),
610 	0x00000000,
611 	(0x0e00 << 16) | (0xc204 >> 2),
612 	0x00000000,
613 	(0x0e00 << 16) | (0xc2b4 >> 2),
614 	0x00000000,
615 	(0x0e00 << 16) | (0xc2b8 >> 2),
616 	0x00000000,
617 	(0x0e00 << 16) | (0xc2bc >> 2),
618 	0x00000000,
619 	(0x0e00 << 16) | (0xc2c0 >> 2),
620 	0x00000000,
621 	(0x0e00 << 16) | (0x8228 >> 2),
622 	0x00000000,
623 	(0x0e00 << 16) | (0x829c >> 2),
624 	0x00000000,
625 	(0x0e00 << 16) | (0x869c >> 2),
626 	0x00000000,
627 	(0x0600 << 16) | (0x98f4 >> 2),
628 	0x00000000,
629 	(0x0e00 << 16) | (0x98f8 >> 2),
630 	0x00000000,
631 	(0x0e00 << 16) | (0x9900 >> 2),
632 	0x00000000,
633 	(0x0e00 << 16) | (0xc260 >> 2),
634 	0x00000000,
635 	(0x0e00 << 16) | (0x90e8 >> 2),
636 	0x00000000,
637 	(0x0e00 << 16) | (0x3c000 >> 2),
638 	0x00000000,
639 	(0x0e00 << 16) | (0x3c00c >> 2),
640 	0x00000000,
641 	(0x0e00 << 16) | (0x8c1c >> 2),
642 	0x00000000,
643 	(0x0e00 << 16) | (0x9700 >> 2),
644 	0x00000000,
645 	(0x0e00 << 16) | (0xcd20 >> 2),
646 	0x00000000,
647 	(0x4e00 << 16) | (0xcd20 >> 2),
648 	0x00000000,
649 	(0x5e00 << 16) | (0xcd20 >> 2),
650 	0x00000000,
651 	(0x6e00 << 16) | (0xcd20 >> 2),
652 	0x00000000,
653 	(0x7e00 << 16) | (0xcd20 >> 2),
654 	0x00000000,
655 	(0x0e00 << 16) | (0x89bc >> 2),
656 	0x00000000,
657 	(0x0e00 << 16) | (0x8900 >> 2),
658 	0x00000000,
659 	0x3,
660 	(0x0e00 << 16) | (0xc130 >> 2),
661 	0x00000000,
662 	(0x0e00 << 16) | (0xc134 >> 2),
663 	0x00000000,
664 	(0x0e00 << 16) | (0xc1fc >> 2),
665 	0x00000000,
666 	(0x0e00 << 16) | (0xc208 >> 2),
667 	0x00000000,
668 	(0x0e00 << 16) | (0xc264 >> 2),
669 	0x00000000,
670 	(0x0e00 << 16) | (0xc268 >> 2),
671 	0x00000000,
672 	(0x0e00 << 16) | (0xc26c >> 2),
673 	0x00000000,
674 	(0x0e00 << 16) | (0xc270 >> 2),
675 	0x00000000,
676 	(0x0e00 << 16) | (0xc274 >> 2),
677 	0x00000000,
678 	(0x0e00 << 16) | (0xc28c >> 2),
679 	0x00000000,
680 	(0x0e00 << 16) | (0xc290 >> 2),
681 	0x00000000,
682 	(0x0e00 << 16) | (0xc294 >> 2),
683 	0x00000000,
684 	(0x0e00 << 16) | (0xc298 >> 2),
685 	0x00000000,
686 	(0x0e00 << 16) | (0xc2a0 >> 2),
687 	0x00000000,
688 	(0x0e00 << 16) | (0xc2a4 >> 2),
689 	0x00000000,
690 	(0x0e00 << 16) | (0xc2a8 >> 2),
691 	0x00000000,
692 	(0x0e00 << 16) | (0xc2ac >> 2),
693 	0x00000000,
694 	(0x0e00 << 16) | (0x301d0 >> 2),
695 	0x00000000,
696 	(0x0e00 << 16) | (0x30238 >> 2),
697 	0x00000000,
698 	(0x0e00 << 16) | (0x30250 >> 2),
699 	0x00000000,
700 	(0x0e00 << 16) | (0x30254 >> 2),
701 	0x00000000,
702 	(0x0e00 << 16) | (0x30258 >> 2),
703 	0x00000000,
704 	(0x0e00 << 16) | (0x3025c >> 2),
705 	0x00000000,
706 	(0x4e00 << 16) | (0xc900 >> 2),
707 	0x00000000,
708 	(0x5e00 << 16) | (0xc900 >> 2),
709 	0x00000000,
710 	(0x6e00 << 16) | (0xc900 >> 2),
711 	0x00000000,
712 	(0x7e00 << 16) | (0xc900 >> 2),
713 	0x00000000,
714 	(0x4e00 << 16) | (0xc904 >> 2),
715 	0x00000000,
716 	(0x5e00 << 16) | (0xc904 >> 2),
717 	0x00000000,
718 	(0x6e00 << 16) | (0xc904 >> 2),
719 	0x00000000,
720 	(0x7e00 << 16) | (0xc904 >> 2),
721 	0x00000000,
722 	(0x4e00 << 16) | (0xc908 >> 2),
723 	0x00000000,
724 	(0x5e00 << 16) | (0xc908 >> 2),
725 	0x00000000,
726 	(0x6e00 << 16) | (0xc908 >> 2),
727 	0x00000000,
728 	(0x7e00 << 16) | (0xc908 >> 2),
729 	0x00000000,
730 	(0x4e00 << 16) | (0xc90c >> 2),
731 	0x00000000,
732 	(0x5e00 << 16) | (0xc90c >> 2),
733 	0x00000000,
734 	(0x6e00 << 16) | (0xc90c >> 2),
735 	0x00000000,
736 	(0x7e00 << 16) | (0xc90c >> 2),
737 	0x00000000,
738 	(0x4e00 << 16) | (0xc910 >> 2),
739 	0x00000000,
740 	(0x5e00 << 16) | (0xc910 >> 2),
741 	0x00000000,
742 	(0x6e00 << 16) | (0xc910 >> 2),
743 	0x00000000,
744 	(0x7e00 << 16) | (0xc910 >> 2),
745 	0x00000000,
746 	(0x0e00 << 16) | (0xc99c >> 2),
747 	0x00000000,
748 	(0x0e00 << 16) | (0x9834 >> 2),
749 	0x00000000,
750 	(0x0000 << 16) | (0x30f00 >> 2),
751 	0x00000000,
752 	(0x0000 << 16) | (0x30f04 >> 2),
753 	0x00000000,
754 	(0x0000 << 16) | (0x30f08 >> 2),
755 	0x00000000,
756 	(0x0000 << 16) | (0x30f0c >> 2),
757 	0x00000000,
758 	(0x0600 << 16) | (0x9b7c >> 2),
759 	0x00000000,
760 	(0x0e00 << 16) | (0x8a14 >> 2),
761 	0x00000000,
762 	(0x0e00 << 16) | (0x8a18 >> 2),
763 	0x00000000,
764 	(0x0600 << 16) | (0x30a00 >> 2),
765 	0x00000000,
766 	(0x0e00 << 16) | (0x8bf0 >> 2),
767 	0x00000000,
768 	(0x0e00 << 16) | (0x8bcc >> 2),
769 	0x00000000,
770 	(0x0e00 << 16) | (0x8b24 >> 2),
771 	0x00000000,
772 	(0x0e00 << 16) | (0x30a04 >> 2),
773 	0x00000000,
774 	(0x0600 << 16) | (0x30a10 >> 2),
775 	0x00000000,
776 	(0x0600 << 16) | (0x30a14 >> 2),
777 	0x00000000,
778 	(0x0600 << 16) | (0x30a18 >> 2),
779 	0x00000000,
780 	(0x0600 << 16) | (0x30a2c >> 2),
781 	0x00000000,
782 	(0x0e00 << 16) | (0xc700 >> 2),
783 	0x00000000,
784 	(0x0e00 << 16) | (0xc704 >> 2),
785 	0x00000000,
786 	(0x0e00 << 16) | (0xc708 >> 2),
787 	0x00000000,
788 	(0x0e00 << 16) | (0xc768 >> 2),
789 	0x00000000,
790 	(0x0400 << 16) | (0xc770 >> 2),
791 	0x00000000,
792 	(0x0400 << 16) | (0xc774 >> 2),
793 	0x00000000,
794 	(0x0400 << 16) | (0xc798 >> 2),
795 	0x00000000,
796 	(0x0400 << 16) | (0xc79c >> 2),
797 	0x00000000,
798 	(0x0e00 << 16) | (0x9100 >> 2),
799 	0x00000000,
800 	(0x0e00 << 16) | (0x3c010 >> 2),
801 	0x00000000,
802 	(0x0e00 << 16) | (0x8c00 >> 2),
803 	0x00000000,
804 	(0x0e00 << 16) | (0x8c04 >> 2),
805 	0x00000000,
806 	(0x0e00 << 16) | (0x8c20 >> 2),
807 	0x00000000,
808 	(0x0e00 << 16) | (0x8c38 >> 2),
809 	0x00000000,
810 	(0x0e00 << 16) | (0x8c3c >> 2),
811 	0x00000000,
812 	(0x0e00 << 16) | (0xae00 >> 2),
813 	0x00000000,
814 	(0x0e00 << 16) | (0x9604 >> 2),
815 	0x00000000,
816 	(0x0e00 << 16) | (0xac08 >> 2),
817 	0x00000000,
818 	(0x0e00 << 16) | (0xac0c >> 2),
819 	0x00000000,
820 	(0x0e00 << 16) | (0xac10 >> 2),
821 	0x00000000,
822 	(0x0e00 << 16) | (0xac14 >> 2),
823 	0x00000000,
824 	(0x0e00 << 16) | (0xac58 >> 2),
825 	0x00000000,
826 	(0x0e00 << 16) | (0xac68 >> 2),
827 	0x00000000,
828 	(0x0e00 << 16) | (0xac6c >> 2),
829 	0x00000000,
830 	(0x0e00 << 16) | (0xac70 >> 2),
831 	0x00000000,
832 	(0x0e00 << 16) | (0xac74 >> 2),
833 	0x00000000,
834 	(0x0e00 << 16) | (0xac78 >> 2),
835 	0x00000000,
836 	(0x0e00 << 16) | (0xac7c >> 2),
837 	0x00000000,
838 	(0x0e00 << 16) | (0xac80 >> 2),
839 	0x00000000,
840 	(0x0e00 << 16) | (0xac84 >> 2),
841 	0x00000000,
842 	(0x0e00 << 16) | (0xac88 >> 2),
843 	0x00000000,
844 	(0x0e00 << 16) | (0xac8c >> 2),
845 	0x00000000,
846 	(0x0e00 << 16) | (0x970c >> 2),
847 	0x00000000,
848 	(0x0e00 << 16) | (0x9714 >> 2),
849 	0x00000000,
850 	(0x0e00 << 16) | (0x9718 >> 2),
851 	0x00000000,
852 	(0x0e00 << 16) | (0x971c >> 2),
853 	0x00000000,
854 	(0x0e00 << 16) | (0x31068 >> 2),
855 	0x00000000,
856 	(0x4e00 << 16) | (0x31068 >> 2),
857 	0x00000000,
858 	(0x5e00 << 16) | (0x31068 >> 2),
859 	0x00000000,
860 	(0x6e00 << 16) | (0x31068 >> 2),
861 	0x00000000,
862 	(0x7e00 << 16) | (0x31068 >> 2),
863 	0x00000000,
864 	(0x0e00 << 16) | (0xcd10 >> 2),
865 	0x00000000,
866 	(0x0e00 << 16) | (0xcd14 >> 2),
867 	0x00000000,
868 	(0x0e00 << 16) | (0x88b0 >> 2),
869 	0x00000000,
870 	(0x0e00 << 16) | (0x88b4 >> 2),
871 	0x00000000,
872 	(0x0e00 << 16) | (0x88b8 >> 2),
873 	0x00000000,
874 	(0x0e00 << 16) | (0x88bc >> 2),
875 	0x00000000,
876 	(0x0400 << 16) | (0x89c0 >> 2),
877 	0x00000000,
878 	(0x0e00 << 16) | (0x88c4 >> 2),
879 	0x00000000,
880 	(0x0e00 << 16) | (0x88c8 >> 2),
881 	0x00000000,
882 	(0x0e00 << 16) | (0x88d0 >> 2),
883 	0x00000000,
884 	(0x0e00 << 16) | (0x88d4 >> 2),
885 	0x00000000,
886 	(0x0e00 << 16) | (0x88d8 >> 2),
887 	0x00000000,
888 	(0x0e00 << 16) | (0x8980 >> 2),
889 	0x00000000,
890 	(0x0e00 << 16) | (0x30938 >> 2),
891 	0x00000000,
892 	(0x0e00 << 16) | (0x3093c >> 2),
893 	0x00000000,
894 	(0x0e00 << 16) | (0x30940 >> 2),
895 	0x00000000,
896 	(0x0e00 << 16) | (0x89a0 >> 2),
897 	0x00000000,
898 	(0x0e00 << 16) | (0x30900 >> 2),
899 	0x00000000,
900 	(0x0e00 << 16) | (0x30904 >> 2),
901 	0x00000000,
902 	(0x0e00 << 16) | (0x89b4 >> 2),
903 	0x00000000,
904 	(0x0e00 << 16) | (0x3e1fc >> 2),
905 	0x00000000,
906 	(0x0e00 << 16) | (0x3c210 >> 2),
907 	0x00000000,
908 	(0x0e00 << 16) | (0x3c214 >> 2),
909 	0x00000000,
910 	(0x0e00 << 16) | (0x3c218 >> 2),
911 	0x00000000,
912 	(0x0e00 << 16) | (0x8904 >> 2),
913 	0x00000000,
914 	0x5,
915 	(0x0e00 << 16) | (0x8c28 >> 2),
916 	(0x0e00 << 16) | (0x8c2c >> 2),
917 	(0x0e00 << 16) | (0x8c30 >> 2),
918 	(0x0e00 << 16) | (0x8c34 >> 2),
919 	(0x0e00 << 16) | (0x9600 >> 2),
920 };
921 
922 static const u32 bonaire_golden_spm_registers[] =
923 {
924 	0x30800, 0xe0ffffff, 0xe0000000
925 };
926 
927 static const u32 bonaire_golden_common_registers[] =
928 {
929 	0xc770, 0xffffffff, 0x00000800,
930 	0xc774, 0xffffffff, 0x00000800,
931 	0xc798, 0xffffffff, 0x00007fbf,
932 	0xc79c, 0xffffffff, 0x00007faf
933 };
934 
935 static const u32 bonaire_golden_registers[] =
936 {
937 	0x3354, 0x00000333, 0x00000333,
938 	0x3350, 0x000c0fc0, 0x00040200,
939 	0x9a10, 0x00010000, 0x00058208,
940 	0x3c000, 0xffff1fff, 0x00140000,
941 	0x3c200, 0xfdfc0fff, 0x00000100,
942 	0x3c234, 0x40000000, 0x40000200,
943 	0x9830, 0xffffffff, 0x00000000,
944 	0x9834, 0xf00fffff, 0x00000400,
945 	0x9838, 0x0002021c, 0x00020200,
946 	0xc78, 0x00000080, 0x00000000,
947 	0x5bb0, 0x000000f0, 0x00000070,
948 	0x5bc0, 0xf0311fff, 0x80300000,
949 	0x98f8, 0x73773777, 0x12010001,
950 	0x350c, 0x00810000, 0x408af000,
951 	0x7030, 0x31000111, 0x00000011,
952 	0x2f48, 0x73773777, 0x12010001,
953 	0x220c, 0x00007fb6, 0x0021a1b1,
954 	0x2210, 0x00007fb6, 0x002021b1,
955 	0x2180, 0x00007fb6, 0x00002191,
956 	0x2218, 0x00007fb6, 0x002121b1,
957 	0x221c, 0x00007fb6, 0x002021b1,
958 	0x21dc, 0x00007fb6, 0x00002191,
959 	0x21e0, 0x00007fb6, 0x00002191,
960 	0x3628, 0x0000003f, 0x0000000a,
961 	0x362c, 0x0000003f, 0x0000000a,
962 	0x2ae4, 0x00073ffe, 0x000022a2,
963 	0x240c, 0x000007ff, 0x00000000,
964 	0x8a14, 0xf000003f, 0x00000007,
965 	0x8bf0, 0x00002001, 0x00000001,
966 	0x8b24, 0xffffffff, 0x00ffffff,
967 	0x30a04, 0x0000ff0f, 0x00000000,
968 	0x28a4c, 0x07ffffff, 0x06000000,
969 	0x4d8, 0x00000fff, 0x00000100,
970 	0x3e78, 0x00000001, 0x00000002,
971 	0x9100, 0x03000000, 0x0362c688,
972 	0x8c00, 0x000000ff, 0x00000001,
973 	0xe40, 0x00001fff, 0x00001fff,
974 	0x9060, 0x0000007f, 0x00000020,
975 	0x9508, 0x00010000, 0x00010000,
976 	0xac14, 0x000003ff, 0x000000f3,
977 	0xac0c, 0xffffffff, 0x00001032
978 };
979 
980 static const u32 bonaire_mgcg_cgcg_init[] =
981 {
982 	0xc420, 0xffffffff, 0xfffffffc,
983 	0x30800, 0xffffffff, 0xe0000000,
984 	0x3c2a0, 0xffffffff, 0x00000100,
985 	0x3c208, 0xffffffff, 0x00000100,
986 	0x3c2c0, 0xffffffff, 0xc0000100,
987 	0x3c2c8, 0xffffffff, 0xc0000100,
988 	0x3c2c4, 0xffffffff, 0xc0000100,
989 	0x55e4, 0xffffffff, 0x00600100,
990 	0x3c280, 0xffffffff, 0x00000100,
991 	0x3c214, 0xffffffff, 0x06000100,
992 	0x3c220, 0xffffffff, 0x00000100,
993 	0x3c218, 0xffffffff, 0x06000100,
994 	0x3c204, 0xffffffff, 0x00000100,
995 	0x3c2e0, 0xffffffff, 0x00000100,
996 	0x3c224, 0xffffffff, 0x00000100,
997 	0x3c200, 0xffffffff, 0x00000100,
998 	0x3c230, 0xffffffff, 0x00000100,
999 	0x3c234, 0xffffffff, 0x00000100,
1000 	0x3c250, 0xffffffff, 0x00000100,
1001 	0x3c254, 0xffffffff, 0x00000100,
1002 	0x3c258, 0xffffffff, 0x00000100,
1003 	0x3c25c, 0xffffffff, 0x00000100,
1004 	0x3c260, 0xffffffff, 0x00000100,
1005 	0x3c27c, 0xffffffff, 0x00000100,
1006 	0x3c278, 0xffffffff, 0x00000100,
1007 	0x3c210, 0xffffffff, 0x06000100,
1008 	0x3c290, 0xffffffff, 0x00000100,
1009 	0x3c274, 0xffffffff, 0x00000100,
1010 	0x3c2b4, 0xffffffff, 0x00000100,
1011 	0x3c2b0, 0xffffffff, 0x00000100,
1012 	0x3c270, 0xffffffff, 0x00000100,
1013 	0x30800, 0xffffffff, 0xe0000000,
1014 	0x3c020, 0xffffffff, 0x00010000,
1015 	0x3c024, 0xffffffff, 0x00030002,
1016 	0x3c028, 0xffffffff, 0x00040007,
1017 	0x3c02c, 0xffffffff, 0x00060005,
1018 	0x3c030, 0xffffffff, 0x00090008,
1019 	0x3c034, 0xffffffff, 0x00010000,
1020 	0x3c038, 0xffffffff, 0x00030002,
1021 	0x3c03c, 0xffffffff, 0x00040007,
1022 	0x3c040, 0xffffffff, 0x00060005,
1023 	0x3c044, 0xffffffff, 0x00090008,
1024 	0x3c048, 0xffffffff, 0x00010000,
1025 	0x3c04c, 0xffffffff, 0x00030002,
1026 	0x3c050, 0xffffffff, 0x00040007,
1027 	0x3c054, 0xffffffff, 0x00060005,
1028 	0x3c058, 0xffffffff, 0x00090008,
1029 	0x3c05c, 0xffffffff, 0x00010000,
1030 	0x3c060, 0xffffffff, 0x00030002,
1031 	0x3c064, 0xffffffff, 0x00040007,
1032 	0x3c068, 0xffffffff, 0x00060005,
1033 	0x3c06c, 0xffffffff, 0x00090008,
1034 	0x3c070, 0xffffffff, 0x00010000,
1035 	0x3c074, 0xffffffff, 0x00030002,
1036 	0x3c078, 0xffffffff, 0x00040007,
1037 	0x3c07c, 0xffffffff, 0x00060005,
1038 	0x3c080, 0xffffffff, 0x00090008,
1039 	0x3c084, 0xffffffff, 0x00010000,
1040 	0x3c088, 0xffffffff, 0x00030002,
1041 	0x3c08c, 0xffffffff, 0x00040007,
1042 	0x3c090, 0xffffffff, 0x00060005,
1043 	0x3c094, 0xffffffff, 0x00090008,
1044 	0x3c098, 0xffffffff, 0x00010000,
1045 	0x3c09c, 0xffffffff, 0x00030002,
1046 	0x3c0a0, 0xffffffff, 0x00040007,
1047 	0x3c0a4, 0xffffffff, 0x00060005,
1048 	0x3c0a8, 0xffffffff, 0x00090008,
1049 	0x3c000, 0xffffffff, 0x96e00200,
1050 	0x8708, 0xffffffff, 0x00900100,
1051 	0xc424, 0xffffffff, 0x0020003f,
1052 	0x38, 0xffffffff, 0x0140001c,
1053 	0x3c, 0x000f0000, 0x000f0000,
1054 	0x220, 0xffffffff, 0xC060000C,
1055 	0x224, 0xc0000fff, 0x00000100,
1056 	0xf90, 0xffffffff, 0x00000100,
1057 	0xf98, 0x00000101, 0x00000000,
1058 	0x20a8, 0xffffffff, 0x00000104,
1059 	0x55e4, 0xff000fff, 0x00000100,
1060 	0x30cc, 0xc0000fff, 0x00000104,
1061 	0xc1e4, 0x00000001, 0x00000001,
1062 	0xd00c, 0xff000ff0, 0x00000100,
1063 	0xd80c, 0xff000ff0, 0x00000100
1064 };
1065 
1066 static const u32 spectre_golden_spm_registers[] =
1067 {
1068 	0x30800, 0xe0ffffff, 0xe0000000
1069 };
1070 
1071 static const u32 spectre_golden_common_registers[] =
1072 {
1073 	0xc770, 0xffffffff, 0x00000800,
1074 	0xc774, 0xffffffff, 0x00000800,
1075 	0xc798, 0xffffffff, 0x00007fbf,
1076 	0xc79c, 0xffffffff, 0x00007faf
1077 };
1078 
1079 static const u32 spectre_golden_registers[] =
1080 {
1081 	0x3c000, 0xffff1fff, 0x96940200,
1082 	0x3c00c, 0xffff0001, 0xff000000,
1083 	0x3c200, 0xfffc0fff, 0x00000100,
1084 	0x6ed8, 0x00010101, 0x00010000,
1085 	0x9834, 0xf00fffff, 0x00000400,
1086 	0x9838, 0xfffffffc, 0x00020200,
1087 	0x5bb0, 0x000000f0, 0x00000070,
1088 	0x5bc0, 0xf0311fff, 0x80300000,
1089 	0x98f8, 0x73773777, 0x12010001,
1090 	0x9b7c, 0x00ff0000, 0x00fc0000,
1091 	0x2f48, 0x73773777, 0x12010001,
1092 	0x8a14, 0xf000003f, 0x00000007,
1093 	0x8b24, 0xffffffff, 0x00ffffff,
1094 	0x28350, 0x3f3f3fff, 0x00000082,
1095 	0x28355, 0x0000003f, 0x00000000,
1096 	0x3e78, 0x00000001, 0x00000002,
1097 	0x913c, 0xffff03df, 0x00000004,
1098 	0xc768, 0x00000008, 0x00000008,
1099 	0x8c00, 0x000008ff, 0x00000800,
1100 	0x9508, 0x00010000, 0x00010000,
1101 	0xac0c, 0xffffffff, 0x54763210,
1102 	0x214f8, 0x01ff01ff, 0x00000002,
1103 	0x21498, 0x007ff800, 0x00200000,
1104 	0x2015c, 0xffffffff, 0x00000f40,
1105 	0x30934, 0xffffffff, 0x00000001
1106 };
1107 
1108 static const u32 spectre_mgcg_cgcg_init[] =
1109 {
1110 	0xc420, 0xffffffff, 0xfffffffc,
1111 	0x30800, 0xffffffff, 0xe0000000,
1112 	0x3c2a0, 0xffffffff, 0x00000100,
1113 	0x3c208, 0xffffffff, 0x00000100,
1114 	0x3c2c0, 0xffffffff, 0x00000100,
1115 	0x3c2c8, 0xffffffff, 0x00000100,
1116 	0x3c2c4, 0xffffffff, 0x00000100,
1117 	0x55e4, 0xffffffff, 0x00600100,
1118 	0x3c280, 0xffffffff, 0x00000100,
1119 	0x3c214, 0xffffffff, 0x06000100,
1120 	0x3c220, 0xffffffff, 0x00000100,
1121 	0x3c218, 0xffffffff, 0x06000100,
1122 	0x3c204, 0xffffffff, 0x00000100,
1123 	0x3c2e0, 0xffffffff, 0x00000100,
1124 	0x3c224, 0xffffffff, 0x00000100,
1125 	0x3c200, 0xffffffff, 0x00000100,
1126 	0x3c230, 0xffffffff, 0x00000100,
1127 	0x3c234, 0xffffffff, 0x00000100,
1128 	0x3c250, 0xffffffff, 0x00000100,
1129 	0x3c254, 0xffffffff, 0x00000100,
1130 	0x3c258, 0xffffffff, 0x00000100,
1131 	0x3c25c, 0xffffffff, 0x00000100,
1132 	0x3c260, 0xffffffff, 0x00000100,
1133 	0x3c27c, 0xffffffff, 0x00000100,
1134 	0x3c278, 0xffffffff, 0x00000100,
1135 	0x3c210, 0xffffffff, 0x06000100,
1136 	0x3c290, 0xffffffff, 0x00000100,
1137 	0x3c274, 0xffffffff, 0x00000100,
1138 	0x3c2b4, 0xffffffff, 0x00000100,
1139 	0x3c2b0, 0xffffffff, 0x00000100,
1140 	0x3c270, 0xffffffff, 0x00000100,
1141 	0x30800, 0xffffffff, 0xe0000000,
1142 	0x3c020, 0xffffffff, 0x00010000,
1143 	0x3c024, 0xffffffff, 0x00030002,
1144 	0x3c028, 0xffffffff, 0x00040007,
1145 	0x3c02c, 0xffffffff, 0x00060005,
1146 	0x3c030, 0xffffffff, 0x00090008,
1147 	0x3c034, 0xffffffff, 0x00010000,
1148 	0x3c038, 0xffffffff, 0x00030002,
1149 	0x3c03c, 0xffffffff, 0x00040007,
1150 	0x3c040, 0xffffffff, 0x00060005,
1151 	0x3c044, 0xffffffff, 0x00090008,
1152 	0x3c048, 0xffffffff, 0x00010000,
1153 	0x3c04c, 0xffffffff, 0x00030002,
1154 	0x3c050, 0xffffffff, 0x00040007,
1155 	0x3c054, 0xffffffff, 0x00060005,
1156 	0x3c058, 0xffffffff, 0x00090008,
1157 	0x3c05c, 0xffffffff, 0x00010000,
1158 	0x3c060, 0xffffffff, 0x00030002,
1159 	0x3c064, 0xffffffff, 0x00040007,
1160 	0x3c068, 0xffffffff, 0x00060005,
1161 	0x3c06c, 0xffffffff, 0x00090008,
1162 	0x3c070, 0xffffffff, 0x00010000,
1163 	0x3c074, 0xffffffff, 0x00030002,
1164 	0x3c078, 0xffffffff, 0x00040007,
1165 	0x3c07c, 0xffffffff, 0x00060005,
1166 	0x3c080, 0xffffffff, 0x00090008,
1167 	0x3c084, 0xffffffff, 0x00010000,
1168 	0x3c088, 0xffffffff, 0x00030002,
1169 	0x3c08c, 0xffffffff, 0x00040007,
1170 	0x3c090, 0xffffffff, 0x00060005,
1171 	0x3c094, 0xffffffff, 0x00090008,
1172 	0x3c098, 0xffffffff, 0x00010000,
1173 	0x3c09c, 0xffffffff, 0x00030002,
1174 	0x3c0a0, 0xffffffff, 0x00040007,
1175 	0x3c0a4, 0xffffffff, 0x00060005,
1176 	0x3c0a8, 0xffffffff, 0x00090008,
1177 	0x3c0ac, 0xffffffff, 0x00010000,
1178 	0x3c0b0, 0xffffffff, 0x00030002,
1179 	0x3c0b4, 0xffffffff, 0x00040007,
1180 	0x3c0b8, 0xffffffff, 0x00060005,
1181 	0x3c0bc, 0xffffffff, 0x00090008,
1182 	0x3c000, 0xffffffff, 0x96e00200,
1183 	0x8708, 0xffffffff, 0x00900100,
1184 	0xc424, 0xffffffff, 0x0020003f,
1185 	0x38, 0xffffffff, 0x0140001c,
1186 	0x3c, 0x000f0000, 0x000f0000,
1187 	0x220, 0xffffffff, 0xC060000C,
1188 	0x224, 0xc0000fff, 0x00000100,
1189 	0xf90, 0xffffffff, 0x00000100,
1190 	0xf98, 0x00000101, 0x00000000,
1191 	0x20a8, 0xffffffff, 0x00000104,
1192 	0x55e4, 0xff000fff, 0x00000100,
1193 	0x30cc, 0xc0000fff, 0x00000104,
1194 	0xc1e4, 0x00000001, 0x00000001,
1195 	0xd00c, 0xff000ff0, 0x00000100,
1196 	0xd80c, 0xff000ff0, 0x00000100
1197 };
1198 
1199 static const u32 kalindi_golden_spm_registers[] =
1200 {
1201 	0x30800, 0xe0ffffff, 0xe0000000
1202 };
1203 
1204 static const u32 kalindi_golden_common_registers[] =
1205 {
1206 	0xc770, 0xffffffff, 0x00000800,
1207 	0xc774, 0xffffffff, 0x00000800,
1208 	0xc798, 0xffffffff, 0x00007fbf,
1209 	0xc79c, 0xffffffff, 0x00007faf
1210 };
1211 
1212 static const u32 kalindi_golden_registers[] =
1213 {
1214 	0x3c000, 0xffffdfff, 0x6e944040,
1215 	0x55e4, 0xff607fff, 0xfc000100,
1216 	0x3c220, 0xff000fff, 0x00000100,
1217 	0x3c224, 0xff000fff, 0x00000100,
1218 	0x3c200, 0xfffc0fff, 0x00000100,
1219 	0x6ed8, 0x00010101, 0x00010000,
1220 	0x9830, 0xffffffff, 0x00000000,
1221 	0x9834, 0xf00fffff, 0x00000400,
1222 	0x5bb0, 0x000000f0, 0x00000070,
1223 	0x5bc0, 0xf0311fff, 0x80300000,
1224 	0x98f8, 0x73773777, 0x12010001,
1225 	0x98fc, 0xffffffff, 0x00000010,
1226 	0x9b7c, 0x00ff0000, 0x00fc0000,
1227 	0x8030, 0x00001f0f, 0x0000100a,
1228 	0x2f48, 0x73773777, 0x12010001,
1229 	0x2408, 0x000fffff, 0x000c007f,
1230 	0x8a14, 0xf000003f, 0x00000007,
1231 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1232 	0x30a04, 0x0000ff0f, 0x00000000,
1233 	0x28a4c, 0x07ffffff, 0x06000000,
1234 	0x4d8, 0x00000fff, 0x00000100,
1235 	0x3e78, 0x00000001, 0x00000002,
1236 	0xc768, 0x00000008, 0x00000008,
1237 	0x8c00, 0x000000ff, 0x00000003,
1238 	0x214f8, 0x01ff01ff, 0x00000002,
1239 	0x21498, 0x007ff800, 0x00200000,
1240 	0x2015c, 0xffffffff, 0x00000f40,
1241 	0x88c4, 0x001f3ae3, 0x00000082,
1242 	0x88d4, 0x0000001f, 0x00000010,
1243 	0x30934, 0xffffffff, 0x00000000
1244 };
1245 
1246 static const u32 kalindi_mgcg_cgcg_init[] =
1247 {
1248 	0xc420, 0xffffffff, 0xfffffffc,
1249 	0x30800, 0xffffffff, 0xe0000000,
1250 	0x3c2a0, 0xffffffff, 0x00000100,
1251 	0x3c208, 0xffffffff, 0x00000100,
1252 	0x3c2c0, 0xffffffff, 0x00000100,
1253 	0x3c2c8, 0xffffffff, 0x00000100,
1254 	0x3c2c4, 0xffffffff, 0x00000100,
1255 	0x55e4, 0xffffffff, 0x00600100,
1256 	0x3c280, 0xffffffff, 0x00000100,
1257 	0x3c214, 0xffffffff, 0x06000100,
1258 	0x3c220, 0xffffffff, 0x00000100,
1259 	0x3c218, 0xffffffff, 0x06000100,
1260 	0x3c204, 0xffffffff, 0x00000100,
1261 	0x3c2e0, 0xffffffff, 0x00000100,
1262 	0x3c224, 0xffffffff, 0x00000100,
1263 	0x3c200, 0xffffffff, 0x00000100,
1264 	0x3c230, 0xffffffff, 0x00000100,
1265 	0x3c234, 0xffffffff, 0x00000100,
1266 	0x3c250, 0xffffffff, 0x00000100,
1267 	0x3c254, 0xffffffff, 0x00000100,
1268 	0x3c258, 0xffffffff, 0x00000100,
1269 	0x3c25c, 0xffffffff, 0x00000100,
1270 	0x3c260, 0xffffffff, 0x00000100,
1271 	0x3c27c, 0xffffffff, 0x00000100,
1272 	0x3c278, 0xffffffff, 0x00000100,
1273 	0x3c210, 0xffffffff, 0x06000100,
1274 	0x3c290, 0xffffffff, 0x00000100,
1275 	0x3c274, 0xffffffff, 0x00000100,
1276 	0x3c2b4, 0xffffffff, 0x00000100,
1277 	0x3c2b0, 0xffffffff, 0x00000100,
1278 	0x3c270, 0xffffffff, 0x00000100,
1279 	0x30800, 0xffffffff, 0xe0000000,
1280 	0x3c020, 0xffffffff, 0x00010000,
1281 	0x3c024, 0xffffffff, 0x00030002,
1282 	0x3c028, 0xffffffff, 0x00040007,
1283 	0x3c02c, 0xffffffff, 0x00060005,
1284 	0x3c030, 0xffffffff, 0x00090008,
1285 	0x3c034, 0xffffffff, 0x00010000,
1286 	0x3c038, 0xffffffff, 0x00030002,
1287 	0x3c03c, 0xffffffff, 0x00040007,
1288 	0x3c040, 0xffffffff, 0x00060005,
1289 	0x3c044, 0xffffffff, 0x00090008,
1290 	0x3c000, 0xffffffff, 0x96e00200,
1291 	0x8708, 0xffffffff, 0x00900100,
1292 	0xc424, 0xffffffff, 0x0020003f,
1293 	0x38, 0xffffffff, 0x0140001c,
1294 	0x3c, 0x000f0000, 0x000f0000,
1295 	0x220, 0xffffffff, 0xC060000C,
1296 	0x224, 0xc0000fff, 0x00000100,
1297 	0x20a8, 0xffffffff, 0x00000104,
1298 	0x55e4, 0xff000fff, 0x00000100,
1299 	0x30cc, 0xc0000fff, 0x00000104,
1300 	0xc1e4, 0x00000001, 0x00000001,
1301 	0xd00c, 0xff000ff0, 0x00000100,
1302 	0xd80c, 0xff000ff0, 0x00000100
1303 };
1304 
1305 static void cik_init_golden_registers(struct radeon_device *rdev)
1306 {
1307 	switch (rdev->family) {
1308 	case CHIP_BONAIRE:
1309 		radeon_program_register_sequence(rdev,
1310 						 bonaire_mgcg_cgcg_init,
1311 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1312 		radeon_program_register_sequence(rdev,
1313 						 bonaire_golden_registers,
1314 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1315 		radeon_program_register_sequence(rdev,
1316 						 bonaire_golden_common_registers,
1317 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1318 		radeon_program_register_sequence(rdev,
1319 						 bonaire_golden_spm_registers,
1320 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1321 		break;
1322 	case CHIP_KABINI:
1323 		radeon_program_register_sequence(rdev,
1324 						 kalindi_mgcg_cgcg_init,
1325 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1326 		radeon_program_register_sequence(rdev,
1327 						 kalindi_golden_registers,
1328 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1329 		radeon_program_register_sequence(rdev,
1330 						 kalindi_golden_common_registers,
1331 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1332 		radeon_program_register_sequence(rdev,
1333 						 kalindi_golden_spm_registers,
1334 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1335 		break;
1336 	case CHIP_KAVERI:
1337 		radeon_program_register_sequence(rdev,
1338 						 spectre_mgcg_cgcg_init,
1339 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1340 		radeon_program_register_sequence(rdev,
1341 						 spectre_golden_registers,
1342 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1343 		radeon_program_register_sequence(rdev,
1344 						 spectre_golden_common_registers,
1345 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1346 		radeon_program_register_sequence(rdev,
1347 						 spectre_golden_spm_registers,
1348 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1349 		break;
1350 	default:
1351 		break;
1352 	}
1353 }
1354 
1355 /**
1356  * cik_get_xclk - get the xclk
1357  *
1358  * @rdev: radeon_device pointer
1359  *
1360  * Returns the reference clock used by the gfx engine
1361  * (CIK).
1362  */
1363 u32 cik_get_xclk(struct radeon_device *rdev)
1364 {
1365         u32 reference_clock = rdev->clock.spll.reference_freq;
1366 
1367 	if (rdev->flags & RADEON_IS_IGP) {
1368 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1369 			return reference_clock / 2;
1370 	} else {
1371 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1372 			return reference_clock / 4;
1373 	}
1374 	return reference_clock;
1375 }
1376 
1377 /**
1378  * cik_mm_rdoorbell - read a doorbell dword
1379  *
1380  * @rdev: radeon_device pointer
1381  * @offset: byte offset into the aperture
1382  *
1383  * Returns the value in the doorbell aperture at the
1384  * requested offset (CIK).
1385  */
1386 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
1387 {
1388 	if (offset < rdev->doorbell.size) {
1389 		return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
1390 	} else {
1391 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
1392 		return 0;
1393 	}
1394 }
1395 
1396 /**
1397  * cik_mm_wdoorbell - write a doorbell dword
1398  *
1399  * @rdev: radeon_device pointer
1400  * @offset: byte offset into the aperture
1401  * @v: value to write
1402  *
1403  * Writes @v to the doorbell aperture at the
1404  * requested offset (CIK).
1405  */
1406 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
1407 {
1408 	if (offset < rdev->doorbell.size) {
1409 		writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
1410 	} else {
1411 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
1412 	}
1413 }
1414 
1415 #define BONAIRE_IO_MC_REGS_SIZE 36
1416 
1417 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1418 {
1419 	{0x00000070, 0x04400000},
1420 	{0x00000071, 0x80c01803},
1421 	{0x00000072, 0x00004004},
1422 	{0x00000073, 0x00000100},
1423 	{0x00000074, 0x00ff0000},
1424 	{0x00000075, 0x34000000},
1425 	{0x00000076, 0x08000014},
1426 	{0x00000077, 0x00cc08ec},
1427 	{0x00000078, 0x00000400},
1428 	{0x00000079, 0x00000000},
1429 	{0x0000007a, 0x04090000},
1430 	{0x0000007c, 0x00000000},
1431 	{0x0000007e, 0x4408a8e8},
1432 	{0x0000007f, 0x00000304},
1433 	{0x00000080, 0x00000000},
1434 	{0x00000082, 0x00000001},
1435 	{0x00000083, 0x00000002},
1436 	{0x00000084, 0xf3e4f400},
1437 	{0x00000085, 0x052024e3},
1438 	{0x00000087, 0x00000000},
1439 	{0x00000088, 0x01000000},
1440 	{0x0000008a, 0x1c0a0000},
1441 	{0x0000008b, 0xff010000},
1442 	{0x0000008d, 0xffffefff},
1443 	{0x0000008e, 0xfff3efff},
1444 	{0x0000008f, 0xfff3efbf},
1445 	{0x00000092, 0xf7ffffff},
1446 	{0x00000093, 0xffffff7f},
1447 	{0x00000095, 0x00101101},
1448 	{0x00000096, 0x00000fff},
1449 	{0x00000097, 0x00116fff},
1450 	{0x00000098, 0x60010000},
1451 	{0x00000099, 0x10010000},
1452 	{0x0000009a, 0x00006000},
1453 	{0x0000009b, 0x00001000},
1454 	{0x0000009f, 0x00b48000}
1455 };
1456 
1457 /**
1458  * cik_srbm_select - select specific register instances
1459  *
1460  * @rdev: radeon_device pointer
1461  * @me: selected ME (micro engine)
1462  * @pipe: pipe
1463  * @queue: queue
1464  * @vmid: VMID
1465  *
1466  * Switches the currently active registers instances.  Some
1467  * registers are instanced per VMID, others are instanced per
1468  * me/pipe/queue combination.
1469  */
1470 static void cik_srbm_select(struct radeon_device *rdev,
1471 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1472 {
1473 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1474 			     MEID(me & 0x3) |
1475 			     VMID(vmid & 0xf) |
1476 			     QUEUEID(queue & 0x7));
1477 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1478 }
1479 
1480 /* ucode loading */
1481 /**
1482  * ci_mc_load_microcode - load MC ucode into the hw
1483  *
1484  * @rdev: radeon_device pointer
1485  *
1486  * Load the GDDR MC ucode into the hw (CIK).
1487  * Returns 0 on success, error on failure.
1488  */
1489 static int ci_mc_load_microcode(struct radeon_device *rdev)
1490 {
1491 	const __be32 *fw_data;
1492 	u32 running, blackout = 0;
1493 	u32 *io_mc_regs;
1494 	int i, ucode_size, regs_size;
1495 
1496 	if (!rdev->mc_fw)
1497 		return -EINVAL;
1498 
1499 	switch (rdev->family) {
1500 	case CHIP_BONAIRE:
1501 	default:
1502 		io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1503 		ucode_size = CIK_MC_UCODE_SIZE;
1504 		regs_size = BONAIRE_IO_MC_REGS_SIZE;
1505 		break;
1506 	}
1507 
1508 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1509 
1510 	if (running == 0) {
1511 		if (running) {
1512 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1513 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1514 		}
1515 
1516 		/* reset the engine and set to writable */
1517 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1518 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1519 
1520 		/* load mc io regs */
1521 		for (i = 0; i < regs_size; i++) {
1522 			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1523 			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1524 		}
1525 		/* load the MC ucode */
1526 		fw_data = (const __be32 *)rdev->mc_fw->data;
1527 		for (i = 0; i < ucode_size; i++)
1528 			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1529 
1530 		/* put the engine back into the active state */
1531 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1532 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1533 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1534 
1535 		/* wait for training to complete */
1536 		for (i = 0; i < rdev->usec_timeout; i++) {
1537 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1538 				break;
1539 			udelay(1);
1540 		}
1541 		for (i = 0; i < rdev->usec_timeout; i++) {
1542 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1543 				break;
1544 			udelay(1);
1545 		}
1546 
1547 		if (running)
1548 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1549 	}
1550 
1551 	return 0;
1552 }
1553 
1554 /**
1555  * cik_init_microcode - load ucode images from disk
1556  *
1557  * @rdev: radeon_device pointer
1558  *
1559  * Use the firmware interface to load the ucode images into
1560  * the driver (not loaded into hw).
1561  * Returns 0 on success, error on failure.
1562  */
1563 static int cik_init_microcode(struct radeon_device *rdev)
1564 {
1565 	const char *chip_name;
1566 	size_t pfp_req_size, me_req_size, ce_req_size,
1567 		mec_req_size, rlc_req_size, mc_req_size,
1568 		sdma_req_size, smc_req_size;
1569 	char fw_name[30];
1570 	int err;
1571 
1572 	DRM_DEBUG("\n");
1573 
1574 	switch (rdev->family) {
1575 	case CHIP_BONAIRE:
1576 		chip_name = "BONAIRE";
1577 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1578 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1579 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1580 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1581 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1582 		mc_req_size = CIK_MC_UCODE_SIZE * 4;
1583 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1584 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1585 		break;
1586 	case CHIP_KAVERI:
1587 		chip_name = "KAVERI";
1588 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1589 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1590 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1591 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1592 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1593 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1594 		break;
1595 	case CHIP_KABINI:
1596 		chip_name = "KABINI";
1597 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1598 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1599 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1600 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1601 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1602 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1603 		break;
1604 	default: BUG();
1605 	}
1606 
1607 	DRM_INFO("Loading %s Microcode\n", chip_name);
1608 
1609 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1610 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1611 	if (err)
1612 		goto out;
1613 	if (rdev->pfp_fw->size != pfp_req_size) {
1614 		printk(KERN_ERR
1615 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1616 		       rdev->pfp_fw->size, fw_name);
1617 		err = -EINVAL;
1618 		goto out;
1619 	}
1620 
1621 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1622 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1623 	if (err)
1624 		goto out;
1625 	if (rdev->me_fw->size != me_req_size) {
1626 		printk(KERN_ERR
1627 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1628 		       rdev->me_fw->size, fw_name);
1629 		err = -EINVAL;
1630 	}
1631 
1632 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1633 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1634 	if (err)
1635 		goto out;
1636 	if (rdev->ce_fw->size != ce_req_size) {
1637 		printk(KERN_ERR
1638 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1639 		       rdev->ce_fw->size, fw_name);
1640 		err = -EINVAL;
1641 	}
1642 
1643 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1644 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1645 	if (err)
1646 		goto out;
1647 	if (rdev->mec_fw->size != mec_req_size) {
1648 		printk(KERN_ERR
1649 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1650 		       rdev->mec_fw->size, fw_name);
1651 		err = -EINVAL;
1652 	}
1653 
1654 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1655 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1656 	if (err)
1657 		goto out;
1658 	if (rdev->rlc_fw->size != rlc_req_size) {
1659 		printk(KERN_ERR
1660 		       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1661 		       rdev->rlc_fw->size, fw_name);
1662 		err = -EINVAL;
1663 	}
1664 
1665 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1666 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1667 	if (err)
1668 		goto out;
1669 	if (rdev->sdma_fw->size != sdma_req_size) {
1670 		printk(KERN_ERR
1671 		       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1672 		       rdev->sdma_fw->size, fw_name);
1673 		err = -EINVAL;
1674 	}
1675 
1676 	/* No SMC, MC ucode on APUs */
1677 	if (!(rdev->flags & RADEON_IS_IGP)) {
1678 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1679 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1680 		if (err)
1681 			goto out;
1682 		if (rdev->mc_fw->size != mc_req_size) {
1683 			printk(KERN_ERR
1684 			       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1685 			       rdev->mc_fw->size, fw_name);
1686 			err = -EINVAL;
1687 		}
1688 
1689 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1690 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1691 		if (err) {
1692 			printk(KERN_ERR
1693 			       "smc: error loading firmware \"%s\"\n",
1694 			       fw_name);
1695 			release_firmware(rdev->smc_fw);
1696 			rdev->smc_fw = NULL;
1697 		} else if (rdev->smc_fw->size != smc_req_size) {
1698 			printk(KERN_ERR
1699 			       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1700 			       rdev->smc_fw->size, fw_name);
1701 			err = -EINVAL;
1702 		}
1703 	}
1704 
1705 out:
1706 	if (err) {
1707 		if (err != -EINVAL)
1708 			printk(KERN_ERR
1709 			       "cik_cp: Failed to load firmware \"%s\"\n",
1710 			       fw_name);
1711 		release_firmware(rdev->pfp_fw);
1712 		rdev->pfp_fw = NULL;
1713 		release_firmware(rdev->me_fw);
1714 		rdev->me_fw = NULL;
1715 		release_firmware(rdev->ce_fw);
1716 		rdev->ce_fw = NULL;
1717 		release_firmware(rdev->rlc_fw);
1718 		rdev->rlc_fw = NULL;
1719 		release_firmware(rdev->mc_fw);
1720 		rdev->mc_fw = NULL;
1721 		release_firmware(rdev->smc_fw);
1722 		rdev->smc_fw = NULL;
1723 	}
1724 	return err;
1725 }
1726 
1727 /*
1728  * Core functions
1729  */
1730 /**
1731  * cik_tiling_mode_table_init - init the hw tiling table
1732  *
1733  * @rdev: radeon_device pointer
1734  *
1735  * Starting with SI, the tiling setup is done globally in a
1736  * set of 32 tiling modes.  Rather than selecting each set of
1737  * parameters per surface as on older asics, we just select
1738  * which index in the tiling table we want to use, and the
1739  * surface uses those parameters (CIK).
1740  */
1741 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1742 {
1743 	const u32 num_tile_mode_states = 32;
1744 	const u32 num_secondary_tile_mode_states = 16;
1745 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1746 	u32 num_pipe_configs;
1747 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
1748 		rdev->config.cik.max_shader_engines;
1749 
1750 	switch (rdev->config.cik.mem_row_size_in_kb) {
1751 	case 1:
1752 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1753 		break;
1754 	case 2:
1755 	default:
1756 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1757 		break;
1758 	case 4:
1759 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1760 		break;
1761 	}
1762 
1763 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
1764 	if (num_pipe_configs > 8)
1765 		num_pipe_configs = 8; /* ??? */
1766 
1767 	if (num_pipe_configs == 8) {
1768 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1769 			switch (reg_offset) {
1770 			case 0:
1771 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1772 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1773 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1774 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1775 				break;
1776 			case 1:
1777 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1778 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1779 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1780 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1781 				break;
1782 			case 2:
1783 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1784 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1785 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1786 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1787 				break;
1788 			case 3:
1789 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1790 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1791 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1792 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1793 				break;
1794 			case 4:
1795 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1796 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1797 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1798 						 TILE_SPLIT(split_equal_to_row_size));
1799 				break;
1800 			case 5:
1801 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1802 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1803 				break;
1804 			case 6:
1805 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1806 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1807 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1808 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1809 				break;
1810 			case 7:
1811 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1812 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1813 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1814 						 TILE_SPLIT(split_equal_to_row_size));
1815 				break;
1816 			case 8:
1817 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1818 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
1819 				break;
1820 			case 9:
1821 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1822 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1823 				break;
1824 			case 10:
1825 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1826 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1827 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1828 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1829 				break;
1830 			case 11:
1831 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1832 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1833 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1834 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1835 				break;
1836 			case 12:
1837 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1838 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1839 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1840 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1841 				break;
1842 			case 13:
1843 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1844 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1845 				break;
1846 			case 14:
1847 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1848 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1849 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1850 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1851 				break;
1852 			case 16:
1853 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1854 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1855 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1856 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1857 				break;
1858 			case 17:
1859 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1860 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1861 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1862 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1863 				break;
1864 			case 27:
1865 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1866 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1867 				break;
1868 			case 28:
1869 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1870 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1871 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1872 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1873 				break;
1874 			case 29:
1875 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1876 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1877 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1878 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1879 				break;
1880 			case 30:
1881 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1882 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1883 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1884 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1885 				break;
1886 			default:
1887 				gb_tile_moden = 0;
1888 				break;
1889 			}
1890 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1891 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1892 		}
1893 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1894 			switch (reg_offset) {
1895 			case 0:
1896 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1897 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1898 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1899 						 NUM_BANKS(ADDR_SURF_16_BANK));
1900 				break;
1901 			case 1:
1902 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1903 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1904 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1905 						 NUM_BANKS(ADDR_SURF_16_BANK));
1906 				break;
1907 			case 2:
1908 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1909 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1910 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1911 						 NUM_BANKS(ADDR_SURF_16_BANK));
1912 				break;
1913 			case 3:
1914 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1915 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1916 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1917 						 NUM_BANKS(ADDR_SURF_16_BANK));
1918 				break;
1919 			case 4:
1920 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1921 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1922 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1923 						 NUM_BANKS(ADDR_SURF_8_BANK));
1924 				break;
1925 			case 5:
1926 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1927 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1928 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1929 						 NUM_BANKS(ADDR_SURF_4_BANK));
1930 				break;
1931 			case 6:
1932 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1933 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1934 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1935 						 NUM_BANKS(ADDR_SURF_2_BANK));
1936 				break;
1937 			case 8:
1938 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1939 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1940 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1941 						 NUM_BANKS(ADDR_SURF_16_BANK));
1942 				break;
1943 			case 9:
1944 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1945 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1946 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1947 						 NUM_BANKS(ADDR_SURF_16_BANK));
1948 				break;
1949 			case 10:
1950 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1951 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1952 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1953 						 NUM_BANKS(ADDR_SURF_16_BANK));
1954 				break;
1955 			case 11:
1956 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1957 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1958 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1959 						 NUM_BANKS(ADDR_SURF_16_BANK));
1960 				break;
1961 			case 12:
1962 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1963 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1964 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1965 						 NUM_BANKS(ADDR_SURF_8_BANK));
1966 				break;
1967 			case 13:
1968 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1969 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1970 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1971 						 NUM_BANKS(ADDR_SURF_4_BANK));
1972 				break;
1973 			case 14:
1974 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1975 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1976 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1977 						 NUM_BANKS(ADDR_SURF_2_BANK));
1978 				break;
1979 			default:
1980 				gb_tile_moden = 0;
1981 				break;
1982 			}
1983 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1984 		}
1985 	} else if (num_pipe_configs == 4) {
1986 		if (num_rbs == 4) {
1987 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1988 				switch (reg_offset) {
1989 				case 0:
1990 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1991 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1992 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1993 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1994 					break;
1995 				case 1:
1996 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1997 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1998 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1999 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2000 					break;
2001 				case 2:
2002 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2003 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2004 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2005 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2006 					break;
2007 				case 3:
2008 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2009 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2010 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2011 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2012 					break;
2013 				case 4:
2014 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2015 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2016 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2017 							 TILE_SPLIT(split_equal_to_row_size));
2018 					break;
2019 				case 5:
2020 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2021 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2022 					break;
2023 				case 6:
2024 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2025 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2026 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2027 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2028 					break;
2029 				case 7:
2030 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2031 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2032 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2033 							 TILE_SPLIT(split_equal_to_row_size));
2034 					break;
2035 				case 8:
2036 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2037 							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2038 					break;
2039 				case 9:
2040 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2041 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2042 					break;
2043 				case 10:
2044 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2045 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2046 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2047 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2048 					break;
2049 				case 11:
2050 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2051 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2052 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2053 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2054 					break;
2055 				case 12:
2056 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2057 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2058 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2059 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2060 					break;
2061 				case 13:
2062 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2063 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2064 					break;
2065 				case 14:
2066 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2067 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2068 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2069 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2070 					break;
2071 				case 16:
2072 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2073 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2074 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2075 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2076 					break;
2077 				case 17:
2078 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2079 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2080 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2081 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2082 					break;
2083 				case 27:
2084 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2085 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2086 					break;
2087 				case 28:
2088 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2089 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2090 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2091 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2092 					break;
2093 				case 29:
2094 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2095 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2096 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2097 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2098 					break;
2099 				case 30:
2100 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2101 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2102 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2103 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2104 					break;
2105 				default:
2106 					gb_tile_moden = 0;
2107 					break;
2108 				}
2109 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2110 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2111 			}
2112 		} else if (num_rbs < 4) {
2113 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2114 				switch (reg_offset) {
2115 				case 0:
2116 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2117 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2118 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2119 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2120 					break;
2121 				case 1:
2122 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2123 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2124 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2125 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2126 					break;
2127 				case 2:
2128 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2129 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2130 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2131 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2132 					break;
2133 				case 3:
2134 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2135 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2136 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2137 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2138 					break;
2139 				case 4:
2140 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2141 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2142 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2143 							 TILE_SPLIT(split_equal_to_row_size));
2144 					break;
2145 				case 5:
2146 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2147 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2148 					break;
2149 				case 6:
2150 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2151 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2152 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2153 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2154 					break;
2155 				case 7:
2156 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2157 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2158 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2159 							 TILE_SPLIT(split_equal_to_row_size));
2160 					break;
2161 				case 8:
2162 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2163 						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2164 					break;
2165 				case 9:
2166 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2167 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2168 					break;
2169 				case 10:
2170 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2171 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2172 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2173 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2174 					break;
2175 				case 11:
2176 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2177 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2178 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2179 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2180 					break;
2181 				case 12:
2182 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2183 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2184 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2185 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2186 					break;
2187 				case 13:
2188 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2189 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2190 					break;
2191 				case 14:
2192 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2193 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2194 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2195 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2196 					break;
2197 				case 16:
2198 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2199 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2200 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2201 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2202 					break;
2203 				case 17:
2204 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2205 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2206 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2207 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2208 					break;
2209 				case 27:
2210 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2211 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2212 					break;
2213 				case 28:
2214 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2215 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2216 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2217 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2218 					break;
2219 				case 29:
2220 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2221 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2222 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2223 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2224 					break;
2225 				case 30:
2226 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2227 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2228 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2229 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2230 					break;
2231 				default:
2232 					gb_tile_moden = 0;
2233 					break;
2234 				}
2235 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2236 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2237 			}
2238 		}
2239 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2240 			switch (reg_offset) {
2241 			case 0:
2242 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2243 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2244 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2245 						 NUM_BANKS(ADDR_SURF_16_BANK));
2246 				break;
2247 			case 1:
2248 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2249 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2250 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2251 						 NUM_BANKS(ADDR_SURF_16_BANK));
2252 				break;
2253 			case 2:
2254 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2255 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2256 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2257 						 NUM_BANKS(ADDR_SURF_16_BANK));
2258 				break;
2259 			case 3:
2260 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2261 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2262 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2263 						 NUM_BANKS(ADDR_SURF_16_BANK));
2264 				break;
2265 			case 4:
2266 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2267 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2268 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2269 						 NUM_BANKS(ADDR_SURF_16_BANK));
2270 				break;
2271 			case 5:
2272 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2273 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2274 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2275 						 NUM_BANKS(ADDR_SURF_8_BANK));
2276 				break;
2277 			case 6:
2278 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2279 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2280 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2281 						 NUM_BANKS(ADDR_SURF_4_BANK));
2282 				break;
2283 			case 8:
2284 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2285 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2286 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2287 						 NUM_BANKS(ADDR_SURF_16_BANK));
2288 				break;
2289 			case 9:
2290 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2291 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2292 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2293 						 NUM_BANKS(ADDR_SURF_16_BANK));
2294 				break;
2295 			case 10:
2296 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2297 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2298 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2299 						 NUM_BANKS(ADDR_SURF_16_BANK));
2300 				break;
2301 			case 11:
2302 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2303 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2304 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2305 						 NUM_BANKS(ADDR_SURF_16_BANK));
2306 				break;
2307 			case 12:
2308 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2309 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2310 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2311 						 NUM_BANKS(ADDR_SURF_16_BANK));
2312 				break;
2313 			case 13:
2314 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2315 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2316 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2317 						 NUM_BANKS(ADDR_SURF_8_BANK));
2318 				break;
2319 			case 14:
2320 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2321 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2322 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2323 						 NUM_BANKS(ADDR_SURF_4_BANK));
2324 				break;
2325 			default:
2326 				gb_tile_moden = 0;
2327 				break;
2328 			}
2329 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2330 		}
2331 	} else if (num_pipe_configs == 2) {
2332 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2333 			switch (reg_offset) {
2334 			case 0:
2335 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2336 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2337 						 PIPE_CONFIG(ADDR_SURF_P2) |
2338 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2339 				break;
2340 			case 1:
2341 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2342 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2343 						 PIPE_CONFIG(ADDR_SURF_P2) |
2344 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2345 				break;
2346 			case 2:
2347 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2348 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2349 						 PIPE_CONFIG(ADDR_SURF_P2) |
2350 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2351 				break;
2352 			case 3:
2353 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2354 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2355 						 PIPE_CONFIG(ADDR_SURF_P2) |
2356 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2357 				break;
2358 			case 4:
2359 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2360 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2361 						 PIPE_CONFIG(ADDR_SURF_P2) |
2362 						 TILE_SPLIT(split_equal_to_row_size));
2363 				break;
2364 			case 5:
2365 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2366 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2367 				break;
2368 			case 6:
2369 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2370 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2371 						 PIPE_CONFIG(ADDR_SURF_P2) |
2372 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2373 				break;
2374 			case 7:
2375 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2376 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2377 						 PIPE_CONFIG(ADDR_SURF_P2) |
2378 						 TILE_SPLIT(split_equal_to_row_size));
2379 				break;
2380 			case 8:
2381 				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
2382 				break;
2383 			case 9:
2384 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2385 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2386 				break;
2387 			case 10:
2388 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2389 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2390 						 PIPE_CONFIG(ADDR_SURF_P2) |
2391 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2392 				break;
2393 			case 11:
2394 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2395 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2396 						 PIPE_CONFIG(ADDR_SURF_P2) |
2397 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2398 				break;
2399 			case 12:
2400 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2401 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2402 						 PIPE_CONFIG(ADDR_SURF_P2) |
2403 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2404 				break;
2405 			case 13:
2406 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2407 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2408 				break;
2409 			case 14:
2410 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2411 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2412 						 PIPE_CONFIG(ADDR_SURF_P2) |
2413 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2414 				break;
2415 			case 16:
2416 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2417 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2418 						 PIPE_CONFIG(ADDR_SURF_P2) |
2419 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2420 				break;
2421 			case 17:
2422 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2423 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2424 						 PIPE_CONFIG(ADDR_SURF_P2) |
2425 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2426 				break;
2427 			case 27:
2428 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2429 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2430 				break;
2431 			case 28:
2432 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2433 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2434 						 PIPE_CONFIG(ADDR_SURF_P2) |
2435 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2436 				break;
2437 			case 29:
2438 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2439 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2440 						 PIPE_CONFIG(ADDR_SURF_P2) |
2441 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2442 				break;
2443 			case 30:
2444 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2445 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2446 						 PIPE_CONFIG(ADDR_SURF_P2) |
2447 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2448 				break;
2449 			default:
2450 				gb_tile_moden = 0;
2451 				break;
2452 			}
2453 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2454 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2455 		}
2456 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2457 			switch (reg_offset) {
2458 			case 0:
2459 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2460 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2461 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2462 						 NUM_BANKS(ADDR_SURF_16_BANK));
2463 				break;
2464 			case 1:
2465 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2466 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2467 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2468 						 NUM_BANKS(ADDR_SURF_16_BANK));
2469 				break;
2470 			case 2:
2471 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2472 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2473 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2474 						 NUM_BANKS(ADDR_SURF_16_BANK));
2475 				break;
2476 			case 3:
2477 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2479 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2480 						 NUM_BANKS(ADDR_SURF_16_BANK));
2481 				break;
2482 			case 4:
2483 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2484 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2485 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2486 						 NUM_BANKS(ADDR_SURF_16_BANK));
2487 				break;
2488 			case 5:
2489 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2490 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2491 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2492 						 NUM_BANKS(ADDR_SURF_16_BANK));
2493 				break;
2494 			case 6:
2495 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2496 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2497 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2498 						 NUM_BANKS(ADDR_SURF_8_BANK));
2499 				break;
2500 			case 8:
2501 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2502 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2503 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2504 						 NUM_BANKS(ADDR_SURF_16_BANK));
2505 				break;
2506 			case 9:
2507 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2508 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2509 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2510 						 NUM_BANKS(ADDR_SURF_16_BANK));
2511 				break;
2512 			case 10:
2513 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2514 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2515 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2516 						 NUM_BANKS(ADDR_SURF_16_BANK));
2517 				break;
2518 			case 11:
2519 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2520 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2521 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2522 						 NUM_BANKS(ADDR_SURF_16_BANK));
2523 				break;
2524 			case 12:
2525 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2526 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2527 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2528 						 NUM_BANKS(ADDR_SURF_16_BANK));
2529 				break;
2530 			case 13:
2531 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2532 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2533 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2534 						 NUM_BANKS(ADDR_SURF_16_BANK));
2535 				break;
2536 			case 14:
2537 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2538 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2539 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2540 						 NUM_BANKS(ADDR_SURF_8_BANK));
2541 				break;
2542 			default:
2543 				gb_tile_moden = 0;
2544 				break;
2545 			}
2546 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2547 		}
2548 	} else
2549 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
2550 }
2551 
2552 /**
2553  * cik_select_se_sh - select which SE, SH to address
2554  *
2555  * @rdev: radeon_device pointer
2556  * @se_num: shader engine to address
2557  * @sh_num: sh block to address
2558  *
2559  * Select which SE, SH combinations to address. Certain
2560  * registers are instanced per SE or SH.  0xffffffff means
2561  * broadcast to all SEs or SHs (CIK).
2562  */
2563 static void cik_select_se_sh(struct radeon_device *rdev,
2564 			     u32 se_num, u32 sh_num)
2565 {
2566 	u32 data = INSTANCE_BROADCAST_WRITES;
2567 
2568 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2569 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2570 	else if (se_num == 0xffffffff)
2571 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2572 	else if (sh_num == 0xffffffff)
2573 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2574 	else
2575 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2576 	WREG32(GRBM_GFX_INDEX, data);
2577 }
2578 
2579 /**
2580  * cik_create_bitmask - create a bitmask
2581  *
2582  * @bit_width: length of the mask
2583  *
2584  * create a variable length bit mask (CIK).
2585  * Returns the bitmask.
2586  */
2587 static u32 cik_create_bitmask(u32 bit_width)
2588 {
2589 	u32 i, mask = 0;
2590 
2591 	for (i = 0; i < bit_width; i++) {
2592 		mask <<= 1;
2593 		mask |= 1;
2594 	}
2595 	return mask;
2596 }
2597 
2598 /**
2599  * cik_select_se_sh - select which SE, SH to address
2600  *
2601  * @rdev: radeon_device pointer
2602  * @max_rb_num: max RBs (render backends) for the asic
2603  * @se_num: number of SEs (shader engines) for the asic
2604  * @sh_per_se: number of SH blocks per SE for the asic
2605  *
2606  * Calculates the bitmask of disabled RBs (CIK).
2607  * Returns the disabled RB bitmask.
2608  */
2609 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
2610 			      u32 max_rb_num, u32 se_num,
2611 			      u32 sh_per_se)
2612 {
2613 	u32 data, mask;
2614 
2615 	data = RREG32(CC_RB_BACKEND_DISABLE);
2616 	if (data & 1)
2617 		data &= BACKEND_DISABLE_MASK;
2618 	else
2619 		data = 0;
2620 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2621 
2622 	data >>= BACKEND_DISABLE_SHIFT;
2623 
2624 	mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
2625 
2626 	return data & mask;
2627 }
2628 
2629 /**
2630  * cik_setup_rb - setup the RBs on the asic
2631  *
2632  * @rdev: radeon_device pointer
2633  * @se_num: number of SEs (shader engines) for the asic
2634  * @sh_per_se: number of SH blocks per SE for the asic
2635  * @max_rb_num: max RBs (render backends) for the asic
2636  *
2637  * Configures per-SE/SH RB registers (CIK).
2638  */
2639 static void cik_setup_rb(struct radeon_device *rdev,
2640 			 u32 se_num, u32 sh_per_se,
2641 			 u32 max_rb_num)
2642 {
2643 	int i, j;
2644 	u32 data, mask;
2645 	u32 disabled_rbs = 0;
2646 	u32 enabled_rbs = 0;
2647 
2648 	for (i = 0; i < se_num; i++) {
2649 		for (j = 0; j < sh_per_se; j++) {
2650 			cik_select_se_sh(rdev, i, j);
2651 			data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2652 			disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
2653 		}
2654 	}
2655 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2656 
2657 	mask = 1;
2658 	for (i = 0; i < max_rb_num; i++) {
2659 		if (!(disabled_rbs & mask))
2660 			enabled_rbs |= mask;
2661 		mask <<= 1;
2662 	}
2663 
2664 	for (i = 0; i < se_num; i++) {
2665 		cik_select_se_sh(rdev, i, 0xffffffff);
2666 		data = 0;
2667 		for (j = 0; j < sh_per_se; j++) {
2668 			switch (enabled_rbs & 3) {
2669 			case 1:
2670 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2671 				break;
2672 			case 2:
2673 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2674 				break;
2675 			case 3:
2676 			default:
2677 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2678 				break;
2679 			}
2680 			enabled_rbs >>= 2;
2681 		}
2682 		WREG32(PA_SC_RASTER_CONFIG, data);
2683 	}
2684 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2685 }
2686 
2687 /**
2688  * cik_gpu_init - setup the 3D engine
2689  *
2690  * @rdev: radeon_device pointer
2691  *
2692  * Configures the 3D engine and tiling configuration
2693  * registers so that the 3D engine is usable.
2694  */
2695 static void cik_gpu_init(struct radeon_device *rdev)
2696 {
2697 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
2698 	u32 mc_shared_chmap, mc_arb_ramcfg;
2699 	u32 hdp_host_path_cntl;
2700 	u32 tmp;
2701 	int i, j;
2702 
2703 	switch (rdev->family) {
2704 	case CHIP_BONAIRE:
2705 		rdev->config.cik.max_shader_engines = 2;
2706 		rdev->config.cik.max_tile_pipes = 4;
2707 		rdev->config.cik.max_cu_per_sh = 7;
2708 		rdev->config.cik.max_sh_per_se = 1;
2709 		rdev->config.cik.max_backends_per_se = 2;
2710 		rdev->config.cik.max_texture_channel_caches = 4;
2711 		rdev->config.cik.max_gprs = 256;
2712 		rdev->config.cik.max_gs_threads = 32;
2713 		rdev->config.cik.max_hw_contexts = 8;
2714 
2715 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2716 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2717 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2718 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2719 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2720 		break;
2721 	case CHIP_KAVERI:
2722 		rdev->config.cik.max_shader_engines = 1;
2723 		rdev->config.cik.max_tile_pipes = 4;
2724 		if ((rdev->pdev->device == 0x1304) ||
2725 		    (rdev->pdev->device == 0x1305) ||
2726 		    (rdev->pdev->device == 0x130C) ||
2727 		    (rdev->pdev->device == 0x130F) ||
2728 		    (rdev->pdev->device == 0x1310) ||
2729 		    (rdev->pdev->device == 0x1311) ||
2730 		    (rdev->pdev->device == 0x131C)) {
2731 			rdev->config.cik.max_cu_per_sh = 8;
2732 			rdev->config.cik.max_backends_per_se = 2;
2733 		} else if ((rdev->pdev->device == 0x1309) ||
2734 			   (rdev->pdev->device == 0x130A) ||
2735 			   (rdev->pdev->device == 0x130D) ||
2736 			   (rdev->pdev->device == 0x1313) ||
2737 			   (rdev->pdev->device == 0x131D)) {
2738 			rdev->config.cik.max_cu_per_sh = 6;
2739 			rdev->config.cik.max_backends_per_se = 2;
2740 		} else if ((rdev->pdev->device == 0x1306) ||
2741 			   (rdev->pdev->device == 0x1307) ||
2742 			   (rdev->pdev->device == 0x130B) ||
2743 			   (rdev->pdev->device == 0x130E) ||
2744 			   (rdev->pdev->device == 0x1315) ||
2745 			   (rdev->pdev->device == 0x131B)) {
2746 			rdev->config.cik.max_cu_per_sh = 4;
2747 			rdev->config.cik.max_backends_per_se = 1;
2748 		} else {
2749 			rdev->config.cik.max_cu_per_sh = 3;
2750 			rdev->config.cik.max_backends_per_se = 1;
2751 		}
2752 		rdev->config.cik.max_sh_per_se = 1;
2753 		rdev->config.cik.max_texture_channel_caches = 4;
2754 		rdev->config.cik.max_gprs = 256;
2755 		rdev->config.cik.max_gs_threads = 16;
2756 		rdev->config.cik.max_hw_contexts = 8;
2757 
2758 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2759 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2760 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2761 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2762 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2763 		break;
2764 	case CHIP_KABINI:
2765 	default:
2766 		rdev->config.cik.max_shader_engines = 1;
2767 		rdev->config.cik.max_tile_pipes = 2;
2768 		rdev->config.cik.max_cu_per_sh = 2;
2769 		rdev->config.cik.max_sh_per_se = 1;
2770 		rdev->config.cik.max_backends_per_se = 1;
2771 		rdev->config.cik.max_texture_channel_caches = 2;
2772 		rdev->config.cik.max_gprs = 256;
2773 		rdev->config.cik.max_gs_threads = 16;
2774 		rdev->config.cik.max_hw_contexts = 8;
2775 
2776 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2777 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2778 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2779 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2780 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2781 		break;
2782 	}
2783 
2784 	/* Initialize HDP */
2785 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2786 		WREG32((0x2c14 + j), 0x00000000);
2787 		WREG32((0x2c18 + j), 0x00000000);
2788 		WREG32((0x2c1c + j), 0x00000000);
2789 		WREG32((0x2c20 + j), 0x00000000);
2790 		WREG32((0x2c24 + j), 0x00000000);
2791 	}
2792 
2793 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2794 
2795 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2796 
2797 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2798 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2799 
2800 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
2801 	rdev->config.cik.mem_max_burst_length_bytes = 256;
2802 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2803 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2804 	if (rdev->config.cik.mem_row_size_in_kb > 4)
2805 		rdev->config.cik.mem_row_size_in_kb = 4;
2806 	/* XXX use MC settings? */
2807 	rdev->config.cik.shader_engine_tile_size = 32;
2808 	rdev->config.cik.num_gpus = 1;
2809 	rdev->config.cik.multi_gpu_tile_size = 64;
2810 
2811 	/* fix up row size */
2812 	gb_addr_config &= ~ROW_SIZE_MASK;
2813 	switch (rdev->config.cik.mem_row_size_in_kb) {
2814 	case 1:
2815 	default:
2816 		gb_addr_config |= ROW_SIZE(0);
2817 		break;
2818 	case 2:
2819 		gb_addr_config |= ROW_SIZE(1);
2820 		break;
2821 	case 4:
2822 		gb_addr_config |= ROW_SIZE(2);
2823 		break;
2824 	}
2825 
2826 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
2827 	 * not have bank info, so create a custom tiling dword.
2828 	 * bits 3:0   num_pipes
2829 	 * bits 7:4   num_banks
2830 	 * bits 11:8  group_size
2831 	 * bits 15:12 row_size
2832 	 */
2833 	rdev->config.cik.tile_config = 0;
2834 	switch (rdev->config.cik.num_tile_pipes) {
2835 	case 1:
2836 		rdev->config.cik.tile_config |= (0 << 0);
2837 		break;
2838 	case 2:
2839 		rdev->config.cik.tile_config |= (1 << 0);
2840 		break;
2841 	case 4:
2842 		rdev->config.cik.tile_config |= (2 << 0);
2843 		break;
2844 	case 8:
2845 	default:
2846 		/* XXX what about 12? */
2847 		rdev->config.cik.tile_config |= (3 << 0);
2848 		break;
2849 	}
2850 	rdev->config.cik.tile_config |=
2851 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
2852 	rdev->config.cik.tile_config |=
2853 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
2854 	rdev->config.cik.tile_config |=
2855 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
2856 
2857 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
2858 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
2859 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
2860 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
2861 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
2862 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
2863 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
2864 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
2865 
2866 	cik_tiling_mode_table_init(rdev);
2867 
2868 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
2869 		     rdev->config.cik.max_sh_per_se,
2870 		     rdev->config.cik.max_backends_per_se);
2871 
2872 	/* set HW defaults for 3D engine */
2873 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
2874 
2875 	WREG32(SX_DEBUG_1, 0x20);
2876 
2877 	WREG32(TA_CNTL_AUX, 0x00010000);
2878 
2879 	tmp = RREG32(SPI_CONFIG_CNTL);
2880 	tmp |= 0x03000000;
2881 	WREG32(SPI_CONFIG_CNTL, tmp);
2882 
2883 	WREG32(SQ_CONFIG, 1);
2884 
2885 	WREG32(DB_DEBUG, 0);
2886 
2887 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
2888 	tmp |= 0x00000400;
2889 	WREG32(DB_DEBUG2, tmp);
2890 
2891 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2892 	tmp |= 0x00020200;
2893 	WREG32(DB_DEBUG3, tmp);
2894 
2895 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2896 	tmp |= 0x00018208;
2897 	WREG32(CB_HW_CONTROL, tmp);
2898 
2899 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2900 
2901 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2902 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2903 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2904 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2905 
2906 	WREG32(VGT_NUM_INSTANCES, 1);
2907 
2908 	WREG32(CP_PERFMON_CNTL, 0);
2909 
2910 	WREG32(SQ_CONFIG, 0);
2911 
2912 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2913 					  FORCE_EOV_MAX_REZ_CNT(255)));
2914 
2915 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2916 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
2917 
2918 	WREG32(VGT_GS_VERTEX_REUSE, 16);
2919 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2920 
2921 	tmp = RREG32(HDP_MISC_CNTL);
2922 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2923 	WREG32(HDP_MISC_CNTL, tmp);
2924 
2925 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2926 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2927 
2928 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2929 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2930 
2931 	udelay(50);
2932 }
2933 
2934 /*
2935  * GPU scratch registers helpers function.
2936  */
2937 /**
2938  * cik_scratch_init - setup driver info for CP scratch regs
2939  *
2940  * @rdev: radeon_device pointer
2941  *
2942  * Set up the number and offset of the CP scratch registers.
2943  * NOTE: use of CP scratch registers is a legacy inferface and
2944  * is not used by default on newer asics (r6xx+).  On newer asics,
2945  * memory buffers are used for fences rather than scratch regs.
2946  */
2947 static void cik_scratch_init(struct radeon_device *rdev)
2948 {
2949 	int i;
2950 
2951 	rdev->scratch.num_reg = 7;
2952 	rdev->scratch.reg_base = SCRATCH_REG0;
2953 	for (i = 0; i < rdev->scratch.num_reg; i++) {
2954 		rdev->scratch.free[i] = true;
2955 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2956 	}
2957 }
2958 
2959 /**
2960  * cik_ring_test - basic gfx ring test
2961  *
2962  * @rdev: radeon_device pointer
2963  * @ring: radeon_ring structure holding ring information
2964  *
2965  * Allocate a scratch register and write to it using the gfx ring (CIK).
2966  * Provides a basic gfx ring test to verify that the ring is working.
2967  * Used by cik_cp_gfx_resume();
2968  * Returns 0 on success, error on failure.
2969  */
2970 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2971 {
2972 	uint32_t scratch;
2973 	uint32_t tmp = 0;
2974 	unsigned i;
2975 	int r;
2976 
2977 	r = radeon_scratch_get(rdev, &scratch);
2978 	if (r) {
2979 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2980 		return r;
2981 	}
2982 	WREG32(scratch, 0xCAFEDEAD);
2983 	r = radeon_ring_lock(rdev, ring, 3);
2984 	if (r) {
2985 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2986 		radeon_scratch_free(rdev, scratch);
2987 		return r;
2988 	}
2989 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2990 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2991 	radeon_ring_write(ring, 0xDEADBEEF);
2992 	radeon_ring_unlock_commit(rdev, ring);
2993 
2994 	for (i = 0; i < rdev->usec_timeout; i++) {
2995 		tmp = RREG32(scratch);
2996 		if (tmp == 0xDEADBEEF)
2997 			break;
2998 		DRM_UDELAY(1);
2999 	}
3000 	if (i < rdev->usec_timeout) {
3001 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3002 	} else {
3003 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3004 			  ring->idx, scratch, tmp);
3005 		r = -EINVAL;
3006 	}
3007 	radeon_scratch_free(rdev, scratch);
3008 	return r;
3009 }
3010 
3011 /**
3012  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3013  *
3014  * @rdev: radeon_device pointer
3015  * @fence: radeon fence object
3016  *
3017  * Emits a fence sequnce number on the gfx ring and flushes
3018  * GPU caches.
3019  */
3020 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3021 			     struct radeon_fence *fence)
3022 {
3023 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3024 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3025 
3026 	/* EVENT_WRITE_EOP - flush caches, send int */
3027 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3028 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3029 				 EOP_TC_ACTION_EN |
3030 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3031 				 EVENT_INDEX(5)));
3032 	radeon_ring_write(ring, addr & 0xfffffffc);
3033 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3034 	radeon_ring_write(ring, fence->seq);
3035 	radeon_ring_write(ring, 0);
3036 	/* HDP flush */
3037 	/* We should be using the new WAIT_REG_MEM special op packet here
3038 	 * but it causes the CP to hang
3039 	 */
3040 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3041 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3042 				 WRITE_DATA_DST_SEL(0)));
3043 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3044 	radeon_ring_write(ring, 0);
3045 	radeon_ring_write(ring, 0);
3046 }
3047 
3048 /**
3049  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3050  *
3051  * @rdev: radeon_device pointer
3052  * @fence: radeon fence object
3053  *
3054  * Emits a fence sequnce number on the compute ring and flushes
3055  * GPU caches.
3056  */
3057 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3058 				 struct radeon_fence *fence)
3059 {
3060 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3061 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3062 
3063 	/* RELEASE_MEM - flush caches, send int */
3064 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3065 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3066 				 EOP_TC_ACTION_EN |
3067 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3068 				 EVENT_INDEX(5)));
3069 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3070 	radeon_ring_write(ring, addr & 0xfffffffc);
3071 	radeon_ring_write(ring, upper_32_bits(addr));
3072 	radeon_ring_write(ring, fence->seq);
3073 	radeon_ring_write(ring, 0);
3074 	/* HDP flush */
3075 	/* We should be using the new WAIT_REG_MEM special op packet here
3076 	 * but it causes the CP to hang
3077 	 */
3078 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3079 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3080 				 WRITE_DATA_DST_SEL(0)));
3081 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3082 	radeon_ring_write(ring, 0);
3083 	radeon_ring_write(ring, 0);
3084 }
3085 
3086 void cik_semaphore_ring_emit(struct radeon_device *rdev,
3087 			     struct radeon_ring *ring,
3088 			     struct radeon_semaphore *semaphore,
3089 			     bool emit_wait)
3090 {
3091 	uint64_t addr = semaphore->gpu_addr;
3092 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3093 
3094 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3095 	radeon_ring_write(ring, addr & 0xffffffff);
3096 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3097 }
3098 
3099 /*
3100  * IB stuff
3101  */
3102 /**
3103  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3104  *
3105  * @rdev: radeon_device pointer
3106  * @ib: radeon indirect buffer object
3107  *
3108  * Emits an DE (drawing engine) or CE (constant engine) IB
3109  * on the gfx ring.  IBs are usually generated by userspace
3110  * acceleration drivers and submitted to the kernel for
3111  * sheduling on the ring.  This function schedules the IB
3112  * on the gfx ring for execution by the GPU.
3113  */
3114 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3115 {
3116 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3117 	u32 header, control = INDIRECT_BUFFER_VALID;
3118 
3119 	if (ib->is_const_ib) {
3120 		/* set switch buffer packet before const IB */
3121 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3122 		radeon_ring_write(ring, 0);
3123 
3124 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3125 	} else {
3126 		u32 next_rptr;
3127 		if (ring->rptr_save_reg) {
3128 			next_rptr = ring->wptr + 3 + 4;
3129 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3130 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3131 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
3132 			radeon_ring_write(ring, next_rptr);
3133 		} else if (rdev->wb.enabled) {
3134 			next_rptr = ring->wptr + 5 + 4;
3135 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3136 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3137 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3138 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3139 			radeon_ring_write(ring, next_rptr);
3140 		}
3141 
3142 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3143 	}
3144 
3145 	control |= ib->length_dw |
3146 		(ib->vm ? (ib->vm->id << 24) : 0);
3147 
3148 	radeon_ring_write(ring, header);
3149 	radeon_ring_write(ring,
3150 #ifdef __BIG_ENDIAN
3151 			  (2 << 0) |
3152 #endif
3153 			  (ib->gpu_addr & 0xFFFFFFFC));
3154 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3155 	radeon_ring_write(ring, control);
3156 }
3157 
3158 /**
3159  * cik_ib_test - basic gfx ring IB test
3160  *
3161  * @rdev: radeon_device pointer
3162  * @ring: radeon_ring structure holding ring information
3163  *
3164  * Allocate an IB and execute it on the gfx ring (CIK).
3165  * Provides a basic gfx ring test to verify that IBs are working.
3166  * Returns 0 on success, error on failure.
3167  */
3168 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3169 {
3170 	struct radeon_ib ib;
3171 	uint32_t scratch;
3172 	uint32_t tmp = 0;
3173 	unsigned i;
3174 	int r;
3175 
3176 	r = radeon_scratch_get(rdev, &scratch);
3177 	if (r) {
3178 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3179 		return r;
3180 	}
3181 	WREG32(scratch, 0xCAFEDEAD);
3182 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3183 	if (r) {
3184 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3185 		return r;
3186 	}
3187 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3188 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3189 	ib.ptr[2] = 0xDEADBEEF;
3190 	ib.length_dw = 3;
3191 	r = radeon_ib_schedule(rdev, &ib, NULL);
3192 	if (r) {
3193 		radeon_scratch_free(rdev, scratch);
3194 		radeon_ib_free(rdev, &ib);
3195 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3196 		return r;
3197 	}
3198 	r = radeon_fence_wait(ib.fence, false);
3199 	if (r) {
3200 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3201 		return r;
3202 	}
3203 	for (i = 0; i < rdev->usec_timeout; i++) {
3204 		tmp = RREG32(scratch);
3205 		if (tmp == 0xDEADBEEF)
3206 			break;
3207 		DRM_UDELAY(1);
3208 	}
3209 	if (i < rdev->usec_timeout) {
3210 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3211 	} else {
3212 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3213 			  scratch, tmp);
3214 		r = -EINVAL;
3215 	}
3216 	radeon_scratch_free(rdev, scratch);
3217 	radeon_ib_free(rdev, &ib);
3218 	return r;
3219 }
3220 
3221 /*
3222  * CP.
3223  * On CIK, gfx and compute now have independant command processors.
3224  *
3225  * GFX
3226  * Gfx consists of a single ring and can process both gfx jobs and
3227  * compute jobs.  The gfx CP consists of three microengines (ME):
3228  * PFP - Pre-Fetch Parser
3229  * ME - Micro Engine
3230  * CE - Constant Engine
3231  * The PFP and ME make up what is considered the Drawing Engine (DE).
3232  * The CE is an asynchronous engine used for updating buffer desciptors
3233  * used by the DE so that they can be loaded into cache in parallel
3234  * while the DE is processing state update packets.
3235  *
3236  * Compute
3237  * The compute CP consists of two microengines (ME):
3238  * MEC1 - Compute MicroEngine 1
3239  * MEC2 - Compute MicroEngine 2
3240  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3241  * The queues are exposed to userspace and are programmed directly
3242  * by the compute runtime.
3243  */
3244 /**
3245  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3246  *
3247  * @rdev: radeon_device pointer
3248  * @enable: enable or disable the MEs
3249  *
3250  * Halts or unhalts the gfx MEs.
3251  */
3252 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3253 {
3254 	if (enable)
3255 		WREG32(CP_ME_CNTL, 0);
3256 	else {
3257 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3258 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3259 	}
3260 	udelay(50);
3261 }
3262 
3263 /**
3264  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3265  *
3266  * @rdev: radeon_device pointer
3267  *
3268  * Loads the gfx PFP, ME, and CE ucode.
3269  * Returns 0 for success, -EINVAL if the ucode is not available.
3270  */
3271 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3272 {
3273 	const __be32 *fw_data;
3274 	int i;
3275 
3276 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3277 		return -EINVAL;
3278 
3279 	cik_cp_gfx_enable(rdev, false);
3280 
3281 	/* PFP */
3282 	fw_data = (const __be32 *)rdev->pfp_fw->data;
3283 	WREG32(CP_PFP_UCODE_ADDR, 0);
3284 	for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3285 		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3286 	WREG32(CP_PFP_UCODE_ADDR, 0);
3287 
3288 	/* CE */
3289 	fw_data = (const __be32 *)rdev->ce_fw->data;
3290 	WREG32(CP_CE_UCODE_ADDR, 0);
3291 	for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3292 		WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3293 	WREG32(CP_CE_UCODE_ADDR, 0);
3294 
3295 	/* ME */
3296 	fw_data = (const __be32 *)rdev->me_fw->data;
3297 	WREG32(CP_ME_RAM_WADDR, 0);
3298 	for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3299 		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3300 	WREG32(CP_ME_RAM_WADDR, 0);
3301 
3302 	WREG32(CP_PFP_UCODE_ADDR, 0);
3303 	WREG32(CP_CE_UCODE_ADDR, 0);
3304 	WREG32(CP_ME_RAM_WADDR, 0);
3305 	WREG32(CP_ME_RAM_RADDR, 0);
3306 	return 0;
3307 }
3308 
3309 /**
3310  * cik_cp_gfx_start - start the gfx ring
3311  *
3312  * @rdev: radeon_device pointer
3313  *
3314  * Enables the ring and loads the clear state context and other
3315  * packets required to init the ring.
3316  * Returns 0 for success, error for failure.
3317  */
3318 static int cik_cp_gfx_start(struct radeon_device *rdev)
3319 {
3320 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3321 	int r, i;
3322 
3323 	/* init the CP */
3324 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3325 	WREG32(CP_ENDIAN_SWAP, 0);
3326 	WREG32(CP_DEVICE_ID, 1);
3327 
3328 	cik_cp_gfx_enable(rdev, true);
3329 
3330 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3331 	if (r) {
3332 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3333 		return r;
3334 	}
3335 
3336 	/* init the CE partitions.  CE only used for gfx on CIK */
3337 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3338 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3339 	radeon_ring_write(ring, 0xc000);
3340 	radeon_ring_write(ring, 0xc000);
3341 
3342 	/* setup clear context state */
3343 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3344 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3345 
3346 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3347 	radeon_ring_write(ring, 0x80000000);
3348 	radeon_ring_write(ring, 0x80000000);
3349 
3350 	for (i = 0; i < cik_default_size; i++)
3351 		radeon_ring_write(ring, cik_default_state[i]);
3352 
3353 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3354 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3355 
3356 	/* set clear context state */
3357 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3358 	radeon_ring_write(ring, 0);
3359 
3360 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3361 	radeon_ring_write(ring, 0x00000316);
3362 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3363 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3364 
3365 	radeon_ring_unlock_commit(rdev, ring);
3366 
3367 	return 0;
3368 }
3369 
3370 /**
3371  * cik_cp_gfx_fini - stop the gfx ring
3372  *
3373  * @rdev: radeon_device pointer
3374  *
3375  * Stop the gfx ring and tear down the driver ring
3376  * info.
3377  */
3378 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3379 {
3380 	cik_cp_gfx_enable(rdev, false);
3381 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3382 }
3383 
3384 /**
3385  * cik_cp_gfx_resume - setup the gfx ring buffer registers
3386  *
3387  * @rdev: radeon_device pointer
3388  *
3389  * Program the location and size of the gfx ring buffer
3390  * and test it to make sure it's working.
3391  * Returns 0 for success, error for failure.
3392  */
3393 static int cik_cp_gfx_resume(struct radeon_device *rdev)
3394 {
3395 	struct radeon_ring *ring;
3396 	u32 tmp;
3397 	u32 rb_bufsz;
3398 	u64 rb_addr;
3399 	int r;
3400 
3401 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3402 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3403 
3404 	/* Set the write pointer delay */
3405 	WREG32(CP_RB_WPTR_DELAY, 0);
3406 
3407 	/* set the RB to use vmid 0 */
3408 	WREG32(CP_RB_VMID, 0);
3409 
3410 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3411 
3412 	/* ring 0 - compute and gfx */
3413 	/* Set ring buffer size */
3414 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3415 	rb_bufsz = order_base_2(ring->ring_size / 8);
3416 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3417 #ifdef __BIG_ENDIAN
3418 	tmp |= BUF_SWAP_32BIT;
3419 #endif
3420 	WREG32(CP_RB0_CNTL, tmp);
3421 
3422 	/* Initialize the ring buffer's read and write pointers */
3423 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3424 	ring->wptr = 0;
3425 	WREG32(CP_RB0_WPTR, ring->wptr);
3426 
3427 	/* set the wb address wether it's enabled or not */
3428 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3429 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3430 
3431 	/* scratch register shadowing is no longer supported */
3432 	WREG32(SCRATCH_UMSK, 0);
3433 
3434 	if (!rdev->wb.enabled)
3435 		tmp |= RB_NO_UPDATE;
3436 
3437 	mdelay(1);
3438 	WREG32(CP_RB0_CNTL, tmp);
3439 
3440 	rb_addr = ring->gpu_addr >> 8;
3441 	WREG32(CP_RB0_BASE, rb_addr);
3442 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
3443 
3444 	ring->rptr = RREG32(CP_RB0_RPTR);
3445 
3446 	/* start the ring */
3447 	cik_cp_gfx_start(rdev);
3448 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3449 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3450 	if (r) {
3451 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3452 		return r;
3453 	}
3454 	return 0;
3455 }
3456 
3457 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
3458 			      struct radeon_ring *ring)
3459 {
3460 	u32 rptr;
3461 
3462 
3463 
3464 	if (rdev->wb.enabled) {
3465 		rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
3466 	} else {
3467 		mutex_lock(&rdev->srbm_mutex);
3468 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3469 		rptr = RREG32(CP_HQD_PQ_RPTR);
3470 		cik_srbm_select(rdev, 0, 0, 0, 0);
3471 		mutex_unlock(&rdev->srbm_mutex);
3472 	}
3473 
3474 	return rptr;
3475 }
3476 
3477 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
3478 			      struct radeon_ring *ring)
3479 {
3480 	u32 wptr;
3481 
3482 	if (rdev->wb.enabled) {
3483 		wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
3484 	} else {
3485 		mutex_lock(&rdev->srbm_mutex);
3486 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3487 		wptr = RREG32(CP_HQD_PQ_WPTR);
3488 		cik_srbm_select(rdev, 0, 0, 0, 0);
3489 		mutex_unlock(&rdev->srbm_mutex);
3490 	}
3491 
3492 	return wptr;
3493 }
3494 
3495 void cik_compute_ring_set_wptr(struct radeon_device *rdev,
3496 			       struct radeon_ring *ring)
3497 {
3498 	rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(ring->wptr);
3499 	WDOORBELL32(ring->doorbell_offset, ring->wptr);
3500 }
3501 
3502 /**
3503  * cik_cp_compute_enable - enable/disable the compute CP MEs
3504  *
3505  * @rdev: radeon_device pointer
3506  * @enable: enable or disable the MEs
3507  *
3508  * Halts or unhalts the compute MEs.
3509  */
3510 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
3511 {
3512 	if (enable)
3513 		WREG32(CP_MEC_CNTL, 0);
3514 	else
3515 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
3516 	udelay(50);
3517 }
3518 
3519 /**
3520  * cik_cp_compute_load_microcode - load the compute CP ME ucode
3521  *
3522  * @rdev: radeon_device pointer
3523  *
3524  * Loads the compute MEC1&2 ucode.
3525  * Returns 0 for success, -EINVAL if the ucode is not available.
3526  */
3527 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
3528 {
3529 	const __be32 *fw_data;
3530 	int i;
3531 
3532 	if (!rdev->mec_fw)
3533 		return -EINVAL;
3534 
3535 	cik_cp_compute_enable(rdev, false);
3536 
3537 	/* MEC1 */
3538 	fw_data = (const __be32 *)rdev->mec_fw->data;
3539 	WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3540 	for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3541 		WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
3542 	WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3543 
3544 	if (rdev->family == CHIP_KAVERI) {
3545 		/* MEC2 */
3546 		fw_data = (const __be32 *)rdev->mec_fw->data;
3547 		WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3548 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3549 			WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
3550 		WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3551 	}
3552 
3553 	return 0;
3554 }
3555 
3556 /**
3557  * cik_cp_compute_start - start the compute queues
3558  *
3559  * @rdev: radeon_device pointer
3560  *
3561  * Enable the compute queues.
3562  * Returns 0 for success, error for failure.
3563  */
3564 static int cik_cp_compute_start(struct radeon_device *rdev)
3565 {
3566 	cik_cp_compute_enable(rdev, true);
3567 
3568 	return 0;
3569 }
3570 
3571 /**
3572  * cik_cp_compute_fini - stop the compute queues
3573  *
3574  * @rdev: radeon_device pointer
3575  *
3576  * Stop the compute queues and tear down the driver queue
3577  * info.
3578  */
3579 static void cik_cp_compute_fini(struct radeon_device *rdev)
3580 {
3581 	int i, idx, r;
3582 
3583 	cik_cp_compute_enable(rdev, false);
3584 
3585 	for (i = 0; i < 2; i++) {
3586 		if (i == 0)
3587 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
3588 		else
3589 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
3590 
3591 		if (rdev->ring[idx].mqd_obj) {
3592 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3593 			if (unlikely(r != 0))
3594 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
3595 
3596 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
3597 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3598 
3599 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
3600 			rdev->ring[idx].mqd_obj = NULL;
3601 		}
3602 	}
3603 }
3604 
3605 static void cik_mec_fini(struct radeon_device *rdev)
3606 {
3607 	int r;
3608 
3609 	if (rdev->mec.hpd_eop_obj) {
3610 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3611 		if (unlikely(r != 0))
3612 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
3613 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
3614 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3615 
3616 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
3617 		rdev->mec.hpd_eop_obj = NULL;
3618 	}
3619 }
3620 
3621 #define MEC_HPD_SIZE 2048
3622 
3623 static int cik_mec_init(struct radeon_device *rdev)
3624 {
3625 	int r;
3626 	u32 *hpd;
3627 
3628 	/*
3629 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
3630 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
3631 	 */
3632 	if (rdev->family == CHIP_KAVERI)
3633 		rdev->mec.num_mec = 2;
3634 	else
3635 		rdev->mec.num_mec = 1;
3636 	rdev->mec.num_pipe = 4;
3637 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
3638 
3639 	if (rdev->mec.hpd_eop_obj == NULL) {
3640 		r = radeon_bo_create(rdev,
3641 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
3642 				     PAGE_SIZE, true,
3643 				     RADEON_GEM_DOMAIN_GTT, NULL,
3644 				     &rdev->mec.hpd_eop_obj);
3645 		if (r) {
3646 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
3647 			return r;
3648 		}
3649 	}
3650 
3651 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3652 	if (unlikely(r != 0)) {
3653 		cik_mec_fini(rdev);
3654 		return r;
3655 	}
3656 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
3657 			  &rdev->mec.hpd_eop_gpu_addr);
3658 	if (r) {
3659 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
3660 		cik_mec_fini(rdev);
3661 		return r;
3662 	}
3663 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
3664 	if (r) {
3665 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
3666 		cik_mec_fini(rdev);
3667 		return r;
3668 	}
3669 
3670 	/* clear memory.  Not sure if this is required or not */
3671 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
3672 
3673 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
3674 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3675 
3676 	return 0;
3677 }
3678 
3679 struct hqd_registers
3680 {
3681 	u32 cp_mqd_base_addr;
3682 	u32 cp_mqd_base_addr_hi;
3683 	u32 cp_hqd_active;
3684 	u32 cp_hqd_vmid;
3685 	u32 cp_hqd_persistent_state;
3686 	u32 cp_hqd_pipe_priority;
3687 	u32 cp_hqd_queue_priority;
3688 	u32 cp_hqd_quantum;
3689 	u32 cp_hqd_pq_base;
3690 	u32 cp_hqd_pq_base_hi;
3691 	u32 cp_hqd_pq_rptr;
3692 	u32 cp_hqd_pq_rptr_report_addr;
3693 	u32 cp_hqd_pq_rptr_report_addr_hi;
3694 	u32 cp_hqd_pq_wptr_poll_addr;
3695 	u32 cp_hqd_pq_wptr_poll_addr_hi;
3696 	u32 cp_hqd_pq_doorbell_control;
3697 	u32 cp_hqd_pq_wptr;
3698 	u32 cp_hqd_pq_control;
3699 	u32 cp_hqd_ib_base_addr;
3700 	u32 cp_hqd_ib_base_addr_hi;
3701 	u32 cp_hqd_ib_rptr;
3702 	u32 cp_hqd_ib_control;
3703 	u32 cp_hqd_iq_timer;
3704 	u32 cp_hqd_iq_rptr;
3705 	u32 cp_hqd_dequeue_request;
3706 	u32 cp_hqd_dma_offload;
3707 	u32 cp_hqd_sema_cmd;
3708 	u32 cp_hqd_msg_type;
3709 	u32 cp_hqd_atomic0_preop_lo;
3710 	u32 cp_hqd_atomic0_preop_hi;
3711 	u32 cp_hqd_atomic1_preop_lo;
3712 	u32 cp_hqd_atomic1_preop_hi;
3713 	u32 cp_hqd_hq_scheduler0;
3714 	u32 cp_hqd_hq_scheduler1;
3715 	u32 cp_mqd_control;
3716 };
3717 
3718 struct bonaire_mqd
3719 {
3720 	u32 header;
3721 	u32 dispatch_initiator;
3722 	u32 dimensions[3];
3723 	u32 start_idx[3];
3724 	u32 num_threads[3];
3725 	u32 pipeline_stat_enable;
3726 	u32 perf_counter_enable;
3727 	u32 pgm[2];
3728 	u32 tba[2];
3729 	u32 tma[2];
3730 	u32 pgm_rsrc[2];
3731 	u32 vmid;
3732 	u32 resource_limits;
3733 	u32 static_thread_mgmt01[2];
3734 	u32 tmp_ring_size;
3735 	u32 static_thread_mgmt23[2];
3736 	u32 restart[3];
3737 	u32 thread_trace_enable;
3738 	u32 reserved1;
3739 	u32 user_data[16];
3740 	u32 vgtcs_invoke_count[2];
3741 	struct hqd_registers queue_state;
3742 	u32 dequeue_cntr;
3743 	u32 interrupt_queue[64];
3744 };
3745 
3746 /**
3747  * cik_cp_compute_resume - setup the compute queue registers
3748  *
3749  * @rdev: radeon_device pointer
3750  *
3751  * Program the compute queues and test them to make sure they
3752  * are working.
3753  * Returns 0 for success, error for failure.
3754  */
3755 static int cik_cp_compute_resume(struct radeon_device *rdev)
3756 {
3757 	int r, i, idx;
3758 	u32 tmp;
3759 	bool use_doorbell = true;
3760 	u64 hqd_gpu_addr;
3761 	u64 mqd_gpu_addr;
3762 	u64 eop_gpu_addr;
3763 	u64 wb_gpu_addr;
3764 	u32 *buf;
3765 	struct bonaire_mqd *mqd;
3766 
3767 	r = cik_cp_compute_start(rdev);
3768 	if (r)
3769 		return r;
3770 
3771 	/* fix up chicken bits */
3772 	tmp = RREG32(CP_CPF_DEBUG);
3773 	tmp |= (1 << 23);
3774 	WREG32(CP_CPF_DEBUG, tmp);
3775 
3776 	/* init the pipes */
3777 	mutex_lock(&rdev->srbm_mutex);
3778 	for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
3779 		int me = (i < 4) ? 1 : 2;
3780 		int pipe = (i < 4) ? i : (i - 4);
3781 
3782 		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
3783 
3784 		cik_srbm_select(rdev, me, pipe, 0, 0);
3785 
3786 		/* write the EOP addr */
3787 		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
3788 		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
3789 
3790 		/* set the VMID assigned */
3791 		WREG32(CP_HPD_EOP_VMID, 0);
3792 
3793 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3794 		tmp = RREG32(CP_HPD_EOP_CONTROL);
3795 		tmp &= ~EOP_SIZE_MASK;
3796 		tmp |= order_base_2(MEC_HPD_SIZE / 8);
3797 		WREG32(CP_HPD_EOP_CONTROL, tmp);
3798 	}
3799 	cik_srbm_select(rdev, 0, 0, 0, 0);
3800 	mutex_unlock(&rdev->srbm_mutex);
3801 
3802 	/* init the queues.  Just two for now. */
3803 	for (i = 0; i < 2; i++) {
3804 		if (i == 0)
3805 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
3806 		else
3807 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
3808 
3809 		if (rdev->ring[idx].mqd_obj == NULL) {
3810 			r = radeon_bo_create(rdev,
3811 					     sizeof(struct bonaire_mqd),
3812 					     PAGE_SIZE, true,
3813 					     RADEON_GEM_DOMAIN_GTT, NULL,
3814 					     &rdev->ring[idx].mqd_obj);
3815 			if (r) {
3816 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
3817 				return r;
3818 			}
3819 		}
3820 
3821 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3822 		if (unlikely(r != 0)) {
3823 			cik_cp_compute_fini(rdev);
3824 			return r;
3825 		}
3826 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
3827 				  &mqd_gpu_addr);
3828 		if (r) {
3829 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
3830 			cik_cp_compute_fini(rdev);
3831 			return r;
3832 		}
3833 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
3834 		if (r) {
3835 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
3836 			cik_cp_compute_fini(rdev);
3837 			return r;
3838 		}
3839 
3840 		/* doorbell offset */
3841 		rdev->ring[idx].doorbell_offset =
3842 			(rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
3843 
3844 		/* init the mqd struct */
3845 		memset(buf, 0, sizeof(struct bonaire_mqd));
3846 
3847 		mqd = (struct bonaire_mqd *)buf;
3848 		mqd->header = 0xC0310800;
3849 		mqd->static_thread_mgmt01[0] = 0xffffffff;
3850 		mqd->static_thread_mgmt01[1] = 0xffffffff;
3851 		mqd->static_thread_mgmt23[0] = 0xffffffff;
3852 		mqd->static_thread_mgmt23[1] = 0xffffffff;
3853 
3854 		mutex_lock(&rdev->srbm_mutex);
3855 		cik_srbm_select(rdev, rdev->ring[idx].me,
3856 				rdev->ring[idx].pipe,
3857 				rdev->ring[idx].queue, 0);
3858 
3859 		/* disable wptr polling */
3860 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
3861 		tmp &= ~WPTR_POLL_EN;
3862 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
3863 
3864 		/* enable doorbell? */
3865 		mqd->queue_state.cp_hqd_pq_doorbell_control =
3866 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3867 		if (use_doorbell)
3868 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3869 		else
3870 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
3871 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3872 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
3873 
3874 		/* disable the queue if it's active */
3875 		mqd->queue_state.cp_hqd_dequeue_request = 0;
3876 		mqd->queue_state.cp_hqd_pq_rptr = 0;
3877 		mqd->queue_state.cp_hqd_pq_wptr= 0;
3878 		if (RREG32(CP_HQD_ACTIVE) & 1) {
3879 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
3880 			for (i = 0; i < rdev->usec_timeout; i++) {
3881 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
3882 					break;
3883 				udelay(1);
3884 			}
3885 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3886 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3887 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3888 		}
3889 
3890 		/* set the pointer to the MQD */
3891 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3892 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3893 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3894 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3895 		/* set MQD vmid to 0 */
3896 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3897 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3898 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3899 
3900 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3901 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3902 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3903 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3904 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3905 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3906 
3907 		/* set up the HQD, this is similar to CP_RB0_CNTL */
3908 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3909 		mqd->queue_state.cp_hqd_pq_control &=
3910 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3911 
3912 		mqd->queue_state.cp_hqd_pq_control |=
3913 			order_base_2(rdev->ring[idx].ring_size / 8);
3914 		mqd->queue_state.cp_hqd_pq_control |=
3915 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
3916 #ifdef __BIG_ENDIAN
3917 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3918 #endif
3919 		mqd->queue_state.cp_hqd_pq_control &=
3920 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3921 		mqd->queue_state.cp_hqd_pq_control |=
3922 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3923 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3924 
3925 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3926 		if (i == 0)
3927 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3928 		else
3929 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3930 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3931 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3932 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3933 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3934 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3935 
3936 		/* set the wb address wether it's enabled or not */
3937 		if (i == 0)
3938 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3939 		else
3940 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3941 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3942 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3943 			upper_32_bits(wb_gpu_addr) & 0xffff;
3944 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3945 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3946 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3947 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3948 
3949 		/* enable the doorbell if requested */
3950 		if (use_doorbell) {
3951 			mqd->queue_state.cp_hqd_pq_doorbell_control =
3952 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3953 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3954 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
3955 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3956 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3957 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
3958 				~(DOORBELL_SOURCE | DOORBELL_HIT);
3959 
3960 		} else {
3961 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3962 		}
3963 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3964 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
3965 
3966 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3967 		rdev->ring[idx].wptr = 0;
3968 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3969 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3970 		rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3971 		mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3972 
3973 		/* set the vmid for the queue */
3974 		mqd->queue_state.cp_hqd_vmid = 0;
3975 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3976 
3977 		/* activate the queue */
3978 		mqd->queue_state.cp_hqd_active = 1;
3979 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3980 
3981 		cik_srbm_select(rdev, 0, 0, 0, 0);
3982 		mutex_unlock(&rdev->srbm_mutex);
3983 
3984 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3985 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3986 
3987 		rdev->ring[idx].ready = true;
3988 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3989 		if (r)
3990 			rdev->ring[idx].ready = false;
3991 	}
3992 
3993 	return 0;
3994 }
3995 
3996 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
3997 {
3998 	cik_cp_gfx_enable(rdev, enable);
3999 	cik_cp_compute_enable(rdev, enable);
4000 }
4001 
4002 static int cik_cp_load_microcode(struct radeon_device *rdev)
4003 {
4004 	int r;
4005 
4006 	r = cik_cp_gfx_load_microcode(rdev);
4007 	if (r)
4008 		return r;
4009 	r = cik_cp_compute_load_microcode(rdev);
4010 	if (r)
4011 		return r;
4012 
4013 	return 0;
4014 }
4015 
4016 static void cik_cp_fini(struct radeon_device *rdev)
4017 {
4018 	cik_cp_gfx_fini(rdev);
4019 	cik_cp_compute_fini(rdev);
4020 }
4021 
4022 static int cik_cp_resume(struct radeon_device *rdev)
4023 {
4024 	int r;
4025 
4026 	cik_enable_gui_idle_interrupt(rdev, false);
4027 
4028 	r = cik_cp_load_microcode(rdev);
4029 	if (r)
4030 		return r;
4031 
4032 	r = cik_cp_gfx_resume(rdev);
4033 	if (r)
4034 		return r;
4035 	r = cik_cp_compute_resume(rdev);
4036 	if (r)
4037 		return r;
4038 
4039 	cik_enable_gui_idle_interrupt(rdev, true);
4040 
4041 	return 0;
4042 }
4043 
4044 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4045 {
4046 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4047 		RREG32(GRBM_STATUS));
4048 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4049 		RREG32(GRBM_STATUS2));
4050 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4051 		RREG32(GRBM_STATUS_SE0));
4052 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4053 		RREG32(GRBM_STATUS_SE1));
4054 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4055 		RREG32(GRBM_STATUS_SE2));
4056 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4057 		RREG32(GRBM_STATUS_SE3));
4058 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4059 		RREG32(SRBM_STATUS));
4060 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4061 		RREG32(SRBM_STATUS2));
4062 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4063 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4064 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4065 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4066 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4067 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4068 		 RREG32(CP_STALLED_STAT1));
4069 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4070 		 RREG32(CP_STALLED_STAT2));
4071 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4072 		 RREG32(CP_STALLED_STAT3));
4073 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4074 		 RREG32(CP_CPF_BUSY_STAT));
4075 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4076 		 RREG32(CP_CPF_STALLED_STAT1));
4077 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4078 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4079 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4080 		 RREG32(CP_CPC_STALLED_STAT1));
4081 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4082 }
4083 
4084 /**
4085  * cik_gpu_check_soft_reset - check which blocks are busy
4086  *
4087  * @rdev: radeon_device pointer
4088  *
4089  * Check which blocks are busy and return the relevant reset
4090  * mask to be used by cik_gpu_soft_reset().
4091  * Returns a mask of the blocks to be reset.
4092  */
4093 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4094 {
4095 	u32 reset_mask = 0;
4096 	u32 tmp;
4097 
4098 	/* GRBM_STATUS */
4099 	tmp = RREG32(GRBM_STATUS);
4100 	if (tmp & (PA_BUSY | SC_BUSY |
4101 		   BCI_BUSY | SX_BUSY |
4102 		   TA_BUSY | VGT_BUSY |
4103 		   DB_BUSY | CB_BUSY |
4104 		   GDS_BUSY | SPI_BUSY |
4105 		   IA_BUSY | IA_BUSY_NO_DMA))
4106 		reset_mask |= RADEON_RESET_GFX;
4107 
4108 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4109 		reset_mask |= RADEON_RESET_CP;
4110 
4111 	/* GRBM_STATUS2 */
4112 	tmp = RREG32(GRBM_STATUS2);
4113 	if (tmp & RLC_BUSY)
4114 		reset_mask |= RADEON_RESET_RLC;
4115 
4116 	/* SDMA0_STATUS_REG */
4117 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4118 	if (!(tmp & SDMA_IDLE))
4119 		reset_mask |= RADEON_RESET_DMA;
4120 
4121 	/* SDMA1_STATUS_REG */
4122 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4123 	if (!(tmp & SDMA_IDLE))
4124 		reset_mask |= RADEON_RESET_DMA1;
4125 
4126 	/* SRBM_STATUS2 */
4127 	tmp = RREG32(SRBM_STATUS2);
4128 	if (tmp & SDMA_BUSY)
4129 		reset_mask |= RADEON_RESET_DMA;
4130 
4131 	if (tmp & SDMA1_BUSY)
4132 		reset_mask |= RADEON_RESET_DMA1;
4133 
4134 	/* SRBM_STATUS */
4135 	tmp = RREG32(SRBM_STATUS);
4136 
4137 	if (tmp & IH_BUSY)
4138 		reset_mask |= RADEON_RESET_IH;
4139 
4140 	if (tmp & SEM_BUSY)
4141 		reset_mask |= RADEON_RESET_SEM;
4142 
4143 	if (tmp & GRBM_RQ_PENDING)
4144 		reset_mask |= RADEON_RESET_GRBM;
4145 
4146 	if (tmp & VMC_BUSY)
4147 		reset_mask |= RADEON_RESET_VMC;
4148 
4149 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4150 		   MCC_BUSY | MCD_BUSY))
4151 		reset_mask |= RADEON_RESET_MC;
4152 
4153 	if (evergreen_is_display_hung(rdev))
4154 		reset_mask |= RADEON_RESET_DISPLAY;
4155 
4156 	/* Skip MC reset as it's mostly likely not hung, just busy */
4157 	if (reset_mask & RADEON_RESET_MC) {
4158 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4159 		reset_mask &= ~RADEON_RESET_MC;
4160 	}
4161 
4162 	return reset_mask;
4163 }
4164 
4165 /**
4166  * cik_gpu_soft_reset - soft reset GPU
4167  *
4168  * @rdev: radeon_device pointer
4169  * @reset_mask: mask of which blocks to reset
4170  *
4171  * Soft reset the blocks specified in @reset_mask.
4172  */
4173 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4174 {
4175 	struct evergreen_mc_save save;
4176 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4177 	u32 tmp;
4178 
4179 	if (reset_mask == 0)
4180 		return;
4181 
4182 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4183 
4184 	cik_print_gpu_status_regs(rdev);
4185 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4186 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4187 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4188 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4189 
4190 	/* disable CG/PG */
4191 	cik_fini_pg(rdev);
4192 	cik_fini_cg(rdev);
4193 
4194 	/* stop the rlc */
4195 	cik_rlc_stop(rdev);
4196 
4197 	/* Disable GFX parsing/prefetching */
4198 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4199 
4200 	/* Disable MEC parsing/prefetching */
4201 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4202 
4203 	if (reset_mask & RADEON_RESET_DMA) {
4204 		/* sdma0 */
4205 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4206 		tmp |= SDMA_HALT;
4207 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4208 	}
4209 	if (reset_mask & RADEON_RESET_DMA1) {
4210 		/* sdma1 */
4211 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4212 		tmp |= SDMA_HALT;
4213 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4214 	}
4215 
4216 	evergreen_mc_stop(rdev, &save);
4217 	if (evergreen_mc_wait_for_idle(rdev)) {
4218 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4219 	}
4220 
4221 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4222 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4223 
4224 	if (reset_mask & RADEON_RESET_CP) {
4225 		grbm_soft_reset |= SOFT_RESET_CP;
4226 
4227 		srbm_soft_reset |= SOFT_RESET_GRBM;
4228 	}
4229 
4230 	if (reset_mask & RADEON_RESET_DMA)
4231 		srbm_soft_reset |= SOFT_RESET_SDMA;
4232 
4233 	if (reset_mask & RADEON_RESET_DMA1)
4234 		srbm_soft_reset |= SOFT_RESET_SDMA1;
4235 
4236 	if (reset_mask & RADEON_RESET_DISPLAY)
4237 		srbm_soft_reset |= SOFT_RESET_DC;
4238 
4239 	if (reset_mask & RADEON_RESET_RLC)
4240 		grbm_soft_reset |= SOFT_RESET_RLC;
4241 
4242 	if (reset_mask & RADEON_RESET_SEM)
4243 		srbm_soft_reset |= SOFT_RESET_SEM;
4244 
4245 	if (reset_mask & RADEON_RESET_IH)
4246 		srbm_soft_reset |= SOFT_RESET_IH;
4247 
4248 	if (reset_mask & RADEON_RESET_GRBM)
4249 		srbm_soft_reset |= SOFT_RESET_GRBM;
4250 
4251 	if (reset_mask & RADEON_RESET_VMC)
4252 		srbm_soft_reset |= SOFT_RESET_VMC;
4253 
4254 	if (!(rdev->flags & RADEON_IS_IGP)) {
4255 		if (reset_mask & RADEON_RESET_MC)
4256 			srbm_soft_reset |= SOFT_RESET_MC;
4257 	}
4258 
4259 	if (grbm_soft_reset) {
4260 		tmp = RREG32(GRBM_SOFT_RESET);
4261 		tmp |= grbm_soft_reset;
4262 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4263 		WREG32(GRBM_SOFT_RESET, tmp);
4264 		tmp = RREG32(GRBM_SOFT_RESET);
4265 
4266 		udelay(50);
4267 
4268 		tmp &= ~grbm_soft_reset;
4269 		WREG32(GRBM_SOFT_RESET, tmp);
4270 		tmp = RREG32(GRBM_SOFT_RESET);
4271 	}
4272 
4273 	if (srbm_soft_reset) {
4274 		tmp = RREG32(SRBM_SOFT_RESET);
4275 		tmp |= srbm_soft_reset;
4276 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4277 		WREG32(SRBM_SOFT_RESET, tmp);
4278 		tmp = RREG32(SRBM_SOFT_RESET);
4279 
4280 		udelay(50);
4281 
4282 		tmp &= ~srbm_soft_reset;
4283 		WREG32(SRBM_SOFT_RESET, tmp);
4284 		tmp = RREG32(SRBM_SOFT_RESET);
4285 	}
4286 
4287 	/* Wait a little for things to settle down */
4288 	udelay(50);
4289 
4290 	evergreen_mc_resume(rdev, &save);
4291 	udelay(50);
4292 
4293 	cik_print_gpu_status_regs(rdev);
4294 }
4295 
4296 /**
4297  * cik_asic_reset - soft reset GPU
4298  *
4299  * @rdev: radeon_device pointer
4300  *
4301  * Look up which blocks are hung and attempt
4302  * to reset them.
4303  * Returns 0 for success.
4304  */
4305 int cik_asic_reset(struct radeon_device *rdev)
4306 {
4307 	u32 reset_mask;
4308 
4309 	reset_mask = cik_gpu_check_soft_reset(rdev);
4310 
4311 	if (reset_mask)
4312 		r600_set_bios_scratch_engine_hung(rdev, true);
4313 
4314 	cik_gpu_soft_reset(rdev, reset_mask);
4315 
4316 	reset_mask = cik_gpu_check_soft_reset(rdev);
4317 
4318 	if (!reset_mask)
4319 		r600_set_bios_scratch_engine_hung(rdev, false);
4320 
4321 	return 0;
4322 }
4323 
4324 /**
4325  * cik_gfx_is_lockup - check if the 3D engine is locked up
4326  *
4327  * @rdev: radeon_device pointer
4328  * @ring: radeon_ring structure holding ring information
4329  *
4330  * Check if the 3D engine is locked up (CIK).
4331  * Returns true if the engine is locked, false if not.
4332  */
4333 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4334 {
4335 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4336 
4337 	if (!(reset_mask & (RADEON_RESET_GFX |
4338 			    RADEON_RESET_COMPUTE |
4339 			    RADEON_RESET_CP))) {
4340 		radeon_ring_lockup_update(ring);
4341 		return false;
4342 	}
4343 	/* force CP activities */
4344 	radeon_ring_force_activity(rdev, ring);
4345 	return radeon_ring_test_lockup(rdev, ring);
4346 }
4347 
4348 /* MC */
4349 /**
4350  * cik_mc_program - program the GPU memory controller
4351  *
4352  * @rdev: radeon_device pointer
4353  *
4354  * Set the location of vram, gart, and AGP in the GPU's
4355  * physical address space (CIK).
4356  */
4357 static void cik_mc_program(struct radeon_device *rdev)
4358 {
4359 	struct evergreen_mc_save save;
4360 	u32 tmp;
4361 	int i, j;
4362 
4363 	/* Initialize HDP */
4364 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4365 		WREG32((0x2c14 + j), 0x00000000);
4366 		WREG32((0x2c18 + j), 0x00000000);
4367 		WREG32((0x2c1c + j), 0x00000000);
4368 		WREG32((0x2c20 + j), 0x00000000);
4369 		WREG32((0x2c24 + j), 0x00000000);
4370 	}
4371 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4372 
4373 	evergreen_mc_stop(rdev, &save);
4374 	if (radeon_mc_wait_for_idle(rdev)) {
4375 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4376 	}
4377 	/* Lockout access through VGA aperture*/
4378 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4379 	/* Update configuration */
4380 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4381 	       rdev->mc.vram_start >> 12);
4382 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4383 	       rdev->mc.vram_end >> 12);
4384 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4385 	       rdev->vram_scratch.gpu_addr >> 12);
4386 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4387 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4388 	WREG32(MC_VM_FB_LOCATION, tmp);
4389 	/* XXX double check these! */
4390 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4391 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4392 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4393 	WREG32(MC_VM_AGP_BASE, 0);
4394 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4395 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4396 	if (radeon_mc_wait_for_idle(rdev)) {
4397 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4398 	}
4399 	evergreen_mc_resume(rdev, &save);
4400 	/* we need to own VRAM, so turn off the VGA renderer here
4401 	 * to stop it overwriting our objects */
4402 	rv515_vga_render_disable(rdev);
4403 }
4404 
4405 /**
4406  * cik_mc_init - initialize the memory controller driver params
4407  *
4408  * @rdev: radeon_device pointer
4409  *
4410  * Look up the amount of vram, vram width, and decide how to place
4411  * vram and gart within the GPU's physical address space (CIK).
4412  * Returns 0 for success.
4413  */
4414 static int cik_mc_init(struct radeon_device *rdev)
4415 {
4416 	u32 tmp;
4417 	int chansize, numchan;
4418 
4419 	/* Get VRAM informations */
4420 	rdev->mc.vram_is_ddr = true;
4421 	tmp = RREG32(MC_ARB_RAMCFG);
4422 	if (tmp & CHANSIZE_MASK) {
4423 		chansize = 64;
4424 	} else {
4425 		chansize = 32;
4426 	}
4427 	tmp = RREG32(MC_SHARED_CHMAP);
4428 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4429 	case 0:
4430 	default:
4431 		numchan = 1;
4432 		break;
4433 	case 1:
4434 		numchan = 2;
4435 		break;
4436 	case 2:
4437 		numchan = 4;
4438 		break;
4439 	case 3:
4440 		numchan = 8;
4441 		break;
4442 	case 4:
4443 		numchan = 3;
4444 		break;
4445 	case 5:
4446 		numchan = 6;
4447 		break;
4448 	case 6:
4449 		numchan = 10;
4450 		break;
4451 	case 7:
4452 		numchan = 12;
4453 		break;
4454 	case 8:
4455 		numchan = 16;
4456 		break;
4457 	}
4458 	rdev->mc.vram_width = numchan * chansize;
4459 	/* Could aper size report 0 ? */
4460 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4461 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4462 	/* size in MB on si */
4463 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
4464 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
4465 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
4466 	si_vram_gtt_location(rdev, &rdev->mc);
4467 	radeon_update_bandwidth_info(rdev);
4468 
4469 	return 0;
4470 }
4471 
4472 /*
4473  * GART
4474  * VMID 0 is the physical GPU addresses as used by the kernel.
4475  * VMIDs 1-15 are used for userspace clients and are handled
4476  * by the radeon vm/hsa code.
4477  */
4478 /**
4479  * cik_pcie_gart_tlb_flush - gart tlb flush callback
4480  *
4481  * @rdev: radeon_device pointer
4482  *
4483  * Flush the TLB for the VMID 0 page table (CIK).
4484  */
4485 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
4486 {
4487 	/* flush hdp cache */
4488 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
4489 
4490 	/* bits 0-15 are the VM contexts0-15 */
4491 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
4492 }
4493 
4494 /**
4495  * cik_pcie_gart_enable - gart enable
4496  *
4497  * @rdev: radeon_device pointer
4498  *
4499  * This sets up the TLBs, programs the page tables for VMID0,
4500  * sets up the hw for VMIDs 1-15 which are allocated on
4501  * demand, and sets up the global locations for the LDS, GDS,
4502  * and GPUVM for FSA64 clients (CIK).
4503  * Returns 0 for success, errors for failure.
4504  */
4505 static int cik_pcie_gart_enable(struct radeon_device *rdev)
4506 {
4507 	int r, i;
4508 
4509 	if (rdev->gart.robj == NULL) {
4510 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4511 		return -EINVAL;
4512 	}
4513 	r = radeon_gart_table_vram_pin(rdev);
4514 	if (r)
4515 		return r;
4516 	radeon_gart_restore(rdev);
4517 	/* Setup TLB control */
4518 	WREG32(MC_VM_MX_L1_TLB_CNTL,
4519 	       (0xA << 7) |
4520 	       ENABLE_L1_TLB |
4521 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4522 	       ENABLE_ADVANCED_DRIVER_MODEL |
4523 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4524 	/* Setup L2 cache */
4525 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4526 	       ENABLE_L2_FRAGMENT_PROCESSING |
4527 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4528 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4529 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4530 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4531 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4532 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4533 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4534 	/* setup context0 */
4535 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4536 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4537 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4538 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4539 			(u32)(rdev->dummy_page.addr >> 12));
4540 	WREG32(VM_CONTEXT0_CNTL2, 0);
4541 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4542 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4543 
4544 	WREG32(0x15D4, 0);
4545 	WREG32(0x15D8, 0);
4546 	WREG32(0x15DC, 0);
4547 
4548 	/* empty context1-15 */
4549 	/* FIXME start with 4G, once using 2 level pt switch to full
4550 	 * vm size space
4551 	 */
4552 	/* set vm size, must be a multiple of 4 */
4553 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4554 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4555 	for (i = 1; i < 16; i++) {
4556 		if (i < 8)
4557 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4558 			       rdev->gart.table_addr >> 12);
4559 		else
4560 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4561 			       rdev->gart.table_addr >> 12);
4562 	}
4563 
4564 	/* enable context1-15 */
4565 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4566 	       (u32)(rdev->dummy_page.addr >> 12));
4567 	WREG32(VM_CONTEXT1_CNTL2, 4);
4568 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4569 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4570 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4571 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4572 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4573 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4574 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4575 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4576 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4577 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4578 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4579 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4580 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4581 
4582 	/* TC cache setup ??? */
4583 	WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
4584 	WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
4585 	WREG32(TC_CFG_L1_STORE_POLICY, 0);
4586 
4587 	WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
4588 	WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
4589 	WREG32(TC_CFG_L2_STORE_POLICY0, 0);
4590 	WREG32(TC_CFG_L2_STORE_POLICY1, 0);
4591 	WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
4592 
4593 	WREG32(TC_CFG_L1_VOLATILE, 0);
4594 	WREG32(TC_CFG_L2_VOLATILE, 0);
4595 
4596 	if (rdev->family == CHIP_KAVERI) {
4597 		u32 tmp = RREG32(CHUB_CONTROL);
4598 		tmp &= ~BYPASS_VM;
4599 		WREG32(CHUB_CONTROL, tmp);
4600 	}
4601 
4602 	/* XXX SH_MEM regs */
4603 	/* where to put LDS, scratch, GPUVM in FSA64 space */
4604 	mutex_lock(&rdev->srbm_mutex);
4605 	for (i = 0; i < 16; i++) {
4606 		cik_srbm_select(rdev, 0, 0, 0, i);
4607 		/* CP and shaders */
4608 		WREG32(SH_MEM_CONFIG, 0);
4609 		WREG32(SH_MEM_APE1_BASE, 1);
4610 		WREG32(SH_MEM_APE1_LIMIT, 0);
4611 		WREG32(SH_MEM_BASES, 0);
4612 		/* SDMA GFX */
4613 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
4614 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
4615 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
4616 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
4617 		/* XXX SDMA RLC - todo */
4618 	}
4619 	cik_srbm_select(rdev, 0, 0, 0, 0);
4620 	mutex_unlock(&rdev->srbm_mutex);
4621 
4622 	cik_pcie_gart_tlb_flush(rdev);
4623 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4624 		 (unsigned)(rdev->mc.gtt_size >> 20),
4625 		 (unsigned long long)rdev->gart.table_addr);
4626 	rdev->gart.ready = true;
4627 	return 0;
4628 }
4629 
4630 /**
4631  * cik_pcie_gart_disable - gart disable
4632  *
4633  * @rdev: radeon_device pointer
4634  *
4635  * This disables all VM page table (CIK).
4636  */
4637 static void cik_pcie_gart_disable(struct radeon_device *rdev)
4638 {
4639 	/* Disable all tables */
4640 	WREG32(VM_CONTEXT0_CNTL, 0);
4641 	WREG32(VM_CONTEXT1_CNTL, 0);
4642 	/* Setup TLB control */
4643 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4644 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4645 	/* Setup L2 cache */
4646 	WREG32(VM_L2_CNTL,
4647 	       ENABLE_L2_FRAGMENT_PROCESSING |
4648 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4649 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4650 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4651 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4652 	WREG32(VM_L2_CNTL2, 0);
4653 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4654 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4655 	radeon_gart_table_vram_unpin(rdev);
4656 }
4657 
4658 /**
4659  * cik_pcie_gart_fini - vm fini callback
4660  *
4661  * @rdev: radeon_device pointer
4662  *
4663  * Tears down the driver GART/VM setup (CIK).
4664  */
4665 static void cik_pcie_gart_fini(struct radeon_device *rdev)
4666 {
4667 	cik_pcie_gart_disable(rdev);
4668 	radeon_gart_table_vram_free(rdev);
4669 	radeon_gart_fini(rdev);
4670 }
4671 
4672 /* vm parser */
4673 /**
4674  * cik_ib_parse - vm ib_parse callback
4675  *
4676  * @rdev: radeon_device pointer
4677  * @ib: indirect buffer pointer
4678  *
4679  * CIK uses hw IB checking so this is a nop (CIK).
4680  */
4681 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4682 {
4683 	return 0;
4684 }
4685 
4686 /*
4687  * vm
4688  * VMID 0 is the physical GPU addresses as used by the kernel.
4689  * VMIDs 1-15 are used for userspace clients and are handled
4690  * by the radeon vm/hsa code.
4691  */
4692 /**
4693  * cik_vm_init - cik vm init callback
4694  *
4695  * @rdev: radeon_device pointer
4696  *
4697  * Inits cik specific vm parameters (number of VMs, base of vram for
4698  * VMIDs 1-15) (CIK).
4699  * Returns 0 for success.
4700  */
4701 int cik_vm_init(struct radeon_device *rdev)
4702 {
4703 	/* number of VMs */
4704 	rdev->vm_manager.nvm = 16;
4705 	/* base offset of vram pages */
4706 	if (rdev->flags & RADEON_IS_IGP) {
4707 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
4708 		tmp <<= 22;
4709 		rdev->vm_manager.vram_base_offset = tmp;
4710 	} else
4711 		rdev->vm_manager.vram_base_offset = 0;
4712 
4713 	return 0;
4714 }
4715 
4716 /**
4717  * cik_vm_fini - cik vm fini callback
4718  *
4719  * @rdev: radeon_device pointer
4720  *
4721  * Tear down any asic specific VM setup (CIK).
4722  */
4723 void cik_vm_fini(struct radeon_device *rdev)
4724 {
4725 }
4726 
4727 /**
4728  * cik_vm_decode_fault - print human readable fault info
4729  *
4730  * @rdev: radeon_device pointer
4731  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4732  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4733  *
4734  * Print human readable fault information (CIK).
4735  */
4736 static void cik_vm_decode_fault(struct radeon_device *rdev,
4737 				u32 status, u32 addr, u32 mc_client)
4738 {
4739 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4740 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4741 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4742 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
4743 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
4744 
4745 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
4746 	       protections, vmid, addr,
4747 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4748 	       block, mc_client, mc_id);
4749 }
4750 
4751 /**
4752  * cik_vm_flush - cik vm flush using the CP
4753  *
4754  * @rdev: radeon_device pointer
4755  *
4756  * Update the page table base and flush the VM TLB
4757  * using the CP (CIK).
4758  */
4759 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4760 {
4761 	struct radeon_ring *ring = &rdev->ring[ridx];
4762 
4763 	if (vm == NULL)
4764 		return;
4765 
4766 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4767 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4768 				 WRITE_DATA_DST_SEL(0)));
4769 	if (vm->id < 8) {
4770 		radeon_ring_write(ring,
4771 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4772 	} else {
4773 		radeon_ring_write(ring,
4774 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4775 	}
4776 	radeon_ring_write(ring, 0);
4777 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4778 
4779 	/* update SH_MEM_* regs */
4780 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4781 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4782 				 WRITE_DATA_DST_SEL(0)));
4783 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4784 	radeon_ring_write(ring, 0);
4785 	radeon_ring_write(ring, VMID(vm->id));
4786 
4787 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
4788 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4789 				 WRITE_DATA_DST_SEL(0)));
4790 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
4791 	radeon_ring_write(ring, 0);
4792 
4793 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
4794 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
4795 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
4796 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
4797 
4798 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4799 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4800 				 WRITE_DATA_DST_SEL(0)));
4801 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4802 	radeon_ring_write(ring, 0);
4803 	radeon_ring_write(ring, VMID(0));
4804 
4805 	/* HDP flush */
4806 	/* We should be using the WAIT_REG_MEM packet here like in
4807 	 * cik_fence_ring_emit(), but it causes the CP to hang in this
4808 	 * context...
4809 	 */
4810 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4811 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4812 				 WRITE_DATA_DST_SEL(0)));
4813 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4814 	radeon_ring_write(ring, 0);
4815 	radeon_ring_write(ring, 0);
4816 
4817 	/* bits 0-15 are the VM contexts0-15 */
4818 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4819 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4820 				 WRITE_DATA_DST_SEL(0)));
4821 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4822 	radeon_ring_write(ring, 0);
4823 	radeon_ring_write(ring, 1 << vm->id);
4824 
4825 	/* compute doesn't have PFP */
4826 	if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
4827 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
4828 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4829 		radeon_ring_write(ring, 0x0);
4830 	}
4831 }
4832 
4833 /**
4834  * cik_vm_set_page - update the page tables using sDMA
4835  *
4836  * @rdev: radeon_device pointer
4837  * @ib: indirect buffer to fill with commands
4838  * @pe: addr of the page entry
4839  * @addr: dst addr to write into pe
4840  * @count: number of page entries to update
4841  * @incr: increase next addr by incr bytes
4842  * @flags: access flags
4843  *
4844  * Update the page tables using CP or sDMA (CIK).
4845  */
4846 void cik_vm_set_page(struct radeon_device *rdev,
4847 		     struct radeon_ib *ib,
4848 		     uint64_t pe,
4849 		     uint64_t addr, unsigned count,
4850 		     uint32_t incr, uint32_t flags)
4851 {
4852 	uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4853 	uint64_t value;
4854 	unsigned ndw;
4855 
4856 	if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4857 		/* CP */
4858 		while (count) {
4859 			ndw = 2 + count * 2;
4860 			if (ndw > 0x3FFE)
4861 				ndw = 0x3FFE;
4862 
4863 			ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4864 			ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4865 						    WRITE_DATA_DST_SEL(1));
4866 			ib->ptr[ib->length_dw++] = pe;
4867 			ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4868 			for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4869 				if (flags & RADEON_VM_PAGE_SYSTEM) {
4870 					value = radeon_vm_map_gart(rdev, addr);
4871 					value &= 0xFFFFFFFFFFFFF000ULL;
4872 				} else if (flags & RADEON_VM_PAGE_VALID) {
4873 					value = addr;
4874 				} else {
4875 					value = 0;
4876 				}
4877 				addr += incr;
4878 				value |= r600_flags;
4879 				ib->ptr[ib->length_dw++] = value;
4880 				ib->ptr[ib->length_dw++] = upper_32_bits(value);
4881 			}
4882 		}
4883 	} else {
4884 		/* DMA */
4885 		cik_sdma_vm_set_page(rdev, ib, pe, addr, count, incr, flags);
4886 	}
4887 }
4888 
4889 /*
4890  * RLC
4891  * The RLC is a multi-purpose microengine that handles a
4892  * variety of functions, the most important of which is
4893  * the interrupt controller.
4894  */
4895 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
4896 					  bool enable)
4897 {
4898 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
4899 
4900 	if (enable)
4901 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4902 	else
4903 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4904 	WREG32(CP_INT_CNTL_RING0, tmp);
4905 }
4906 
4907 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
4908 {
4909 	u32 tmp;
4910 
4911 	tmp = RREG32(RLC_LB_CNTL);
4912 	if (enable)
4913 		tmp |= LOAD_BALANCE_ENABLE;
4914 	else
4915 		tmp &= ~LOAD_BALANCE_ENABLE;
4916 	WREG32(RLC_LB_CNTL, tmp);
4917 }
4918 
4919 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
4920 {
4921 	u32 i, j, k;
4922 	u32 mask;
4923 
4924 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
4925 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
4926 			cik_select_se_sh(rdev, i, j);
4927 			for (k = 0; k < rdev->usec_timeout; k++) {
4928 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
4929 					break;
4930 				udelay(1);
4931 			}
4932 		}
4933 	}
4934 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4935 
4936 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
4937 	for (k = 0; k < rdev->usec_timeout; k++) {
4938 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
4939 			break;
4940 		udelay(1);
4941 	}
4942 }
4943 
4944 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
4945 {
4946 	u32 tmp;
4947 
4948 	tmp = RREG32(RLC_CNTL);
4949 	if (tmp != rlc)
4950 		WREG32(RLC_CNTL, rlc);
4951 }
4952 
4953 static u32 cik_halt_rlc(struct radeon_device *rdev)
4954 {
4955 	u32 data, orig;
4956 
4957 	orig = data = RREG32(RLC_CNTL);
4958 
4959 	if (data & RLC_ENABLE) {
4960 		u32 i;
4961 
4962 		data &= ~RLC_ENABLE;
4963 		WREG32(RLC_CNTL, data);
4964 
4965 		for (i = 0; i < rdev->usec_timeout; i++) {
4966 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
4967 				break;
4968 			udelay(1);
4969 		}
4970 
4971 		cik_wait_for_rlc_serdes(rdev);
4972 	}
4973 
4974 	return orig;
4975 }
4976 
4977 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
4978 {
4979 	u32 tmp, i, mask;
4980 
4981 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
4982 	WREG32(RLC_GPR_REG2, tmp);
4983 
4984 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
4985 	for (i = 0; i < rdev->usec_timeout; i++) {
4986 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
4987 			break;
4988 		udelay(1);
4989 	}
4990 
4991 	for (i = 0; i < rdev->usec_timeout; i++) {
4992 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
4993 			break;
4994 		udelay(1);
4995 	}
4996 }
4997 
4998 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
4999 {
5000 	u32 tmp;
5001 
5002 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5003 	WREG32(RLC_GPR_REG2, tmp);
5004 }
5005 
5006 /**
5007  * cik_rlc_stop - stop the RLC ME
5008  *
5009  * @rdev: radeon_device pointer
5010  *
5011  * Halt the RLC ME (MicroEngine) (CIK).
5012  */
5013 static void cik_rlc_stop(struct radeon_device *rdev)
5014 {
5015 	WREG32(RLC_CNTL, 0);
5016 
5017 	cik_enable_gui_idle_interrupt(rdev, false);
5018 
5019 	cik_wait_for_rlc_serdes(rdev);
5020 }
5021 
5022 /**
5023  * cik_rlc_start - start the RLC ME
5024  *
5025  * @rdev: radeon_device pointer
5026  *
5027  * Unhalt the RLC ME (MicroEngine) (CIK).
5028  */
5029 static void cik_rlc_start(struct radeon_device *rdev)
5030 {
5031 	WREG32(RLC_CNTL, RLC_ENABLE);
5032 
5033 	cik_enable_gui_idle_interrupt(rdev, true);
5034 
5035 	udelay(50);
5036 }
5037 
5038 /**
5039  * cik_rlc_resume - setup the RLC hw
5040  *
5041  * @rdev: radeon_device pointer
5042  *
5043  * Initialize the RLC registers, load the ucode,
5044  * and start the RLC (CIK).
5045  * Returns 0 for success, -EINVAL if the ucode is not available.
5046  */
5047 static int cik_rlc_resume(struct radeon_device *rdev)
5048 {
5049 	u32 i, size, tmp;
5050 	const __be32 *fw_data;
5051 
5052 	if (!rdev->rlc_fw)
5053 		return -EINVAL;
5054 
5055 	switch (rdev->family) {
5056 	case CHIP_BONAIRE:
5057 	default:
5058 		size = BONAIRE_RLC_UCODE_SIZE;
5059 		break;
5060 	case CHIP_KAVERI:
5061 		size = KV_RLC_UCODE_SIZE;
5062 		break;
5063 	case CHIP_KABINI:
5064 		size = KB_RLC_UCODE_SIZE;
5065 		break;
5066 	}
5067 
5068 	cik_rlc_stop(rdev);
5069 
5070 	/* disable CG */
5071 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5072 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5073 
5074 	si_rlc_reset(rdev);
5075 
5076 	cik_init_pg(rdev);
5077 
5078 	cik_init_cg(rdev);
5079 
5080 	WREG32(RLC_LB_CNTR_INIT, 0);
5081 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5082 
5083 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5084 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5085 	WREG32(RLC_LB_PARAMS, 0x00600408);
5086 	WREG32(RLC_LB_CNTL, 0x80000004);
5087 
5088 	WREG32(RLC_MC_CNTL, 0);
5089 	WREG32(RLC_UCODE_CNTL, 0);
5090 
5091 	fw_data = (const __be32 *)rdev->rlc_fw->data;
5092 		WREG32(RLC_GPM_UCODE_ADDR, 0);
5093 	for (i = 0; i < size; i++)
5094 		WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5095 	WREG32(RLC_GPM_UCODE_ADDR, 0);
5096 
5097 	/* XXX - find out what chips support lbpw */
5098 	cik_enable_lbpw(rdev, false);
5099 
5100 	if (rdev->family == CHIP_BONAIRE)
5101 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
5102 
5103 	cik_rlc_start(rdev);
5104 
5105 	return 0;
5106 }
5107 
5108 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5109 {
5110 	u32 data, orig, tmp, tmp2;
5111 
5112 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5113 
5114 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5115 		cik_enable_gui_idle_interrupt(rdev, true);
5116 
5117 		tmp = cik_halt_rlc(rdev);
5118 
5119 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5120 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5121 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5122 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5123 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
5124 
5125 		cik_update_rlc(rdev, tmp);
5126 
5127 		data |= CGCG_EN | CGLS_EN;
5128 	} else {
5129 		cik_enable_gui_idle_interrupt(rdev, false);
5130 
5131 		RREG32(CB_CGTT_SCLK_CTRL);
5132 		RREG32(CB_CGTT_SCLK_CTRL);
5133 		RREG32(CB_CGTT_SCLK_CTRL);
5134 		RREG32(CB_CGTT_SCLK_CTRL);
5135 
5136 		data &= ~(CGCG_EN | CGLS_EN);
5137 	}
5138 
5139 	if (orig != data)
5140 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5141 
5142 }
5143 
5144 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5145 {
5146 	u32 data, orig, tmp = 0;
5147 
5148 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5149 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5150 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5151 				orig = data = RREG32(CP_MEM_SLP_CNTL);
5152 				data |= CP_MEM_LS_EN;
5153 				if (orig != data)
5154 					WREG32(CP_MEM_SLP_CNTL, data);
5155 			}
5156 		}
5157 
5158 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5159 		data &= 0xfffffffd;
5160 		if (orig != data)
5161 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5162 
5163 		tmp = cik_halt_rlc(rdev);
5164 
5165 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5166 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5167 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5168 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5169 		WREG32(RLC_SERDES_WR_CTRL, data);
5170 
5171 		cik_update_rlc(rdev, tmp);
5172 
5173 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
5174 			orig = data = RREG32(CGTS_SM_CTRL_REG);
5175 			data &= ~SM_MODE_MASK;
5176 			data |= SM_MODE(0x2);
5177 			data |= SM_MODE_ENABLE;
5178 			data &= ~CGTS_OVERRIDE;
5179 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
5180 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
5181 				data &= ~CGTS_LS_OVERRIDE;
5182 			data &= ~ON_MONITOR_ADD_MASK;
5183 			data |= ON_MONITOR_ADD_EN;
5184 			data |= ON_MONITOR_ADD(0x96);
5185 			if (orig != data)
5186 				WREG32(CGTS_SM_CTRL_REG, data);
5187 		}
5188 	} else {
5189 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5190 		data |= 0x00000002;
5191 		if (orig != data)
5192 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5193 
5194 		data = RREG32(RLC_MEM_SLP_CNTL);
5195 		if (data & RLC_MEM_LS_EN) {
5196 			data &= ~RLC_MEM_LS_EN;
5197 			WREG32(RLC_MEM_SLP_CNTL, data);
5198 		}
5199 
5200 		data = RREG32(CP_MEM_SLP_CNTL);
5201 		if (data & CP_MEM_LS_EN) {
5202 			data &= ~CP_MEM_LS_EN;
5203 			WREG32(CP_MEM_SLP_CNTL, data);
5204 		}
5205 
5206 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5207 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5208 		if (orig != data)
5209 			WREG32(CGTS_SM_CTRL_REG, data);
5210 
5211 		tmp = cik_halt_rlc(rdev);
5212 
5213 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5214 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5215 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5216 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5217 		WREG32(RLC_SERDES_WR_CTRL, data);
5218 
5219 		cik_update_rlc(rdev, tmp);
5220 	}
5221 }
5222 
5223 static const u32 mc_cg_registers[] =
5224 {
5225 	MC_HUB_MISC_HUB_CG,
5226 	MC_HUB_MISC_SIP_CG,
5227 	MC_HUB_MISC_VM_CG,
5228 	MC_XPB_CLK_GAT,
5229 	ATC_MISC_CG,
5230 	MC_CITF_MISC_WR_CG,
5231 	MC_CITF_MISC_RD_CG,
5232 	MC_CITF_MISC_VM_CG,
5233 	VM_L2_CG,
5234 };
5235 
5236 static void cik_enable_mc_ls(struct radeon_device *rdev,
5237 			     bool enable)
5238 {
5239 	int i;
5240 	u32 orig, data;
5241 
5242 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5243 		orig = data = RREG32(mc_cg_registers[i]);
5244 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5245 			data |= MC_LS_ENABLE;
5246 		else
5247 			data &= ~MC_LS_ENABLE;
5248 		if (data != orig)
5249 			WREG32(mc_cg_registers[i], data);
5250 	}
5251 }
5252 
5253 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5254 			       bool enable)
5255 {
5256 	int i;
5257 	u32 orig, data;
5258 
5259 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5260 		orig = data = RREG32(mc_cg_registers[i]);
5261 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5262 			data |= MC_CG_ENABLE;
5263 		else
5264 			data &= ~MC_CG_ENABLE;
5265 		if (data != orig)
5266 			WREG32(mc_cg_registers[i], data);
5267 	}
5268 }
5269 
5270 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
5271 				 bool enable)
5272 {
5273 	u32 orig, data;
5274 
5275 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5276 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
5277 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
5278 	} else {
5279 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
5280 		data |= 0xff000000;
5281 		if (data != orig)
5282 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
5283 
5284 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
5285 		data |= 0xff000000;
5286 		if (data != orig)
5287 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
5288 	}
5289 }
5290 
5291 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
5292 				 bool enable)
5293 {
5294 	u32 orig, data;
5295 
5296 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
5297 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5298 		data |= 0x100;
5299 		if (orig != data)
5300 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5301 
5302 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5303 		data |= 0x100;
5304 		if (orig != data)
5305 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5306 	} else {
5307 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5308 		data &= ~0x100;
5309 		if (orig != data)
5310 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5311 
5312 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5313 		data &= ~0x100;
5314 		if (orig != data)
5315 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5316 	}
5317 }
5318 
5319 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
5320 				bool enable)
5321 {
5322 	u32 orig, data;
5323 
5324 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5325 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5326 		data = 0xfff;
5327 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5328 
5329 		orig = data = RREG32(UVD_CGC_CTRL);
5330 		data |= DCM;
5331 		if (orig != data)
5332 			WREG32(UVD_CGC_CTRL, data);
5333 	} else {
5334 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5335 		data &= ~0xfff;
5336 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5337 
5338 		orig = data = RREG32(UVD_CGC_CTRL);
5339 		data &= ~DCM;
5340 		if (orig != data)
5341 			WREG32(UVD_CGC_CTRL, data);
5342 	}
5343 }
5344 
5345 static void cik_enable_bif_mgls(struct radeon_device *rdev,
5346 			       bool enable)
5347 {
5348 	u32 orig, data;
5349 
5350 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
5351 
5352 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5353 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5354 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5355 	else
5356 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5357 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5358 
5359 	if (orig != data)
5360 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
5361 }
5362 
5363 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
5364 				bool enable)
5365 {
5366 	u32 orig, data;
5367 
5368 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
5369 
5370 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5371 		data &= ~CLOCK_GATING_DIS;
5372 	else
5373 		data |= CLOCK_GATING_DIS;
5374 
5375 	if (orig != data)
5376 		WREG32(HDP_HOST_PATH_CNTL, data);
5377 }
5378 
5379 static void cik_enable_hdp_ls(struct radeon_device *rdev,
5380 			      bool enable)
5381 {
5382 	u32 orig, data;
5383 
5384 	orig = data = RREG32(HDP_MEM_POWER_LS);
5385 
5386 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5387 		data |= HDP_LS_ENABLE;
5388 	else
5389 		data &= ~HDP_LS_ENABLE;
5390 
5391 	if (orig != data)
5392 		WREG32(HDP_MEM_POWER_LS, data);
5393 }
5394 
5395 void cik_update_cg(struct radeon_device *rdev,
5396 		   u32 block, bool enable)
5397 {
5398 
5399 	if (block & RADEON_CG_BLOCK_GFX) {
5400 		cik_enable_gui_idle_interrupt(rdev, false);
5401 		/* order matters! */
5402 		if (enable) {
5403 			cik_enable_mgcg(rdev, true);
5404 			cik_enable_cgcg(rdev, true);
5405 		} else {
5406 			cik_enable_cgcg(rdev, false);
5407 			cik_enable_mgcg(rdev, false);
5408 		}
5409 		cik_enable_gui_idle_interrupt(rdev, true);
5410 	}
5411 
5412 	if (block & RADEON_CG_BLOCK_MC) {
5413 		if (!(rdev->flags & RADEON_IS_IGP)) {
5414 			cik_enable_mc_mgcg(rdev, enable);
5415 			cik_enable_mc_ls(rdev, enable);
5416 		}
5417 	}
5418 
5419 	if (block & RADEON_CG_BLOCK_SDMA) {
5420 		cik_enable_sdma_mgcg(rdev, enable);
5421 		cik_enable_sdma_mgls(rdev, enable);
5422 	}
5423 
5424 	if (block & RADEON_CG_BLOCK_BIF) {
5425 		cik_enable_bif_mgls(rdev, enable);
5426 	}
5427 
5428 	if (block & RADEON_CG_BLOCK_UVD) {
5429 		if (rdev->has_uvd)
5430 			cik_enable_uvd_mgcg(rdev, enable);
5431 	}
5432 
5433 	if (block & RADEON_CG_BLOCK_HDP) {
5434 		cik_enable_hdp_mgcg(rdev, enable);
5435 		cik_enable_hdp_ls(rdev, enable);
5436 	}
5437 }
5438 
5439 static void cik_init_cg(struct radeon_device *rdev)
5440 {
5441 
5442 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
5443 
5444 	if (rdev->has_uvd)
5445 		si_init_uvd_internal_cg(rdev);
5446 
5447 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5448 			     RADEON_CG_BLOCK_SDMA |
5449 			     RADEON_CG_BLOCK_BIF |
5450 			     RADEON_CG_BLOCK_UVD |
5451 			     RADEON_CG_BLOCK_HDP), true);
5452 }
5453 
5454 static void cik_fini_cg(struct radeon_device *rdev)
5455 {
5456 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5457 			     RADEON_CG_BLOCK_SDMA |
5458 			     RADEON_CG_BLOCK_BIF |
5459 			     RADEON_CG_BLOCK_UVD |
5460 			     RADEON_CG_BLOCK_HDP), false);
5461 
5462 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
5463 }
5464 
5465 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
5466 					  bool enable)
5467 {
5468 	u32 data, orig;
5469 
5470 	orig = data = RREG32(RLC_PG_CNTL);
5471 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5472 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5473 	else
5474 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5475 	if (orig != data)
5476 		WREG32(RLC_PG_CNTL, data);
5477 }
5478 
5479 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
5480 					  bool enable)
5481 {
5482 	u32 data, orig;
5483 
5484 	orig = data = RREG32(RLC_PG_CNTL);
5485 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5486 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5487 	else
5488 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5489 	if (orig != data)
5490 		WREG32(RLC_PG_CNTL, data);
5491 }
5492 
5493 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
5494 {
5495 	u32 data, orig;
5496 
5497 	orig = data = RREG32(RLC_PG_CNTL);
5498 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
5499 		data &= ~DISABLE_CP_PG;
5500 	else
5501 		data |= DISABLE_CP_PG;
5502 	if (orig != data)
5503 		WREG32(RLC_PG_CNTL, data);
5504 }
5505 
5506 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
5507 {
5508 	u32 data, orig;
5509 
5510 	orig = data = RREG32(RLC_PG_CNTL);
5511 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
5512 		data &= ~DISABLE_GDS_PG;
5513 	else
5514 		data |= DISABLE_GDS_PG;
5515 	if (orig != data)
5516 		WREG32(RLC_PG_CNTL, data);
5517 }
5518 
5519 #define CP_ME_TABLE_SIZE    96
5520 #define CP_ME_TABLE_OFFSET  2048
5521 #define CP_MEC_TABLE_OFFSET 4096
5522 
5523 void cik_init_cp_pg_table(struct radeon_device *rdev)
5524 {
5525 	const __be32 *fw_data;
5526 	volatile u32 *dst_ptr;
5527 	int me, i, max_me = 4;
5528 	u32 bo_offset = 0;
5529 	u32 table_offset;
5530 
5531 	if (rdev->family == CHIP_KAVERI)
5532 		max_me = 5;
5533 
5534 	if (rdev->rlc.cp_table_ptr == NULL)
5535 		return;
5536 
5537 	/* write the cp table buffer */
5538 	dst_ptr = rdev->rlc.cp_table_ptr;
5539 	for (me = 0; me < max_me; me++) {
5540 		if (me == 0) {
5541 			fw_data = (const __be32 *)rdev->ce_fw->data;
5542 			table_offset = CP_ME_TABLE_OFFSET;
5543 		} else if (me == 1) {
5544 			fw_data = (const __be32 *)rdev->pfp_fw->data;
5545 			table_offset = CP_ME_TABLE_OFFSET;
5546 		} else if (me == 2) {
5547 			fw_data = (const __be32 *)rdev->me_fw->data;
5548 			table_offset = CP_ME_TABLE_OFFSET;
5549 		} else {
5550 			fw_data = (const __be32 *)rdev->mec_fw->data;
5551 			table_offset = CP_MEC_TABLE_OFFSET;
5552 		}
5553 
5554 		for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
5555 			dst_ptr[bo_offset + i] = be32_to_cpu(fw_data[table_offset + i]);
5556 		}
5557 		bo_offset += CP_ME_TABLE_SIZE;
5558 	}
5559 }
5560 
5561 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
5562 				bool enable)
5563 {
5564 	u32 data, orig;
5565 
5566 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5567 		orig = data = RREG32(RLC_PG_CNTL);
5568 		data |= GFX_PG_ENABLE;
5569 		if (orig != data)
5570 			WREG32(RLC_PG_CNTL, data);
5571 
5572 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
5573 		data |= AUTO_PG_EN;
5574 		if (orig != data)
5575 			WREG32(RLC_AUTO_PG_CTRL, data);
5576 	} else {
5577 		orig = data = RREG32(RLC_PG_CNTL);
5578 		data &= ~GFX_PG_ENABLE;
5579 		if (orig != data)
5580 			WREG32(RLC_PG_CNTL, data);
5581 
5582 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
5583 		data &= ~AUTO_PG_EN;
5584 		if (orig != data)
5585 			WREG32(RLC_AUTO_PG_CTRL, data);
5586 
5587 		data = RREG32(DB_RENDER_CONTROL);
5588 	}
5589 }
5590 
5591 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5592 {
5593 	u32 mask = 0, tmp, tmp1;
5594 	int i;
5595 
5596 	cik_select_se_sh(rdev, se, sh);
5597 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5598 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5599 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5600 
5601 	tmp &= 0xffff0000;
5602 
5603 	tmp |= tmp1;
5604 	tmp >>= 16;
5605 
5606 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
5607 		mask <<= 1;
5608 		mask |= 1;
5609 	}
5610 
5611 	return (~tmp) & mask;
5612 }
5613 
5614 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
5615 {
5616 	u32 i, j, k, active_cu_number = 0;
5617 	u32 mask, counter, cu_bitmap;
5618 	u32 tmp = 0;
5619 
5620 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5621 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5622 			mask = 1;
5623 			cu_bitmap = 0;
5624 			counter = 0;
5625 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
5626 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
5627 					if (counter < 2)
5628 						cu_bitmap |= mask;
5629 					counter ++;
5630 				}
5631 				mask <<= 1;
5632 			}
5633 
5634 			active_cu_number += counter;
5635 			tmp |= (cu_bitmap << (i * 16 + j * 8));
5636 		}
5637 	}
5638 
5639 	WREG32(RLC_PG_AO_CU_MASK, tmp);
5640 
5641 	tmp = RREG32(RLC_MAX_PG_CU);
5642 	tmp &= ~MAX_PU_CU_MASK;
5643 	tmp |= MAX_PU_CU(active_cu_number);
5644 	WREG32(RLC_MAX_PG_CU, tmp);
5645 }
5646 
5647 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
5648 				       bool enable)
5649 {
5650 	u32 data, orig;
5651 
5652 	orig = data = RREG32(RLC_PG_CNTL);
5653 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
5654 		data |= STATIC_PER_CU_PG_ENABLE;
5655 	else
5656 		data &= ~STATIC_PER_CU_PG_ENABLE;
5657 	if (orig != data)
5658 		WREG32(RLC_PG_CNTL, data);
5659 }
5660 
5661 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
5662 					bool enable)
5663 {
5664 	u32 data, orig;
5665 
5666 	orig = data = RREG32(RLC_PG_CNTL);
5667 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
5668 		data |= DYN_PER_CU_PG_ENABLE;
5669 	else
5670 		data &= ~DYN_PER_CU_PG_ENABLE;
5671 	if (orig != data)
5672 		WREG32(RLC_PG_CNTL, data);
5673 }
5674 
5675 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
5676 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
5677 
5678 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
5679 {
5680 	u32 data, orig;
5681 	u32 i;
5682 
5683 	if (rdev->rlc.cs_data) {
5684 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
5685 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
5686 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
5687 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
5688 	} else {
5689 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
5690 		for (i = 0; i < 3; i++)
5691 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
5692 	}
5693 	if (rdev->rlc.reg_list) {
5694 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
5695 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
5696 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
5697 	}
5698 
5699 	orig = data = RREG32(RLC_PG_CNTL);
5700 	data |= GFX_PG_SRC;
5701 	if (orig != data)
5702 		WREG32(RLC_PG_CNTL, data);
5703 
5704 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5705 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
5706 
5707 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
5708 	data &= ~IDLE_POLL_COUNT_MASK;
5709 	data |= IDLE_POLL_COUNT(0x60);
5710 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
5711 
5712 	data = 0x10101010;
5713 	WREG32(RLC_PG_DELAY, data);
5714 
5715 	data = RREG32(RLC_PG_DELAY_2);
5716 	data &= ~0xff;
5717 	data |= 0x3;
5718 	WREG32(RLC_PG_DELAY_2, data);
5719 
5720 	data = RREG32(RLC_AUTO_PG_CTRL);
5721 	data &= ~GRBM_REG_SGIT_MASK;
5722 	data |= GRBM_REG_SGIT(0x700);
5723 	WREG32(RLC_AUTO_PG_CTRL, data);
5724 
5725 }
5726 
5727 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
5728 {
5729 	cik_enable_gfx_cgpg(rdev, enable);
5730 	cik_enable_gfx_static_mgpg(rdev, enable);
5731 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
5732 }
5733 
5734 u32 cik_get_csb_size(struct radeon_device *rdev)
5735 {
5736 	u32 count = 0;
5737 	const struct cs_section_def *sect = NULL;
5738 	const struct cs_extent_def *ext = NULL;
5739 
5740 	if (rdev->rlc.cs_data == NULL)
5741 		return 0;
5742 
5743 	/* begin clear state */
5744 	count += 2;
5745 	/* context control state */
5746 	count += 3;
5747 
5748 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5749 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5750 			if (sect->id == SECT_CONTEXT)
5751 				count += 2 + ext->reg_count;
5752 			else
5753 				return 0;
5754 		}
5755 	}
5756 	/* pa_sc_raster_config/pa_sc_raster_config1 */
5757 	count += 4;
5758 	/* end clear state */
5759 	count += 2;
5760 	/* clear state */
5761 	count += 2;
5762 
5763 	return count;
5764 }
5765 
5766 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5767 {
5768 	u32 count = 0, i;
5769 	const struct cs_section_def *sect = NULL;
5770 	const struct cs_extent_def *ext = NULL;
5771 
5772 	if (rdev->rlc.cs_data == NULL)
5773 		return;
5774 	if (buffer == NULL)
5775 		return;
5776 
5777 	buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5778 	buffer[count++] = PACKET3_PREAMBLE_BEGIN_CLEAR_STATE;
5779 
5780 	buffer[count++] = PACKET3(PACKET3_CONTEXT_CONTROL, 1);
5781 	buffer[count++] = 0x80000000;
5782 	buffer[count++] = 0x80000000;
5783 
5784 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5785 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5786 			if (sect->id == SECT_CONTEXT) {
5787 				buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count);
5788 				buffer[count++] = ext->reg_index - 0xa000;
5789 				for (i = 0; i < ext->reg_count; i++)
5790 					buffer[count++] = ext->extent[i];
5791 			} else {
5792 				return;
5793 			}
5794 		}
5795 	}
5796 
5797 	buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
5798 	buffer[count++] = PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START;
5799 	switch (rdev->family) {
5800 	case CHIP_BONAIRE:
5801 		buffer[count++] = 0x16000012;
5802 		buffer[count++] = 0x00000000;
5803 		break;
5804 	case CHIP_KAVERI:
5805 		buffer[count++] = 0x00000000; /* XXX */
5806 		buffer[count++] = 0x00000000;
5807 		break;
5808 	case CHIP_KABINI:
5809 		buffer[count++] = 0x00000000; /* XXX */
5810 		buffer[count++] = 0x00000000;
5811 		break;
5812 	default:
5813 		buffer[count++] = 0x00000000;
5814 		buffer[count++] = 0x00000000;
5815 		break;
5816 	}
5817 
5818 	buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5819 	buffer[count++] = PACKET3_PREAMBLE_END_CLEAR_STATE;
5820 
5821 	buffer[count++] = PACKET3(PACKET3_CLEAR_STATE, 0);
5822 	buffer[count++] = 0;
5823 }
5824 
5825 static void cik_init_pg(struct radeon_device *rdev)
5826 {
5827 	if (rdev->pg_flags) {
5828 		cik_enable_sck_slowdown_on_pu(rdev, true);
5829 		cik_enable_sck_slowdown_on_pd(rdev, true);
5830 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5831 			cik_init_gfx_cgpg(rdev);
5832 			cik_enable_cp_pg(rdev, true);
5833 			cik_enable_gds_pg(rdev, true);
5834 		}
5835 		cik_init_ao_cu_mask(rdev);
5836 		cik_update_gfx_pg(rdev, true);
5837 	}
5838 }
5839 
5840 static void cik_fini_pg(struct radeon_device *rdev)
5841 {
5842 	if (rdev->pg_flags) {
5843 		cik_update_gfx_pg(rdev, false);
5844 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5845 			cik_enable_cp_pg(rdev, false);
5846 			cik_enable_gds_pg(rdev, false);
5847 		}
5848 	}
5849 }
5850 
5851 /*
5852  * Interrupts
5853  * Starting with r6xx, interrupts are handled via a ring buffer.
5854  * Ring buffers are areas of GPU accessible memory that the GPU
5855  * writes interrupt vectors into and the host reads vectors out of.
5856  * There is a rptr (read pointer) that determines where the
5857  * host is currently reading, and a wptr (write pointer)
5858  * which determines where the GPU has written.  When the
5859  * pointers are equal, the ring is idle.  When the GPU
5860  * writes vectors to the ring buffer, it increments the
5861  * wptr.  When there is an interrupt, the host then starts
5862  * fetching commands and processing them until the pointers are
5863  * equal again at which point it updates the rptr.
5864  */
5865 
5866 /**
5867  * cik_enable_interrupts - Enable the interrupt ring buffer
5868  *
5869  * @rdev: radeon_device pointer
5870  *
5871  * Enable the interrupt ring buffer (CIK).
5872  */
5873 static void cik_enable_interrupts(struct radeon_device *rdev)
5874 {
5875 	u32 ih_cntl = RREG32(IH_CNTL);
5876 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5877 
5878 	ih_cntl |= ENABLE_INTR;
5879 	ih_rb_cntl |= IH_RB_ENABLE;
5880 	WREG32(IH_CNTL, ih_cntl);
5881 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5882 	rdev->ih.enabled = true;
5883 }
5884 
5885 /**
5886  * cik_disable_interrupts - Disable the interrupt ring buffer
5887  *
5888  * @rdev: radeon_device pointer
5889  *
5890  * Disable the interrupt ring buffer (CIK).
5891  */
5892 static void cik_disable_interrupts(struct radeon_device *rdev)
5893 {
5894 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5895 	u32 ih_cntl = RREG32(IH_CNTL);
5896 
5897 	ih_rb_cntl &= ~IH_RB_ENABLE;
5898 	ih_cntl &= ~ENABLE_INTR;
5899 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5900 	WREG32(IH_CNTL, ih_cntl);
5901 	/* set rptr, wptr to 0 */
5902 	WREG32(IH_RB_RPTR, 0);
5903 	WREG32(IH_RB_WPTR, 0);
5904 	rdev->ih.enabled = false;
5905 	rdev->ih.rptr = 0;
5906 }
5907 
5908 /**
5909  * cik_disable_interrupt_state - Disable all interrupt sources
5910  *
5911  * @rdev: radeon_device pointer
5912  *
5913  * Clear all interrupt enable bits used by the driver (CIK).
5914  */
5915 static void cik_disable_interrupt_state(struct radeon_device *rdev)
5916 {
5917 	u32 tmp;
5918 
5919 	/* gfx ring */
5920 	tmp = RREG32(CP_INT_CNTL_RING0) &
5921 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5922 	WREG32(CP_INT_CNTL_RING0, tmp);
5923 	/* sdma */
5924 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5925 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5926 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5927 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5928 	/* compute queues */
5929 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
5930 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
5931 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
5932 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
5933 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
5934 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
5935 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
5936 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
5937 	/* grbm */
5938 	WREG32(GRBM_INT_CNTL, 0);
5939 	/* vline/vblank, etc. */
5940 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5941 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5942 	if (rdev->num_crtc >= 4) {
5943 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5944 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5945 	}
5946 	if (rdev->num_crtc >= 6) {
5947 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5948 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5949 	}
5950 
5951 	/* dac hotplug */
5952 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5953 
5954 	/* digital hotplug */
5955 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5956 	WREG32(DC_HPD1_INT_CONTROL, tmp);
5957 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5958 	WREG32(DC_HPD2_INT_CONTROL, tmp);
5959 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5960 	WREG32(DC_HPD3_INT_CONTROL, tmp);
5961 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5962 	WREG32(DC_HPD4_INT_CONTROL, tmp);
5963 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5964 	WREG32(DC_HPD5_INT_CONTROL, tmp);
5965 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5966 	WREG32(DC_HPD6_INT_CONTROL, tmp);
5967 
5968 }
5969 
5970 /**
5971  * cik_irq_init - init and enable the interrupt ring
5972  *
5973  * @rdev: radeon_device pointer
5974  *
5975  * Allocate a ring buffer for the interrupt controller,
5976  * enable the RLC, disable interrupts, enable the IH
5977  * ring buffer and enable it (CIK).
5978  * Called at device load and reume.
5979  * Returns 0 for success, errors for failure.
5980  */
5981 static int cik_irq_init(struct radeon_device *rdev)
5982 {
5983 	int ret = 0;
5984 	int rb_bufsz;
5985 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5986 
5987 	/* allocate ring */
5988 	ret = r600_ih_ring_alloc(rdev);
5989 	if (ret)
5990 		return ret;
5991 
5992 	/* disable irqs */
5993 	cik_disable_interrupts(rdev);
5994 
5995 	/* init rlc */
5996 	ret = cik_rlc_resume(rdev);
5997 	if (ret) {
5998 		r600_ih_ring_fini(rdev);
5999 		return ret;
6000 	}
6001 
6002 	/* setup interrupt control */
6003 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
6004 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6005 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
6006 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6007 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6008 	 */
6009 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6010 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6011 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6012 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6013 
6014 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6015 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6016 
6017 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6018 		      IH_WPTR_OVERFLOW_CLEAR |
6019 		      (rb_bufsz << 1));
6020 
6021 	if (rdev->wb.enabled)
6022 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6023 
6024 	/* set the writeback address whether it's enabled or not */
6025 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6026 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6027 
6028 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6029 
6030 	/* set rptr, wptr to 0 */
6031 	WREG32(IH_RB_RPTR, 0);
6032 	WREG32(IH_RB_WPTR, 0);
6033 
6034 	/* Default settings for IH_CNTL (disabled at first) */
6035 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6036 	/* RPTR_REARM only works if msi's are enabled */
6037 	if (rdev->msi_enabled)
6038 		ih_cntl |= RPTR_REARM;
6039 	WREG32(IH_CNTL, ih_cntl);
6040 
6041 	/* force the active interrupt state to all disabled */
6042 	cik_disable_interrupt_state(rdev);
6043 
6044 	pci_set_master(rdev->pdev);
6045 
6046 	/* enable irqs */
6047 	cik_enable_interrupts(rdev);
6048 
6049 	return ret;
6050 }
6051 
6052 /**
6053  * cik_irq_set - enable/disable interrupt sources
6054  *
6055  * @rdev: radeon_device pointer
6056  *
6057  * Enable interrupt sources on the GPU (vblanks, hpd,
6058  * etc.) (CIK).
6059  * Returns 0 for success, errors for failure.
6060  */
6061 int cik_irq_set(struct radeon_device *rdev)
6062 {
6063 	u32 cp_int_cntl;
6064 	u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6065 	u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6066 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6067 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6068 	u32 grbm_int_cntl = 0;
6069 	u32 dma_cntl, dma_cntl1;
6070 	u32 thermal_int;
6071 
6072 	if (!rdev->irq.installed) {
6073 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6074 		return -EINVAL;
6075 	}
6076 	/* don't enable anything if the ih is disabled */
6077 	if (!rdev->ih.enabled) {
6078 		cik_disable_interrupts(rdev);
6079 		/* force the active interrupt state to all disabled */
6080 		cik_disable_interrupt_state(rdev);
6081 		return 0;
6082 	}
6083 
6084 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6085 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6086 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6087 
6088 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6089 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6090 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6091 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6092 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6093 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6094 
6095 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6096 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6097 
6098 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6099 	cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6100 	cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6101 	cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6102 	cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6103 	cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6104 	cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6105 	cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6106 
6107 	if (rdev->flags & RADEON_IS_IGP)
6108 		thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6109 			~(THERM_INTH_MASK | THERM_INTL_MASK);
6110 	else
6111 		thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6112 			~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6113 
6114 	/* enable CP interrupts on all rings */
6115 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6116 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
6117 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6118 	}
6119 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6120 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6121 		DRM_DEBUG("si_irq_set: sw int cp1\n");
6122 		if (ring->me == 1) {
6123 			switch (ring->pipe) {
6124 			case 0:
6125 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6126 				break;
6127 			case 1:
6128 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6129 				break;
6130 			case 2:
6131 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6132 				break;
6133 			case 3:
6134 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6135 				break;
6136 			default:
6137 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6138 				break;
6139 			}
6140 		} else if (ring->me == 2) {
6141 			switch (ring->pipe) {
6142 			case 0:
6143 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6144 				break;
6145 			case 1:
6146 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6147 				break;
6148 			case 2:
6149 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6150 				break;
6151 			case 3:
6152 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6153 				break;
6154 			default:
6155 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6156 				break;
6157 			}
6158 		} else {
6159 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6160 		}
6161 	}
6162 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6163 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6164 		DRM_DEBUG("si_irq_set: sw int cp2\n");
6165 		if (ring->me == 1) {
6166 			switch (ring->pipe) {
6167 			case 0:
6168 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6169 				break;
6170 			case 1:
6171 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6172 				break;
6173 			case 2:
6174 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6175 				break;
6176 			case 3:
6177 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6178 				break;
6179 			default:
6180 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6181 				break;
6182 			}
6183 		} else if (ring->me == 2) {
6184 			switch (ring->pipe) {
6185 			case 0:
6186 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6187 				break;
6188 			case 1:
6189 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6190 				break;
6191 			case 2:
6192 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6193 				break;
6194 			case 3:
6195 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6196 				break;
6197 			default:
6198 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6199 				break;
6200 			}
6201 		} else {
6202 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6203 		}
6204 	}
6205 
6206 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6207 		DRM_DEBUG("cik_irq_set: sw int dma\n");
6208 		dma_cntl |= TRAP_ENABLE;
6209 	}
6210 
6211 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6212 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
6213 		dma_cntl1 |= TRAP_ENABLE;
6214 	}
6215 
6216 	if (rdev->irq.crtc_vblank_int[0] ||
6217 	    atomic_read(&rdev->irq.pflip[0])) {
6218 		DRM_DEBUG("cik_irq_set: vblank 0\n");
6219 		crtc1 |= VBLANK_INTERRUPT_MASK;
6220 	}
6221 	if (rdev->irq.crtc_vblank_int[1] ||
6222 	    atomic_read(&rdev->irq.pflip[1])) {
6223 		DRM_DEBUG("cik_irq_set: vblank 1\n");
6224 		crtc2 |= VBLANK_INTERRUPT_MASK;
6225 	}
6226 	if (rdev->irq.crtc_vblank_int[2] ||
6227 	    atomic_read(&rdev->irq.pflip[2])) {
6228 		DRM_DEBUG("cik_irq_set: vblank 2\n");
6229 		crtc3 |= VBLANK_INTERRUPT_MASK;
6230 	}
6231 	if (rdev->irq.crtc_vblank_int[3] ||
6232 	    atomic_read(&rdev->irq.pflip[3])) {
6233 		DRM_DEBUG("cik_irq_set: vblank 3\n");
6234 		crtc4 |= VBLANK_INTERRUPT_MASK;
6235 	}
6236 	if (rdev->irq.crtc_vblank_int[4] ||
6237 	    atomic_read(&rdev->irq.pflip[4])) {
6238 		DRM_DEBUG("cik_irq_set: vblank 4\n");
6239 		crtc5 |= VBLANK_INTERRUPT_MASK;
6240 	}
6241 	if (rdev->irq.crtc_vblank_int[5] ||
6242 	    atomic_read(&rdev->irq.pflip[5])) {
6243 		DRM_DEBUG("cik_irq_set: vblank 5\n");
6244 		crtc6 |= VBLANK_INTERRUPT_MASK;
6245 	}
6246 	if (rdev->irq.hpd[0]) {
6247 		DRM_DEBUG("cik_irq_set: hpd 1\n");
6248 		hpd1 |= DC_HPDx_INT_EN;
6249 	}
6250 	if (rdev->irq.hpd[1]) {
6251 		DRM_DEBUG("cik_irq_set: hpd 2\n");
6252 		hpd2 |= DC_HPDx_INT_EN;
6253 	}
6254 	if (rdev->irq.hpd[2]) {
6255 		DRM_DEBUG("cik_irq_set: hpd 3\n");
6256 		hpd3 |= DC_HPDx_INT_EN;
6257 	}
6258 	if (rdev->irq.hpd[3]) {
6259 		DRM_DEBUG("cik_irq_set: hpd 4\n");
6260 		hpd4 |= DC_HPDx_INT_EN;
6261 	}
6262 	if (rdev->irq.hpd[4]) {
6263 		DRM_DEBUG("cik_irq_set: hpd 5\n");
6264 		hpd5 |= DC_HPDx_INT_EN;
6265 	}
6266 	if (rdev->irq.hpd[5]) {
6267 		DRM_DEBUG("cik_irq_set: hpd 6\n");
6268 		hpd6 |= DC_HPDx_INT_EN;
6269 	}
6270 
6271 	if (rdev->irq.dpm_thermal) {
6272 		DRM_DEBUG("dpm thermal\n");
6273 		if (rdev->flags & RADEON_IS_IGP)
6274 			thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
6275 		else
6276 			thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6277 	}
6278 
6279 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6280 
6281 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
6282 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
6283 
6284 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
6285 	WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
6286 	WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
6287 	WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
6288 	WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
6289 	WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
6290 	WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
6291 	WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
6292 
6293 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6294 
6295 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6296 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6297 	if (rdev->num_crtc >= 4) {
6298 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6299 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6300 	}
6301 	if (rdev->num_crtc >= 6) {
6302 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6303 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6304 	}
6305 
6306 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
6307 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
6308 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
6309 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
6310 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
6311 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
6312 
6313 	if (rdev->flags & RADEON_IS_IGP)
6314 		WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
6315 	else
6316 		WREG32_SMC(CG_THERMAL_INT, thermal_int);
6317 
6318 	return 0;
6319 }
6320 
6321 /**
6322  * cik_irq_ack - ack interrupt sources
6323  *
6324  * @rdev: radeon_device pointer
6325  *
6326  * Ack interrupt sources on the GPU (vblanks, hpd,
6327  * etc.) (CIK).  Certain interrupts sources are sw
6328  * generated and do not require an explicit ack.
6329  */
6330 static inline void cik_irq_ack(struct radeon_device *rdev)
6331 {
6332 	u32 tmp;
6333 
6334 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6335 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6336 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6337 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6338 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6339 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6340 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
6341 
6342 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
6343 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6344 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
6345 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6346 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6347 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6348 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6349 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6350 
6351 	if (rdev->num_crtc >= 4) {
6352 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6353 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6354 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6355 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6356 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6357 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6358 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6359 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6360 	}
6361 
6362 	if (rdev->num_crtc >= 6) {
6363 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6364 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6365 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6366 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6367 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6368 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6369 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6370 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6371 	}
6372 
6373 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6374 		tmp = RREG32(DC_HPD1_INT_CONTROL);
6375 		tmp |= DC_HPDx_INT_ACK;
6376 		WREG32(DC_HPD1_INT_CONTROL, tmp);
6377 	}
6378 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6379 		tmp = RREG32(DC_HPD2_INT_CONTROL);
6380 		tmp |= DC_HPDx_INT_ACK;
6381 		WREG32(DC_HPD2_INT_CONTROL, tmp);
6382 	}
6383 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6384 		tmp = RREG32(DC_HPD3_INT_CONTROL);
6385 		tmp |= DC_HPDx_INT_ACK;
6386 		WREG32(DC_HPD3_INT_CONTROL, tmp);
6387 	}
6388 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6389 		tmp = RREG32(DC_HPD4_INT_CONTROL);
6390 		tmp |= DC_HPDx_INT_ACK;
6391 		WREG32(DC_HPD4_INT_CONTROL, tmp);
6392 	}
6393 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6394 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6395 		tmp |= DC_HPDx_INT_ACK;
6396 		WREG32(DC_HPD5_INT_CONTROL, tmp);
6397 	}
6398 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6399 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6400 		tmp |= DC_HPDx_INT_ACK;
6401 		WREG32(DC_HPD6_INT_CONTROL, tmp);
6402 	}
6403 }
6404 
6405 /**
6406  * cik_irq_disable - disable interrupts
6407  *
6408  * @rdev: radeon_device pointer
6409  *
6410  * Disable interrupts on the hw (CIK).
6411  */
6412 static void cik_irq_disable(struct radeon_device *rdev)
6413 {
6414 	cik_disable_interrupts(rdev);
6415 	/* Wait and acknowledge irq */
6416 	mdelay(1);
6417 	cik_irq_ack(rdev);
6418 	cik_disable_interrupt_state(rdev);
6419 }
6420 
6421 /**
6422  * cik_irq_disable - disable interrupts for suspend
6423  *
6424  * @rdev: radeon_device pointer
6425  *
6426  * Disable interrupts and stop the RLC (CIK).
6427  * Used for suspend.
6428  */
6429 static void cik_irq_suspend(struct radeon_device *rdev)
6430 {
6431 	cik_irq_disable(rdev);
6432 	cik_rlc_stop(rdev);
6433 }
6434 
6435 /**
6436  * cik_irq_fini - tear down interrupt support
6437  *
6438  * @rdev: radeon_device pointer
6439  *
6440  * Disable interrupts on the hw and free the IH ring
6441  * buffer (CIK).
6442  * Used for driver unload.
6443  */
6444 static void cik_irq_fini(struct radeon_device *rdev)
6445 {
6446 	cik_irq_suspend(rdev);
6447 	r600_ih_ring_fini(rdev);
6448 }
6449 
6450 /**
6451  * cik_get_ih_wptr - get the IH ring buffer wptr
6452  *
6453  * @rdev: radeon_device pointer
6454  *
6455  * Get the IH ring buffer wptr from either the register
6456  * or the writeback memory buffer (CIK).  Also check for
6457  * ring buffer overflow and deal with it.
6458  * Used by cik_irq_process().
6459  * Returns the value of the wptr.
6460  */
6461 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
6462 {
6463 	u32 wptr, tmp;
6464 
6465 	if (rdev->wb.enabled)
6466 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6467 	else
6468 		wptr = RREG32(IH_RB_WPTR);
6469 
6470 	if (wptr & RB_OVERFLOW) {
6471 		/* When a ring buffer overflow happen start parsing interrupt
6472 		 * from the last not overwritten vector (wptr + 16). Hopefully
6473 		 * this should allow us to catchup.
6474 		 */
6475 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
6476 			wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
6477 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6478 		tmp = RREG32(IH_RB_CNTL);
6479 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
6480 		WREG32(IH_RB_CNTL, tmp);
6481 	}
6482 	return (wptr & rdev->ih.ptr_mask);
6483 }
6484 
6485 /*        CIK IV Ring
6486  * Each IV ring entry is 128 bits:
6487  * [7:0]    - interrupt source id
6488  * [31:8]   - reserved
6489  * [59:32]  - interrupt source data
6490  * [63:60]  - reserved
6491  * [71:64]  - RINGID
6492  *            CP:
6493  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
6494  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
6495  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
6496  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
6497  *            PIPE_ID - ME0 0=3D
6498  *                    - ME1&2 compute dispatcher (4 pipes each)
6499  *            SDMA:
6500  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
6501  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
6502  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
6503  * [79:72]  - VMID
6504  * [95:80]  - PASID
6505  * [127:96] - reserved
6506  */
6507 /**
6508  * cik_irq_process - interrupt handler
6509  *
6510  * @rdev: radeon_device pointer
6511  *
6512  * Interrupt hander (CIK).  Walk the IH ring,
6513  * ack interrupts and schedule work to handle
6514  * interrupt events.
6515  * Returns irq process return code.
6516  */
6517 int cik_irq_process(struct radeon_device *rdev)
6518 {
6519 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6520 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6521 	u32 wptr;
6522 	u32 rptr;
6523 	u32 src_id, src_data, ring_id;
6524 	u8 me_id, pipe_id, queue_id;
6525 	u32 ring_index;
6526 	bool queue_hotplug = false;
6527 	bool queue_reset = false;
6528 	u32 addr, status, mc_client;
6529 	bool queue_thermal = false;
6530 
6531 	if (!rdev->ih.enabled || rdev->shutdown)
6532 		return IRQ_NONE;
6533 
6534 	wptr = cik_get_ih_wptr(rdev);
6535 
6536 restart_ih:
6537 	/* is somebody else already processing irqs? */
6538 	if (atomic_xchg(&rdev->ih.lock, 1))
6539 		return IRQ_NONE;
6540 
6541 	rptr = rdev->ih.rptr;
6542 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6543 
6544 	/* Order reading of wptr vs. reading of IH ring data */
6545 	rmb();
6546 
6547 	/* display interrupts */
6548 	cik_irq_ack(rdev);
6549 
6550 	while (rptr != wptr) {
6551 		/* wptr/rptr are in bytes! */
6552 		ring_index = rptr / 4;
6553 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6554 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6555 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6556 
6557 		switch (src_id) {
6558 		case 1: /* D1 vblank/vline */
6559 			switch (src_data) {
6560 			case 0: /* D1 vblank */
6561 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
6562 					if (rdev->irq.crtc_vblank_int[0]) {
6563 						drm_handle_vblank(rdev->ddev, 0);
6564 						rdev->pm.vblank_sync = true;
6565 						wake_up(&rdev->irq.vblank_queue);
6566 					}
6567 					if (atomic_read(&rdev->irq.pflip[0]))
6568 						radeon_crtc_handle_flip(rdev, 0);
6569 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6570 					DRM_DEBUG("IH: D1 vblank\n");
6571 				}
6572 				break;
6573 			case 1: /* D1 vline */
6574 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
6575 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6576 					DRM_DEBUG("IH: D1 vline\n");
6577 				}
6578 				break;
6579 			default:
6580 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6581 				break;
6582 			}
6583 			break;
6584 		case 2: /* D2 vblank/vline */
6585 			switch (src_data) {
6586 			case 0: /* D2 vblank */
6587 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6588 					if (rdev->irq.crtc_vblank_int[1]) {
6589 						drm_handle_vblank(rdev->ddev, 1);
6590 						rdev->pm.vblank_sync = true;
6591 						wake_up(&rdev->irq.vblank_queue);
6592 					}
6593 					if (atomic_read(&rdev->irq.pflip[1]))
6594 						radeon_crtc_handle_flip(rdev, 1);
6595 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6596 					DRM_DEBUG("IH: D2 vblank\n");
6597 				}
6598 				break;
6599 			case 1: /* D2 vline */
6600 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6601 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6602 					DRM_DEBUG("IH: D2 vline\n");
6603 				}
6604 				break;
6605 			default:
6606 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6607 				break;
6608 			}
6609 			break;
6610 		case 3: /* D3 vblank/vline */
6611 			switch (src_data) {
6612 			case 0: /* D3 vblank */
6613 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6614 					if (rdev->irq.crtc_vblank_int[2]) {
6615 						drm_handle_vblank(rdev->ddev, 2);
6616 						rdev->pm.vblank_sync = true;
6617 						wake_up(&rdev->irq.vblank_queue);
6618 					}
6619 					if (atomic_read(&rdev->irq.pflip[2]))
6620 						radeon_crtc_handle_flip(rdev, 2);
6621 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6622 					DRM_DEBUG("IH: D3 vblank\n");
6623 				}
6624 				break;
6625 			case 1: /* D3 vline */
6626 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6627 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6628 					DRM_DEBUG("IH: D3 vline\n");
6629 				}
6630 				break;
6631 			default:
6632 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6633 				break;
6634 			}
6635 			break;
6636 		case 4: /* D4 vblank/vline */
6637 			switch (src_data) {
6638 			case 0: /* D4 vblank */
6639 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6640 					if (rdev->irq.crtc_vblank_int[3]) {
6641 						drm_handle_vblank(rdev->ddev, 3);
6642 						rdev->pm.vblank_sync = true;
6643 						wake_up(&rdev->irq.vblank_queue);
6644 					}
6645 					if (atomic_read(&rdev->irq.pflip[3]))
6646 						radeon_crtc_handle_flip(rdev, 3);
6647 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6648 					DRM_DEBUG("IH: D4 vblank\n");
6649 				}
6650 				break;
6651 			case 1: /* D4 vline */
6652 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6653 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6654 					DRM_DEBUG("IH: D4 vline\n");
6655 				}
6656 				break;
6657 			default:
6658 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6659 				break;
6660 			}
6661 			break;
6662 		case 5: /* D5 vblank/vline */
6663 			switch (src_data) {
6664 			case 0: /* D5 vblank */
6665 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6666 					if (rdev->irq.crtc_vblank_int[4]) {
6667 						drm_handle_vblank(rdev->ddev, 4);
6668 						rdev->pm.vblank_sync = true;
6669 						wake_up(&rdev->irq.vblank_queue);
6670 					}
6671 					if (atomic_read(&rdev->irq.pflip[4]))
6672 						radeon_crtc_handle_flip(rdev, 4);
6673 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6674 					DRM_DEBUG("IH: D5 vblank\n");
6675 				}
6676 				break;
6677 			case 1: /* D5 vline */
6678 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6679 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6680 					DRM_DEBUG("IH: D5 vline\n");
6681 				}
6682 				break;
6683 			default:
6684 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6685 				break;
6686 			}
6687 			break;
6688 		case 6: /* D6 vblank/vline */
6689 			switch (src_data) {
6690 			case 0: /* D6 vblank */
6691 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6692 					if (rdev->irq.crtc_vblank_int[5]) {
6693 						drm_handle_vblank(rdev->ddev, 5);
6694 						rdev->pm.vblank_sync = true;
6695 						wake_up(&rdev->irq.vblank_queue);
6696 					}
6697 					if (atomic_read(&rdev->irq.pflip[5]))
6698 						radeon_crtc_handle_flip(rdev, 5);
6699 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6700 					DRM_DEBUG("IH: D6 vblank\n");
6701 				}
6702 				break;
6703 			case 1: /* D6 vline */
6704 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6705 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6706 					DRM_DEBUG("IH: D6 vline\n");
6707 				}
6708 				break;
6709 			default:
6710 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6711 				break;
6712 			}
6713 			break;
6714 		case 42: /* HPD hotplug */
6715 			switch (src_data) {
6716 			case 0:
6717 				if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6718 					rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
6719 					queue_hotplug = true;
6720 					DRM_DEBUG("IH: HPD1\n");
6721 				}
6722 				break;
6723 			case 1:
6724 				if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6725 					rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6726 					queue_hotplug = true;
6727 					DRM_DEBUG("IH: HPD2\n");
6728 				}
6729 				break;
6730 			case 2:
6731 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6732 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6733 					queue_hotplug = true;
6734 					DRM_DEBUG("IH: HPD3\n");
6735 				}
6736 				break;
6737 			case 3:
6738 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6739 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6740 					queue_hotplug = true;
6741 					DRM_DEBUG("IH: HPD4\n");
6742 				}
6743 				break;
6744 			case 4:
6745 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6746 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6747 					queue_hotplug = true;
6748 					DRM_DEBUG("IH: HPD5\n");
6749 				}
6750 				break;
6751 			case 5:
6752 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6753 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6754 					queue_hotplug = true;
6755 					DRM_DEBUG("IH: HPD6\n");
6756 				}
6757 				break;
6758 			default:
6759 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6760 				break;
6761 			}
6762 			break;
6763 		case 124: /* UVD */
6764 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6765 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6766 			break;
6767 		case 146:
6768 		case 147:
6769 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6770 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6771 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
6772 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6773 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6774 				addr);
6775 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6776 				status);
6777 			cik_vm_decode_fault(rdev, status, addr, mc_client);
6778 			/* reset addr and status */
6779 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6780 			break;
6781 		case 176: /* GFX RB CP_INT */
6782 		case 177: /* GFX IB CP_INT */
6783 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6784 			break;
6785 		case 181: /* CP EOP event */
6786 			DRM_DEBUG("IH: CP EOP\n");
6787 			/* XXX check the bitfield order! */
6788 			me_id = (ring_id & 0x60) >> 5;
6789 			pipe_id = (ring_id & 0x18) >> 3;
6790 			queue_id = (ring_id & 0x7) >> 0;
6791 			switch (me_id) {
6792 			case 0:
6793 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6794 				break;
6795 			case 1:
6796 			case 2:
6797 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
6798 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6799 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
6800 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6801 				break;
6802 			}
6803 			break;
6804 		case 184: /* CP Privileged reg access */
6805 			DRM_ERROR("Illegal register access in command stream\n");
6806 			/* XXX check the bitfield order! */
6807 			me_id = (ring_id & 0x60) >> 5;
6808 			pipe_id = (ring_id & 0x18) >> 3;
6809 			queue_id = (ring_id & 0x7) >> 0;
6810 			switch (me_id) {
6811 			case 0:
6812 				/* This results in a full GPU reset, but all we need to do is soft
6813 				 * reset the CP for gfx
6814 				 */
6815 				queue_reset = true;
6816 				break;
6817 			case 1:
6818 				/* XXX compute */
6819 				queue_reset = true;
6820 				break;
6821 			case 2:
6822 				/* XXX compute */
6823 				queue_reset = true;
6824 				break;
6825 			}
6826 			break;
6827 		case 185: /* CP Privileged inst */
6828 			DRM_ERROR("Illegal instruction in command stream\n");
6829 			/* XXX check the bitfield order! */
6830 			me_id = (ring_id & 0x60) >> 5;
6831 			pipe_id = (ring_id & 0x18) >> 3;
6832 			queue_id = (ring_id & 0x7) >> 0;
6833 			switch (me_id) {
6834 			case 0:
6835 				/* This results in a full GPU reset, but all we need to do is soft
6836 				 * reset the CP for gfx
6837 				 */
6838 				queue_reset = true;
6839 				break;
6840 			case 1:
6841 				/* XXX compute */
6842 				queue_reset = true;
6843 				break;
6844 			case 2:
6845 				/* XXX compute */
6846 				queue_reset = true;
6847 				break;
6848 			}
6849 			break;
6850 		case 224: /* SDMA trap event */
6851 			/* XXX check the bitfield order! */
6852 			me_id = (ring_id & 0x3) >> 0;
6853 			queue_id = (ring_id & 0xc) >> 2;
6854 			DRM_DEBUG("IH: SDMA trap\n");
6855 			switch (me_id) {
6856 			case 0:
6857 				switch (queue_id) {
6858 				case 0:
6859 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6860 					break;
6861 				case 1:
6862 					/* XXX compute */
6863 					break;
6864 				case 2:
6865 					/* XXX compute */
6866 					break;
6867 				}
6868 				break;
6869 			case 1:
6870 				switch (queue_id) {
6871 				case 0:
6872 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6873 					break;
6874 				case 1:
6875 					/* XXX compute */
6876 					break;
6877 				case 2:
6878 					/* XXX compute */
6879 					break;
6880 				}
6881 				break;
6882 			}
6883 			break;
6884 		case 230: /* thermal low to high */
6885 			DRM_DEBUG("IH: thermal low to high\n");
6886 			rdev->pm.dpm.thermal.high_to_low = false;
6887 			queue_thermal = true;
6888 			break;
6889 		case 231: /* thermal high to low */
6890 			DRM_DEBUG("IH: thermal high to low\n");
6891 			rdev->pm.dpm.thermal.high_to_low = true;
6892 			queue_thermal = true;
6893 			break;
6894 		case 233: /* GUI IDLE */
6895 			DRM_DEBUG("IH: GUI idle\n");
6896 			break;
6897 		case 241: /* SDMA Privileged inst */
6898 		case 247: /* SDMA Privileged inst */
6899 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
6900 			/* XXX check the bitfield order! */
6901 			me_id = (ring_id & 0x3) >> 0;
6902 			queue_id = (ring_id & 0xc) >> 2;
6903 			switch (me_id) {
6904 			case 0:
6905 				switch (queue_id) {
6906 				case 0:
6907 					queue_reset = true;
6908 					break;
6909 				case 1:
6910 					/* XXX compute */
6911 					queue_reset = true;
6912 					break;
6913 				case 2:
6914 					/* XXX compute */
6915 					queue_reset = true;
6916 					break;
6917 				}
6918 				break;
6919 			case 1:
6920 				switch (queue_id) {
6921 				case 0:
6922 					queue_reset = true;
6923 					break;
6924 				case 1:
6925 					/* XXX compute */
6926 					queue_reset = true;
6927 					break;
6928 				case 2:
6929 					/* XXX compute */
6930 					queue_reset = true;
6931 					break;
6932 				}
6933 				break;
6934 			}
6935 			break;
6936 		default:
6937 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6938 			break;
6939 		}
6940 
6941 		/* wptr/rptr are in bytes! */
6942 		rptr += 16;
6943 		rptr &= rdev->ih.ptr_mask;
6944 	}
6945 	if (queue_hotplug)
6946 		schedule_work(&rdev->hotplug_work);
6947 	if (queue_reset)
6948 		schedule_work(&rdev->reset_work);
6949 	if (queue_thermal)
6950 		schedule_work(&rdev->pm.dpm.thermal.work);
6951 	rdev->ih.rptr = rptr;
6952 	WREG32(IH_RB_RPTR, rdev->ih.rptr);
6953 	atomic_set(&rdev->ih.lock, 0);
6954 
6955 	/* make sure wptr hasn't changed while processing */
6956 	wptr = cik_get_ih_wptr(rdev);
6957 	if (wptr != rptr)
6958 		goto restart_ih;
6959 
6960 	return IRQ_HANDLED;
6961 }
6962 
6963 /*
6964  * startup/shutdown callbacks
6965  */
6966 /**
6967  * cik_startup - program the asic to a functional state
6968  *
6969  * @rdev: radeon_device pointer
6970  *
6971  * Programs the asic to a functional state (CIK).
6972  * Called by cik_init() and cik_resume().
6973  * Returns 0 for success, error for failure.
6974  */
6975 static int cik_startup(struct radeon_device *rdev)
6976 {
6977 	struct radeon_ring *ring;
6978 	int r;
6979 
6980 	/* enable pcie gen2/3 link */
6981 	cik_pcie_gen3_enable(rdev);
6982 	/* enable aspm */
6983 	cik_program_aspm(rdev);
6984 
6985 	/* scratch needs to be initialized before MC */
6986 	r = r600_vram_scratch_init(rdev);
6987 	if (r)
6988 		return r;
6989 
6990 	cik_mc_program(rdev);
6991 
6992 	if (rdev->flags & RADEON_IS_IGP) {
6993 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6994 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
6995 			r = cik_init_microcode(rdev);
6996 			if (r) {
6997 				DRM_ERROR("Failed to load firmware!\n");
6998 				return r;
6999 			}
7000 		}
7001 	} else {
7002 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7003 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
7004 		    !rdev->mc_fw) {
7005 			r = cik_init_microcode(rdev);
7006 			if (r) {
7007 				DRM_ERROR("Failed to load firmware!\n");
7008 				return r;
7009 			}
7010 		}
7011 
7012 		r = ci_mc_load_microcode(rdev);
7013 		if (r) {
7014 			DRM_ERROR("Failed to load MC firmware!\n");
7015 			return r;
7016 		}
7017 	}
7018 
7019 	r = cik_pcie_gart_enable(rdev);
7020 	if (r)
7021 		return r;
7022 	cik_gpu_init(rdev);
7023 
7024 	/* allocate rlc buffers */
7025 	if (rdev->flags & RADEON_IS_IGP) {
7026 		if (rdev->family == CHIP_KAVERI) {
7027 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7028 			rdev->rlc.reg_list_size =
7029 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7030 		} else {
7031 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7032 			rdev->rlc.reg_list_size =
7033 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7034 		}
7035 	}
7036 	rdev->rlc.cs_data = ci_cs_data;
7037 	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7038 	r = sumo_rlc_init(rdev);
7039 	if (r) {
7040 		DRM_ERROR("Failed to init rlc BOs!\n");
7041 		return r;
7042 	}
7043 
7044 	/* allocate wb buffer */
7045 	r = radeon_wb_init(rdev);
7046 	if (r)
7047 		return r;
7048 
7049 	/* allocate mec buffers */
7050 	r = cik_mec_init(rdev);
7051 	if (r) {
7052 		DRM_ERROR("Failed to init MEC BOs!\n");
7053 		return r;
7054 	}
7055 
7056 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7057 	if (r) {
7058 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7059 		return r;
7060 	}
7061 
7062 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7063 	if (r) {
7064 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7065 		return r;
7066 	}
7067 
7068 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7069 	if (r) {
7070 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7071 		return r;
7072 	}
7073 
7074 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7075 	if (r) {
7076 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7077 		return r;
7078 	}
7079 
7080 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7081 	if (r) {
7082 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7083 		return r;
7084 	}
7085 
7086 	r = radeon_uvd_resume(rdev);
7087 	if (!r) {
7088 		r = uvd_v4_2_resume(rdev);
7089 		if (!r) {
7090 			r = radeon_fence_driver_start_ring(rdev,
7091 							   R600_RING_TYPE_UVD_INDEX);
7092 			if (r)
7093 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7094 		}
7095 	}
7096 	if (r)
7097 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7098 
7099 	/* Enable IRQ */
7100 	if (!rdev->irq.installed) {
7101 		r = radeon_irq_kms_init(rdev);
7102 		if (r)
7103 			return r;
7104 	}
7105 
7106 	r = cik_irq_init(rdev);
7107 	if (r) {
7108 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
7109 		radeon_irq_kms_fini(rdev);
7110 		return r;
7111 	}
7112 	cik_irq_set(rdev);
7113 
7114 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7115 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7116 			     CP_RB0_RPTR, CP_RB0_WPTR,
7117 			     RADEON_CP_PACKET2);
7118 	if (r)
7119 		return r;
7120 
7121 	/* set up the compute queues */
7122 	/* type-2 packets are deprecated on MEC, use type-3 instead */
7123 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7124 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7125 			     CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7126 			     PACKET3(PACKET3_NOP, 0x3FFF));
7127 	if (r)
7128 		return r;
7129 	ring->me = 1; /* first MEC */
7130 	ring->pipe = 0; /* first pipe */
7131 	ring->queue = 0; /* first queue */
7132 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
7133 
7134 	/* type-2 packets are deprecated on MEC, use type-3 instead */
7135 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7136 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7137 			     CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7138 			     PACKET3(PACKET3_NOP, 0x3FFF));
7139 	if (r)
7140 		return r;
7141 	/* dGPU only have 1 MEC */
7142 	ring->me = 1; /* first MEC */
7143 	ring->pipe = 0; /* first pipe */
7144 	ring->queue = 1; /* second queue */
7145 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
7146 
7147 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7148 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7149 			     SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
7150 			     SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
7151 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7152 	if (r)
7153 		return r;
7154 
7155 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7156 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7157 			     SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
7158 			     SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
7159 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7160 	if (r)
7161 		return r;
7162 
7163 	r = cik_cp_resume(rdev);
7164 	if (r)
7165 		return r;
7166 
7167 	r = cik_sdma_resume(rdev);
7168 	if (r)
7169 		return r;
7170 
7171 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7172 	if (ring->ring_size) {
7173 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7174 				     UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
7175 				     RADEON_CP_PACKET2);
7176 		if (!r)
7177 			r = uvd_v1_0_init(rdev);
7178 		if (r)
7179 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7180 	}
7181 
7182 	r = radeon_ib_pool_init(rdev);
7183 	if (r) {
7184 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7185 		return r;
7186 	}
7187 
7188 	r = radeon_vm_manager_init(rdev);
7189 	if (r) {
7190 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7191 		return r;
7192 	}
7193 
7194 	r = dce6_audio_init(rdev);
7195 	if (r)
7196 		return r;
7197 
7198 	return 0;
7199 }
7200 
7201 /**
7202  * cik_resume - resume the asic to a functional state
7203  *
7204  * @rdev: radeon_device pointer
7205  *
7206  * Programs the asic to a functional state (CIK).
7207  * Called at resume.
7208  * Returns 0 for success, error for failure.
7209  */
7210 int cik_resume(struct radeon_device *rdev)
7211 {
7212 	int r;
7213 
7214 	/* post card */
7215 	atom_asic_init(rdev->mode_info.atom_context);
7216 
7217 	/* init golden registers */
7218 	cik_init_golden_registers(rdev);
7219 
7220 	rdev->accel_working = true;
7221 	r = cik_startup(rdev);
7222 	if (r) {
7223 		DRM_ERROR("cik startup failed on resume\n");
7224 		rdev->accel_working = false;
7225 		return r;
7226 	}
7227 
7228 	return r;
7229 
7230 }
7231 
7232 /**
7233  * cik_suspend - suspend the asic
7234  *
7235  * @rdev: radeon_device pointer
7236  *
7237  * Bring the chip into a state suitable for suspend (CIK).
7238  * Called at suspend.
7239  * Returns 0 for success.
7240  */
7241 int cik_suspend(struct radeon_device *rdev)
7242 {
7243 	dce6_audio_fini(rdev);
7244 	radeon_vm_manager_fini(rdev);
7245 	cik_cp_enable(rdev, false);
7246 	cik_sdma_enable(rdev, false);
7247 	uvd_v1_0_fini(rdev);
7248 	radeon_uvd_suspend(rdev);
7249 	cik_fini_pg(rdev);
7250 	cik_fini_cg(rdev);
7251 	cik_irq_suspend(rdev);
7252 	radeon_wb_disable(rdev);
7253 	cik_pcie_gart_disable(rdev);
7254 	return 0;
7255 }
7256 
7257 /* Plan is to move initialization in that function and use
7258  * helper function so that radeon_device_init pretty much
7259  * do nothing more than calling asic specific function. This
7260  * should also allow to remove a bunch of callback function
7261  * like vram_info.
7262  */
7263 /**
7264  * cik_init - asic specific driver and hw init
7265  *
7266  * @rdev: radeon_device pointer
7267  *
7268  * Setup asic specific driver variables and program the hw
7269  * to a functional state (CIK).
7270  * Called at driver startup.
7271  * Returns 0 for success, errors for failure.
7272  */
7273 int cik_init(struct radeon_device *rdev)
7274 {
7275 	struct radeon_ring *ring;
7276 	int r;
7277 
7278 	/* Read BIOS */
7279 	if (!radeon_get_bios(rdev)) {
7280 		if (ASIC_IS_AVIVO(rdev))
7281 			return -EINVAL;
7282 	}
7283 	/* Must be an ATOMBIOS */
7284 	if (!rdev->is_atom_bios) {
7285 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7286 		return -EINVAL;
7287 	}
7288 	r = radeon_atombios_init(rdev);
7289 	if (r)
7290 		return r;
7291 
7292 	/* Post card if necessary */
7293 	if (!radeon_card_posted(rdev)) {
7294 		if (!rdev->bios) {
7295 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7296 			return -EINVAL;
7297 		}
7298 		DRM_INFO("GPU not posted. posting now...\n");
7299 		atom_asic_init(rdev->mode_info.atom_context);
7300 	}
7301 	/* init golden registers */
7302 	cik_init_golden_registers(rdev);
7303 	/* Initialize scratch registers */
7304 	cik_scratch_init(rdev);
7305 	/* Initialize surface registers */
7306 	radeon_surface_init(rdev);
7307 	/* Initialize clocks */
7308 	radeon_get_clock_info(rdev->ddev);
7309 
7310 	/* Fence driver */
7311 	r = radeon_fence_driver_init(rdev);
7312 	if (r)
7313 		return r;
7314 
7315 	/* initialize memory controller */
7316 	r = cik_mc_init(rdev);
7317 	if (r)
7318 		return r;
7319 	/* Memory manager */
7320 	r = radeon_bo_init(rdev);
7321 	if (r)
7322 		return r;
7323 
7324 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7325 	ring->ring_obj = NULL;
7326 	r600_ring_init(rdev, ring, 1024 * 1024);
7327 
7328 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7329 	ring->ring_obj = NULL;
7330 	r600_ring_init(rdev, ring, 1024 * 1024);
7331 	r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7332 	if (r)
7333 		return r;
7334 
7335 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7336 	ring->ring_obj = NULL;
7337 	r600_ring_init(rdev, ring, 1024 * 1024);
7338 	r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7339 	if (r)
7340 		return r;
7341 
7342 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7343 	ring->ring_obj = NULL;
7344 	r600_ring_init(rdev, ring, 256 * 1024);
7345 
7346 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7347 	ring->ring_obj = NULL;
7348 	r600_ring_init(rdev, ring, 256 * 1024);
7349 
7350 	r = radeon_uvd_init(rdev);
7351 	if (!r) {
7352 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7353 		ring->ring_obj = NULL;
7354 		r600_ring_init(rdev, ring, 4096);
7355 	}
7356 
7357 	rdev->ih.ring_obj = NULL;
7358 	r600_ih_ring_init(rdev, 64 * 1024);
7359 
7360 	r = r600_pcie_gart_init(rdev);
7361 	if (r)
7362 		return r;
7363 
7364 	rdev->accel_working = true;
7365 	r = cik_startup(rdev);
7366 	if (r) {
7367 		dev_err(rdev->dev, "disabling GPU acceleration\n");
7368 		cik_cp_fini(rdev);
7369 		cik_sdma_fini(rdev);
7370 		cik_irq_fini(rdev);
7371 		sumo_rlc_fini(rdev);
7372 		cik_mec_fini(rdev);
7373 		radeon_wb_fini(rdev);
7374 		radeon_ib_pool_fini(rdev);
7375 		radeon_vm_manager_fini(rdev);
7376 		radeon_irq_kms_fini(rdev);
7377 		cik_pcie_gart_fini(rdev);
7378 		rdev->accel_working = false;
7379 	}
7380 
7381 	/* Don't start up if the MC ucode is missing.
7382 	 * The default clocks and voltages before the MC ucode
7383 	 * is loaded are not suffient for advanced operations.
7384 	 */
7385 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
7386 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
7387 		return -EINVAL;
7388 	}
7389 
7390 	return 0;
7391 }
7392 
7393 /**
7394  * cik_fini - asic specific driver and hw fini
7395  *
7396  * @rdev: radeon_device pointer
7397  *
7398  * Tear down the asic specific driver variables and program the hw
7399  * to an idle state (CIK).
7400  * Called at driver unload.
7401  */
7402 void cik_fini(struct radeon_device *rdev)
7403 {
7404 	cik_cp_fini(rdev);
7405 	cik_sdma_fini(rdev);
7406 	cik_fini_pg(rdev);
7407 	cik_fini_cg(rdev);
7408 	cik_irq_fini(rdev);
7409 	sumo_rlc_fini(rdev);
7410 	cik_mec_fini(rdev);
7411 	radeon_wb_fini(rdev);
7412 	radeon_vm_manager_fini(rdev);
7413 	radeon_ib_pool_fini(rdev);
7414 	radeon_irq_kms_fini(rdev);
7415 	uvd_v1_0_fini(rdev);
7416 	radeon_uvd_fini(rdev);
7417 	cik_pcie_gart_fini(rdev);
7418 	r600_vram_scratch_fini(rdev);
7419 	radeon_gem_fini(rdev);
7420 	radeon_fence_driver_fini(rdev);
7421 	radeon_bo_fini(rdev);
7422 	radeon_atombios_fini(rdev);
7423 	kfree(rdev->bios);
7424 	rdev->bios = NULL;
7425 }
7426 
7427 /* display watermark setup */
7428 /**
7429  * dce8_line_buffer_adjust - Set up the line buffer
7430  *
7431  * @rdev: radeon_device pointer
7432  * @radeon_crtc: the selected display controller
7433  * @mode: the current display mode on the selected display
7434  * controller
7435  *
7436  * Setup up the line buffer allocation for
7437  * the selected display controller (CIK).
7438  * Returns the line buffer size in pixels.
7439  */
7440 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
7441 				   struct radeon_crtc *radeon_crtc,
7442 				   struct drm_display_mode *mode)
7443 {
7444 	u32 tmp, buffer_alloc, i;
7445 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
7446 	/*
7447 	 * Line Buffer Setup
7448 	 * There are 6 line buffers, one for each display controllers.
7449 	 * There are 3 partitions per LB. Select the number of partitions
7450 	 * to enable based on the display width.  For display widths larger
7451 	 * than 4096, you need use to use 2 display controllers and combine
7452 	 * them using the stereo blender.
7453 	 */
7454 	if (radeon_crtc->base.enabled && mode) {
7455 		if (mode->crtc_hdisplay < 1920) {
7456 			tmp = 1;
7457 			buffer_alloc = 2;
7458 		} else if (mode->crtc_hdisplay < 2560) {
7459 			tmp = 2;
7460 			buffer_alloc = 2;
7461 		} else if (mode->crtc_hdisplay < 4096) {
7462 			tmp = 0;
7463 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
7464 		} else {
7465 			DRM_DEBUG_KMS("Mode too big for LB!\n");
7466 			tmp = 0;
7467 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
7468 		}
7469 	} else {
7470 		tmp = 1;
7471 		buffer_alloc = 0;
7472 	}
7473 
7474 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
7475 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
7476 
7477 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
7478 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
7479 	for (i = 0; i < rdev->usec_timeout; i++) {
7480 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
7481 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
7482 			break;
7483 		udelay(1);
7484 	}
7485 
7486 	if (radeon_crtc->base.enabled && mode) {
7487 		switch (tmp) {
7488 		case 0:
7489 		default:
7490 			return 4096 * 2;
7491 		case 1:
7492 			return 1920 * 2;
7493 		case 2:
7494 			return 2560 * 2;
7495 		}
7496 	}
7497 
7498 	/* controller not enabled, so no lb used */
7499 	return 0;
7500 }
7501 
7502 /**
7503  * cik_get_number_of_dram_channels - get the number of dram channels
7504  *
7505  * @rdev: radeon_device pointer
7506  *
7507  * Look up the number of video ram channels (CIK).
7508  * Used for display watermark bandwidth calculations
7509  * Returns the number of dram channels
7510  */
7511 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
7512 {
7513 	u32 tmp = RREG32(MC_SHARED_CHMAP);
7514 
7515 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
7516 	case 0:
7517 	default:
7518 		return 1;
7519 	case 1:
7520 		return 2;
7521 	case 2:
7522 		return 4;
7523 	case 3:
7524 		return 8;
7525 	case 4:
7526 		return 3;
7527 	case 5:
7528 		return 6;
7529 	case 6:
7530 		return 10;
7531 	case 7:
7532 		return 12;
7533 	case 8:
7534 		return 16;
7535 	}
7536 }
7537 
7538 struct dce8_wm_params {
7539 	u32 dram_channels; /* number of dram channels */
7540 	u32 yclk;          /* bandwidth per dram data pin in kHz */
7541 	u32 sclk;          /* engine clock in kHz */
7542 	u32 disp_clk;      /* display clock in kHz */
7543 	u32 src_width;     /* viewport width */
7544 	u32 active_time;   /* active display time in ns */
7545 	u32 blank_time;    /* blank time in ns */
7546 	bool interlaced;    /* mode is interlaced */
7547 	fixed20_12 vsc;    /* vertical scale ratio */
7548 	u32 num_heads;     /* number of active crtcs */
7549 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
7550 	u32 lb_size;       /* line buffer allocated to pipe */
7551 	u32 vtaps;         /* vertical scaler taps */
7552 };
7553 
7554 /**
7555  * dce8_dram_bandwidth - get the dram bandwidth
7556  *
7557  * @wm: watermark calculation data
7558  *
7559  * Calculate the raw dram bandwidth (CIK).
7560  * Used for display watermark bandwidth calculations
7561  * Returns the dram bandwidth in MBytes/s
7562  */
7563 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
7564 {
7565 	/* Calculate raw DRAM Bandwidth */
7566 	fixed20_12 dram_efficiency; /* 0.7 */
7567 	fixed20_12 yclk, dram_channels, bandwidth;
7568 	fixed20_12 a;
7569 
7570 	a.full = dfixed_const(1000);
7571 	yclk.full = dfixed_const(wm->yclk);
7572 	yclk.full = dfixed_div(yclk, a);
7573 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
7574 	a.full = dfixed_const(10);
7575 	dram_efficiency.full = dfixed_const(7);
7576 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
7577 	bandwidth.full = dfixed_mul(dram_channels, yclk);
7578 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
7579 
7580 	return dfixed_trunc(bandwidth);
7581 }
7582 
7583 /**
7584  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
7585  *
7586  * @wm: watermark calculation data
7587  *
7588  * Calculate the dram bandwidth used for display (CIK).
7589  * Used for display watermark bandwidth calculations
7590  * Returns the dram bandwidth for display in MBytes/s
7591  */
7592 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
7593 {
7594 	/* Calculate DRAM Bandwidth and the part allocated to display. */
7595 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
7596 	fixed20_12 yclk, dram_channels, bandwidth;
7597 	fixed20_12 a;
7598 
7599 	a.full = dfixed_const(1000);
7600 	yclk.full = dfixed_const(wm->yclk);
7601 	yclk.full = dfixed_div(yclk, a);
7602 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
7603 	a.full = dfixed_const(10);
7604 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
7605 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
7606 	bandwidth.full = dfixed_mul(dram_channels, yclk);
7607 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
7608 
7609 	return dfixed_trunc(bandwidth);
7610 }
7611 
7612 /**
7613  * dce8_data_return_bandwidth - get the data return bandwidth
7614  *
7615  * @wm: watermark calculation data
7616  *
7617  * Calculate the data return bandwidth used for display (CIK).
7618  * Used for display watermark bandwidth calculations
7619  * Returns the data return bandwidth in MBytes/s
7620  */
7621 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
7622 {
7623 	/* Calculate the display Data return Bandwidth */
7624 	fixed20_12 return_efficiency; /* 0.8 */
7625 	fixed20_12 sclk, bandwidth;
7626 	fixed20_12 a;
7627 
7628 	a.full = dfixed_const(1000);
7629 	sclk.full = dfixed_const(wm->sclk);
7630 	sclk.full = dfixed_div(sclk, a);
7631 	a.full = dfixed_const(10);
7632 	return_efficiency.full = dfixed_const(8);
7633 	return_efficiency.full = dfixed_div(return_efficiency, a);
7634 	a.full = dfixed_const(32);
7635 	bandwidth.full = dfixed_mul(a, sclk);
7636 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
7637 
7638 	return dfixed_trunc(bandwidth);
7639 }
7640 
7641 /**
7642  * dce8_dmif_request_bandwidth - get the dmif bandwidth
7643  *
7644  * @wm: watermark calculation data
7645  *
7646  * Calculate the dmif bandwidth used for display (CIK).
7647  * Used for display watermark bandwidth calculations
7648  * Returns the dmif bandwidth in MBytes/s
7649  */
7650 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
7651 {
7652 	/* Calculate the DMIF Request Bandwidth */
7653 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
7654 	fixed20_12 disp_clk, bandwidth;
7655 	fixed20_12 a, b;
7656 
7657 	a.full = dfixed_const(1000);
7658 	disp_clk.full = dfixed_const(wm->disp_clk);
7659 	disp_clk.full = dfixed_div(disp_clk, a);
7660 	a.full = dfixed_const(32);
7661 	b.full = dfixed_mul(a, disp_clk);
7662 
7663 	a.full = dfixed_const(10);
7664 	disp_clk_request_efficiency.full = dfixed_const(8);
7665 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
7666 
7667 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
7668 
7669 	return dfixed_trunc(bandwidth);
7670 }
7671 
7672 /**
7673  * dce8_available_bandwidth - get the min available bandwidth
7674  *
7675  * @wm: watermark calculation data
7676  *
7677  * Calculate the min available bandwidth used for display (CIK).
7678  * Used for display watermark bandwidth calculations
7679  * Returns the min available bandwidth in MBytes/s
7680  */
7681 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
7682 {
7683 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
7684 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
7685 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
7686 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
7687 
7688 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
7689 }
7690 
7691 /**
7692  * dce8_average_bandwidth - get the average available bandwidth
7693  *
7694  * @wm: watermark calculation data
7695  *
7696  * Calculate the average available bandwidth used for display (CIK).
7697  * Used for display watermark bandwidth calculations
7698  * Returns the average available bandwidth in MBytes/s
7699  */
7700 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
7701 {
7702 	/* Calculate the display mode Average Bandwidth
7703 	 * DisplayMode should contain the source and destination dimensions,
7704 	 * timing, etc.
7705 	 */
7706 	fixed20_12 bpp;
7707 	fixed20_12 line_time;
7708 	fixed20_12 src_width;
7709 	fixed20_12 bandwidth;
7710 	fixed20_12 a;
7711 
7712 	a.full = dfixed_const(1000);
7713 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
7714 	line_time.full = dfixed_div(line_time, a);
7715 	bpp.full = dfixed_const(wm->bytes_per_pixel);
7716 	src_width.full = dfixed_const(wm->src_width);
7717 	bandwidth.full = dfixed_mul(src_width, bpp);
7718 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
7719 	bandwidth.full = dfixed_div(bandwidth, line_time);
7720 
7721 	return dfixed_trunc(bandwidth);
7722 }
7723 
7724 /**
7725  * dce8_latency_watermark - get the latency watermark
7726  *
7727  * @wm: watermark calculation data
7728  *
7729  * Calculate the latency watermark (CIK).
7730  * Used for display watermark bandwidth calculations
7731  * Returns the latency watermark in ns
7732  */
7733 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
7734 {
7735 	/* First calculate the latency in ns */
7736 	u32 mc_latency = 2000; /* 2000 ns. */
7737 	u32 available_bandwidth = dce8_available_bandwidth(wm);
7738 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
7739 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
7740 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
7741 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
7742 		(wm->num_heads * cursor_line_pair_return_time);
7743 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
7744 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
7745 	u32 tmp, dmif_size = 12288;
7746 	fixed20_12 a, b, c;
7747 
7748 	if (wm->num_heads == 0)
7749 		return 0;
7750 
7751 	a.full = dfixed_const(2);
7752 	b.full = dfixed_const(1);
7753 	if ((wm->vsc.full > a.full) ||
7754 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
7755 	    (wm->vtaps >= 5) ||
7756 	    ((wm->vsc.full >= a.full) && wm->interlaced))
7757 		max_src_lines_per_dst_line = 4;
7758 	else
7759 		max_src_lines_per_dst_line = 2;
7760 
7761 	a.full = dfixed_const(available_bandwidth);
7762 	b.full = dfixed_const(wm->num_heads);
7763 	a.full = dfixed_div(a, b);
7764 
7765 	b.full = dfixed_const(mc_latency + 512);
7766 	c.full = dfixed_const(wm->disp_clk);
7767 	b.full = dfixed_div(b, c);
7768 
7769 	c.full = dfixed_const(dmif_size);
7770 	b.full = dfixed_div(c, b);
7771 
7772 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
7773 
7774 	b.full = dfixed_const(1000);
7775 	c.full = dfixed_const(wm->disp_clk);
7776 	b.full = dfixed_div(c, b);
7777 	c.full = dfixed_const(wm->bytes_per_pixel);
7778 	b.full = dfixed_mul(b, c);
7779 
7780 	lb_fill_bw = min(tmp, dfixed_trunc(b));
7781 
7782 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
7783 	b.full = dfixed_const(1000);
7784 	c.full = dfixed_const(lb_fill_bw);
7785 	b.full = dfixed_div(c, b);
7786 	a.full = dfixed_div(a, b);
7787 	line_fill_time = dfixed_trunc(a);
7788 
7789 	if (line_fill_time < wm->active_time)
7790 		return latency;
7791 	else
7792 		return latency + (line_fill_time - wm->active_time);
7793 
7794 }
7795 
7796 /**
7797  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
7798  * average and available dram bandwidth
7799  *
7800  * @wm: watermark calculation data
7801  *
7802  * Check if the display average bandwidth fits in the display
7803  * dram bandwidth (CIK).
7804  * Used for display watermark bandwidth calculations
7805  * Returns true if the display fits, false if not.
7806  */
7807 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
7808 {
7809 	if (dce8_average_bandwidth(wm) <=
7810 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
7811 		return true;
7812 	else
7813 		return false;
7814 }
7815 
7816 /**
7817  * dce8_average_bandwidth_vs_available_bandwidth - check
7818  * average and available bandwidth
7819  *
7820  * @wm: watermark calculation data
7821  *
7822  * Check if the display average bandwidth fits in the display
7823  * available bandwidth (CIK).
7824  * Used for display watermark bandwidth calculations
7825  * Returns true if the display fits, false if not.
7826  */
7827 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
7828 {
7829 	if (dce8_average_bandwidth(wm) <=
7830 	    (dce8_available_bandwidth(wm) / wm->num_heads))
7831 		return true;
7832 	else
7833 		return false;
7834 }
7835 
7836 /**
7837  * dce8_check_latency_hiding - check latency hiding
7838  *
7839  * @wm: watermark calculation data
7840  *
7841  * Check latency hiding (CIK).
7842  * Used for display watermark bandwidth calculations
7843  * Returns true if the display fits, false if not.
7844  */
7845 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
7846 {
7847 	u32 lb_partitions = wm->lb_size / wm->src_width;
7848 	u32 line_time = wm->active_time + wm->blank_time;
7849 	u32 latency_tolerant_lines;
7850 	u32 latency_hiding;
7851 	fixed20_12 a;
7852 
7853 	a.full = dfixed_const(1);
7854 	if (wm->vsc.full > a.full)
7855 		latency_tolerant_lines = 1;
7856 	else {
7857 		if (lb_partitions <= (wm->vtaps + 1))
7858 			latency_tolerant_lines = 1;
7859 		else
7860 			latency_tolerant_lines = 2;
7861 	}
7862 
7863 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
7864 
7865 	if (dce8_latency_watermark(wm) <= latency_hiding)
7866 		return true;
7867 	else
7868 		return false;
7869 }
7870 
7871 /**
7872  * dce8_program_watermarks - program display watermarks
7873  *
7874  * @rdev: radeon_device pointer
7875  * @radeon_crtc: the selected display controller
7876  * @lb_size: line buffer size
7877  * @num_heads: number of display controllers in use
7878  *
7879  * Calculate and program the display watermarks for the
7880  * selected display controller (CIK).
7881  */
7882 static void dce8_program_watermarks(struct radeon_device *rdev,
7883 				    struct radeon_crtc *radeon_crtc,
7884 				    u32 lb_size, u32 num_heads)
7885 {
7886 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
7887 	struct dce8_wm_params wm_low, wm_high;
7888 	u32 pixel_period;
7889 	u32 line_time = 0;
7890 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
7891 	u32 tmp, wm_mask;
7892 
7893 	if (radeon_crtc->base.enabled && num_heads && mode) {
7894 		pixel_period = 1000000 / (u32)mode->clock;
7895 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
7896 
7897 		/* watermark for high clocks */
7898 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
7899 		    rdev->pm.dpm_enabled) {
7900 			wm_high.yclk =
7901 				radeon_dpm_get_mclk(rdev, false) * 10;
7902 			wm_high.sclk =
7903 				radeon_dpm_get_sclk(rdev, false) * 10;
7904 		} else {
7905 			wm_high.yclk = rdev->pm.current_mclk * 10;
7906 			wm_high.sclk = rdev->pm.current_sclk * 10;
7907 		}
7908 
7909 		wm_high.disp_clk = mode->clock;
7910 		wm_high.src_width = mode->crtc_hdisplay;
7911 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
7912 		wm_high.blank_time = line_time - wm_high.active_time;
7913 		wm_high.interlaced = false;
7914 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
7915 			wm_high.interlaced = true;
7916 		wm_high.vsc = radeon_crtc->vsc;
7917 		wm_high.vtaps = 1;
7918 		if (radeon_crtc->rmx_type != RMX_OFF)
7919 			wm_high.vtaps = 2;
7920 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
7921 		wm_high.lb_size = lb_size;
7922 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
7923 		wm_high.num_heads = num_heads;
7924 
7925 		/* set for high clocks */
7926 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
7927 
7928 		/* possibly force display priority to high */
7929 		/* should really do this at mode validation time... */
7930 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
7931 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
7932 		    !dce8_check_latency_hiding(&wm_high) ||
7933 		    (rdev->disp_priority == 2)) {
7934 			DRM_DEBUG_KMS("force priority to high\n");
7935 		}
7936 
7937 		/* watermark for low clocks */
7938 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
7939 		    rdev->pm.dpm_enabled) {
7940 			wm_low.yclk =
7941 				radeon_dpm_get_mclk(rdev, true) * 10;
7942 			wm_low.sclk =
7943 				radeon_dpm_get_sclk(rdev, true) * 10;
7944 		} else {
7945 			wm_low.yclk = rdev->pm.current_mclk * 10;
7946 			wm_low.sclk = rdev->pm.current_sclk * 10;
7947 		}
7948 
7949 		wm_low.disp_clk = mode->clock;
7950 		wm_low.src_width = mode->crtc_hdisplay;
7951 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
7952 		wm_low.blank_time = line_time - wm_low.active_time;
7953 		wm_low.interlaced = false;
7954 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
7955 			wm_low.interlaced = true;
7956 		wm_low.vsc = radeon_crtc->vsc;
7957 		wm_low.vtaps = 1;
7958 		if (radeon_crtc->rmx_type != RMX_OFF)
7959 			wm_low.vtaps = 2;
7960 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
7961 		wm_low.lb_size = lb_size;
7962 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
7963 		wm_low.num_heads = num_heads;
7964 
7965 		/* set for low clocks */
7966 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
7967 
7968 		/* possibly force display priority to high */
7969 		/* should really do this at mode validation time... */
7970 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
7971 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
7972 		    !dce8_check_latency_hiding(&wm_low) ||
7973 		    (rdev->disp_priority == 2)) {
7974 			DRM_DEBUG_KMS("force priority to high\n");
7975 		}
7976 	}
7977 
7978 	/* select wm A */
7979 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
7980 	tmp = wm_mask;
7981 	tmp &= ~LATENCY_WATERMARK_MASK(3);
7982 	tmp |= LATENCY_WATERMARK_MASK(1);
7983 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
7984 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
7985 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
7986 		LATENCY_HIGH_WATERMARK(line_time)));
7987 	/* select wm B */
7988 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
7989 	tmp &= ~LATENCY_WATERMARK_MASK(3);
7990 	tmp |= LATENCY_WATERMARK_MASK(2);
7991 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
7992 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
7993 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
7994 		LATENCY_HIGH_WATERMARK(line_time)));
7995 	/* restore original selection */
7996 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
7997 
7998 	/* save values for DPM */
7999 	radeon_crtc->line_time = line_time;
8000 	radeon_crtc->wm_high = latency_watermark_a;
8001 	radeon_crtc->wm_low = latency_watermark_b;
8002 }
8003 
8004 /**
8005  * dce8_bandwidth_update - program display watermarks
8006  *
8007  * @rdev: radeon_device pointer
8008  *
8009  * Calculate and program the display watermarks and line
8010  * buffer allocation (CIK).
8011  */
8012 void dce8_bandwidth_update(struct radeon_device *rdev)
8013 {
8014 	struct drm_display_mode *mode = NULL;
8015 	u32 num_heads = 0, lb_size;
8016 	int i;
8017 
8018 	radeon_update_display_priority(rdev);
8019 
8020 	for (i = 0; i < rdev->num_crtc; i++) {
8021 		if (rdev->mode_info.crtcs[i]->base.enabled)
8022 			num_heads++;
8023 	}
8024 	for (i = 0; i < rdev->num_crtc; i++) {
8025 		mode = &rdev->mode_info.crtcs[i]->base.mode;
8026 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
8027 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
8028 	}
8029 }
8030 
8031 /**
8032  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8033  *
8034  * @rdev: radeon_device pointer
8035  *
8036  * Fetches a GPU clock counter snapshot (SI).
8037  * Returns the 64 bit clock counter snapshot.
8038  */
8039 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
8040 {
8041 	uint64_t clock;
8042 
8043 	mutex_lock(&rdev->gpu_clock_mutex);
8044 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
8045 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
8046 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
8047 	mutex_unlock(&rdev->gpu_clock_mutex);
8048 	return clock;
8049 }
8050 
8051 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
8052                               u32 cntl_reg, u32 status_reg)
8053 {
8054 	int r, i;
8055 	struct atom_clock_dividers dividers;
8056 	uint32_t tmp;
8057 
8058 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8059 					   clock, false, &dividers);
8060 	if (r)
8061 		return r;
8062 
8063 	tmp = RREG32_SMC(cntl_reg);
8064 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
8065 	tmp |= dividers.post_divider;
8066 	WREG32_SMC(cntl_reg, tmp);
8067 
8068 	for (i = 0; i < 100; i++) {
8069 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
8070 			break;
8071 		mdelay(10);
8072 	}
8073 	if (i == 100)
8074 		return -ETIMEDOUT;
8075 
8076 	return 0;
8077 }
8078 
8079 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
8080 {
8081 	int r = 0;
8082 
8083 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
8084 	if (r)
8085 		return r;
8086 
8087 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
8088 	return r;
8089 }
8090 
8091 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
8092 {
8093 	struct pci_dev *root = rdev->pdev->bus->self;
8094 	int bridge_pos, gpu_pos;
8095 	u32 speed_cntl, mask, current_data_rate;
8096 	int ret, i;
8097 	u16 tmp16;
8098 
8099 	if (radeon_pcie_gen2 == 0)
8100 		return;
8101 
8102 	if (rdev->flags & RADEON_IS_IGP)
8103 		return;
8104 
8105 	if (!(rdev->flags & RADEON_IS_PCIE))
8106 		return;
8107 
8108 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
8109 	if (ret != 0)
8110 		return;
8111 
8112 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
8113 		return;
8114 
8115 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8116 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
8117 		LC_CURRENT_DATA_RATE_SHIFT;
8118 	if (mask & DRM_PCIE_SPEED_80) {
8119 		if (current_data_rate == 2) {
8120 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
8121 			return;
8122 		}
8123 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
8124 	} else if (mask & DRM_PCIE_SPEED_50) {
8125 		if (current_data_rate == 1) {
8126 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
8127 			return;
8128 		}
8129 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
8130 	}
8131 
8132 	bridge_pos = pci_pcie_cap(root);
8133 	if (!bridge_pos)
8134 		return;
8135 
8136 	gpu_pos = pci_pcie_cap(rdev->pdev);
8137 	if (!gpu_pos)
8138 		return;
8139 
8140 	if (mask & DRM_PCIE_SPEED_80) {
8141 		/* re-try equalization if gen3 is not already enabled */
8142 		if (current_data_rate != 2) {
8143 			u16 bridge_cfg, gpu_cfg;
8144 			u16 bridge_cfg2, gpu_cfg2;
8145 			u32 max_lw, current_lw, tmp;
8146 
8147 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8148 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8149 
8150 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
8151 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8152 
8153 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
8154 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8155 
8156 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8157 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
8158 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
8159 
8160 			if (current_lw < max_lw) {
8161 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8162 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
8163 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
8164 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
8165 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
8166 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
8167 				}
8168 			}
8169 
8170 			for (i = 0; i < 10; i++) {
8171 				/* check status */
8172 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
8173 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
8174 					break;
8175 
8176 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8177 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8178 
8179 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
8180 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
8181 
8182 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8183 				tmp |= LC_SET_QUIESCE;
8184 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8185 
8186 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8187 				tmp |= LC_REDO_EQ;
8188 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8189 
8190 				mdelay(100);
8191 
8192 				/* linkctl */
8193 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
8194 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8195 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
8196 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8197 
8198 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
8199 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8200 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
8201 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8202 
8203 				/* linkctl2 */
8204 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
8205 				tmp16 &= ~((1 << 4) | (7 << 9));
8206 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
8207 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
8208 
8209 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8210 				tmp16 &= ~((1 << 4) | (7 << 9));
8211 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
8212 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8213 
8214 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8215 				tmp &= ~LC_SET_QUIESCE;
8216 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8217 			}
8218 		}
8219 	}
8220 
8221 	/* set the link speed */
8222 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
8223 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
8224 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8225 
8226 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8227 	tmp16 &= ~0xf;
8228 	if (mask & DRM_PCIE_SPEED_80)
8229 		tmp16 |= 3; /* gen3 */
8230 	else if (mask & DRM_PCIE_SPEED_50)
8231 		tmp16 |= 2; /* gen2 */
8232 	else
8233 		tmp16 |= 1; /* gen1 */
8234 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8235 
8236 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8237 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
8238 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8239 
8240 	for (i = 0; i < rdev->usec_timeout; i++) {
8241 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8242 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
8243 			break;
8244 		udelay(1);
8245 	}
8246 }
8247 
8248 static void cik_program_aspm(struct radeon_device *rdev)
8249 {
8250 	u32 data, orig;
8251 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
8252 	bool disable_clkreq = false;
8253 
8254 	if (radeon_aspm == 0)
8255 		return;
8256 
8257 	/* XXX double check IGPs */
8258 	if (rdev->flags & RADEON_IS_IGP)
8259 		return;
8260 
8261 	if (!(rdev->flags & RADEON_IS_PCIE))
8262 		return;
8263 
8264 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8265 	data &= ~LC_XMIT_N_FTS_MASK;
8266 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
8267 	if (orig != data)
8268 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
8269 
8270 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
8271 	data |= LC_GO_TO_RECOVERY;
8272 	if (orig != data)
8273 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
8274 
8275 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
8276 	data |= P_IGNORE_EDB_ERR;
8277 	if (orig != data)
8278 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
8279 
8280 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8281 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
8282 	data |= LC_PMI_TO_L1_DIS;
8283 	if (!disable_l0s)
8284 		data |= LC_L0S_INACTIVITY(7);
8285 
8286 	if (!disable_l1) {
8287 		data |= LC_L1_INACTIVITY(7);
8288 		data &= ~LC_PMI_TO_L1_DIS;
8289 		if (orig != data)
8290 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8291 
8292 		if (!disable_plloff_in_l1) {
8293 			bool clk_req_support;
8294 
8295 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
8296 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8297 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8298 			if (orig != data)
8299 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
8300 
8301 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
8302 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8303 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8304 			if (orig != data)
8305 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
8306 
8307 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
8308 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8309 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8310 			if (orig != data)
8311 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
8312 
8313 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
8314 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8315 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8316 			if (orig != data)
8317 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
8318 
8319 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8320 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
8321 			data |= LC_DYN_LANES_PWR_STATE(3);
8322 			if (orig != data)
8323 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
8324 
8325 			if (!disable_clkreq) {
8326 				struct pci_dev *root = rdev->pdev->bus->self;
8327 				u32 lnkcap;
8328 
8329 				clk_req_support = false;
8330 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
8331 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
8332 					clk_req_support = true;
8333 			} else {
8334 				clk_req_support = false;
8335 			}
8336 
8337 			if (clk_req_support) {
8338 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
8339 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
8340 				if (orig != data)
8341 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
8342 
8343 				orig = data = RREG32_SMC(THM_CLK_CNTL);
8344 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
8345 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
8346 				if (orig != data)
8347 					WREG32_SMC(THM_CLK_CNTL, data);
8348 
8349 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
8350 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
8351 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
8352 				if (orig != data)
8353 					WREG32_SMC(MISC_CLK_CTRL, data);
8354 
8355 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
8356 				data &= ~BCLK_AS_XCLK;
8357 				if (orig != data)
8358 					WREG32_SMC(CG_CLKPIN_CNTL, data);
8359 
8360 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
8361 				data &= ~FORCE_BIF_REFCLK_EN;
8362 				if (orig != data)
8363 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
8364 
8365 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
8366 				data &= ~MPLL_CLKOUT_SEL_MASK;
8367 				data |= MPLL_CLKOUT_SEL(4);
8368 				if (orig != data)
8369 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
8370 			}
8371 		}
8372 	} else {
8373 		if (orig != data)
8374 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8375 	}
8376 
8377 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
8378 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
8379 	if (orig != data)
8380 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
8381 
8382 	if (!disable_l0s) {
8383 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8384 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
8385 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8386 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
8387 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8388 				data &= ~LC_L0S_INACTIVITY_MASK;
8389 				if (orig != data)
8390 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8391 			}
8392 		}
8393 	}
8394 }
8395