xref: /openbmc/linux/drivers/gpu/drm/radeon/cik.c (revision 161f4089)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35 
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
44 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
45 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
46 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
47 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
48 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
49 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
50 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
51 MODULE_FIRMWARE("radeon/KABINI_me.bin");
52 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
53 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
54 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
55 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
56 
57 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
58 extern void r600_ih_ring_fini(struct radeon_device *rdev);
59 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
60 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
61 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
62 extern void sumo_rlc_fini(struct radeon_device *rdev);
63 extern int sumo_rlc_init(struct radeon_device *rdev);
64 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
65 extern void si_rlc_reset(struct radeon_device *rdev);
66 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
67 extern int cik_sdma_resume(struct radeon_device *rdev);
68 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
69 extern void cik_sdma_fini(struct radeon_device *rdev);
70 extern void cik_sdma_vm_set_page(struct radeon_device *rdev,
71 				 struct radeon_ib *ib,
72 				 uint64_t pe,
73 				 uint64_t addr, unsigned count,
74 				 uint32_t incr, uint32_t flags);
75 static void cik_rlc_stop(struct radeon_device *rdev);
76 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
77 static void cik_program_aspm(struct radeon_device *rdev);
78 static void cik_init_pg(struct radeon_device *rdev);
79 static void cik_init_cg(struct radeon_device *rdev);
80 static void cik_fini_pg(struct radeon_device *rdev);
81 static void cik_fini_cg(struct radeon_device *rdev);
82 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
83 					  bool enable);
84 
85 /* get temperature in millidegrees */
86 int ci_get_temp(struct radeon_device *rdev)
87 {
88 	u32 temp;
89 	int actual_temp = 0;
90 
91 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
92 		CTF_TEMP_SHIFT;
93 
94 	if (temp & 0x200)
95 		actual_temp = 255;
96 	else
97 		actual_temp = temp & 0x1ff;
98 
99 	actual_temp = actual_temp * 1000;
100 
101 	return actual_temp;
102 }
103 
104 /* get temperature in millidegrees */
105 int kv_get_temp(struct radeon_device *rdev)
106 {
107 	u32 temp;
108 	int actual_temp = 0;
109 
110 	temp = RREG32_SMC(0xC0300E0C);
111 
112 	if (temp)
113 		actual_temp = (temp / 8) - 49;
114 	else
115 		actual_temp = 0;
116 
117 	actual_temp = actual_temp * 1000;
118 
119 	return actual_temp;
120 }
121 
122 /*
123  * Indirect registers accessor
124  */
125 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
126 {
127 	unsigned long flags;
128 	u32 r;
129 
130 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
131 	WREG32(PCIE_INDEX, reg);
132 	(void)RREG32(PCIE_INDEX);
133 	r = RREG32(PCIE_DATA);
134 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
135 	return r;
136 }
137 
138 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
139 {
140 	unsigned long flags;
141 
142 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
143 	WREG32(PCIE_INDEX, reg);
144 	(void)RREG32(PCIE_INDEX);
145 	WREG32(PCIE_DATA, v);
146 	(void)RREG32(PCIE_DATA);
147 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
148 }
149 
150 static const u32 spectre_rlc_save_restore_register_list[] =
151 {
152 	(0x0e00 << 16) | (0xc12c >> 2),
153 	0x00000000,
154 	(0x0e00 << 16) | (0xc140 >> 2),
155 	0x00000000,
156 	(0x0e00 << 16) | (0xc150 >> 2),
157 	0x00000000,
158 	(0x0e00 << 16) | (0xc15c >> 2),
159 	0x00000000,
160 	(0x0e00 << 16) | (0xc168 >> 2),
161 	0x00000000,
162 	(0x0e00 << 16) | (0xc170 >> 2),
163 	0x00000000,
164 	(0x0e00 << 16) | (0xc178 >> 2),
165 	0x00000000,
166 	(0x0e00 << 16) | (0xc204 >> 2),
167 	0x00000000,
168 	(0x0e00 << 16) | (0xc2b4 >> 2),
169 	0x00000000,
170 	(0x0e00 << 16) | (0xc2b8 >> 2),
171 	0x00000000,
172 	(0x0e00 << 16) | (0xc2bc >> 2),
173 	0x00000000,
174 	(0x0e00 << 16) | (0xc2c0 >> 2),
175 	0x00000000,
176 	(0x0e00 << 16) | (0x8228 >> 2),
177 	0x00000000,
178 	(0x0e00 << 16) | (0x829c >> 2),
179 	0x00000000,
180 	(0x0e00 << 16) | (0x869c >> 2),
181 	0x00000000,
182 	(0x0600 << 16) | (0x98f4 >> 2),
183 	0x00000000,
184 	(0x0e00 << 16) | (0x98f8 >> 2),
185 	0x00000000,
186 	(0x0e00 << 16) | (0x9900 >> 2),
187 	0x00000000,
188 	(0x0e00 << 16) | (0xc260 >> 2),
189 	0x00000000,
190 	(0x0e00 << 16) | (0x90e8 >> 2),
191 	0x00000000,
192 	(0x0e00 << 16) | (0x3c000 >> 2),
193 	0x00000000,
194 	(0x0e00 << 16) | (0x3c00c >> 2),
195 	0x00000000,
196 	(0x0e00 << 16) | (0x8c1c >> 2),
197 	0x00000000,
198 	(0x0e00 << 16) | (0x9700 >> 2),
199 	0x00000000,
200 	(0x0e00 << 16) | (0xcd20 >> 2),
201 	0x00000000,
202 	(0x4e00 << 16) | (0xcd20 >> 2),
203 	0x00000000,
204 	(0x5e00 << 16) | (0xcd20 >> 2),
205 	0x00000000,
206 	(0x6e00 << 16) | (0xcd20 >> 2),
207 	0x00000000,
208 	(0x7e00 << 16) | (0xcd20 >> 2),
209 	0x00000000,
210 	(0x8e00 << 16) | (0xcd20 >> 2),
211 	0x00000000,
212 	(0x9e00 << 16) | (0xcd20 >> 2),
213 	0x00000000,
214 	(0xae00 << 16) | (0xcd20 >> 2),
215 	0x00000000,
216 	(0xbe00 << 16) | (0xcd20 >> 2),
217 	0x00000000,
218 	(0x0e00 << 16) | (0x89bc >> 2),
219 	0x00000000,
220 	(0x0e00 << 16) | (0x8900 >> 2),
221 	0x00000000,
222 	0x3,
223 	(0x0e00 << 16) | (0xc130 >> 2),
224 	0x00000000,
225 	(0x0e00 << 16) | (0xc134 >> 2),
226 	0x00000000,
227 	(0x0e00 << 16) | (0xc1fc >> 2),
228 	0x00000000,
229 	(0x0e00 << 16) | (0xc208 >> 2),
230 	0x00000000,
231 	(0x0e00 << 16) | (0xc264 >> 2),
232 	0x00000000,
233 	(0x0e00 << 16) | (0xc268 >> 2),
234 	0x00000000,
235 	(0x0e00 << 16) | (0xc26c >> 2),
236 	0x00000000,
237 	(0x0e00 << 16) | (0xc270 >> 2),
238 	0x00000000,
239 	(0x0e00 << 16) | (0xc274 >> 2),
240 	0x00000000,
241 	(0x0e00 << 16) | (0xc278 >> 2),
242 	0x00000000,
243 	(0x0e00 << 16) | (0xc27c >> 2),
244 	0x00000000,
245 	(0x0e00 << 16) | (0xc280 >> 2),
246 	0x00000000,
247 	(0x0e00 << 16) | (0xc284 >> 2),
248 	0x00000000,
249 	(0x0e00 << 16) | (0xc288 >> 2),
250 	0x00000000,
251 	(0x0e00 << 16) | (0xc28c >> 2),
252 	0x00000000,
253 	(0x0e00 << 16) | (0xc290 >> 2),
254 	0x00000000,
255 	(0x0e00 << 16) | (0xc294 >> 2),
256 	0x00000000,
257 	(0x0e00 << 16) | (0xc298 >> 2),
258 	0x00000000,
259 	(0x0e00 << 16) | (0xc29c >> 2),
260 	0x00000000,
261 	(0x0e00 << 16) | (0xc2a0 >> 2),
262 	0x00000000,
263 	(0x0e00 << 16) | (0xc2a4 >> 2),
264 	0x00000000,
265 	(0x0e00 << 16) | (0xc2a8 >> 2),
266 	0x00000000,
267 	(0x0e00 << 16) | (0xc2ac  >> 2),
268 	0x00000000,
269 	(0x0e00 << 16) | (0xc2b0 >> 2),
270 	0x00000000,
271 	(0x0e00 << 16) | (0x301d0 >> 2),
272 	0x00000000,
273 	(0x0e00 << 16) | (0x30238 >> 2),
274 	0x00000000,
275 	(0x0e00 << 16) | (0x30250 >> 2),
276 	0x00000000,
277 	(0x0e00 << 16) | (0x30254 >> 2),
278 	0x00000000,
279 	(0x0e00 << 16) | (0x30258 >> 2),
280 	0x00000000,
281 	(0x0e00 << 16) | (0x3025c >> 2),
282 	0x00000000,
283 	(0x4e00 << 16) | (0xc900 >> 2),
284 	0x00000000,
285 	(0x5e00 << 16) | (0xc900 >> 2),
286 	0x00000000,
287 	(0x6e00 << 16) | (0xc900 >> 2),
288 	0x00000000,
289 	(0x7e00 << 16) | (0xc900 >> 2),
290 	0x00000000,
291 	(0x8e00 << 16) | (0xc900 >> 2),
292 	0x00000000,
293 	(0x9e00 << 16) | (0xc900 >> 2),
294 	0x00000000,
295 	(0xae00 << 16) | (0xc900 >> 2),
296 	0x00000000,
297 	(0xbe00 << 16) | (0xc900 >> 2),
298 	0x00000000,
299 	(0x4e00 << 16) | (0xc904 >> 2),
300 	0x00000000,
301 	(0x5e00 << 16) | (0xc904 >> 2),
302 	0x00000000,
303 	(0x6e00 << 16) | (0xc904 >> 2),
304 	0x00000000,
305 	(0x7e00 << 16) | (0xc904 >> 2),
306 	0x00000000,
307 	(0x8e00 << 16) | (0xc904 >> 2),
308 	0x00000000,
309 	(0x9e00 << 16) | (0xc904 >> 2),
310 	0x00000000,
311 	(0xae00 << 16) | (0xc904 >> 2),
312 	0x00000000,
313 	(0xbe00 << 16) | (0xc904 >> 2),
314 	0x00000000,
315 	(0x4e00 << 16) | (0xc908 >> 2),
316 	0x00000000,
317 	(0x5e00 << 16) | (0xc908 >> 2),
318 	0x00000000,
319 	(0x6e00 << 16) | (0xc908 >> 2),
320 	0x00000000,
321 	(0x7e00 << 16) | (0xc908 >> 2),
322 	0x00000000,
323 	(0x8e00 << 16) | (0xc908 >> 2),
324 	0x00000000,
325 	(0x9e00 << 16) | (0xc908 >> 2),
326 	0x00000000,
327 	(0xae00 << 16) | (0xc908 >> 2),
328 	0x00000000,
329 	(0xbe00 << 16) | (0xc908 >> 2),
330 	0x00000000,
331 	(0x4e00 << 16) | (0xc90c >> 2),
332 	0x00000000,
333 	(0x5e00 << 16) | (0xc90c >> 2),
334 	0x00000000,
335 	(0x6e00 << 16) | (0xc90c >> 2),
336 	0x00000000,
337 	(0x7e00 << 16) | (0xc90c >> 2),
338 	0x00000000,
339 	(0x8e00 << 16) | (0xc90c >> 2),
340 	0x00000000,
341 	(0x9e00 << 16) | (0xc90c >> 2),
342 	0x00000000,
343 	(0xae00 << 16) | (0xc90c >> 2),
344 	0x00000000,
345 	(0xbe00 << 16) | (0xc90c >> 2),
346 	0x00000000,
347 	(0x4e00 << 16) | (0xc910 >> 2),
348 	0x00000000,
349 	(0x5e00 << 16) | (0xc910 >> 2),
350 	0x00000000,
351 	(0x6e00 << 16) | (0xc910 >> 2),
352 	0x00000000,
353 	(0x7e00 << 16) | (0xc910 >> 2),
354 	0x00000000,
355 	(0x8e00 << 16) | (0xc910 >> 2),
356 	0x00000000,
357 	(0x9e00 << 16) | (0xc910 >> 2),
358 	0x00000000,
359 	(0xae00 << 16) | (0xc910 >> 2),
360 	0x00000000,
361 	(0xbe00 << 16) | (0xc910 >> 2),
362 	0x00000000,
363 	(0x0e00 << 16) | (0xc99c >> 2),
364 	0x00000000,
365 	(0x0e00 << 16) | (0x9834 >> 2),
366 	0x00000000,
367 	(0x0000 << 16) | (0x30f00 >> 2),
368 	0x00000000,
369 	(0x0001 << 16) | (0x30f00 >> 2),
370 	0x00000000,
371 	(0x0000 << 16) | (0x30f04 >> 2),
372 	0x00000000,
373 	(0x0001 << 16) | (0x30f04 >> 2),
374 	0x00000000,
375 	(0x0000 << 16) | (0x30f08 >> 2),
376 	0x00000000,
377 	(0x0001 << 16) | (0x30f08 >> 2),
378 	0x00000000,
379 	(0x0000 << 16) | (0x30f0c >> 2),
380 	0x00000000,
381 	(0x0001 << 16) | (0x30f0c >> 2),
382 	0x00000000,
383 	(0x0600 << 16) | (0x9b7c >> 2),
384 	0x00000000,
385 	(0x0e00 << 16) | (0x8a14 >> 2),
386 	0x00000000,
387 	(0x0e00 << 16) | (0x8a18 >> 2),
388 	0x00000000,
389 	(0x0600 << 16) | (0x30a00 >> 2),
390 	0x00000000,
391 	(0x0e00 << 16) | (0x8bf0 >> 2),
392 	0x00000000,
393 	(0x0e00 << 16) | (0x8bcc >> 2),
394 	0x00000000,
395 	(0x0e00 << 16) | (0x8b24 >> 2),
396 	0x00000000,
397 	(0x0e00 << 16) | (0x30a04 >> 2),
398 	0x00000000,
399 	(0x0600 << 16) | (0x30a10 >> 2),
400 	0x00000000,
401 	(0x0600 << 16) | (0x30a14 >> 2),
402 	0x00000000,
403 	(0x0600 << 16) | (0x30a18 >> 2),
404 	0x00000000,
405 	(0x0600 << 16) | (0x30a2c >> 2),
406 	0x00000000,
407 	(0x0e00 << 16) | (0xc700 >> 2),
408 	0x00000000,
409 	(0x0e00 << 16) | (0xc704 >> 2),
410 	0x00000000,
411 	(0x0e00 << 16) | (0xc708 >> 2),
412 	0x00000000,
413 	(0x0e00 << 16) | (0xc768 >> 2),
414 	0x00000000,
415 	(0x0400 << 16) | (0xc770 >> 2),
416 	0x00000000,
417 	(0x0400 << 16) | (0xc774 >> 2),
418 	0x00000000,
419 	(0x0400 << 16) | (0xc778 >> 2),
420 	0x00000000,
421 	(0x0400 << 16) | (0xc77c >> 2),
422 	0x00000000,
423 	(0x0400 << 16) | (0xc780 >> 2),
424 	0x00000000,
425 	(0x0400 << 16) | (0xc784 >> 2),
426 	0x00000000,
427 	(0x0400 << 16) | (0xc788 >> 2),
428 	0x00000000,
429 	(0x0400 << 16) | (0xc78c >> 2),
430 	0x00000000,
431 	(0x0400 << 16) | (0xc798 >> 2),
432 	0x00000000,
433 	(0x0400 << 16) | (0xc79c >> 2),
434 	0x00000000,
435 	(0x0400 << 16) | (0xc7a0 >> 2),
436 	0x00000000,
437 	(0x0400 << 16) | (0xc7a4 >> 2),
438 	0x00000000,
439 	(0x0400 << 16) | (0xc7a8 >> 2),
440 	0x00000000,
441 	(0x0400 << 16) | (0xc7ac >> 2),
442 	0x00000000,
443 	(0x0400 << 16) | (0xc7b0 >> 2),
444 	0x00000000,
445 	(0x0400 << 16) | (0xc7b4 >> 2),
446 	0x00000000,
447 	(0x0e00 << 16) | (0x9100 >> 2),
448 	0x00000000,
449 	(0x0e00 << 16) | (0x3c010 >> 2),
450 	0x00000000,
451 	(0x0e00 << 16) | (0x92a8 >> 2),
452 	0x00000000,
453 	(0x0e00 << 16) | (0x92ac >> 2),
454 	0x00000000,
455 	(0x0e00 << 16) | (0x92b4 >> 2),
456 	0x00000000,
457 	(0x0e00 << 16) | (0x92b8 >> 2),
458 	0x00000000,
459 	(0x0e00 << 16) | (0x92bc >> 2),
460 	0x00000000,
461 	(0x0e00 << 16) | (0x92c0 >> 2),
462 	0x00000000,
463 	(0x0e00 << 16) | (0x92c4 >> 2),
464 	0x00000000,
465 	(0x0e00 << 16) | (0x92c8 >> 2),
466 	0x00000000,
467 	(0x0e00 << 16) | (0x92cc >> 2),
468 	0x00000000,
469 	(0x0e00 << 16) | (0x92d0 >> 2),
470 	0x00000000,
471 	(0x0e00 << 16) | (0x8c00 >> 2),
472 	0x00000000,
473 	(0x0e00 << 16) | (0x8c04 >> 2),
474 	0x00000000,
475 	(0x0e00 << 16) | (0x8c20 >> 2),
476 	0x00000000,
477 	(0x0e00 << 16) | (0x8c38 >> 2),
478 	0x00000000,
479 	(0x0e00 << 16) | (0x8c3c >> 2),
480 	0x00000000,
481 	(0x0e00 << 16) | (0xae00 >> 2),
482 	0x00000000,
483 	(0x0e00 << 16) | (0x9604 >> 2),
484 	0x00000000,
485 	(0x0e00 << 16) | (0xac08 >> 2),
486 	0x00000000,
487 	(0x0e00 << 16) | (0xac0c >> 2),
488 	0x00000000,
489 	(0x0e00 << 16) | (0xac10 >> 2),
490 	0x00000000,
491 	(0x0e00 << 16) | (0xac14 >> 2),
492 	0x00000000,
493 	(0x0e00 << 16) | (0xac58 >> 2),
494 	0x00000000,
495 	(0x0e00 << 16) | (0xac68 >> 2),
496 	0x00000000,
497 	(0x0e00 << 16) | (0xac6c >> 2),
498 	0x00000000,
499 	(0x0e00 << 16) | (0xac70 >> 2),
500 	0x00000000,
501 	(0x0e00 << 16) | (0xac74 >> 2),
502 	0x00000000,
503 	(0x0e00 << 16) | (0xac78 >> 2),
504 	0x00000000,
505 	(0x0e00 << 16) | (0xac7c >> 2),
506 	0x00000000,
507 	(0x0e00 << 16) | (0xac80 >> 2),
508 	0x00000000,
509 	(0x0e00 << 16) | (0xac84 >> 2),
510 	0x00000000,
511 	(0x0e00 << 16) | (0xac88 >> 2),
512 	0x00000000,
513 	(0x0e00 << 16) | (0xac8c >> 2),
514 	0x00000000,
515 	(0x0e00 << 16) | (0x970c >> 2),
516 	0x00000000,
517 	(0x0e00 << 16) | (0x9714 >> 2),
518 	0x00000000,
519 	(0x0e00 << 16) | (0x9718 >> 2),
520 	0x00000000,
521 	(0x0e00 << 16) | (0x971c >> 2),
522 	0x00000000,
523 	(0x0e00 << 16) | (0x31068 >> 2),
524 	0x00000000,
525 	(0x4e00 << 16) | (0x31068 >> 2),
526 	0x00000000,
527 	(0x5e00 << 16) | (0x31068 >> 2),
528 	0x00000000,
529 	(0x6e00 << 16) | (0x31068 >> 2),
530 	0x00000000,
531 	(0x7e00 << 16) | (0x31068 >> 2),
532 	0x00000000,
533 	(0x8e00 << 16) | (0x31068 >> 2),
534 	0x00000000,
535 	(0x9e00 << 16) | (0x31068 >> 2),
536 	0x00000000,
537 	(0xae00 << 16) | (0x31068 >> 2),
538 	0x00000000,
539 	(0xbe00 << 16) | (0x31068 >> 2),
540 	0x00000000,
541 	(0x0e00 << 16) | (0xcd10 >> 2),
542 	0x00000000,
543 	(0x0e00 << 16) | (0xcd14 >> 2),
544 	0x00000000,
545 	(0x0e00 << 16) | (0x88b0 >> 2),
546 	0x00000000,
547 	(0x0e00 << 16) | (0x88b4 >> 2),
548 	0x00000000,
549 	(0x0e00 << 16) | (0x88b8 >> 2),
550 	0x00000000,
551 	(0x0e00 << 16) | (0x88bc >> 2),
552 	0x00000000,
553 	(0x0400 << 16) | (0x89c0 >> 2),
554 	0x00000000,
555 	(0x0e00 << 16) | (0x88c4 >> 2),
556 	0x00000000,
557 	(0x0e00 << 16) | (0x88c8 >> 2),
558 	0x00000000,
559 	(0x0e00 << 16) | (0x88d0 >> 2),
560 	0x00000000,
561 	(0x0e00 << 16) | (0x88d4 >> 2),
562 	0x00000000,
563 	(0x0e00 << 16) | (0x88d8 >> 2),
564 	0x00000000,
565 	(0x0e00 << 16) | (0x8980 >> 2),
566 	0x00000000,
567 	(0x0e00 << 16) | (0x30938 >> 2),
568 	0x00000000,
569 	(0x0e00 << 16) | (0x3093c >> 2),
570 	0x00000000,
571 	(0x0e00 << 16) | (0x30940 >> 2),
572 	0x00000000,
573 	(0x0e00 << 16) | (0x89a0 >> 2),
574 	0x00000000,
575 	(0x0e00 << 16) | (0x30900 >> 2),
576 	0x00000000,
577 	(0x0e00 << 16) | (0x30904 >> 2),
578 	0x00000000,
579 	(0x0e00 << 16) | (0x89b4 >> 2),
580 	0x00000000,
581 	(0x0e00 << 16) | (0x3c210 >> 2),
582 	0x00000000,
583 	(0x0e00 << 16) | (0x3c214 >> 2),
584 	0x00000000,
585 	(0x0e00 << 16) | (0x3c218 >> 2),
586 	0x00000000,
587 	(0x0e00 << 16) | (0x8904 >> 2),
588 	0x00000000,
589 	0x5,
590 	(0x0e00 << 16) | (0x8c28 >> 2),
591 	(0x0e00 << 16) | (0x8c2c >> 2),
592 	(0x0e00 << 16) | (0x8c30 >> 2),
593 	(0x0e00 << 16) | (0x8c34 >> 2),
594 	(0x0e00 << 16) | (0x9600 >> 2),
595 };
596 
597 static const u32 kalindi_rlc_save_restore_register_list[] =
598 {
599 	(0x0e00 << 16) | (0xc12c >> 2),
600 	0x00000000,
601 	(0x0e00 << 16) | (0xc140 >> 2),
602 	0x00000000,
603 	(0x0e00 << 16) | (0xc150 >> 2),
604 	0x00000000,
605 	(0x0e00 << 16) | (0xc15c >> 2),
606 	0x00000000,
607 	(0x0e00 << 16) | (0xc168 >> 2),
608 	0x00000000,
609 	(0x0e00 << 16) | (0xc170 >> 2),
610 	0x00000000,
611 	(0x0e00 << 16) | (0xc204 >> 2),
612 	0x00000000,
613 	(0x0e00 << 16) | (0xc2b4 >> 2),
614 	0x00000000,
615 	(0x0e00 << 16) | (0xc2b8 >> 2),
616 	0x00000000,
617 	(0x0e00 << 16) | (0xc2bc >> 2),
618 	0x00000000,
619 	(0x0e00 << 16) | (0xc2c0 >> 2),
620 	0x00000000,
621 	(0x0e00 << 16) | (0x8228 >> 2),
622 	0x00000000,
623 	(0x0e00 << 16) | (0x829c >> 2),
624 	0x00000000,
625 	(0x0e00 << 16) | (0x869c >> 2),
626 	0x00000000,
627 	(0x0600 << 16) | (0x98f4 >> 2),
628 	0x00000000,
629 	(0x0e00 << 16) | (0x98f8 >> 2),
630 	0x00000000,
631 	(0x0e00 << 16) | (0x9900 >> 2),
632 	0x00000000,
633 	(0x0e00 << 16) | (0xc260 >> 2),
634 	0x00000000,
635 	(0x0e00 << 16) | (0x90e8 >> 2),
636 	0x00000000,
637 	(0x0e00 << 16) | (0x3c000 >> 2),
638 	0x00000000,
639 	(0x0e00 << 16) | (0x3c00c >> 2),
640 	0x00000000,
641 	(0x0e00 << 16) | (0x8c1c >> 2),
642 	0x00000000,
643 	(0x0e00 << 16) | (0x9700 >> 2),
644 	0x00000000,
645 	(0x0e00 << 16) | (0xcd20 >> 2),
646 	0x00000000,
647 	(0x4e00 << 16) | (0xcd20 >> 2),
648 	0x00000000,
649 	(0x5e00 << 16) | (0xcd20 >> 2),
650 	0x00000000,
651 	(0x6e00 << 16) | (0xcd20 >> 2),
652 	0x00000000,
653 	(0x7e00 << 16) | (0xcd20 >> 2),
654 	0x00000000,
655 	(0x0e00 << 16) | (0x89bc >> 2),
656 	0x00000000,
657 	(0x0e00 << 16) | (0x8900 >> 2),
658 	0x00000000,
659 	0x3,
660 	(0x0e00 << 16) | (0xc130 >> 2),
661 	0x00000000,
662 	(0x0e00 << 16) | (0xc134 >> 2),
663 	0x00000000,
664 	(0x0e00 << 16) | (0xc1fc >> 2),
665 	0x00000000,
666 	(0x0e00 << 16) | (0xc208 >> 2),
667 	0x00000000,
668 	(0x0e00 << 16) | (0xc264 >> 2),
669 	0x00000000,
670 	(0x0e00 << 16) | (0xc268 >> 2),
671 	0x00000000,
672 	(0x0e00 << 16) | (0xc26c >> 2),
673 	0x00000000,
674 	(0x0e00 << 16) | (0xc270 >> 2),
675 	0x00000000,
676 	(0x0e00 << 16) | (0xc274 >> 2),
677 	0x00000000,
678 	(0x0e00 << 16) | (0xc28c >> 2),
679 	0x00000000,
680 	(0x0e00 << 16) | (0xc290 >> 2),
681 	0x00000000,
682 	(0x0e00 << 16) | (0xc294 >> 2),
683 	0x00000000,
684 	(0x0e00 << 16) | (0xc298 >> 2),
685 	0x00000000,
686 	(0x0e00 << 16) | (0xc2a0 >> 2),
687 	0x00000000,
688 	(0x0e00 << 16) | (0xc2a4 >> 2),
689 	0x00000000,
690 	(0x0e00 << 16) | (0xc2a8 >> 2),
691 	0x00000000,
692 	(0x0e00 << 16) | (0xc2ac >> 2),
693 	0x00000000,
694 	(0x0e00 << 16) | (0x301d0 >> 2),
695 	0x00000000,
696 	(0x0e00 << 16) | (0x30238 >> 2),
697 	0x00000000,
698 	(0x0e00 << 16) | (0x30250 >> 2),
699 	0x00000000,
700 	(0x0e00 << 16) | (0x30254 >> 2),
701 	0x00000000,
702 	(0x0e00 << 16) | (0x30258 >> 2),
703 	0x00000000,
704 	(0x0e00 << 16) | (0x3025c >> 2),
705 	0x00000000,
706 	(0x4e00 << 16) | (0xc900 >> 2),
707 	0x00000000,
708 	(0x5e00 << 16) | (0xc900 >> 2),
709 	0x00000000,
710 	(0x6e00 << 16) | (0xc900 >> 2),
711 	0x00000000,
712 	(0x7e00 << 16) | (0xc900 >> 2),
713 	0x00000000,
714 	(0x4e00 << 16) | (0xc904 >> 2),
715 	0x00000000,
716 	(0x5e00 << 16) | (0xc904 >> 2),
717 	0x00000000,
718 	(0x6e00 << 16) | (0xc904 >> 2),
719 	0x00000000,
720 	(0x7e00 << 16) | (0xc904 >> 2),
721 	0x00000000,
722 	(0x4e00 << 16) | (0xc908 >> 2),
723 	0x00000000,
724 	(0x5e00 << 16) | (0xc908 >> 2),
725 	0x00000000,
726 	(0x6e00 << 16) | (0xc908 >> 2),
727 	0x00000000,
728 	(0x7e00 << 16) | (0xc908 >> 2),
729 	0x00000000,
730 	(0x4e00 << 16) | (0xc90c >> 2),
731 	0x00000000,
732 	(0x5e00 << 16) | (0xc90c >> 2),
733 	0x00000000,
734 	(0x6e00 << 16) | (0xc90c >> 2),
735 	0x00000000,
736 	(0x7e00 << 16) | (0xc90c >> 2),
737 	0x00000000,
738 	(0x4e00 << 16) | (0xc910 >> 2),
739 	0x00000000,
740 	(0x5e00 << 16) | (0xc910 >> 2),
741 	0x00000000,
742 	(0x6e00 << 16) | (0xc910 >> 2),
743 	0x00000000,
744 	(0x7e00 << 16) | (0xc910 >> 2),
745 	0x00000000,
746 	(0x0e00 << 16) | (0xc99c >> 2),
747 	0x00000000,
748 	(0x0e00 << 16) | (0x9834 >> 2),
749 	0x00000000,
750 	(0x0000 << 16) | (0x30f00 >> 2),
751 	0x00000000,
752 	(0x0000 << 16) | (0x30f04 >> 2),
753 	0x00000000,
754 	(0x0000 << 16) | (0x30f08 >> 2),
755 	0x00000000,
756 	(0x0000 << 16) | (0x30f0c >> 2),
757 	0x00000000,
758 	(0x0600 << 16) | (0x9b7c >> 2),
759 	0x00000000,
760 	(0x0e00 << 16) | (0x8a14 >> 2),
761 	0x00000000,
762 	(0x0e00 << 16) | (0x8a18 >> 2),
763 	0x00000000,
764 	(0x0600 << 16) | (0x30a00 >> 2),
765 	0x00000000,
766 	(0x0e00 << 16) | (0x8bf0 >> 2),
767 	0x00000000,
768 	(0x0e00 << 16) | (0x8bcc >> 2),
769 	0x00000000,
770 	(0x0e00 << 16) | (0x8b24 >> 2),
771 	0x00000000,
772 	(0x0e00 << 16) | (0x30a04 >> 2),
773 	0x00000000,
774 	(0x0600 << 16) | (0x30a10 >> 2),
775 	0x00000000,
776 	(0x0600 << 16) | (0x30a14 >> 2),
777 	0x00000000,
778 	(0x0600 << 16) | (0x30a18 >> 2),
779 	0x00000000,
780 	(0x0600 << 16) | (0x30a2c >> 2),
781 	0x00000000,
782 	(0x0e00 << 16) | (0xc700 >> 2),
783 	0x00000000,
784 	(0x0e00 << 16) | (0xc704 >> 2),
785 	0x00000000,
786 	(0x0e00 << 16) | (0xc708 >> 2),
787 	0x00000000,
788 	(0x0e00 << 16) | (0xc768 >> 2),
789 	0x00000000,
790 	(0x0400 << 16) | (0xc770 >> 2),
791 	0x00000000,
792 	(0x0400 << 16) | (0xc774 >> 2),
793 	0x00000000,
794 	(0x0400 << 16) | (0xc798 >> 2),
795 	0x00000000,
796 	(0x0400 << 16) | (0xc79c >> 2),
797 	0x00000000,
798 	(0x0e00 << 16) | (0x9100 >> 2),
799 	0x00000000,
800 	(0x0e00 << 16) | (0x3c010 >> 2),
801 	0x00000000,
802 	(0x0e00 << 16) | (0x8c00 >> 2),
803 	0x00000000,
804 	(0x0e00 << 16) | (0x8c04 >> 2),
805 	0x00000000,
806 	(0x0e00 << 16) | (0x8c20 >> 2),
807 	0x00000000,
808 	(0x0e00 << 16) | (0x8c38 >> 2),
809 	0x00000000,
810 	(0x0e00 << 16) | (0x8c3c >> 2),
811 	0x00000000,
812 	(0x0e00 << 16) | (0xae00 >> 2),
813 	0x00000000,
814 	(0x0e00 << 16) | (0x9604 >> 2),
815 	0x00000000,
816 	(0x0e00 << 16) | (0xac08 >> 2),
817 	0x00000000,
818 	(0x0e00 << 16) | (0xac0c >> 2),
819 	0x00000000,
820 	(0x0e00 << 16) | (0xac10 >> 2),
821 	0x00000000,
822 	(0x0e00 << 16) | (0xac14 >> 2),
823 	0x00000000,
824 	(0x0e00 << 16) | (0xac58 >> 2),
825 	0x00000000,
826 	(0x0e00 << 16) | (0xac68 >> 2),
827 	0x00000000,
828 	(0x0e00 << 16) | (0xac6c >> 2),
829 	0x00000000,
830 	(0x0e00 << 16) | (0xac70 >> 2),
831 	0x00000000,
832 	(0x0e00 << 16) | (0xac74 >> 2),
833 	0x00000000,
834 	(0x0e00 << 16) | (0xac78 >> 2),
835 	0x00000000,
836 	(0x0e00 << 16) | (0xac7c >> 2),
837 	0x00000000,
838 	(0x0e00 << 16) | (0xac80 >> 2),
839 	0x00000000,
840 	(0x0e00 << 16) | (0xac84 >> 2),
841 	0x00000000,
842 	(0x0e00 << 16) | (0xac88 >> 2),
843 	0x00000000,
844 	(0x0e00 << 16) | (0xac8c >> 2),
845 	0x00000000,
846 	(0x0e00 << 16) | (0x970c >> 2),
847 	0x00000000,
848 	(0x0e00 << 16) | (0x9714 >> 2),
849 	0x00000000,
850 	(0x0e00 << 16) | (0x9718 >> 2),
851 	0x00000000,
852 	(0x0e00 << 16) | (0x971c >> 2),
853 	0x00000000,
854 	(0x0e00 << 16) | (0x31068 >> 2),
855 	0x00000000,
856 	(0x4e00 << 16) | (0x31068 >> 2),
857 	0x00000000,
858 	(0x5e00 << 16) | (0x31068 >> 2),
859 	0x00000000,
860 	(0x6e00 << 16) | (0x31068 >> 2),
861 	0x00000000,
862 	(0x7e00 << 16) | (0x31068 >> 2),
863 	0x00000000,
864 	(0x0e00 << 16) | (0xcd10 >> 2),
865 	0x00000000,
866 	(0x0e00 << 16) | (0xcd14 >> 2),
867 	0x00000000,
868 	(0x0e00 << 16) | (0x88b0 >> 2),
869 	0x00000000,
870 	(0x0e00 << 16) | (0x88b4 >> 2),
871 	0x00000000,
872 	(0x0e00 << 16) | (0x88b8 >> 2),
873 	0x00000000,
874 	(0x0e00 << 16) | (0x88bc >> 2),
875 	0x00000000,
876 	(0x0400 << 16) | (0x89c0 >> 2),
877 	0x00000000,
878 	(0x0e00 << 16) | (0x88c4 >> 2),
879 	0x00000000,
880 	(0x0e00 << 16) | (0x88c8 >> 2),
881 	0x00000000,
882 	(0x0e00 << 16) | (0x88d0 >> 2),
883 	0x00000000,
884 	(0x0e00 << 16) | (0x88d4 >> 2),
885 	0x00000000,
886 	(0x0e00 << 16) | (0x88d8 >> 2),
887 	0x00000000,
888 	(0x0e00 << 16) | (0x8980 >> 2),
889 	0x00000000,
890 	(0x0e00 << 16) | (0x30938 >> 2),
891 	0x00000000,
892 	(0x0e00 << 16) | (0x3093c >> 2),
893 	0x00000000,
894 	(0x0e00 << 16) | (0x30940 >> 2),
895 	0x00000000,
896 	(0x0e00 << 16) | (0x89a0 >> 2),
897 	0x00000000,
898 	(0x0e00 << 16) | (0x30900 >> 2),
899 	0x00000000,
900 	(0x0e00 << 16) | (0x30904 >> 2),
901 	0x00000000,
902 	(0x0e00 << 16) | (0x89b4 >> 2),
903 	0x00000000,
904 	(0x0e00 << 16) | (0x3e1fc >> 2),
905 	0x00000000,
906 	(0x0e00 << 16) | (0x3c210 >> 2),
907 	0x00000000,
908 	(0x0e00 << 16) | (0x3c214 >> 2),
909 	0x00000000,
910 	(0x0e00 << 16) | (0x3c218 >> 2),
911 	0x00000000,
912 	(0x0e00 << 16) | (0x8904 >> 2),
913 	0x00000000,
914 	0x5,
915 	(0x0e00 << 16) | (0x8c28 >> 2),
916 	(0x0e00 << 16) | (0x8c2c >> 2),
917 	(0x0e00 << 16) | (0x8c30 >> 2),
918 	(0x0e00 << 16) | (0x8c34 >> 2),
919 	(0x0e00 << 16) | (0x9600 >> 2),
920 };
921 
922 static const u32 bonaire_golden_spm_registers[] =
923 {
924 	0x30800, 0xe0ffffff, 0xe0000000
925 };
926 
927 static const u32 bonaire_golden_common_registers[] =
928 {
929 	0xc770, 0xffffffff, 0x00000800,
930 	0xc774, 0xffffffff, 0x00000800,
931 	0xc798, 0xffffffff, 0x00007fbf,
932 	0xc79c, 0xffffffff, 0x00007faf
933 };
934 
935 static const u32 bonaire_golden_registers[] =
936 {
937 	0x3354, 0x00000333, 0x00000333,
938 	0x3350, 0x000c0fc0, 0x00040200,
939 	0x9a10, 0x00010000, 0x00058208,
940 	0x3c000, 0xffff1fff, 0x00140000,
941 	0x3c200, 0xfdfc0fff, 0x00000100,
942 	0x3c234, 0x40000000, 0x40000200,
943 	0x9830, 0xffffffff, 0x00000000,
944 	0x9834, 0xf00fffff, 0x00000400,
945 	0x9838, 0x0002021c, 0x00020200,
946 	0xc78, 0x00000080, 0x00000000,
947 	0x5bb0, 0x000000f0, 0x00000070,
948 	0x5bc0, 0xf0311fff, 0x80300000,
949 	0x98f8, 0x73773777, 0x12010001,
950 	0x350c, 0x00810000, 0x408af000,
951 	0x7030, 0x31000111, 0x00000011,
952 	0x2f48, 0x73773777, 0x12010001,
953 	0x220c, 0x00007fb6, 0x0021a1b1,
954 	0x2210, 0x00007fb6, 0x002021b1,
955 	0x2180, 0x00007fb6, 0x00002191,
956 	0x2218, 0x00007fb6, 0x002121b1,
957 	0x221c, 0x00007fb6, 0x002021b1,
958 	0x21dc, 0x00007fb6, 0x00002191,
959 	0x21e0, 0x00007fb6, 0x00002191,
960 	0x3628, 0x0000003f, 0x0000000a,
961 	0x362c, 0x0000003f, 0x0000000a,
962 	0x2ae4, 0x00073ffe, 0x000022a2,
963 	0x240c, 0x000007ff, 0x00000000,
964 	0x8a14, 0xf000003f, 0x00000007,
965 	0x8bf0, 0x00002001, 0x00000001,
966 	0x8b24, 0xffffffff, 0x00ffffff,
967 	0x30a04, 0x0000ff0f, 0x00000000,
968 	0x28a4c, 0x07ffffff, 0x06000000,
969 	0x4d8, 0x00000fff, 0x00000100,
970 	0x3e78, 0x00000001, 0x00000002,
971 	0x9100, 0x03000000, 0x0362c688,
972 	0x8c00, 0x000000ff, 0x00000001,
973 	0xe40, 0x00001fff, 0x00001fff,
974 	0x9060, 0x0000007f, 0x00000020,
975 	0x9508, 0x00010000, 0x00010000,
976 	0xac14, 0x000003ff, 0x000000f3,
977 	0xac0c, 0xffffffff, 0x00001032
978 };
979 
980 static const u32 bonaire_mgcg_cgcg_init[] =
981 {
982 	0xc420, 0xffffffff, 0xfffffffc,
983 	0x30800, 0xffffffff, 0xe0000000,
984 	0x3c2a0, 0xffffffff, 0x00000100,
985 	0x3c208, 0xffffffff, 0x00000100,
986 	0x3c2c0, 0xffffffff, 0xc0000100,
987 	0x3c2c8, 0xffffffff, 0xc0000100,
988 	0x3c2c4, 0xffffffff, 0xc0000100,
989 	0x55e4, 0xffffffff, 0x00600100,
990 	0x3c280, 0xffffffff, 0x00000100,
991 	0x3c214, 0xffffffff, 0x06000100,
992 	0x3c220, 0xffffffff, 0x00000100,
993 	0x3c218, 0xffffffff, 0x06000100,
994 	0x3c204, 0xffffffff, 0x00000100,
995 	0x3c2e0, 0xffffffff, 0x00000100,
996 	0x3c224, 0xffffffff, 0x00000100,
997 	0x3c200, 0xffffffff, 0x00000100,
998 	0x3c230, 0xffffffff, 0x00000100,
999 	0x3c234, 0xffffffff, 0x00000100,
1000 	0x3c250, 0xffffffff, 0x00000100,
1001 	0x3c254, 0xffffffff, 0x00000100,
1002 	0x3c258, 0xffffffff, 0x00000100,
1003 	0x3c25c, 0xffffffff, 0x00000100,
1004 	0x3c260, 0xffffffff, 0x00000100,
1005 	0x3c27c, 0xffffffff, 0x00000100,
1006 	0x3c278, 0xffffffff, 0x00000100,
1007 	0x3c210, 0xffffffff, 0x06000100,
1008 	0x3c290, 0xffffffff, 0x00000100,
1009 	0x3c274, 0xffffffff, 0x00000100,
1010 	0x3c2b4, 0xffffffff, 0x00000100,
1011 	0x3c2b0, 0xffffffff, 0x00000100,
1012 	0x3c270, 0xffffffff, 0x00000100,
1013 	0x30800, 0xffffffff, 0xe0000000,
1014 	0x3c020, 0xffffffff, 0x00010000,
1015 	0x3c024, 0xffffffff, 0x00030002,
1016 	0x3c028, 0xffffffff, 0x00040007,
1017 	0x3c02c, 0xffffffff, 0x00060005,
1018 	0x3c030, 0xffffffff, 0x00090008,
1019 	0x3c034, 0xffffffff, 0x00010000,
1020 	0x3c038, 0xffffffff, 0x00030002,
1021 	0x3c03c, 0xffffffff, 0x00040007,
1022 	0x3c040, 0xffffffff, 0x00060005,
1023 	0x3c044, 0xffffffff, 0x00090008,
1024 	0x3c048, 0xffffffff, 0x00010000,
1025 	0x3c04c, 0xffffffff, 0x00030002,
1026 	0x3c050, 0xffffffff, 0x00040007,
1027 	0x3c054, 0xffffffff, 0x00060005,
1028 	0x3c058, 0xffffffff, 0x00090008,
1029 	0x3c05c, 0xffffffff, 0x00010000,
1030 	0x3c060, 0xffffffff, 0x00030002,
1031 	0x3c064, 0xffffffff, 0x00040007,
1032 	0x3c068, 0xffffffff, 0x00060005,
1033 	0x3c06c, 0xffffffff, 0x00090008,
1034 	0x3c070, 0xffffffff, 0x00010000,
1035 	0x3c074, 0xffffffff, 0x00030002,
1036 	0x3c078, 0xffffffff, 0x00040007,
1037 	0x3c07c, 0xffffffff, 0x00060005,
1038 	0x3c080, 0xffffffff, 0x00090008,
1039 	0x3c084, 0xffffffff, 0x00010000,
1040 	0x3c088, 0xffffffff, 0x00030002,
1041 	0x3c08c, 0xffffffff, 0x00040007,
1042 	0x3c090, 0xffffffff, 0x00060005,
1043 	0x3c094, 0xffffffff, 0x00090008,
1044 	0x3c098, 0xffffffff, 0x00010000,
1045 	0x3c09c, 0xffffffff, 0x00030002,
1046 	0x3c0a0, 0xffffffff, 0x00040007,
1047 	0x3c0a4, 0xffffffff, 0x00060005,
1048 	0x3c0a8, 0xffffffff, 0x00090008,
1049 	0x3c000, 0xffffffff, 0x96e00200,
1050 	0x8708, 0xffffffff, 0x00900100,
1051 	0xc424, 0xffffffff, 0x0020003f,
1052 	0x38, 0xffffffff, 0x0140001c,
1053 	0x3c, 0x000f0000, 0x000f0000,
1054 	0x220, 0xffffffff, 0xC060000C,
1055 	0x224, 0xc0000fff, 0x00000100,
1056 	0xf90, 0xffffffff, 0x00000100,
1057 	0xf98, 0x00000101, 0x00000000,
1058 	0x20a8, 0xffffffff, 0x00000104,
1059 	0x55e4, 0xff000fff, 0x00000100,
1060 	0x30cc, 0xc0000fff, 0x00000104,
1061 	0xc1e4, 0x00000001, 0x00000001,
1062 	0xd00c, 0xff000ff0, 0x00000100,
1063 	0xd80c, 0xff000ff0, 0x00000100
1064 };
1065 
1066 static const u32 spectre_golden_spm_registers[] =
1067 {
1068 	0x30800, 0xe0ffffff, 0xe0000000
1069 };
1070 
1071 static const u32 spectre_golden_common_registers[] =
1072 {
1073 	0xc770, 0xffffffff, 0x00000800,
1074 	0xc774, 0xffffffff, 0x00000800,
1075 	0xc798, 0xffffffff, 0x00007fbf,
1076 	0xc79c, 0xffffffff, 0x00007faf
1077 };
1078 
1079 static const u32 spectre_golden_registers[] =
1080 {
1081 	0x3c000, 0xffff1fff, 0x96940200,
1082 	0x3c00c, 0xffff0001, 0xff000000,
1083 	0x3c200, 0xfffc0fff, 0x00000100,
1084 	0x6ed8, 0x00010101, 0x00010000,
1085 	0x9834, 0xf00fffff, 0x00000400,
1086 	0x9838, 0xfffffffc, 0x00020200,
1087 	0x5bb0, 0x000000f0, 0x00000070,
1088 	0x5bc0, 0xf0311fff, 0x80300000,
1089 	0x98f8, 0x73773777, 0x12010001,
1090 	0x9b7c, 0x00ff0000, 0x00fc0000,
1091 	0x2f48, 0x73773777, 0x12010001,
1092 	0x8a14, 0xf000003f, 0x00000007,
1093 	0x8b24, 0xffffffff, 0x00ffffff,
1094 	0x28350, 0x3f3f3fff, 0x00000082,
1095 	0x28355, 0x0000003f, 0x00000000,
1096 	0x3e78, 0x00000001, 0x00000002,
1097 	0x913c, 0xffff03df, 0x00000004,
1098 	0xc768, 0x00000008, 0x00000008,
1099 	0x8c00, 0x000008ff, 0x00000800,
1100 	0x9508, 0x00010000, 0x00010000,
1101 	0xac0c, 0xffffffff, 0x54763210,
1102 	0x214f8, 0x01ff01ff, 0x00000002,
1103 	0x21498, 0x007ff800, 0x00200000,
1104 	0x2015c, 0xffffffff, 0x00000f40,
1105 	0x30934, 0xffffffff, 0x00000001
1106 };
1107 
1108 static const u32 spectre_mgcg_cgcg_init[] =
1109 {
1110 	0xc420, 0xffffffff, 0xfffffffc,
1111 	0x30800, 0xffffffff, 0xe0000000,
1112 	0x3c2a0, 0xffffffff, 0x00000100,
1113 	0x3c208, 0xffffffff, 0x00000100,
1114 	0x3c2c0, 0xffffffff, 0x00000100,
1115 	0x3c2c8, 0xffffffff, 0x00000100,
1116 	0x3c2c4, 0xffffffff, 0x00000100,
1117 	0x55e4, 0xffffffff, 0x00600100,
1118 	0x3c280, 0xffffffff, 0x00000100,
1119 	0x3c214, 0xffffffff, 0x06000100,
1120 	0x3c220, 0xffffffff, 0x00000100,
1121 	0x3c218, 0xffffffff, 0x06000100,
1122 	0x3c204, 0xffffffff, 0x00000100,
1123 	0x3c2e0, 0xffffffff, 0x00000100,
1124 	0x3c224, 0xffffffff, 0x00000100,
1125 	0x3c200, 0xffffffff, 0x00000100,
1126 	0x3c230, 0xffffffff, 0x00000100,
1127 	0x3c234, 0xffffffff, 0x00000100,
1128 	0x3c250, 0xffffffff, 0x00000100,
1129 	0x3c254, 0xffffffff, 0x00000100,
1130 	0x3c258, 0xffffffff, 0x00000100,
1131 	0x3c25c, 0xffffffff, 0x00000100,
1132 	0x3c260, 0xffffffff, 0x00000100,
1133 	0x3c27c, 0xffffffff, 0x00000100,
1134 	0x3c278, 0xffffffff, 0x00000100,
1135 	0x3c210, 0xffffffff, 0x06000100,
1136 	0x3c290, 0xffffffff, 0x00000100,
1137 	0x3c274, 0xffffffff, 0x00000100,
1138 	0x3c2b4, 0xffffffff, 0x00000100,
1139 	0x3c2b0, 0xffffffff, 0x00000100,
1140 	0x3c270, 0xffffffff, 0x00000100,
1141 	0x30800, 0xffffffff, 0xe0000000,
1142 	0x3c020, 0xffffffff, 0x00010000,
1143 	0x3c024, 0xffffffff, 0x00030002,
1144 	0x3c028, 0xffffffff, 0x00040007,
1145 	0x3c02c, 0xffffffff, 0x00060005,
1146 	0x3c030, 0xffffffff, 0x00090008,
1147 	0x3c034, 0xffffffff, 0x00010000,
1148 	0x3c038, 0xffffffff, 0x00030002,
1149 	0x3c03c, 0xffffffff, 0x00040007,
1150 	0x3c040, 0xffffffff, 0x00060005,
1151 	0x3c044, 0xffffffff, 0x00090008,
1152 	0x3c048, 0xffffffff, 0x00010000,
1153 	0x3c04c, 0xffffffff, 0x00030002,
1154 	0x3c050, 0xffffffff, 0x00040007,
1155 	0x3c054, 0xffffffff, 0x00060005,
1156 	0x3c058, 0xffffffff, 0x00090008,
1157 	0x3c05c, 0xffffffff, 0x00010000,
1158 	0x3c060, 0xffffffff, 0x00030002,
1159 	0x3c064, 0xffffffff, 0x00040007,
1160 	0x3c068, 0xffffffff, 0x00060005,
1161 	0x3c06c, 0xffffffff, 0x00090008,
1162 	0x3c070, 0xffffffff, 0x00010000,
1163 	0x3c074, 0xffffffff, 0x00030002,
1164 	0x3c078, 0xffffffff, 0x00040007,
1165 	0x3c07c, 0xffffffff, 0x00060005,
1166 	0x3c080, 0xffffffff, 0x00090008,
1167 	0x3c084, 0xffffffff, 0x00010000,
1168 	0x3c088, 0xffffffff, 0x00030002,
1169 	0x3c08c, 0xffffffff, 0x00040007,
1170 	0x3c090, 0xffffffff, 0x00060005,
1171 	0x3c094, 0xffffffff, 0x00090008,
1172 	0x3c098, 0xffffffff, 0x00010000,
1173 	0x3c09c, 0xffffffff, 0x00030002,
1174 	0x3c0a0, 0xffffffff, 0x00040007,
1175 	0x3c0a4, 0xffffffff, 0x00060005,
1176 	0x3c0a8, 0xffffffff, 0x00090008,
1177 	0x3c0ac, 0xffffffff, 0x00010000,
1178 	0x3c0b0, 0xffffffff, 0x00030002,
1179 	0x3c0b4, 0xffffffff, 0x00040007,
1180 	0x3c0b8, 0xffffffff, 0x00060005,
1181 	0x3c0bc, 0xffffffff, 0x00090008,
1182 	0x3c000, 0xffffffff, 0x96e00200,
1183 	0x8708, 0xffffffff, 0x00900100,
1184 	0xc424, 0xffffffff, 0x0020003f,
1185 	0x38, 0xffffffff, 0x0140001c,
1186 	0x3c, 0x000f0000, 0x000f0000,
1187 	0x220, 0xffffffff, 0xC060000C,
1188 	0x224, 0xc0000fff, 0x00000100,
1189 	0xf90, 0xffffffff, 0x00000100,
1190 	0xf98, 0x00000101, 0x00000000,
1191 	0x20a8, 0xffffffff, 0x00000104,
1192 	0x55e4, 0xff000fff, 0x00000100,
1193 	0x30cc, 0xc0000fff, 0x00000104,
1194 	0xc1e4, 0x00000001, 0x00000001,
1195 	0xd00c, 0xff000ff0, 0x00000100,
1196 	0xd80c, 0xff000ff0, 0x00000100
1197 };
1198 
1199 static const u32 kalindi_golden_spm_registers[] =
1200 {
1201 	0x30800, 0xe0ffffff, 0xe0000000
1202 };
1203 
1204 static const u32 kalindi_golden_common_registers[] =
1205 {
1206 	0xc770, 0xffffffff, 0x00000800,
1207 	0xc774, 0xffffffff, 0x00000800,
1208 	0xc798, 0xffffffff, 0x00007fbf,
1209 	0xc79c, 0xffffffff, 0x00007faf
1210 };
1211 
1212 static const u32 kalindi_golden_registers[] =
1213 {
1214 	0x3c000, 0xffffdfff, 0x6e944040,
1215 	0x55e4, 0xff607fff, 0xfc000100,
1216 	0x3c220, 0xff000fff, 0x00000100,
1217 	0x3c224, 0xff000fff, 0x00000100,
1218 	0x3c200, 0xfffc0fff, 0x00000100,
1219 	0x6ed8, 0x00010101, 0x00010000,
1220 	0x9830, 0xffffffff, 0x00000000,
1221 	0x9834, 0xf00fffff, 0x00000400,
1222 	0x5bb0, 0x000000f0, 0x00000070,
1223 	0x5bc0, 0xf0311fff, 0x80300000,
1224 	0x98f8, 0x73773777, 0x12010001,
1225 	0x98fc, 0xffffffff, 0x00000010,
1226 	0x9b7c, 0x00ff0000, 0x00fc0000,
1227 	0x8030, 0x00001f0f, 0x0000100a,
1228 	0x2f48, 0x73773777, 0x12010001,
1229 	0x2408, 0x000fffff, 0x000c007f,
1230 	0x8a14, 0xf000003f, 0x00000007,
1231 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1232 	0x30a04, 0x0000ff0f, 0x00000000,
1233 	0x28a4c, 0x07ffffff, 0x06000000,
1234 	0x4d8, 0x00000fff, 0x00000100,
1235 	0x3e78, 0x00000001, 0x00000002,
1236 	0xc768, 0x00000008, 0x00000008,
1237 	0x8c00, 0x000000ff, 0x00000003,
1238 	0x214f8, 0x01ff01ff, 0x00000002,
1239 	0x21498, 0x007ff800, 0x00200000,
1240 	0x2015c, 0xffffffff, 0x00000f40,
1241 	0x88c4, 0x001f3ae3, 0x00000082,
1242 	0x88d4, 0x0000001f, 0x00000010,
1243 	0x30934, 0xffffffff, 0x00000000
1244 };
1245 
1246 static const u32 kalindi_mgcg_cgcg_init[] =
1247 {
1248 	0xc420, 0xffffffff, 0xfffffffc,
1249 	0x30800, 0xffffffff, 0xe0000000,
1250 	0x3c2a0, 0xffffffff, 0x00000100,
1251 	0x3c208, 0xffffffff, 0x00000100,
1252 	0x3c2c0, 0xffffffff, 0x00000100,
1253 	0x3c2c8, 0xffffffff, 0x00000100,
1254 	0x3c2c4, 0xffffffff, 0x00000100,
1255 	0x55e4, 0xffffffff, 0x00600100,
1256 	0x3c280, 0xffffffff, 0x00000100,
1257 	0x3c214, 0xffffffff, 0x06000100,
1258 	0x3c220, 0xffffffff, 0x00000100,
1259 	0x3c218, 0xffffffff, 0x06000100,
1260 	0x3c204, 0xffffffff, 0x00000100,
1261 	0x3c2e0, 0xffffffff, 0x00000100,
1262 	0x3c224, 0xffffffff, 0x00000100,
1263 	0x3c200, 0xffffffff, 0x00000100,
1264 	0x3c230, 0xffffffff, 0x00000100,
1265 	0x3c234, 0xffffffff, 0x00000100,
1266 	0x3c250, 0xffffffff, 0x00000100,
1267 	0x3c254, 0xffffffff, 0x00000100,
1268 	0x3c258, 0xffffffff, 0x00000100,
1269 	0x3c25c, 0xffffffff, 0x00000100,
1270 	0x3c260, 0xffffffff, 0x00000100,
1271 	0x3c27c, 0xffffffff, 0x00000100,
1272 	0x3c278, 0xffffffff, 0x00000100,
1273 	0x3c210, 0xffffffff, 0x06000100,
1274 	0x3c290, 0xffffffff, 0x00000100,
1275 	0x3c274, 0xffffffff, 0x00000100,
1276 	0x3c2b4, 0xffffffff, 0x00000100,
1277 	0x3c2b0, 0xffffffff, 0x00000100,
1278 	0x3c270, 0xffffffff, 0x00000100,
1279 	0x30800, 0xffffffff, 0xe0000000,
1280 	0x3c020, 0xffffffff, 0x00010000,
1281 	0x3c024, 0xffffffff, 0x00030002,
1282 	0x3c028, 0xffffffff, 0x00040007,
1283 	0x3c02c, 0xffffffff, 0x00060005,
1284 	0x3c030, 0xffffffff, 0x00090008,
1285 	0x3c034, 0xffffffff, 0x00010000,
1286 	0x3c038, 0xffffffff, 0x00030002,
1287 	0x3c03c, 0xffffffff, 0x00040007,
1288 	0x3c040, 0xffffffff, 0x00060005,
1289 	0x3c044, 0xffffffff, 0x00090008,
1290 	0x3c000, 0xffffffff, 0x96e00200,
1291 	0x8708, 0xffffffff, 0x00900100,
1292 	0xc424, 0xffffffff, 0x0020003f,
1293 	0x38, 0xffffffff, 0x0140001c,
1294 	0x3c, 0x000f0000, 0x000f0000,
1295 	0x220, 0xffffffff, 0xC060000C,
1296 	0x224, 0xc0000fff, 0x00000100,
1297 	0x20a8, 0xffffffff, 0x00000104,
1298 	0x55e4, 0xff000fff, 0x00000100,
1299 	0x30cc, 0xc0000fff, 0x00000104,
1300 	0xc1e4, 0x00000001, 0x00000001,
1301 	0xd00c, 0xff000ff0, 0x00000100,
1302 	0xd80c, 0xff000ff0, 0x00000100
1303 };
1304 
1305 static void cik_init_golden_registers(struct radeon_device *rdev)
1306 {
1307 	switch (rdev->family) {
1308 	case CHIP_BONAIRE:
1309 		radeon_program_register_sequence(rdev,
1310 						 bonaire_mgcg_cgcg_init,
1311 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1312 		radeon_program_register_sequence(rdev,
1313 						 bonaire_golden_registers,
1314 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1315 		radeon_program_register_sequence(rdev,
1316 						 bonaire_golden_common_registers,
1317 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1318 		radeon_program_register_sequence(rdev,
1319 						 bonaire_golden_spm_registers,
1320 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1321 		break;
1322 	case CHIP_KABINI:
1323 		radeon_program_register_sequence(rdev,
1324 						 kalindi_mgcg_cgcg_init,
1325 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1326 		radeon_program_register_sequence(rdev,
1327 						 kalindi_golden_registers,
1328 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1329 		radeon_program_register_sequence(rdev,
1330 						 kalindi_golden_common_registers,
1331 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1332 		radeon_program_register_sequence(rdev,
1333 						 kalindi_golden_spm_registers,
1334 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1335 		break;
1336 	case CHIP_KAVERI:
1337 		radeon_program_register_sequence(rdev,
1338 						 spectre_mgcg_cgcg_init,
1339 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1340 		radeon_program_register_sequence(rdev,
1341 						 spectre_golden_registers,
1342 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1343 		radeon_program_register_sequence(rdev,
1344 						 spectre_golden_common_registers,
1345 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1346 		radeon_program_register_sequence(rdev,
1347 						 spectre_golden_spm_registers,
1348 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1349 		break;
1350 	default:
1351 		break;
1352 	}
1353 }
1354 
1355 /**
1356  * cik_get_xclk - get the xclk
1357  *
1358  * @rdev: radeon_device pointer
1359  *
1360  * Returns the reference clock used by the gfx engine
1361  * (CIK).
1362  */
1363 u32 cik_get_xclk(struct radeon_device *rdev)
1364 {
1365         u32 reference_clock = rdev->clock.spll.reference_freq;
1366 
1367 	if (rdev->flags & RADEON_IS_IGP) {
1368 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1369 			return reference_clock / 2;
1370 	} else {
1371 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1372 			return reference_clock / 4;
1373 	}
1374 	return reference_clock;
1375 }
1376 
1377 /**
1378  * cik_mm_rdoorbell - read a doorbell dword
1379  *
1380  * @rdev: radeon_device pointer
1381  * @offset: byte offset into the aperture
1382  *
1383  * Returns the value in the doorbell aperture at the
1384  * requested offset (CIK).
1385  */
1386 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
1387 {
1388 	if (offset < rdev->doorbell.size) {
1389 		return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
1390 	} else {
1391 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
1392 		return 0;
1393 	}
1394 }
1395 
1396 /**
1397  * cik_mm_wdoorbell - write a doorbell dword
1398  *
1399  * @rdev: radeon_device pointer
1400  * @offset: byte offset into the aperture
1401  * @v: value to write
1402  *
1403  * Writes @v to the doorbell aperture at the
1404  * requested offset (CIK).
1405  */
1406 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
1407 {
1408 	if (offset < rdev->doorbell.size) {
1409 		writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
1410 	} else {
1411 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
1412 	}
1413 }
1414 
1415 #define BONAIRE_IO_MC_REGS_SIZE 36
1416 
1417 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1418 {
1419 	{0x00000070, 0x04400000},
1420 	{0x00000071, 0x80c01803},
1421 	{0x00000072, 0x00004004},
1422 	{0x00000073, 0x00000100},
1423 	{0x00000074, 0x00ff0000},
1424 	{0x00000075, 0x34000000},
1425 	{0x00000076, 0x08000014},
1426 	{0x00000077, 0x00cc08ec},
1427 	{0x00000078, 0x00000400},
1428 	{0x00000079, 0x00000000},
1429 	{0x0000007a, 0x04090000},
1430 	{0x0000007c, 0x00000000},
1431 	{0x0000007e, 0x4408a8e8},
1432 	{0x0000007f, 0x00000304},
1433 	{0x00000080, 0x00000000},
1434 	{0x00000082, 0x00000001},
1435 	{0x00000083, 0x00000002},
1436 	{0x00000084, 0xf3e4f400},
1437 	{0x00000085, 0x052024e3},
1438 	{0x00000087, 0x00000000},
1439 	{0x00000088, 0x01000000},
1440 	{0x0000008a, 0x1c0a0000},
1441 	{0x0000008b, 0xff010000},
1442 	{0x0000008d, 0xffffefff},
1443 	{0x0000008e, 0xfff3efff},
1444 	{0x0000008f, 0xfff3efbf},
1445 	{0x00000092, 0xf7ffffff},
1446 	{0x00000093, 0xffffff7f},
1447 	{0x00000095, 0x00101101},
1448 	{0x00000096, 0x00000fff},
1449 	{0x00000097, 0x00116fff},
1450 	{0x00000098, 0x60010000},
1451 	{0x00000099, 0x10010000},
1452 	{0x0000009a, 0x00006000},
1453 	{0x0000009b, 0x00001000},
1454 	{0x0000009f, 0x00b48000}
1455 };
1456 
1457 /**
1458  * cik_srbm_select - select specific register instances
1459  *
1460  * @rdev: radeon_device pointer
1461  * @me: selected ME (micro engine)
1462  * @pipe: pipe
1463  * @queue: queue
1464  * @vmid: VMID
1465  *
1466  * Switches the currently active registers instances.  Some
1467  * registers are instanced per VMID, others are instanced per
1468  * me/pipe/queue combination.
1469  */
1470 static void cik_srbm_select(struct radeon_device *rdev,
1471 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1472 {
1473 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1474 			     MEID(me & 0x3) |
1475 			     VMID(vmid & 0xf) |
1476 			     QUEUEID(queue & 0x7));
1477 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1478 }
1479 
1480 /* ucode loading */
1481 /**
1482  * ci_mc_load_microcode - load MC ucode into the hw
1483  *
1484  * @rdev: radeon_device pointer
1485  *
1486  * Load the GDDR MC ucode into the hw (CIK).
1487  * Returns 0 on success, error on failure.
1488  */
1489 static int ci_mc_load_microcode(struct radeon_device *rdev)
1490 {
1491 	const __be32 *fw_data;
1492 	u32 running, blackout = 0;
1493 	u32 *io_mc_regs;
1494 	int i, ucode_size, regs_size;
1495 
1496 	if (!rdev->mc_fw)
1497 		return -EINVAL;
1498 
1499 	switch (rdev->family) {
1500 	case CHIP_BONAIRE:
1501 	default:
1502 		io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1503 		ucode_size = CIK_MC_UCODE_SIZE;
1504 		regs_size = BONAIRE_IO_MC_REGS_SIZE;
1505 		break;
1506 	}
1507 
1508 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1509 
1510 	if (running == 0) {
1511 		if (running) {
1512 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1513 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1514 		}
1515 
1516 		/* reset the engine and set to writable */
1517 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1518 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1519 
1520 		/* load mc io regs */
1521 		for (i = 0; i < regs_size; i++) {
1522 			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1523 			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1524 		}
1525 		/* load the MC ucode */
1526 		fw_data = (const __be32 *)rdev->mc_fw->data;
1527 		for (i = 0; i < ucode_size; i++)
1528 			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1529 
1530 		/* put the engine back into the active state */
1531 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1532 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1533 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1534 
1535 		/* wait for training to complete */
1536 		for (i = 0; i < rdev->usec_timeout; i++) {
1537 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1538 				break;
1539 			udelay(1);
1540 		}
1541 		for (i = 0; i < rdev->usec_timeout; i++) {
1542 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1543 				break;
1544 			udelay(1);
1545 		}
1546 
1547 		if (running)
1548 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1549 	}
1550 
1551 	return 0;
1552 }
1553 
1554 /**
1555  * cik_init_microcode - load ucode images from disk
1556  *
1557  * @rdev: radeon_device pointer
1558  *
1559  * Use the firmware interface to load the ucode images into
1560  * the driver (not loaded into hw).
1561  * Returns 0 on success, error on failure.
1562  */
1563 static int cik_init_microcode(struct radeon_device *rdev)
1564 {
1565 	const char *chip_name;
1566 	size_t pfp_req_size, me_req_size, ce_req_size,
1567 		mec_req_size, rlc_req_size, mc_req_size,
1568 		sdma_req_size, smc_req_size;
1569 	char fw_name[30];
1570 	int err;
1571 
1572 	DRM_DEBUG("\n");
1573 
1574 	switch (rdev->family) {
1575 	case CHIP_BONAIRE:
1576 		chip_name = "BONAIRE";
1577 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1578 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1579 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1580 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1581 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1582 		mc_req_size = CIK_MC_UCODE_SIZE * 4;
1583 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1584 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1585 		break;
1586 	case CHIP_KAVERI:
1587 		chip_name = "KAVERI";
1588 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1589 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1590 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1591 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1592 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1593 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1594 		break;
1595 	case CHIP_KABINI:
1596 		chip_name = "KABINI";
1597 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1598 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1599 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1600 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1601 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1602 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1603 		break;
1604 	default: BUG();
1605 	}
1606 
1607 	DRM_INFO("Loading %s Microcode\n", chip_name);
1608 
1609 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1610 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1611 	if (err)
1612 		goto out;
1613 	if (rdev->pfp_fw->size != pfp_req_size) {
1614 		printk(KERN_ERR
1615 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1616 		       rdev->pfp_fw->size, fw_name);
1617 		err = -EINVAL;
1618 		goto out;
1619 	}
1620 
1621 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1622 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1623 	if (err)
1624 		goto out;
1625 	if (rdev->me_fw->size != me_req_size) {
1626 		printk(KERN_ERR
1627 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1628 		       rdev->me_fw->size, fw_name);
1629 		err = -EINVAL;
1630 	}
1631 
1632 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1633 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1634 	if (err)
1635 		goto out;
1636 	if (rdev->ce_fw->size != ce_req_size) {
1637 		printk(KERN_ERR
1638 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1639 		       rdev->ce_fw->size, fw_name);
1640 		err = -EINVAL;
1641 	}
1642 
1643 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1644 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1645 	if (err)
1646 		goto out;
1647 	if (rdev->mec_fw->size != mec_req_size) {
1648 		printk(KERN_ERR
1649 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1650 		       rdev->mec_fw->size, fw_name);
1651 		err = -EINVAL;
1652 	}
1653 
1654 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1655 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1656 	if (err)
1657 		goto out;
1658 	if (rdev->rlc_fw->size != rlc_req_size) {
1659 		printk(KERN_ERR
1660 		       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1661 		       rdev->rlc_fw->size, fw_name);
1662 		err = -EINVAL;
1663 	}
1664 
1665 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1666 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1667 	if (err)
1668 		goto out;
1669 	if (rdev->sdma_fw->size != sdma_req_size) {
1670 		printk(KERN_ERR
1671 		       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1672 		       rdev->sdma_fw->size, fw_name);
1673 		err = -EINVAL;
1674 	}
1675 
1676 	/* No SMC, MC ucode on APUs */
1677 	if (!(rdev->flags & RADEON_IS_IGP)) {
1678 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1679 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1680 		if (err)
1681 			goto out;
1682 		if (rdev->mc_fw->size != mc_req_size) {
1683 			printk(KERN_ERR
1684 			       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1685 			       rdev->mc_fw->size, fw_name);
1686 			err = -EINVAL;
1687 		}
1688 
1689 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1690 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1691 		if (err) {
1692 			printk(KERN_ERR
1693 			       "smc: error loading firmware \"%s\"\n",
1694 			       fw_name);
1695 			release_firmware(rdev->smc_fw);
1696 			rdev->smc_fw = NULL;
1697 			err = 0;
1698 		} else if (rdev->smc_fw->size != smc_req_size) {
1699 			printk(KERN_ERR
1700 			       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1701 			       rdev->smc_fw->size, fw_name);
1702 			err = -EINVAL;
1703 		}
1704 	}
1705 
1706 out:
1707 	if (err) {
1708 		if (err != -EINVAL)
1709 			printk(KERN_ERR
1710 			       "cik_cp: Failed to load firmware \"%s\"\n",
1711 			       fw_name);
1712 		release_firmware(rdev->pfp_fw);
1713 		rdev->pfp_fw = NULL;
1714 		release_firmware(rdev->me_fw);
1715 		rdev->me_fw = NULL;
1716 		release_firmware(rdev->ce_fw);
1717 		rdev->ce_fw = NULL;
1718 		release_firmware(rdev->rlc_fw);
1719 		rdev->rlc_fw = NULL;
1720 		release_firmware(rdev->mc_fw);
1721 		rdev->mc_fw = NULL;
1722 		release_firmware(rdev->smc_fw);
1723 		rdev->smc_fw = NULL;
1724 	}
1725 	return err;
1726 }
1727 
1728 /*
1729  * Core functions
1730  */
1731 /**
1732  * cik_tiling_mode_table_init - init the hw tiling table
1733  *
1734  * @rdev: radeon_device pointer
1735  *
1736  * Starting with SI, the tiling setup is done globally in a
1737  * set of 32 tiling modes.  Rather than selecting each set of
1738  * parameters per surface as on older asics, we just select
1739  * which index in the tiling table we want to use, and the
1740  * surface uses those parameters (CIK).
1741  */
1742 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1743 {
1744 	const u32 num_tile_mode_states = 32;
1745 	const u32 num_secondary_tile_mode_states = 16;
1746 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1747 	u32 num_pipe_configs;
1748 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
1749 		rdev->config.cik.max_shader_engines;
1750 
1751 	switch (rdev->config.cik.mem_row_size_in_kb) {
1752 	case 1:
1753 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1754 		break;
1755 	case 2:
1756 	default:
1757 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1758 		break;
1759 	case 4:
1760 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1761 		break;
1762 	}
1763 
1764 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
1765 	if (num_pipe_configs > 8)
1766 		num_pipe_configs = 8; /* ??? */
1767 
1768 	if (num_pipe_configs == 8) {
1769 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1770 			switch (reg_offset) {
1771 			case 0:
1772 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1773 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1774 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1775 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1776 				break;
1777 			case 1:
1778 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1779 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1780 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1781 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1782 				break;
1783 			case 2:
1784 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1785 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1786 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1787 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1788 				break;
1789 			case 3:
1790 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1791 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1792 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1793 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1794 				break;
1795 			case 4:
1796 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1797 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1798 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1799 						 TILE_SPLIT(split_equal_to_row_size));
1800 				break;
1801 			case 5:
1802 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1803 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1804 				break;
1805 			case 6:
1806 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1807 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1808 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1809 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1810 				break;
1811 			case 7:
1812 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1813 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1814 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1815 						 TILE_SPLIT(split_equal_to_row_size));
1816 				break;
1817 			case 8:
1818 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1819 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
1820 				break;
1821 			case 9:
1822 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1823 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1824 				break;
1825 			case 10:
1826 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1827 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1828 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1829 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1830 				break;
1831 			case 11:
1832 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1833 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1834 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1835 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1836 				break;
1837 			case 12:
1838 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1839 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1840 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1841 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1842 				break;
1843 			case 13:
1844 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1845 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1846 				break;
1847 			case 14:
1848 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1849 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1850 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1851 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1852 				break;
1853 			case 16:
1854 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1855 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1856 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1857 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1858 				break;
1859 			case 17:
1860 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1861 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1862 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1863 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1864 				break;
1865 			case 27:
1866 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1867 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1868 				break;
1869 			case 28:
1870 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1871 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1872 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1873 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1874 				break;
1875 			case 29:
1876 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1877 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1878 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1879 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1880 				break;
1881 			case 30:
1882 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1883 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1884 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1885 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1886 				break;
1887 			default:
1888 				gb_tile_moden = 0;
1889 				break;
1890 			}
1891 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1892 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1893 		}
1894 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1895 			switch (reg_offset) {
1896 			case 0:
1897 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1898 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1899 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1900 						 NUM_BANKS(ADDR_SURF_16_BANK));
1901 				break;
1902 			case 1:
1903 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1904 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1905 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1906 						 NUM_BANKS(ADDR_SURF_16_BANK));
1907 				break;
1908 			case 2:
1909 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1910 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1911 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1912 						 NUM_BANKS(ADDR_SURF_16_BANK));
1913 				break;
1914 			case 3:
1915 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1916 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1917 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1918 						 NUM_BANKS(ADDR_SURF_16_BANK));
1919 				break;
1920 			case 4:
1921 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1922 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1923 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1924 						 NUM_BANKS(ADDR_SURF_8_BANK));
1925 				break;
1926 			case 5:
1927 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1928 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1929 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1930 						 NUM_BANKS(ADDR_SURF_4_BANK));
1931 				break;
1932 			case 6:
1933 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1934 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1935 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1936 						 NUM_BANKS(ADDR_SURF_2_BANK));
1937 				break;
1938 			case 8:
1939 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1940 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1941 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1942 						 NUM_BANKS(ADDR_SURF_16_BANK));
1943 				break;
1944 			case 9:
1945 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1946 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1947 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1948 						 NUM_BANKS(ADDR_SURF_16_BANK));
1949 				break;
1950 			case 10:
1951 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1952 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1953 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1954 						 NUM_BANKS(ADDR_SURF_16_BANK));
1955 				break;
1956 			case 11:
1957 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1958 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1959 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1960 						 NUM_BANKS(ADDR_SURF_16_BANK));
1961 				break;
1962 			case 12:
1963 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1964 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1965 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1966 						 NUM_BANKS(ADDR_SURF_8_BANK));
1967 				break;
1968 			case 13:
1969 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1970 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1971 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1972 						 NUM_BANKS(ADDR_SURF_4_BANK));
1973 				break;
1974 			case 14:
1975 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1976 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1977 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1978 						 NUM_BANKS(ADDR_SURF_2_BANK));
1979 				break;
1980 			default:
1981 				gb_tile_moden = 0;
1982 				break;
1983 			}
1984 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1985 		}
1986 	} else if (num_pipe_configs == 4) {
1987 		if (num_rbs == 4) {
1988 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1989 				switch (reg_offset) {
1990 				case 0:
1991 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1992 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1993 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1994 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1995 					break;
1996 				case 1:
1997 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1998 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1999 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2000 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2001 					break;
2002 				case 2:
2003 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2004 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2005 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2006 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2007 					break;
2008 				case 3:
2009 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2010 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2011 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2012 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2013 					break;
2014 				case 4:
2015 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2016 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2017 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2018 							 TILE_SPLIT(split_equal_to_row_size));
2019 					break;
2020 				case 5:
2021 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2022 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2023 					break;
2024 				case 6:
2025 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2026 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2027 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2028 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2029 					break;
2030 				case 7:
2031 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2032 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2033 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2034 							 TILE_SPLIT(split_equal_to_row_size));
2035 					break;
2036 				case 8:
2037 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2038 							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2039 					break;
2040 				case 9:
2041 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2042 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2043 					break;
2044 				case 10:
2045 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2046 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2047 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2048 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2049 					break;
2050 				case 11:
2051 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2052 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2053 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2054 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2055 					break;
2056 				case 12:
2057 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2058 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2059 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2060 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2061 					break;
2062 				case 13:
2063 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2064 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2065 					break;
2066 				case 14:
2067 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2068 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2069 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2070 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2071 					break;
2072 				case 16:
2073 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2074 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2075 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2076 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2077 					break;
2078 				case 17:
2079 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2080 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2081 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2082 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2083 					break;
2084 				case 27:
2085 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2086 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2087 					break;
2088 				case 28:
2089 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2090 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2091 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2092 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2093 					break;
2094 				case 29:
2095 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2096 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2097 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2098 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2099 					break;
2100 				case 30:
2101 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2102 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2103 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2104 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2105 					break;
2106 				default:
2107 					gb_tile_moden = 0;
2108 					break;
2109 				}
2110 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2111 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2112 			}
2113 		} else if (num_rbs < 4) {
2114 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2115 				switch (reg_offset) {
2116 				case 0:
2117 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2118 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2119 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2120 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2121 					break;
2122 				case 1:
2123 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2124 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2125 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2126 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2127 					break;
2128 				case 2:
2129 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2130 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2131 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2132 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2133 					break;
2134 				case 3:
2135 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2136 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2137 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2138 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2139 					break;
2140 				case 4:
2141 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2142 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2143 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2144 							 TILE_SPLIT(split_equal_to_row_size));
2145 					break;
2146 				case 5:
2147 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2148 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2149 					break;
2150 				case 6:
2151 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2152 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2153 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2154 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2155 					break;
2156 				case 7:
2157 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2158 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2159 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2160 							 TILE_SPLIT(split_equal_to_row_size));
2161 					break;
2162 				case 8:
2163 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2164 						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2165 					break;
2166 				case 9:
2167 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2168 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2169 					break;
2170 				case 10:
2171 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2172 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2173 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2174 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2175 					break;
2176 				case 11:
2177 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2178 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2179 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2180 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2181 					break;
2182 				case 12:
2183 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2184 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2185 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2186 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2187 					break;
2188 				case 13:
2189 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2190 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2191 					break;
2192 				case 14:
2193 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2194 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2195 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2196 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2197 					break;
2198 				case 16:
2199 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2200 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2201 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2202 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2203 					break;
2204 				case 17:
2205 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2206 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2207 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2208 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2209 					break;
2210 				case 27:
2211 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2212 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2213 					break;
2214 				case 28:
2215 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2216 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2217 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2218 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2219 					break;
2220 				case 29:
2221 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2222 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2223 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2224 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2225 					break;
2226 				case 30:
2227 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2228 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2229 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2230 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2231 					break;
2232 				default:
2233 					gb_tile_moden = 0;
2234 					break;
2235 				}
2236 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2237 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2238 			}
2239 		}
2240 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2241 			switch (reg_offset) {
2242 			case 0:
2243 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2244 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2245 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2246 						 NUM_BANKS(ADDR_SURF_16_BANK));
2247 				break;
2248 			case 1:
2249 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2250 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2251 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2252 						 NUM_BANKS(ADDR_SURF_16_BANK));
2253 				break;
2254 			case 2:
2255 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2256 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2257 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2258 						 NUM_BANKS(ADDR_SURF_16_BANK));
2259 				break;
2260 			case 3:
2261 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2262 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2263 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2264 						 NUM_BANKS(ADDR_SURF_16_BANK));
2265 				break;
2266 			case 4:
2267 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2268 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2269 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2270 						 NUM_BANKS(ADDR_SURF_16_BANK));
2271 				break;
2272 			case 5:
2273 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2274 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2275 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2276 						 NUM_BANKS(ADDR_SURF_8_BANK));
2277 				break;
2278 			case 6:
2279 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2280 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2281 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2282 						 NUM_BANKS(ADDR_SURF_4_BANK));
2283 				break;
2284 			case 8:
2285 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2286 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2287 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2288 						 NUM_BANKS(ADDR_SURF_16_BANK));
2289 				break;
2290 			case 9:
2291 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2292 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2293 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2294 						 NUM_BANKS(ADDR_SURF_16_BANK));
2295 				break;
2296 			case 10:
2297 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2298 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2299 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2300 						 NUM_BANKS(ADDR_SURF_16_BANK));
2301 				break;
2302 			case 11:
2303 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2304 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2305 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2306 						 NUM_BANKS(ADDR_SURF_16_BANK));
2307 				break;
2308 			case 12:
2309 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2310 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2311 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2312 						 NUM_BANKS(ADDR_SURF_16_BANK));
2313 				break;
2314 			case 13:
2315 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2316 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2317 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2318 						 NUM_BANKS(ADDR_SURF_8_BANK));
2319 				break;
2320 			case 14:
2321 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2322 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2323 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2324 						 NUM_BANKS(ADDR_SURF_4_BANK));
2325 				break;
2326 			default:
2327 				gb_tile_moden = 0;
2328 				break;
2329 			}
2330 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2331 		}
2332 	} else if (num_pipe_configs == 2) {
2333 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2334 			switch (reg_offset) {
2335 			case 0:
2336 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2337 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2338 						 PIPE_CONFIG(ADDR_SURF_P2) |
2339 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2340 				break;
2341 			case 1:
2342 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2343 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2344 						 PIPE_CONFIG(ADDR_SURF_P2) |
2345 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2346 				break;
2347 			case 2:
2348 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2349 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2350 						 PIPE_CONFIG(ADDR_SURF_P2) |
2351 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2352 				break;
2353 			case 3:
2354 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2355 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2356 						 PIPE_CONFIG(ADDR_SURF_P2) |
2357 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2358 				break;
2359 			case 4:
2360 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2361 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2362 						 PIPE_CONFIG(ADDR_SURF_P2) |
2363 						 TILE_SPLIT(split_equal_to_row_size));
2364 				break;
2365 			case 5:
2366 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2367 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2368 				break;
2369 			case 6:
2370 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2371 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2372 						 PIPE_CONFIG(ADDR_SURF_P2) |
2373 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2374 				break;
2375 			case 7:
2376 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2377 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2378 						 PIPE_CONFIG(ADDR_SURF_P2) |
2379 						 TILE_SPLIT(split_equal_to_row_size));
2380 				break;
2381 			case 8:
2382 				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
2383 				break;
2384 			case 9:
2385 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2386 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2387 				break;
2388 			case 10:
2389 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2390 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2391 						 PIPE_CONFIG(ADDR_SURF_P2) |
2392 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2393 				break;
2394 			case 11:
2395 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2396 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2397 						 PIPE_CONFIG(ADDR_SURF_P2) |
2398 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2399 				break;
2400 			case 12:
2401 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2402 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2403 						 PIPE_CONFIG(ADDR_SURF_P2) |
2404 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2405 				break;
2406 			case 13:
2407 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2408 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2409 				break;
2410 			case 14:
2411 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2412 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2413 						 PIPE_CONFIG(ADDR_SURF_P2) |
2414 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2415 				break;
2416 			case 16:
2417 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2418 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2419 						 PIPE_CONFIG(ADDR_SURF_P2) |
2420 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2421 				break;
2422 			case 17:
2423 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2424 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2425 						 PIPE_CONFIG(ADDR_SURF_P2) |
2426 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2427 				break;
2428 			case 27:
2429 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2430 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2431 				break;
2432 			case 28:
2433 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2434 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2435 						 PIPE_CONFIG(ADDR_SURF_P2) |
2436 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2437 				break;
2438 			case 29:
2439 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2440 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2441 						 PIPE_CONFIG(ADDR_SURF_P2) |
2442 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2443 				break;
2444 			case 30:
2445 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2446 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2447 						 PIPE_CONFIG(ADDR_SURF_P2) |
2448 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2449 				break;
2450 			default:
2451 				gb_tile_moden = 0;
2452 				break;
2453 			}
2454 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2455 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2456 		}
2457 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2458 			switch (reg_offset) {
2459 			case 0:
2460 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2461 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2462 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2463 						 NUM_BANKS(ADDR_SURF_16_BANK));
2464 				break;
2465 			case 1:
2466 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2467 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2468 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2469 						 NUM_BANKS(ADDR_SURF_16_BANK));
2470 				break;
2471 			case 2:
2472 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2474 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2475 						 NUM_BANKS(ADDR_SURF_16_BANK));
2476 				break;
2477 			case 3:
2478 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2480 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2481 						 NUM_BANKS(ADDR_SURF_16_BANK));
2482 				break;
2483 			case 4:
2484 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2486 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2487 						 NUM_BANKS(ADDR_SURF_16_BANK));
2488 				break;
2489 			case 5:
2490 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2491 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2492 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2493 						 NUM_BANKS(ADDR_SURF_16_BANK));
2494 				break;
2495 			case 6:
2496 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2497 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2498 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2499 						 NUM_BANKS(ADDR_SURF_8_BANK));
2500 				break;
2501 			case 8:
2502 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2503 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2504 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2505 						 NUM_BANKS(ADDR_SURF_16_BANK));
2506 				break;
2507 			case 9:
2508 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2509 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2510 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2511 						 NUM_BANKS(ADDR_SURF_16_BANK));
2512 				break;
2513 			case 10:
2514 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2515 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2516 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2517 						 NUM_BANKS(ADDR_SURF_16_BANK));
2518 				break;
2519 			case 11:
2520 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2521 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2522 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2523 						 NUM_BANKS(ADDR_SURF_16_BANK));
2524 				break;
2525 			case 12:
2526 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2527 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2528 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2529 						 NUM_BANKS(ADDR_SURF_16_BANK));
2530 				break;
2531 			case 13:
2532 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2533 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2534 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2535 						 NUM_BANKS(ADDR_SURF_16_BANK));
2536 				break;
2537 			case 14:
2538 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2539 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2540 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2541 						 NUM_BANKS(ADDR_SURF_8_BANK));
2542 				break;
2543 			default:
2544 				gb_tile_moden = 0;
2545 				break;
2546 			}
2547 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2548 		}
2549 	} else
2550 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
2551 }
2552 
2553 /**
2554  * cik_select_se_sh - select which SE, SH to address
2555  *
2556  * @rdev: radeon_device pointer
2557  * @se_num: shader engine to address
2558  * @sh_num: sh block to address
2559  *
2560  * Select which SE, SH combinations to address. Certain
2561  * registers are instanced per SE or SH.  0xffffffff means
2562  * broadcast to all SEs or SHs (CIK).
2563  */
2564 static void cik_select_se_sh(struct radeon_device *rdev,
2565 			     u32 se_num, u32 sh_num)
2566 {
2567 	u32 data = INSTANCE_BROADCAST_WRITES;
2568 
2569 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2570 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2571 	else if (se_num == 0xffffffff)
2572 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2573 	else if (sh_num == 0xffffffff)
2574 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2575 	else
2576 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2577 	WREG32(GRBM_GFX_INDEX, data);
2578 }
2579 
2580 /**
2581  * cik_create_bitmask - create a bitmask
2582  *
2583  * @bit_width: length of the mask
2584  *
2585  * create a variable length bit mask (CIK).
2586  * Returns the bitmask.
2587  */
2588 static u32 cik_create_bitmask(u32 bit_width)
2589 {
2590 	u32 i, mask = 0;
2591 
2592 	for (i = 0; i < bit_width; i++) {
2593 		mask <<= 1;
2594 		mask |= 1;
2595 	}
2596 	return mask;
2597 }
2598 
2599 /**
2600  * cik_select_se_sh - select which SE, SH to address
2601  *
2602  * @rdev: radeon_device pointer
2603  * @max_rb_num: max RBs (render backends) for the asic
2604  * @se_num: number of SEs (shader engines) for the asic
2605  * @sh_per_se: number of SH blocks per SE for the asic
2606  *
2607  * Calculates the bitmask of disabled RBs (CIK).
2608  * Returns the disabled RB bitmask.
2609  */
2610 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
2611 			      u32 max_rb_num, u32 se_num,
2612 			      u32 sh_per_se)
2613 {
2614 	u32 data, mask;
2615 
2616 	data = RREG32(CC_RB_BACKEND_DISABLE);
2617 	if (data & 1)
2618 		data &= BACKEND_DISABLE_MASK;
2619 	else
2620 		data = 0;
2621 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2622 
2623 	data >>= BACKEND_DISABLE_SHIFT;
2624 
2625 	mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
2626 
2627 	return data & mask;
2628 }
2629 
2630 /**
2631  * cik_setup_rb - setup the RBs on the asic
2632  *
2633  * @rdev: radeon_device pointer
2634  * @se_num: number of SEs (shader engines) for the asic
2635  * @sh_per_se: number of SH blocks per SE for the asic
2636  * @max_rb_num: max RBs (render backends) for the asic
2637  *
2638  * Configures per-SE/SH RB registers (CIK).
2639  */
2640 static void cik_setup_rb(struct radeon_device *rdev,
2641 			 u32 se_num, u32 sh_per_se,
2642 			 u32 max_rb_num)
2643 {
2644 	int i, j;
2645 	u32 data, mask;
2646 	u32 disabled_rbs = 0;
2647 	u32 enabled_rbs = 0;
2648 
2649 	for (i = 0; i < se_num; i++) {
2650 		for (j = 0; j < sh_per_se; j++) {
2651 			cik_select_se_sh(rdev, i, j);
2652 			data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2653 			disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
2654 		}
2655 	}
2656 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2657 
2658 	mask = 1;
2659 	for (i = 0; i < max_rb_num; i++) {
2660 		if (!(disabled_rbs & mask))
2661 			enabled_rbs |= mask;
2662 		mask <<= 1;
2663 	}
2664 
2665 	for (i = 0; i < se_num; i++) {
2666 		cik_select_se_sh(rdev, i, 0xffffffff);
2667 		data = 0;
2668 		for (j = 0; j < sh_per_se; j++) {
2669 			switch (enabled_rbs & 3) {
2670 			case 1:
2671 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2672 				break;
2673 			case 2:
2674 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2675 				break;
2676 			case 3:
2677 			default:
2678 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2679 				break;
2680 			}
2681 			enabled_rbs >>= 2;
2682 		}
2683 		WREG32(PA_SC_RASTER_CONFIG, data);
2684 	}
2685 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2686 }
2687 
2688 /**
2689  * cik_gpu_init - setup the 3D engine
2690  *
2691  * @rdev: radeon_device pointer
2692  *
2693  * Configures the 3D engine and tiling configuration
2694  * registers so that the 3D engine is usable.
2695  */
2696 static void cik_gpu_init(struct radeon_device *rdev)
2697 {
2698 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
2699 	u32 mc_shared_chmap, mc_arb_ramcfg;
2700 	u32 hdp_host_path_cntl;
2701 	u32 tmp;
2702 	int i, j;
2703 
2704 	switch (rdev->family) {
2705 	case CHIP_BONAIRE:
2706 		rdev->config.cik.max_shader_engines = 2;
2707 		rdev->config.cik.max_tile_pipes = 4;
2708 		rdev->config.cik.max_cu_per_sh = 7;
2709 		rdev->config.cik.max_sh_per_se = 1;
2710 		rdev->config.cik.max_backends_per_se = 2;
2711 		rdev->config.cik.max_texture_channel_caches = 4;
2712 		rdev->config.cik.max_gprs = 256;
2713 		rdev->config.cik.max_gs_threads = 32;
2714 		rdev->config.cik.max_hw_contexts = 8;
2715 
2716 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2717 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2718 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2719 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2720 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2721 		break;
2722 	case CHIP_KAVERI:
2723 		rdev->config.cik.max_shader_engines = 1;
2724 		rdev->config.cik.max_tile_pipes = 4;
2725 		if ((rdev->pdev->device == 0x1304) ||
2726 		    (rdev->pdev->device == 0x1305) ||
2727 		    (rdev->pdev->device == 0x130C) ||
2728 		    (rdev->pdev->device == 0x130F) ||
2729 		    (rdev->pdev->device == 0x1310) ||
2730 		    (rdev->pdev->device == 0x1311) ||
2731 		    (rdev->pdev->device == 0x131C)) {
2732 			rdev->config.cik.max_cu_per_sh = 8;
2733 			rdev->config.cik.max_backends_per_se = 2;
2734 		} else if ((rdev->pdev->device == 0x1309) ||
2735 			   (rdev->pdev->device == 0x130A) ||
2736 			   (rdev->pdev->device == 0x130D) ||
2737 			   (rdev->pdev->device == 0x1313) ||
2738 			   (rdev->pdev->device == 0x131D)) {
2739 			rdev->config.cik.max_cu_per_sh = 6;
2740 			rdev->config.cik.max_backends_per_se = 2;
2741 		} else if ((rdev->pdev->device == 0x1306) ||
2742 			   (rdev->pdev->device == 0x1307) ||
2743 			   (rdev->pdev->device == 0x130B) ||
2744 			   (rdev->pdev->device == 0x130E) ||
2745 			   (rdev->pdev->device == 0x1315) ||
2746 			   (rdev->pdev->device == 0x131B)) {
2747 			rdev->config.cik.max_cu_per_sh = 4;
2748 			rdev->config.cik.max_backends_per_se = 1;
2749 		} else {
2750 			rdev->config.cik.max_cu_per_sh = 3;
2751 			rdev->config.cik.max_backends_per_se = 1;
2752 		}
2753 		rdev->config.cik.max_sh_per_se = 1;
2754 		rdev->config.cik.max_texture_channel_caches = 4;
2755 		rdev->config.cik.max_gprs = 256;
2756 		rdev->config.cik.max_gs_threads = 16;
2757 		rdev->config.cik.max_hw_contexts = 8;
2758 
2759 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2760 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2761 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2762 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2763 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2764 		break;
2765 	case CHIP_KABINI:
2766 	default:
2767 		rdev->config.cik.max_shader_engines = 1;
2768 		rdev->config.cik.max_tile_pipes = 2;
2769 		rdev->config.cik.max_cu_per_sh = 2;
2770 		rdev->config.cik.max_sh_per_se = 1;
2771 		rdev->config.cik.max_backends_per_se = 1;
2772 		rdev->config.cik.max_texture_channel_caches = 2;
2773 		rdev->config.cik.max_gprs = 256;
2774 		rdev->config.cik.max_gs_threads = 16;
2775 		rdev->config.cik.max_hw_contexts = 8;
2776 
2777 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2778 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2779 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2780 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2781 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2782 		break;
2783 	}
2784 
2785 	/* Initialize HDP */
2786 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2787 		WREG32((0x2c14 + j), 0x00000000);
2788 		WREG32((0x2c18 + j), 0x00000000);
2789 		WREG32((0x2c1c + j), 0x00000000);
2790 		WREG32((0x2c20 + j), 0x00000000);
2791 		WREG32((0x2c24 + j), 0x00000000);
2792 	}
2793 
2794 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2795 
2796 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2797 
2798 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2799 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2800 
2801 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
2802 	rdev->config.cik.mem_max_burst_length_bytes = 256;
2803 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2804 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2805 	if (rdev->config.cik.mem_row_size_in_kb > 4)
2806 		rdev->config.cik.mem_row_size_in_kb = 4;
2807 	/* XXX use MC settings? */
2808 	rdev->config.cik.shader_engine_tile_size = 32;
2809 	rdev->config.cik.num_gpus = 1;
2810 	rdev->config.cik.multi_gpu_tile_size = 64;
2811 
2812 	/* fix up row size */
2813 	gb_addr_config &= ~ROW_SIZE_MASK;
2814 	switch (rdev->config.cik.mem_row_size_in_kb) {
2815 	case 1:
2816 	default:
2817 		gb_addr_config |= ROW_SIZE(0);
2818 		break;
2819 	case 2:
2820 		gb_addr_config |= ROW_SIZE(1);
2821 		break;
2822 	case 4:
2823 		gb_addr_config |= ROW_SIZE(2);
2824 		break;
2825 	}
2826 
2827 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
2828 	 * not have bank info, so create a custom tiling dword.
2829 	 * bits 3:0   num_pipes
2830 	 * bits 7:4   num_banks
2831 	 * bits 11:8  group_size
2832 	 * bits 15:12 row_size
2833 	 */
2834 	rdev->config.cik.tile_config = 0;
2835 	switch (rdev->config.cik.num_tile_pipes) {
2836 	case 1:
2837 		rdev->config.cik.tile_config |= (0 << 0);
2838 		break;
2839 	case 2:
2840 		rdev->config.cik.tile_config |= (1 << 0);
2841 		break;
2842 	case 4:
2843 		rdev->config.cik.tile_config |= (2 << 0);
2844 		break;
2845 	case 8:
2846 	default:
2847 		/* XXX what about 12? */
2848 		rdev->config.cik.tile_config |= (3 << 0);
2849 		break;
2850 	}
2851 	rdev->config.cik.tile_config |=
2852 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
2853 	rdev->config.cik.tile_config |=
2854 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
2855 	rdev->config.cik.tile_config |=
2856 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
2857 
2858 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
2859 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
2860 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
2861 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
2862 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
2863 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
2864 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
2865 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
2866 
2867 	cik_tiling_mode_table_init(rdev);
2868 
2869 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
2870 		     rdev->config.cik.max_sh_per_se,
2871 		     rdev->config.cik.max_backends_per_se);
2872 
2873 	/* set HW defaults for 3D engine */
2874 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
2875 
2876 	WREG32(SX_DEBUG_1, 0x20);
2877 
2878 	WREG32(TA_CNTL_AUX, 0x00010000);
2879 
2880 	tmp = RREG32(SPI_CONFIG_CNTL);
2881 	tmp |= 0x03000000;
2882 	WREG32(SPI_CONFIG_CNTL, tmp);
2883 
2884 	WREG32(SQ_CONFIG, 1);
2885 
2886 	WREG32(DB_DEBUG, 0);
2887 
2888 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
2889 	tmp |= 0x00000400;
2890 	WREG32(DB_DEBUG2, tmp);
2891 
2892 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2893 	tmp |= 0x00020200;
2894 	WREG32(DB_DEBUG3, tmp);
2895 
2896 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2897 	tmp |= 0x00018208;
2898 	WREG32(CB_HW_CONTROL, tmp);
2899 
2900 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2901 
2902 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2903 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2904 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2905 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2906 
2907 	WREG32(VGT_NUM_INSTANCES, 1);
2908 
2909 	WREG32(CP_PERFMON_CNTL, 0);
2910 
2911 	WREG32(SQ_CONFIG, 0);
2912 
2913 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2914 					  FORCE_EOV_MAX_REZ_CNT(255)));
2915 
2916 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2917 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
2918 
2919 	WREG32(VGT_GS_VERTEX_REUSE, 16);
2920 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2921 
2922 	tmp = RREG32(HDP_MISC_CNTL);
2923 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2924 	WREG32(HDP_MISC_CNTL, tmp);
2925 
2926 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2927 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2928 
2929 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2930 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2931 
2932 	udelay(50);
2933 }
2934 
2935 /*
2936  * GPU scratch registers helpers function.
2937  */
2938 /**
2939  * cik_scratch_init - setup driver info for CP scratch regs
2940  *
2941  * @rdev: radeon_device pointer
2942  *
2943  * Set up the number and offset of the CP scratch registers.
2944  * NOTE: use of CP scratch registers is a legacy inferface and
2945  * is not used by default on newer asics (r6xx+).  On newer asics,
2946  * memory buffers are used for fences rather than scratch regs.
2947  */
2948 static void cik_scratch_init(struct radeon_device *rdev)
2949 {
2950 	int i;
2951 
2952 	rdev->scratch.num_reg = 7;
2953 	rdev->scratch.reg_base = SCRATCH_REG0;
2954 	for (i = 0; i < rdev->scratch.num_reg; i++) {
2955 		rdev->scratch.free[i] = true;
2956 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2957 	}
2958 }
2959 
2960 /**
2961  * cik_ring_test - basic gfx ring test
2962  *
2963  * @rdev: radeon_device pointer
2964  * @ring: radeon_ring structure holding ring information
2965  *
2966  * Allocate a scratch register and write to it using the gfx ring (CIK).
2967  * Provides a basic gfx ring test to verify that the ring is working.
2968  * Used by cik_cp_gfx_resume();
2969  * Returns 0 on success, error on failure.
2970  */
2971 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2972 {
2973 	uint32_t scratch;
2974 	uint32_t tmp = 0;
2975 	unsigned i;
2976 	int r;
2977 
2978 	r = radeon_scratch_get(rdev, &scratch);
2979 	if (r) {
2980 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2981 		return r;
2982 	}
2983 	WREG32(scratch, 0xCAFEDEAD);
2984 	r = radeon_ring_lock(rdev, ring, 3);
2985 	if (r) {
2986 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2987 		radeon_scratch_free(rdev, scratch);
2988 		return r;
2989 	}
2990 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2991 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2992 	radeon_ring_write(ring, 0xDEADBEEF);
2993 	radeon_ring_unlock_commit(rdev, ring);
2994 
2995 	for (i = 0; i < rdev->usec_timeout; i++) {
2996 		tmp = RREG32(scratch);
2997 		if (tmp == 0xDEADBEEF)
2998 			break;
2999 		DRM_UDELAY(1);
3000 	}
3001 	if (i < rdev->usec_timeout) {
3002 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3003 	} else {
3004 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3005 			  ring->idx, scratch, tmp);
3006 		r = -EINVAL;
3007 	}
3008 	radeon_scratch_free(rdev, scratch);
3009 	return r;
3010 }
3011 
3012 /**
3013  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3014  *
3015  * @rdev: radeon_device pointer
3016  * @fence: radeon fence object
3017  *
3018  * Emits a fence sequnce number on the gfx ring and flushes
3019  * GPU caches.
3020  */
3021 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3022 			     struct radeon_fence *fence)
3023 {
3024 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3025 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3026 
3027 	/* EVENT_WRITE_EOP - flush caches, send int */
3028 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3029 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3030 				 EOP_TC_ACTION_EN |
3031 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3032 				 EVENT_INDEX(5)));
3033 	radeon_ring_write(ring, addr & 0xfffffffc);
3034 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3035 	radeon_ring_write(ring, fence->seq);
3036 	radeon_ring_write(ring, 0);
3037 	/* HDP flush */
3038 	/* We should be using the new WAIT_REG_MEM special op packet here
3039 	 * but it causes the CP to hang
3040 	 */
3041 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3042 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3043 				 WRITE_DATA_DST_SEL(0)));
3044 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3045 	radeon_ring_write(ring, 0);
3046 	radeon_ring_write(ring, 0);
3047 }
3048 
3049 /**
3050  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3051  *
3052  * @rdev: radeon_device pointer
3053  * @fence: radeon fence object
3054  *
3055  * Emits a fence sequnce number on the compute ring and flushes
3056  * GPU caches.
3057  */
3058 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3059 				 struct radeon_fence *fence)
3060 {
3061 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3062 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3063 
3064 	/* RELEASE_MEM - flush caches, send int */
3065 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3066 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3067 				 EOP_TC_ACTION_EN |
3068 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3069 				 EVENT_INDEX(5)));
3070 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3071 	radeon_ring_write(ring, addr & 0xfffffffc);
3072 	radeon_ring_write(ring, upper_32_bits(addr));
3073 	radeon_ring_write(ring, fence->seq);
3074 	radeon_ring_write(ring, 0);
3075 	/* HDP flush */
3076 	/* We should be using the new WAIT_REG_MEM special op packet here
3077 	 * but it causes the CP to hang
3078 	 */
3079 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3080 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3081 				 WRITE_DATA_DST_SEL(0)));
3082 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3083 	radeon_ring_write(ring, 0);
3084 	radeon_ring_write(ring, 0);
3085 }
3086 
3087 void cik_semaphore_ring_emit(struct radeon_device *rdev,
3088 			     struct radeon_ring *ring,
3089 			     struct radeon_semaphore *semaphore,
3090 			     bool emit_wait)
3091 {
3092 	uint64_t addr = semaphore->gpu_addr;
3093 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3094 
3095 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3096 	radeon_ring_write(ring, addr & 0xffffffff);
3097 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3098 }
3099 
3100 /*
3101  * IB stuff
3102  */
3103 /**
3104  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3105  *
3106  * @rdev: radeon_device pointer
3107  * @ib: radeon indirect buffer object
3108  *
3109  * Emits an DE (drawing engine) or CE (constant engine) IB
3110  * on the gfx ring.  IBs are usually generated by userspace
3111  * acceleration drivers and submitted to the kernel for
3112  * sheduling on the ring.  This function schedules the IB
3113  * on the gfx ring for execution by the GPU.
3114  */
3115 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3116 {
3117 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3118 	u32 header, control = INDIRECT_BUFFER_VALID;
3119 
3120 	if (ib->is_const_ib) {
3121 		/* set switch buffer packet before const IB */
3122 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3123 		radeon_ring_write(ring, 0);
3124 
3125 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3126 	} else {
3127 		u32 next_rptr;
3128 		if (ring->rptr_save_reg) {
3129 			next_rptr = ring->wptr + 3 + 4;
3130 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3131 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3132 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
3133 			radeon_ring_write(ring, next_rptr);
3134 		} else if (rdev->wb.enabled) {
3135 			next_rptr = ring->wptr + 5 + 4;
3136 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3137 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3138 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3139 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3140 			radeon_ring_write(ring, next_rptr);
3141 		}
3142 
3143 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3144 	}
3145 
3146 	control |= ib->length_dw |
3147 		(ib->vm ? (ib->vm->id << 24) : 0);
3148 
3149 	radeon_ring_write(ring, header);
3150 	radeon_ring_write(ring,
3151 #ifdef __BIG_ENDIAN
3152 			  (2 << 0) |
3153 #endif
3154 			  (ib->gpu_addr & 0xFFFFFFFC));
3155 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3156 	radeon_ring_write(ring, control);
3157 }
3158 
3159 /**
3160  * cik_ib_test - basic gfx ring IB test
3161  *
3162  * @rdev: radeon_device pointer
3163  * @ring: radeon_ring structure holding ring information
3164  *
3165  * Allocate an IB and execute it on the gfx ring (CIK).
3166  * Provides a basic gfx ring test to verify that IBs are working.
3167  * Returns 0 on success, error on failure.
3168  */
3169 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3170 {
3171 	struct radeon_ib ib;
3172 	uint32_t scratch;
3173 	uint32_t tmp = 0;
3174 	unsigned i;
3175 	int r;
3176 
3177 	r = radeon_scratch_get(rdev, &scratch);
3178 	if (r) {
3179 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3180 		return r;
3181 	}
3182 	WREG32(scratch, 0xCAFEDEAD);
3183 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3184 	if (r) {
3185 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3186 		radeon_scratch_free(rdev, scratch);
3187 		return r;
3188 	}
3189 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3190 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3191 	ib.ptr[2] = 0xDEADBEEF;
3192 	ib.length_dw = 3;
3193 	r = radeon_ib_schedule(rdev, &ib, NULL);
3194 	if (r) {
3195 		radeon_scratch_free(rdev, scratch);
3196 		radeon_ib_free(rdev, &ib);
3197 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3198 		return r;
3199 	}
3200 	r = radeon_fence_wait(ib.fence, false);
3201 	if (r) {
3202 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3203 		radeon_scratch_free(rdev, scratch);
3204 		radeon_ib_free(rdev, &ib);
3205 		return r;
3206 	}
3207 	for (i = 0; i < rdev->usec_timeout; i++) {
3208 		tmp = RREG32(scratch);
3209 		if (tmp == 0xDEADBEEF)
3210 			break;
3211 		DRM_UDELAY(1);
3212 	}
3213 	if (i < rdev->usec_timeout) {
3214 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3215 	} else {
3216 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3217 			  scratch, tmp);
3218 		r = -EINVAL;
3219 	}
3220 	radeon_scratch_free(rdev, scratch);
3221 	radeon_ib_free(rdev, &ib);
3222 	return r;
3223 }
3224 
3225 /*
3226  * CP.
3227  * On CIK, gfx and compute now have independant command processors.
3228  *
3229  * GFX
3230  * Gfx consists of a single ring and can process both gfx jobs and
3231  * compute jobs.  The gfx CP consists of three microengines (ME):
3232  * PFP - Pre-Fetch Parser
3233  * ME - Micro Engine
3234  * CE - Constant Engine
3235  * The PFP and ME make up what is considered the Drawing Engine (DE).
3236  * The CE is an asynchronous engine used for updating buffer desciptors
3237  * used by the DE so that they can be loaded into cache in parallel
3238  * while the DE is processing state update packets.
3239  *
3240  * Compute
3241  * The compute CP consists of two microengines (ME):
3242  * MEC1 - Compute MicroEngine 1
3243  * MEC2 - Compute MicroEngine 2
3244  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3245  * The queues are exposed to userspace and are programmed directly
3246  * by the compute runtime.
3247  */
3248 /**
3249  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3250  *
3251  * @rdev: radeon_device pointer
3252  * @enable: enable or disable the MEs
3253  *
3254  * Halts or unhalts the gfx MEs.
3255  */
3256 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3257 {
3258 	if (enable)
3259 		WREG32(CP_ME_CNTL, 0);
3260 	else {
3261 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3262 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3263 	}
3264 	udelay(50);
3265 }
3266 
3267 /**
3268  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3269  *
3270  * @rdev: radeon_device pointer
3271  *
3272  * Loads the gfx PFP, ME, and CE ucode.
3273  * Returns 0 for success, -EINVAL if the ucode is not available.
3274  */
3275 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3276 {
3277 	const __be32 *fw_data;
3278 	int i;
3279 
3280 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3281 		return -EINVAL;
3282 
3283 	cik_cp_gfx_enable(rdev, false);
3284 
3285 	/* PFP */
3286 	fw_data = (const __be32 *)rdev->pfp_fw->data;
3287 	WREG32(CP_PFP_UCODE_ADDR, 0);
3288 	for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3289 		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3290 	WREG32(CP_PFP_UCODE_ADDR, 0);
3291 
3292 	/* CE */
3293 	fw_data = (const __be32 *)rdev->ce_fw->data;
3294 	WREG32(CP_CE_UCODE_ADDR, 0);
3295 	for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3296 		WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3297 	WREG32(CP_CE_UCODE_ADDR, 0);
3298 
3299 	/* ME */
3300 	fw_data = (const __be32 *)rdev->me_fw->data;
3301 	WREG32(CP_ME_RAM_WADDR, 0);
3302 	for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3303 		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3304 	WREG32(CP_ME_RAM_WADDR, 0);
3305 
3306 	WREG32(CP_PFP_UCODE_ADDR, 0);
3307 	WREG32(CP_CE_UCODE_ADDR, 0);
3308 	WREG32(CP_ME_RAM_WADDR, 0);
3309 	WREG32(CP_ME_RAM_RADDR, 0);
3310 	return 0;
3311 }
3312 
3313 /**
3314  * cik_cp_gfx_start - start the gfx ring
3315  *
3316  * @rdev: radeon_device pointer
3317  *
3318  * Enables the ring and loads the clear state context and other
3319  * packets required to init the ring.
3320  * Returns 0 for success, error for failure.
3321  */
3322 static int cik_cp_gfx_start(struct radeon_device *rdev)
3323 {
3324 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3325 	int r, i;
3326 
3327 	/* init the CP */
3328 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3329 	WREG32(CP_ENDIAN_SWAP, 0);
3330 	WREG32(CP_DEVICE_ID, 1);
3331 
3332 	cik_cp_gfx_enable(rdev, true);
3333 
3334 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3335 	if (r) {
3336 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3337 		return r;
3338 	}
3339 
3340 	/* init the CE partitions.  CE only used for gfx on CIK */
3341 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3342 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3343 	radeon_ring_write(ring, 0xc000);
3344 	radeon_ring_write(ring, 0xc000);
3345 
3346 	/* setup clear context state */
3347 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3348 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3349 
3350 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3351 	radeon_ring_write(ring, 0x80000000);
3352 	radeon_ring_write(ring, 0x80000000);
3353 
3354 	for (i = 0; i < cik_default_size; i++)
3355 		radeon_ring_write(ring, cik_default_state[i]);
3356 
3357 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3358 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3359 
3360 	/* set clear context state */
3361 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3362 	radeon_ring_write(ring, 0);
3363 
3364 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3365 	radeon_ring_write(ring, 0x00000316);
3366 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3367 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3368 
3369 	radeon_ring_unlock_commit(rdev, ring);
3370 
3371 	return 0;
3372 }
3373 
3374 /**
3375  * cik_cp_gfx_fini - stop the gfx ring
3376  *
3377  * @rdev: radeon_device pointer
3378  *
3379  * Stop the gfx ring and tear down the driver ring
3380  * info.
3381  */
3382 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3383 {
3384 	cik_cp_gfx_enable(rdev, false);
3385 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3386 }
3387 
3388 /**
3389  * cik_cp_gfx_resume - setup the gfx ring buffer registers
3390  *
3391  * @rdev: radeon_device pointer
3392  *
3393  * Program the location and size of the gfx ring buffer
3394  * and test it to make sure it's working.
3395  * Returns 0 for success, error for failure.
3396  */
3397 static int cik_cp_gfx_resume(struct radeon_device *rdev)
3398 {
3399 	struct radeon_ring *ring;
3400 	u32 tmp;
3401 	u32 rb_bufsz;
3402 	u64 rb_addr;
3403 	int r;
3404 
3405 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3406 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3407 
3408 	/* Set the write pointer delay */
3409 	WREG32(CP_RB_WPTR_DELAY, 0);
3410 
3411 	/* set the RB to use vmid 0 */
3412 	WREG32(CP_RB_VMID, 0);
3413 
3414 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3415 
3416 	/* ring 0 - compute and gfx */
3417 	/* Set ring buffer size */
3418 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3419 	rb_bufsz = order_base_2(ring->ring_size / 8);
3420 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3421 #ifdef __BIG_ENDIAN
3422 	tmp |= BUF_SWAP_32BIT;
3423 #endif
3424 	WREG32(CP_RB0_CNTL, tmp);
3425 
3426 	/* Initialize the ring buffer's read and write pointers */
3427 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3428 	ring->wptr = 0;
3429 	WREG32(CP_RB0_WPTR, ring->wptr);
3430 
3431 	/* set the wb address wether it's enabled or not */
3432 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3433 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3434 
3435 	/* scratch register shadowing is no longer supported */
3436 	WREG32(SCRATCH_UMSK, 0);
3437 
3438 	if (!rdev->wb.enabled)
3439 		tmp |= RB_NO_UPDATE;
3440 
3441 	mdelay(1);
3442 	WREG32(CP_RB0_CNTL, tmp);
3443 
3444 	rb_addr = ring->gpu_addr >> 8;
3445 	WREG32(CP_RB0_BASE, rb_addr);
3446 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
3447 
3448 	ring->rptr = RREG32(CP_RB0_RPTR);
3449 
3450 	/* start the ring */
3451 	cik_cp_gfx_start(rdev);
3452 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3453 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3454 	if (r) {
3455 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3456 		return r;
3457 	}
3458 	return 0;
3459 }
3460 
3461 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
3462 			      struct radeon_ring *ring)
3463 {
3464 	u32 rptr;
3465 
3466 
3467 
3468 	if (rdev->wb.enabled) {
3469 		rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
3470 	} else {
3471 		mutex_lock(&rdev->srbm_mutex);
3472 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3473 		rptr = RREG32(CP_HQD_PQ_RPTR);
3474 		cik_srbm_select(rdev, 0, 0, 0, 0);
3475 		mutex_unlock(&rdev->srbm_mutex);
3476 	}
3477 
3478 	return rptr;
3479 }
3480 
3481 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
3482 			      struct radeon_ring *ring)
3483 {
3484 	u32 wptr;
3485 
3486 	if (rdev->wb.enabled) {
3487 		wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
3488 	} else {
3489 		mutex_lock(&rdev->srbm_mutex);
3490 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3491 		wptr = RREG32(CP_HQD_PQ_WPTR);
3492 		cik_srbm_select(rdev, 0, 0, 0, 0);
3493 		mutex_unlock(&rdev->srbm_mutex);
3494 	}
3495 
3496 	return wptr;
3497 }
3498 
3499 void cik_compute_ring_set_wptr(struct radeon_device *rdev,
3500 			       struct radeon_ring *ring)
3501 {
3502 	rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(ring->wptr);
3503 	WDOORBELL32(ring->doorbell_offset, ring->wptr);
3504 }
3505 
3506 /**
3507  * cik_cp_compute_enable - enable/disable the compute CP MEs
3508  *
3509  * @rdev: radeon_device pointer
3510  * @enable: enable or disable the MEs
3511  *
3512  * Halts or unhalts the compute MEs.
3513  */
3514 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
3515 {
3516 	if (enable)
3517 		WREG32(CP_MEC_CNTL, 0);
3518 	else
3519 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
3520 	udelay(50);
3521 }
3522 
3523 /**
3524  * cik_cp_compute_load_microcode - load the compute CP ME ucode
3525  *
3526  * @rdev: radeon_device pointer
3527  *
3528  * Loads the compute MEC1&2 ucode.
3529  * Returns 0 for success, -EINVAL if the ucode is not available.
3530  */
3531 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
3532 {
3533 	const __be32 *fw_data;
3534 	int i;
3535 
3536 	if (!rdev->mec_fw)
3537 		return -EINVAL;
3538 
3539 	cik_cp_compute_enable(rdev, false);
3540 
3541 	/* MEC1 */
3542 	fw_data = (const __be32 *)rdev->mec_fw->data;
3543 	WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3544 	for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3545 		WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
3546 	WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3547 
3548 	if (rdev->family == CHIP_KAVERI) {
3549 		/* MEC2 */
3550 		fw_data = (const __be32 *)rdev->mec_fw->data;
3551 		WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3552 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3553 			WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
3554 		WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3555 	}
3556 
3557 	return 0;
3558 }
3559 
3560 /**
3561  * cik_cp_compute_start - start the compute queues
3562  *
3563  * @rdev: radeon_device pointer
3564  *
3565  * Enable the compute queues.
3566  * Returns 0 for success, error for failure.
3567  */
3568 static int cik_cp_compute_start(struct radeon_device *rdev)
3569 {
3570 	cik_cp_compute_enable(rdev, true);
3571 
3572 	return 0;
3573 }
3574 
3575 /**
3576  * cik_cp_compute_fini - stop the compute queues
3577  *
3578  * @rdev: radeon_device pointer
3579  *
3580  * Stop the compute queues and tear down the driver queue
3581  * info.
3582  */
3583 static void cik_cp_compute_fini(struct radeon_device *rdev)
3584 {
3585 	int i, idx, r;
3586 
3587 	cik_cp_compute_enable(rdev, false);
3588 
3589 	for (i = 0; i < 2; i++) {
3590 		if (i == 0)
3591 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
3592 		else
3593 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
3594 
3595 		if (rdev->ring[idx].mqd_obj) {
3596 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3597 			if (unlikely(r != 0))
3598 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
3599 
3600 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
3601 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3602 
3603 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
3604 			rdev->ring[idx].mqd_obj = NULL;
3605 		}
3606 	}
3607 }
3608 
3609 static void cik_mec_fini(struct radeon_device *rdev)
3610 {
3611 	int r;
3612 
3613 	if (rdev->mec.hpd_eop_obj) {
3614 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3615 		if (unlikely(r != 0))
3616 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
3617 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
3618 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3619 
3620 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
3621 		rdev->mec.hpd_eop_obj = NULL;
3622 	}
3623 }
3624 
3625 #define MEC_HPD_SIZE 2048
3626 
3627 static int cik_mec_init(struct radeon_device *rdev)
3628 {
3629 	int r;
3630 	u32 *hpd;
3631 
3632 	/*
3633 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
3634 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
3635 	 */
3636 	if (rdev->family == CHIP_KAVERI)
3637 		rdev->mec.num_mec = 2;
3638 	else
3639 		rdev->mec.num_mec = 1;
3640 	rdev->mec.num_pipe = 4;
3641 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
3642 
3643 	if (rdev->mec.hpd_eop_obj == NULL) {
3644 		r = radeon_bo_create(rdev,
3645 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
3646 				     PAGE_SIZE, true,
3647 				     RADEON_GEM_DOMAIN_GTT, NULL,
3648 				     &rdev->mec.hpd_eop_obj);
3649 		if (r) {
3650 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
3651 			return r;
3652 		}
3653 	}
3654 
3655 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3656 	if (unlikely(r != 0)) {
3657 		cik_mec_fini(rdev);
3658 		return r;
3659 	}
3660 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
3661 			  &rdev->mec.hpd_eop_gpu_addr);
3662 	if (r) {
3663 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
3664 		cik_mec_fini(rdev);
3665 		return r;
3666 	}
3667 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
3668 	if (r) {
3669 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
3670 		cik_mec_fini(rdev);
3671 		return r;
3672 	}
3673 
3674 	/* clear memory.  Not sure if this is required or not */
3675 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
3676 
3677 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
3678 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3679 
3680 	return 0;
3681 }
3682 
3683 struct hqd_registers
3684 {
3685 	u32 cp_mqd_base_addr;
3686 	u32 cp_mqd_base_addr_hi;
3687 	u32 cp_hqd_active;
3688 	u32 cp_hqd_vmid;
3689 	u32 cp_hqd_persistent_state;
3690 	u32 cp_hqd_pipe_priority;
3691 	u32 cp_hqd_queue_priority;
3692 	u32 cp_hqd_quantum;
3693 	u32 cp_hqd_pq_base;
3694 	u32 cp_hqd_pq_base_hi;
3695 	u32 cp_hqd_pq_rptr;
3696 	u32 cp_hqd_pq_rptr_report_addr;
3697 	u32 cp_hqd_pq_rptr_report_addr_hi;
3698 	u32 cp_hqd_pq_wptr_poll_addr;
3699 	u32 cp_hqd_pq_wptr_poll_addr_hi;
3700 	u32 cp_hqd_pq_doorbell_control;
3701 	u32 cp_hqd_pq_wptr;
3702 	u32 cp_hqd_pq_control;
3703 	u32 cp_hqd_ib_base_addr;
3704 	u32 cp_hqd_ib_base_addr_hi;
3705 	u32 cp_hqd_ib_rptr;
3706 	u32 cp_hqd_ib_control;
3707 	u32 cp_hqd_iq_timer;
3708 	u32 cp_hqd_iq_rptr;
3709 	u32 cp_hqd_dequeue_request;
3710 	u32 cp_hqd_dma_offload;
3711 	u32 cp_hqd_sema_cmd;
3712 	u32 cp_hqd_msg_type;
3713 	u32 cp_hqd_atomic0_preop_lo;
3714 	u32 cp_hqd_atomic0_preop_hi;
3715 	u32 cp_hqd_atomic1_preop_lo;
3716 	u32 cp_hqd_atomic1_preop_hi;
3717 	u32 cp_hqd_hq_scheduler0;
3718 	u32 cp_hqd_hq_scheduler1;
3719 	u32 cp_mqd_control;
3720 };
3721 
3722 struct bonaire_mqd
3723 {
3724 	u32 header;
3725 	u32 dispatch_initiator;
3726 	u32 dimensions[3];
3727 	u32 start_idx[3];
3728 	u32 num_threads[3];
3729 	u32 pipeline_stat_enable;
3730 	u32 perf_counter_enable;
3731 	u32 pgm[2];
3732 	u32 tba[2];
3733 	u32 tma[2];
3734 	u32 pgm_rsrc[2];
3735 	u32 vmid;
3736 	u32 resource_limits;
3737 	u32 static_thread_mgmt01[2];
3738 	u32 tmp_ring_size;
3739 	u32 static_thread_mgmt23[2];
3740 	u32 restart[3];
3741 	u32 thread_trace_enable;
3742 	u32 reserved1;
3743 	u32 user_data[16];
3744 	u32 vgtcs_invoke_count[2];
3745 	struct hqd_registers queue_state;
3746 	u32 dequeue_cntr;
3747 	u32 interrupt_queue[64];
3748 };
3749 
3750 /**
3751  * cik_cp_compute_resume - setup the compute queue registers
3752  *
3753  * @rdev: radeon_device pointer
3754  *
3755  * Program the compute queues and test them to make sure they
3756  * are working.
3757  * Returns 0 for success, error for failure.
3758  */
3759 static int cik_cp_compute_resume(struct radeon_device *rdev)
3760 {
3761 	int r, i, idx;
3762 	u32 tmp;
3763 	bool use_doorbell = true;
3764 	u64 hqd_gpu_addr;
3765 	u64 mqd_gpu_addr;
3766 	u64 eop_gpu_addr;
3767 	u64 wb_gpu_addr;
3768 	u32 *buf;
3769 	struct bonaire_mqd *mqd;
3770 
3771 	r = cik_cp_compute_start(rdev);
3772 	if (r)
3773 		return r;
3774 
3775 	/* fix up chicken bits */
3776 	tmp = RREG32(CP_CPF_DEBUG);
3777 	tmp |= (1 << 23);
3778 	WREG32(CP_CPF_DEBUG, tmp);
3779 
3780 	/* init the pipes */
3781 	mutex_lock(&rdev->srbm_mutex);
3782 	for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
3783 		int me = (i < 4) ? 1 : 2;
3784 		int pipe = (i < 4) ? i : (i - 4);
3785 
3786 		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
3787 
3788 		cik_srbm_select(rdev, me, pipe, 0, 0);
3789 
3790 		/* write the EOP addr */
3791 		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
3792 		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
3793 
3794 		/* set the VMID assigned */
3795 		WREG32(CP_HPD_EOP_VMID, 0);
3796 
3797 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3798 		tmp = RREG32(CP_HPD_EOP_CONTROL);
3799 		tmp &= ~EOP_SIZE_MASK;
3800 		tmp |= order_base_2(MEC_HPD_SIZE / 8);
3801 		WREG32(CP_HPD_EOP_CONTROL, tmp);
3802 	}
3803 	cik_srbm_select(rdev, 0, 0, 0, 0);
3804 	mutex_unlock(&rdev->srbm_mutex);
3805 
3806 	/* init the queues.  Just two for now. */
3807 	for (i = 0; i < 2; i++) {
3808 		if (i == 0)
3809 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
3810 		else
3811 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
3812 
3813 		if (rdev->ring[idx].mqd_obj == NULL) {
3814 			r = radeon_bo_create(rdev,
3815 					     sizeof(struct bonaire_mqd),
3816 					     PAGE_SIZE, true,
3817 					     RADEON_GEM_DOMAIN_GTT, NULL,
3818 					     &rdev->ring[idx].mqd_obj);
3819 			if (r) {
3820 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
3821 				return r;
3822 			}
3823 		}
3824 
3825 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3826 		if (unlikely(r != 0)) {
3827 			cik_cp_compute_fini(rdev);
3828 			return r;
3829 		}
3830 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
3831 				  &mqd_gpu_addr);
3832 		if (r) {
3833 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
3834 			cik_cp_compute_fini(rdev);
3835 			return r;
3836 		}
3837 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
3838 		if (r) {
3839 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
3840 			cik_cp_compute_fini(rdev);
3841 			return r;
3842 		}
3843 
3844 		/* doorbell offset */
3845 		rdev->ring[idx].doorbell_offset =
3846 			(rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
3847 
3848 		/* init the mqd struct */
3849 		memset(buf, 0, sizeof(struct bonaire_mqd));
3850 
3851 		mqd = (struct bonaire_mqd *)buf;
3852 		mqd->header = 0xC0310800;
3853 		mqd->static_thread_mgmt01[0] = 0xffffffff;
3854 		mqd->static_thread_mgmt01[1] = 0xffffffff;
3855 		mqd->static_thread_mgmt23[0] = 0xffffffff;
3856 		mqd->static_thread_mgmt23[1] = 0xffffffff;
3857 
3858 		mutex_lock(&rdev->srbm_mutex);
3859 		cik_srbm_select(rdev, rdev->ring[idx].me,
3860 				rdev->ring[idx].pipe,
3861 				rdev->ring[idx].queue, 0);
3862 
3863 		/* disable wptr polling */
3864 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
3865 		tmp &= ~WPTR_POLL_EN;
3866 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
3867 
3868 		/* enable doorbell? */
3869 		mqd->queue_state.cp_hqd_pq_doorbell_control =
3870 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3871 		if (use_doorbell)
3872 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3873 		else
3874 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
3875 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3876 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
3877 
3878 		/* disable the queue if it's active */
3879 		mqd->queue_state.cp_hqd_dequeue_request = 0;
3880 		mqd->queue_state.cp_hqd_pq_rptr = 0;
3881 		mqd->queue_state.cp_hqd_pq_wptr= 0;
3882 		if (RREG32(CP_HQD_ACTIVE) & 1) {
3883 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
3884 			for (i = 0; i < rdev->usec_timeout; i++) {
3885 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
3886 					break;
3887 				udelay(1);
3888 			}
3889 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3890 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3891 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3892 		}
3893 
3894 		/* set the pointer to the MQD */
3895 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3896 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3897 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3898 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3899 		/* set MQD vmid to 0 */
3900 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3901 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3902 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3903 
3904 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3905 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3906 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3907 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3908 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3909 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3910 
3911 		/* set up the HQD, this is similar to CP_RB0_CNTL */
3912 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3913 		mqd->queue_state.cp_hqd_pq_control &=
3914 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3915 
3916 		mqd->queue_state.cp_hqd_pq_control |=
3917 			order_base_2(rdev->ring[idx].ring_size / 8);
3918 		mqd->queue_state.cp_hqd_pq_control |=
3919 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
3920 #ifdef __BIG_ENDIAN
3921 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3922 #endif
3923 		mqd->queue_state.cp_hqd_pq_control &=
3924 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3925 		mqd->queue_state.cp_hqd_pq_control |=
3926 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3927 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3928 
3929 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3930 		if (i == 0)
3931 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3932 		else
3933 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3934 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3935 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3936 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3937 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3938 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3939 
3940 		/* set the wb address wether it's enabled or not */
3941 		if (i == 0)
3942 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3943 		else
3944 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3945 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3946 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3947 			upper_32_bits(wb_gpu_addr) & 0xffff;
3948 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3949 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3950 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3951 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3952 
3953 		/* enable the doorbell if requested */
3954 		if (use_doorbell) {
3955 			mqd->queue_state.cp_hqd_pq_doorbell_control =
3956 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3957 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3958 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
3959 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3960 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3961 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
3962 				~(DOORBELL_SOURCE | DOORBELL_HIT);
3963 
3964 		} else {
3965 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3966 		}
3967 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3968 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
3969 
3970 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3971 		rdev->ring[idx].wptr = 0;
3972 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3973 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3974 		rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3975 		mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3976 
3977 		/* set the vmid for the queue */
3978 		mqd->queue_state.cp_hqd_vmid = 0;
3979 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3980 
3981 		/* activate the queue */
3982 		mqd->queue_state.cp_hqd_active = 1;
3983 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3984 
3985 		cik_srbm_select(rdev, 0, 0, 0, 0);
3986 		mutex_unlock(&rdev->srbm_mutex);
3987 
3988 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3989 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3990 
3991 		rdev->ring[idx].ready = true;
3992 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3993 		if (r)
3994 			rdev->ring[idx].ready = false;
3995 	}
3996 
3997 	return 0;
3998 }
3999 
4000 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4001 {
4002 	cik_cp_gfx_enable(rdev, enable);
4003 	cik_cp_compute_enable(rdev, enable);
4004 }
4005 
4006 static int cik_cp_load_microcode(struct radeon_device *rdev)
4007 {
4008 	int r;
4009 
4010 	r = cik_cp_gfx_load_microcode(rdev);
4011 	if (r)
4012 		return r;
4013 	r = cik_cp_compute_load_microcode(rdev);
4014 	if (r)
4015 		return r;
4016 
4017 	return 0;
4018 }
4019 
4020 static void cik_cp_fini(struct radeon_device *rdev)
4021 {
4022 	cik_cp_gfx_fini(rdev);
4023 	cik_cp_compute_fini(rdev);
4024 }
4025 
4026 static int cik_cp_resume(struct radeon_device *rdev)
4027 {
4028 	int r;
4029 
4030 	cik_enable_gui_idle_interrupt(rdev, false);
4031 
4032 	r = cik_cp_load_microcode(rdev);
4033 	if (r)
4034 		return r;
4035 
4036 	r = cik_cp_gfx_resume(rdev);
4037 	if (r)
4038 		return r;
4039 	r = cik_cp_compute_resume(rdev);
4040 	if (r)
4041 		return r;
4042 
4043 	cik_enable_gui_idle_interrupt(rdev, true);
4044 
4045 	return 0;
4046 }
4047 
4048 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4049 {
4050 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4051 		RREG32(GRBM_STATUS));
4052 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4053 		RREG32(GRBM_STATUS2));
4054 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4055 		RREG32(GRBM_STATUS_SE0));
4056 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4057 		RREG32(GRBM_STATUS_SE1));
4058 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4059 		RREG32(GRBM_STATUS_SE2));
4060 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4061 		RREG32(GRBM_STATUS_SE3));
4062 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4063 		RREG32(SRBM_STATUS));
4064 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4065 		RREG32(SRBM_STATUS2));
4066 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4067 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4068 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4069 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4070 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4071 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4072 		 RREG32(CP_STALLED_STAT1));
4073 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4074 		 RREG32(CP_STALLED_STAT2));
4075 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4076 		 RREG32(CP_STALLED_STAT3));
4077 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4078 		 RREG32(CP_CPF_BUSY_STAT));
4079 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4080 		 RREG32(CP_CPF_STALLED_STAT1));
4081 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4082 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4083 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4084 		 RREG32(CP_CPC_STALLED_STAT1));
4085 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4086 }
4087 
4088 /**
4089  * cik_gpu_check_soft_reset - check which blocks are busy
4090  *
4091  * @rdev: radeon_device pointer
4092  *
4093  * Check which blocks are busy and return the relevant reset
4094  * mask to be used by cik_gpu_soft_reset().
4095  * Returns a mask of the blocks to be reset.
4096  */
4097 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4098 {
4099 	u32 reset_mask = 0;
4100 	u32 tmp;
4101 
4102 	/* GRBM_STATUS */
4103 	tmp = RREG32(GRBM_STATUS);
4104 	if (tmp & (PA_BUSY | SC_BUSY |
4105 		   BCI_BUSY | SX_BUSY |
4106 		   TA_BUSY | VGT_BUSY |
4107 		   DB_BUSY | CB_BUSY |
4108 		   GDS_BUSY | SPI_BUSY |
4109 		   IA_BUSY | IA_BUSY_NO_DMA))
4110 		reset_mask |= RADEON_RESET_GFX;
4111 
4112 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4113 		reset_mask |= RADEON_RESET_CP;
4114 
4115 	/* GRBM_STATUS2 */
4116 	tmp = RREG32(GRBM_STATUS2);
4117 	if (tmp & RLC_BUSY)
4118 		reset_mask |= RADEON_RESET_RLC;
4119 
4120 	/* SDMA0_STATUS_REG */
4121 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4122 	if (!(tmp & SDMA_IDLE))
4123 		reset_mask |= RADEON_RESET_DMA;
4124 
4125 	/* SDMA1_STATUS_REG */
4126 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4127 	if (!(tmp & SDMA_IDLE))
4128 		reset_mask |= RADEON_RESET_DMA1;
4129 
4130 	/* SRBM_STATUS2 */
4131 	tmp = RREG32(SRBM_STATUS2);
4132 	if (tmp & SDMA_BUSY)
4133 		reset_mask |= RADEON_RESET_DMA;
4134 
4135 	if (tmp & SDMA1_BUSY)
4136 		reset_mask |= RADEON_RESET_DMA1;
4137 
4138 	/* SRBM_STATUS */
4139 	tmp = RREG32(SRBM_STATUS);
4140 
4141 	if (tmp & IH_BUSY)
4142 		reset_mask |= RADEON_RESET_IH;
4143 
4144 	if (tmp & SEM_BUSY)
4145 		reset_mask |= RADEON_RESET_SEM;
4146 
4147 	if (tmp & GRBM_RQ_PENDING)
4148 		reset_mask |= RADEON_RESET_GRBM;
4149 
4150 	if (tmp & VMC_BUSY)
4151 		reset_mask |= RADEON_RESET_VMC;
4152 
4153 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4154 		   MCC_BUSY | MCD_BUSY))
4155 		reset_mask |= RADEON_RESET_MC;
4156 
4157 	if (evergreen_is_display_hung(rdev))
4158 		reset_mask |= RADEON_RESET_DISPLAY;
4159 
4160 	/* Skip MC reset as it's mostly likely not hung, just busy */
4161 	if (reset_mask & RADEON_RESET_MC) {
4162 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4163 		reset_mask &= ~RADEON_RESET_MC;
4164 	}
4165 
4166 	return reset_mask;
4167 }
4168 
4169 /**
4170  * cik_gpu_soft_reset - soft reset GPU
4171  *
4172  * @rdev: radeon_device pointer
4173  * @reset_mask: mask of which blocks to reset
4174  *
4175  * Soft reset the blocks specified in @reset_mask.
4176  */
4177 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4178 {
4179 	struct evergreen_mc_save save;
4180 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4181 	u32 tmp;
4182 
4183 	if (reset_mask == 0)
4184 		return;
4185 
4186 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4187 
4188 	cik_print_gpu_status_regs(rdev);
4189 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4190 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4191 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4192 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4193 
4194 	/* disable CG/PG */
4195 	cik_fini_pg(rdev);
4196 	cik_fini_cg(rdev);
4197 
4198 	/* stop the rlc */
4199 	cik_rlc_stop(rdev);
4200 
4201 	/* Disable GFX parsing/prefetching */
4202 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4203 
4204 	/* Disable MEC parsing/prefetching */
4205 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4206 
4207 	if (reset_mask & RADEON_RESET_DMA) {
4208 		/* sdma0 */
4209 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4210 		tmp |= SDMA_HALT;
4211 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4212 	}
4213 	if (reset_mask & RADEON_RESET_DMA1) {
4214 		/* sdma1 */
4215 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4216 		tmp |= SDMA_HALT;
4217 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4218 	}
4219 
4220 	evergreen_mc_stop(rdev, &save);
4221 	if (evergreen_mc_wait_for_idle(rdev)) {
4222 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4223 	}
4224 
4225 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4226 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4227 
4228 	if (reset_mask & RADEON_RESET_CP) {
4229 		grbm_soft_reset |= SOFT_RESET_CP;
4230 
4231 		srbm_soft_reset |= SOFT_RESET_GRBM;
4232 	}
4233 
4234 	if (reset_mask & RADEON_RESET_DMA)
4235 		srbm_soft_reset |= SOFT_RESET_SDMA;
4236 
4237 	if (reset_mask & RADEON_RESET_DMA1)
4238 		srbm_soft_reset |= SOFT_RESET_SDMA1;
4239 
4240 	if (reset_mask & RADEON_RESET_DISPLAY)
4241 		srbm_soft_reset |= SOFT_RESET_DC;
4242 
4243 	if (reset_mask & RADEON_RESET_RLC)
4244 		grbm_soft_reset |= SOFT_RESET_RLC;
4245 
4246 	if (reset_mask & RADEON_RESET_SEM)
4247 		srbm_soft_reset |= SOFT_RESET_SEM;
4248 
4249 	if (reset_mask & RADEON_RESET_IH)
4250 		srbm_soft_reset |= SOFT_RESET_IH;
4251 
4252 	if (reset_mask & RADEON_RESET_GRBM)
4253 		srbm_soft_reset |= SOFT_RESET_GRBM;
4254 
4255 	if (reset_mask & RADEON_RESET_VMC)
4256 		srbm_soft_reset |= SOFT_RESET_VMC;
4257 
4258 	if (!(rdev->flags & RADEON_IS_IGP)) {
4259 		if (reset_mask & RADEON_RESET_MC)
4260 			srbm_soft_reset |= SOFT_RESET_MC;
4261 	}
4262 
4263 	if (grbm_soft_reset) {
4264 		tmp = RREG32(GRBM_SOFT_RESET);
4265 		tmp |= grbm_soft_reset;
4266 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4267 		WREG32(GRBM_SOFT_RESET, tmp);
4268 		tmp = RREG32(GRBM_SOFT_RESET);
4269 
4270 		udelay(50);
4271 
4272 		tmp &= ~grbm_soft_reset;
4273 		WREG32(GRBM_SOFT_RESET, tmp);
4274 		tmp = RREG32(GRBM_SOFT_RESET);
4275 	}
4276 
4277 	if (srbm_soft_reset) {
4278 		tmp = RREG32(SRBM_SOFT_RESET);
4279 		tmp |= srbm_soft_reset;
4280 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4281 		WREG32(SRBM_SOFT_RESET, tmp);
4282 		tmp = RREG32(SRBM_SOFT_RESET);
4283 
4284 		udelay(50);
4285 
4286 		tmp &= ~srbm_soft_reset;
4287 		WREG32(SRBM_SOFT_RESET, tmp);
4288 		tmp = RREG32(SRBM_SOFT_RESET);
4289 	}
4290 
4291 	/* Wait a little for things to settle down */
4292 	udelay(50);
4293 
4294 	evergreen_mc_resume(rdev, &save);
4295 	udelay(50);
4296 
4297 	cik_print_gpu_status_regs(rdev);
4298 }
4299 
4300 /**
4301  * cik_asic_reset - soft reset GPU
4302  *
4303  * @rdev: radeon_device pointer
4304  *
4305  * Look up which blocks are hung and attempt
4306  * to reset them.
4307  * Returns 0 for success.
4308  */
4309 int cik_asic_reset(struct radeon_device *rdev)
4310 {
4311 	u32 reset_mask;
4312 
4313 	reset_mask = cik_gpu_check_soft_reset(rdev);
4314 
4315 	if (reset_mask)
4316 		r600_set_bios_scratch_engine_hung(rdev, true);
4317 
4318 	cik_gpu_soft_reset(rdev, reset_mask);
4319 
4320 	reset_mask = cik_gpu_check_soft_reset(rdev);
4321 
4322 	if (!reset_mask)
4323 		r600_set_bios_scratch_engine_hung(rdev, false);
4324 
4325 	return 0;
4326 }
4327 
4328 /**
4329  * cik_gfx_is_lockup - check if the 3D engine is locked up
4330  *
4331  * @rdev: radeon_device pointer
4332  * @ring: radeon_ring structure holding ring information
4333  *
4334  * Check if the 3D engine is locked up (CIK).
4335  * Returns true if the engine is locked, false if not.
4336  */
4337 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4338 {
4339 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4340 
4341 	if (!(reset_mask & (RADEON_RESET_GFX |
4342 			    RADEON_RESET_COMPUTE |
4343 			    RADEON_RESET_CP))) {
4344 		radeon_ring_lockup_update(ring);
4345 		return false;
4346 	}
4347 	/* force CP activities */
4348 	radeon_ring_force_activity(rdev, ring);
4349 	return radeon_ring_test_lockup(rdev, ring);
4350 }
4351 
4352 /* MC */
4353 /**
4354  * cik_mc_program - program the GPU memory controller
4355  *
4356  * @rdev: radeon_device pointer
4357  *
4358  * Set the location of vram, gart, and AGP in the GPU's
4359  * physical address space (CIK).
4360  */
4361 static void cik_mc_program(struct radeon_device *rdev)
4362 {
4363 	struct evergreen_mc_save save;
4364 	u32 tmp;
4365 	int i, j;
4366 
4367 	/* Initialize HDP */
4368 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4369 		WREG32((0x2c14 + j), 0x00000000);
4370 		WREG32((0x2c18 + j), 0x00000000);
4371 		WREG32((0x2c1c + j), 0x00000000);
4372 		WREG32((0x2c20 + j), 0x00000000);
4373 		WREG32((0x2c24 + j), 0x00000000);
4374 	}
4375 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4376 
4377 	evergreen_mc_stop(rdev, &save);
4378 	if (radeon_mc_wait_for_idle(rdev)) {
4379 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4380 	}
4381 	/* Lockout access through VGA aperture*/
4382 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4383 	/* Update configuration */
4384 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4385 	       rdev->mc.vram_start >> 12);
4386 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4387 	       rdev->mc.vram_end >> 12);
4388 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4389 	       rdev->vram_scratch.gpu_addr >> 12);
4390 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4391 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4392 	WREG32(MC_VM_FB_LOCATION, tmp);
4393 	/* XXX double check these! */
4394 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4395 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4396 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4397 	WREG32(MC_VM_AGP_BASE, 0);
4398 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4399 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4400 	if (radeon_mc_wait_for_idle(rdev)) {
4401 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4402 	}
4403 	evergreen_mc_resume(rdev, &save);
4404 	/* we need to own VRAM, so turn off the VGA renderer here
4405 	 * to stop it overwriting our objects */
4406 	rv515_vga_render_disable(rdev);
4407 }
4408 
4409 /**
4410  * cik_mc_init - initialize the memory controller driver params
4411  *
4412  * @rdev: radeon_device pointer
4413  *
4414  * Look up the amount of vram, vram width, and decide how to place
4415  * vram and gart within the GPU's physical address space (CIK).
4416  * Returns 0 for success.
4417  */
4418 static int cik_mc_init(struct radeon_device *rdev)
4419 {
4420 	u32 tmp;
4421 	int chansize, numchan;
4422 
4423 	/* Get VRAM informations */
4424 	rdev->mc.vram_is_ddr = true;
4425 	tmp = RREG32(MC_ARB_RAMCFG);
4426 	if (tmp & CHANSIZE_MASK) {
4427 		chansize = 64;
4428 	} else {
4429 		chansize = 32;
4430 	}
4431 	tmp = RREG32(MC_SHARED_CHMAP);
4432 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4433 	case 0:
4434 	default:
4435 		numchan = 1;
4436 		break;
4437 	case 1:
4438 		numchan = 2;
4439 		break;
4440 	case 2:
4441 		numchan = 4;
4442 		break;
4443 	case 3:
4444 		numchan = 8;
4445 		break;
4446 	case 4:
4447 		numchan = 3;
4448 		break;
4449 	case 5:
4450 		numchan = 6;
4451 		break;
4452 	case 6:
4453 		numchan = 10;
4454 		break;
4455 	case 7:
4456 		numchan = 12;
4457 		break;
4458 	case 8:
4459 		numchan = 16;
4460 		break;
4461 	}
4462 	rdev->mc.vram_width = numchan * chansize;
4463 	/* Could aper size report 0 ? */
4464 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4465 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4466 	/* size in MB on si */
4467 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
4468 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
4469 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
4470 	si_vram_gtt_location(rdev, &rdev->mc);
4471 	radeon_update_bandwidth_info(rdev);
4472 
4473 	return 0;
4474 }
4475 
4476 /*
4477  * GART
4478  * VMID 0 is the physical GPU addresses as used by the kernel.
4479  * VMIDs 1-15 are used for userspace clients and are handled
4480  * by the radeon vm/hsa code.
4481  */
4482 /**
4483  * cik_pcie_gart_tlb_flush - gart tlb flush callback
4484  *
4485  * @rdev: radeon_device pointer
4486  *
4487  * Flush the TLB for the VMID 0 page table (CIK).
4488  */
4489 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
4490 {
4491 	/* flush hdp cache */
4492 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
4493 
4494 	/* bits 0-15 are the VM contexts0-15 */
4495 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
4496 }
4497 
4498 /**
4499  * cik_pcie_gart_enable - gart enable
4500  *
4501  * @rdev: radeon_device pointer
4502  *
4503  * This sets up the TLBs, programs the page tables for VMID0,
4504  * sets up the hw for VMIDs 1-15 which are allocated on
4505  * demand, and sets up the global locations for the LDS, GDS,
4506  * and GPUVM for FSA64 clients (CIK).
4507  * Returns 0 for success, errors for failure.
4508  */
4509 static int cik_pcie_gart_enable(struct radeon_device *rdev)
4510 {
4511 	int r, i;
4512 
4513 	if (rdev->gart.robj == NULL) {
4514 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4515 		return -EINVAL;
4516 	}
4517 	r = radeon_gart_table_vram_pin(rdev);
4518 	if (r)
4519 		return r;
4520 	radeon_gart_restore(rdev);
4521 	/* Setup TLB control */
4522 	WREG32(MC_VM_MX_L1_TLB_CNTL,
4523 	       (0xA << 7) |
4524 	       ENABLE_L1_TLB |
4525 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4526 	       ENABLE_ADVANCED_DRIVER_MODEL |
4527 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4528 	/* Setup L2 cache */
4529 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4530 	       ENABLE_L2_FRAGMENT_PROCESSING |
4531 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4532 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4533 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4534 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4535 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4536 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4537 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4538 	/* setup context0 */
4539 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4540 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4541 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4542 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4543 			(u32)(rdev->dummy_page.addr >> 12));
4544 	WREG32(VM_CONTEXT0_CNTL2, 0);
4545 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4546 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4547 
4548 	WREG32(0x15D4, 0);
4549 	WREG32(0x15D8, 0);
4550 	WREG32(0x15DC, 0);
4551 
4552 	/* empty context1-15 */
4553 	/* FIXME start with 4G, once using 2 level pt switch to full
4554 	 * vm size space
4555 	 */
4556 	/* set vm size, must be a multiple of 4 */
4557 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4558 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4559 	for (i = 1; i < 16; i++) {
4560 		if (i < 8)
4561 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4562 			       rdev->gart.table_addr >> 12);
4563 		else
4564 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4565 			       rdev->gart.table_addr >> 12);
4566 	}
4567 
4568 	/* enable context1-15 */
4569 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4570 	       (u32)(rdev->dummy_page.addr >> 12));
4571 	WREG32(VM_CONTEXT1_CNTL2, 4);
4572 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4573 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4574 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4575 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4576 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4577 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4578 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4579 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4580 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4581 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4582 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4583 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4584 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4585 
4586 	/* TC cache setup ??? */
4587 	WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
4588 	WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
4589 	WREG32(TC_CFG_L1_STORE_POLICY, 0);
4590 
4591 	WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
4592 	WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
4593 	WREG32(TC_CFG_L2_STORE_POLICY0, 0);
4594 	WREG32(TC_CFG_L2_STORE_POLICY1, 0);
4595 	WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
4596 
4597 	WREG32(TC_CFG_L1_VOLATILE, 0);
4598 	WREG32(TC_CFG_L2_VOLATILE, 0);
4599 
4600 	if (rdev->family == CHIP_KAVERI) {
4601 		u32 tmp = RREG32(CHUB_CONTROL);
4602 		tmp &= ~BYPASS_VM;
4603 		WREG32(CHUB_CONTROL, tmp);
4604 	}
4605 
4606 	/* XXX SH_MEM regs */
4607 	/* where to put LDS, scratch, GPUVM in FSA64 space */
4608 	mutex_lock(&rdev->srbm_mutex);
4609 	for (i = 0; i < 16; i++) {
4610 		cik_srbm_select(rdev, 0, 0, 0, i);
4611 		/* CP and shaders */
4612 		WREG32(SH_MEM_CONFIG, 0);
4613 		WREG32(SH_MEM_APE1_BASE, 1);
4614 		WREG32(SH_MEM_APE1_LIMIT, 0);
4615 		WREG32(SH_MEM_BASES, 0);
4616 		/* SDMA GFX */
4617 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
4618 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
4619 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
4620 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
4621 		/* XXX SDMA RLC - todo */
4622 	}
4623 	cik_srbm_select(rdev, 0, 0, 0, 0);
4624 	mutex_unlock(&rdev->srbm_mutex);
4625 
4626 	cik_pcie_gart_tlb_flush(rdev);
4627 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4628 		 (unsigned)(rdev->mc.gtt_size >> 20),
4629 		 (unsigned long long)rdev->gart.table_addr);
4630 	rdev->gart.ready = true;
4631 	return 0;
4632 }
4633 
4634 /**
4635  * cik_pcie_gart_disable - gart disable
4636  *
4637  * @rdev: radeon_device pointer
4638  *
4639  * This disables all VM page table (CIK).
4640  */
4641 static void cik_pcie_gart_disable(struct radeon_device *rdev)
4642 {
4643 	/* Disable all tables */
4644 	WREG32(VM_CONTEXT0_CNTL, 0);
4645 	WREG32(VM_CONTEXT1_CNTL, 0);
4646 	/* Setup TLB control */
4647 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4648 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4649 	/* Setup L2 cache */
4650 	WREG32(VM_L2_CNTL,
4651 	       ENABLE_L2_FRAGMENT_PROCESSING |
4652 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4653 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4654 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4655 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4656 	WREG32(VM_L2_CNTL2, 0);
4657 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4658 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4659 	radeon_gart_table_vram_unpin(rdev);
4660 }
4661 
4662 /**
4663  * cik_pcie_gart_fini - vm fini callback
4664  *
4665  * @rdev: radeon_device pointer
4666  *
4667  * Tears down the driver GART/VM setup (CIK).
4668  */
4669 static void cik_pcie_gart_fini(struct radeon_device *rdev)
4670 {
4671 	cik_pcie_gart_disable(rdev);
4672 	radeon_gart_table_vram_free(rdev);
4673 	radeon_gart_fini(rdev);
4674 }
4675 
4676 /* vm parser */
4677 /**
4678  * cik_ib_parse - vm ib_parse callback
4679  *
4680  * @rdev: radeon_device pointer
4681  * @ib: indirect buffer pointer
4682  *
4683  * CIK uses hw IB checking so this is a nop (CIK).
4684  */
4685 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4686 {
4687 	return 0;
4688 }
4689 
4690 /*
4691  * vm
4692  * VMID 0 is the physical GPU addresses as used by the kernel.
4693  * VMIDs 1-15 are used for userspace clients and are handled
4694  * by the radeon vm/hsa code.
4695  */
4696 /**
4697  * cik_vm_init - cik vm init callback
4698  *
4699  * @rdev: radeon_device pointer
4700  *
4701  * Inits cik specific vm parameters (number of VMs, base of vram for
4702  * VMIDs 1-15) (CIK).
4703  * Returns 0 for success.
4704  */
4705 int cik_vm_init(struct radeon_device *rdev)
4706 {
4707 	/* number of VMs */
4708 	rdev->vm_manager.nvm = 16;
4709 	/* base offset of vram pages */
4710 	if (rdev->flags & RADEON_IS_IGP) {
4711 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
4712 		tmp <<= 22;
4713 		rdev->vm_manager.vram_base_offset = tmp;
4714 	} else
4715 		rdev->vm_manager.vram_base_offset = 0;
4716 
4717 	return 0;
4718 }
4719 
4720 /**
4721  * cik_vm_fini - cik vm fini callback
4722  *
4723  * @rdev: radeon_device pointer
4724  *
4725  * Tear down any asic specific VM setup (CIK).
4726  */
4727 void cik_vm_fini(struct radeon_device *rdev)
4728 {
4729 }
4730 
4731 /**
4732  * cik_vm_decode_fault - print human readable fault info
4733  *
4734  * @rdev: radeon_device pointer
4735  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4736  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4737  *
4738  * Print human readable fault information (CIK).
4739  */
4740 static void cik_vm_decode_fault(struct radeon_device *rdev,
4741 				u32 status, u32 addr, u32 mc_client)
4742 {
4743 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4744 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4745 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4746 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
4747 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
4748 
4749 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
4750 	       protections, vmid, addr,
4751 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4752 	       block, mc_client, mc_id);
4753 }
4754 
4755 /**
4756  * cik_vm_flush - cik vm flush using the CP
4757  *
4758  * @rdev: radeon_device pointer
4759  *
4760  * Update the page table base and flush the VM TLB
4761  * using the CP (CIK).
4762  */
4763 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4764 {
4765 	struct radeon_ring *ring = &rdev->ring[ridx];
4766 
4767 	if (vm == NULL)
4768 		return;
4769 
4770 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4771 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4772 				 WRITE_DATA_DST_SEL(0)));
4773 	if (vm->id < 8) {
4774 		radeon_ring_write(ring,
4775 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4776 	} else {
4777 		radeon_ring_write(ring,
4778 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4779 	}
4780 	radeon_ring_write(ring, 0);
4781 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4782 
4783 	/* update SH_MEM_* regs */
4784 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4785 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4786 				 WRITE_DATA_DST_SEL(0)));
4787 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4788 	radeon_ring_write(ring, 0);
4789 	radeon_ring_write(ring, VMID(vm->id));
4790 
4791 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
4792 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4793 				 WRITE_DATA_DST_SEL(0)));
4794 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
4795 	radeon_ring_write(ring, 0);
4796 
4797 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
4798 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
4799 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
4800 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
4801 
4802 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4803 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4804 				 WRITE_DATA_DST_SEL(0)));
4805 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4806 	radeon_ring_write(ring, 0);
4807 	radeon_ring_write(ring, VMID(0));
4808 
4809 	/* HDP flush */
4810 	/* We should be using the WAIT_REG_MEM packet here like in
4811 	 * cik_fence_ring_emit(), but it causes the CP to hang in this
4812 	 * context...
4813 	 */
4814 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4815 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4816 				 WRITE_DATA_DST_SEL(0)));
4817 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4818 	radeon_ring_write(ring, 0);
4819 	radeon_ring_write(ring, 0);
4820 
4821 	/* bits 0-15 are the VM contexts0-15 */
4822 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4823 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4824 				 WRITE_DATA_DST_SEL(0)));
4825 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4826 	radeon_ring_write(ring, 0);
4827 	radeon_ring_write(ring, 1 << vm->id);
4828 
4829 	/* compute doesn't have PFP */
4830 	if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
4831 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
4832 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4833 		radeon_ring_write(ring, 0x0);
4834 	}
4835 }
4836 
4837 /**
4838  * cik_vm_set_page - update the page tables using sDMA
4839  *
4840  * @rdev: radeon_device pointer
4841  * @ib: indirect buffer to fill with commands
4842  * @pe: addr of the page entry
4843  * @addr: dst addr to write into pe
4844  * @count: number of page entries to update
4845  * @incr: increase next addr by incr bytes
4846  * @flags: access flags
4847  *
4848  * Update the page tables using CP or sDMA (CIK).
4849  */
4850 void cik_vm_set_page(struct radeon_device *rdev,
4851 		     struct radeon_ib *ib,
4852 		     uint64_t pe,
4853 		     uint64_t addr, unsigned count,
4854 		     uint32_t incr, uint32_t flags)
4855 {
4856 	uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4857 	uint64_t value;
4858 	unsigned ndw;
4859 
4860 	if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4861 		/* CP */
4862 		while (count) {
4863 			ndw = 2 + count * 2;
4864 			if (ndw > 0x3FFE)
4865 				ndw = 0x3FFE;
4866 
4867 			ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4868 			ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4869 						    WRITE_DATA_DST_SEL(1));
4870 			ib->ptr[ib->length_dw++] = pe;
4871 			ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4872 			for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4873 				if (flags & RADEON_VM_PAGE_SYSTEM) {
4874 					value = radeon_vm_map_gart(rdev, addr);
4875 					value &= 0xFFFFFFFFFFFFF000ULL;
4876 				} else if (flags & RADEON_VM_PAGE_VALID) {
4877 					value = addr;
4878 				} else {
4879 					value = 0;
4880 				}
4881 				addr += incr;
4882 				value |= r600_flags;
4883 				ib->ptr[ib->length_dw++] = value;
4884 				ib->ptr[ib->length_dw++] = upper_32_bits(value);
4885 			}
4886 		}
4887 	} else {
4888 		/* DMA */
4889 		cik_sdma_vm_set_page(rdev, ib, pe, addr, count, incr, flags);
4890 	}
4891 }
4892 
4893 /*
4894  * RLC
4895  * The RLC is a multi-purpose microengine that handles a
4896  * variety of functions, the most important of which is
4897  * the interrupt controller.
4898  */
4899 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
4900 					  bool enable)
4901 {
4902 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
4903 
4904 	if (enable)
4905 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4906 	else
4907 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4908 	WREG32(CP_INT_CNTL_RING0, tmp);
4909 }
4910 
4911 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
4912 {
4913 	u32 tmp;
4914 
4915 	tmp = RREG32(RLC_LB_CNTL);
4916 	if (enable)
4917 		tmp |= LOAD_BALANCE_ENABLE;
4918 	else
4919 		tmp &= ~LOAD_BALANCE_ENABLE;
4920 	WREG32(RLC_LB_CNTL, tmp);
4921 }
4922 
4923 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
4924 {
4925 	u32 i, j, k;
4926 	u32 mask;
4927 
4928 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
4929 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
4930 			cik_select_se_sh(rdev, i, j);
4931 			for (k = 0; k < rdev->usec_timeout; k++) {
4932 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
4933 					break;
4934 				udelay(1);
4935 			}
4936 		}
4937 	}
4938 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4939 
4940 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
4941 	for (k = 0; k < rdev->usec_timeout; k++) {
4942 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
4943 			break;
4944 		udelay(1);
4945 	}
4946 }
4947 
4948 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
4949 {
4950 	u32 tmp;
4951 
4952 	tmp = RREG32(RLC_CNTL);
4953 	if (tmp != rlc)
4954 		WREG32(RLC_CNTL, rlc);
4955 }
4956 
4957 static u32 cik_halt_rlc(struct radeon_device *rdev)
4958 {
4959 	u32 data, orig;
4960 
4961 	orig = data = RREG32(RLC_CNTL);
4962 
4963 	if (data & RLC_ENABLE) {
4964 		u32 i;
4965 
4966 		data &= ~RLC_ENABLE;
4967 		WREG32(RLC_CNTL, data);
4968 
4969 		for (i = 0; i < rdev->usec_timeout; i++) {
4970 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
4971 				break;
4972 			udelay(1);
4973 		}
4974 
4975 		cik_wait_for_rlc_serdes(rdev);
4976 	}
4977 
4978 	return orig;
4979 }
4980 
4981 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
4982 {
4983 	u32 tmp, i, mask;
4984 
4985 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
4986 	WREG32(RLC_GPR_REG2, tmp);
4987 
4988 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
4989 	for (i = 0; i < rdev->usec_timeout; i++) {
4990 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
4991 			break;
4992 		udelay(1);
4993 	}
4994 
4995 	for (i = 0; i < rdev->usec_timeout; i++) {
4996 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
4997 			break;
4998 		udelay(1);
4999 	}
5000 }
5001 
5002 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5003 {
5004 	u32 tmp;
5005 
5006 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5007 	WREG32(RLC_GPR_REG2, tmp);
5008 }
5009 
5010 /**
5011  * cik_rlc_stop - stop the RLC ME
5012  *
5013  * @rdev: radeon_device pointer
5014  *
5015  * Halt the RLC ME (MicroEngine) (CIK).
5016  */
5017 static void cik_rlc_stop(struct radeon_device *rdev)
5018 {
5019 	WREG32(RLC_CNTL, 0);
5020 
5021 	cik_enable_gui_idle_interrupt(rdev, false);
5022 
5023 	cik_wait_for_rlc_serdes(rdev);
5024 }
5025 
5026 /**
5027  * cik_rlc_start - start the RLC ME
5028  *
5029  * @rdev: radeon_device pointer
5030  *
5031  * Unhalt the RLC ME (MicroEngine) (CIK).
5032  */
5033 static void cik_rlc_start(struct radeon_device *rdev)
5034 {
5035 	WREG32(RLC_CNTL, RLC_ENABLE);
5036 
5037 	cik_enable_gui_idle_interrupt(rdev, true);
5038 
5039 	udelay(50);
5040 }
5041 
5042 /**
5043  * cik_rlc_resume - setup the RLC hw
5044  *
5045  * @rdev: radeon_device pointer
5046  *
5047  * Initialize the RLC registers, load the ucode,
5048  * and start the RLC (CIK).
5049  * Returns 0 for success, -EINVAL if the ucode is not available.
5050  */
5051 static int cik_rlc_resume(struct radeon_device *rdev)
5052 {
5053 	u32 i, size, tmp;
5054 	const __be32 *fw_data;
5055 
5056 	if (!rdev->rlc_fw)
5057 		return -EINVAL;
5058 
5059 	switch (rdev->family) {
5060 	case CHIP_BONAIRE:
5061 	default:
5062 		size = BONAIRE_RLC_UCODE_SIZE;
5063 		break;
5064 	case CHIP_KAVERI:
5065 		size = KV_RLC_UCODE_SIZE;
5066 		break;
5067 	case CHIP_KABINI:
5068 		size = KB_RLC_UCODE_SIZE;
5069 		break;
5070 	}
5071 
5072 	cik_rlc_stop(rdev);
5073 
5074 	/* disable CG */
5075 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5076 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5077 
5078 	si_rlc_reset(rdev);
5079 
5080 	cik_init_pg(rdev);
5081 
5082 	cik_init_cg(rdev);
5083 
5084 	WREG32(RLC_LB_CNTR_INIT, 0);
5085 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5086 
5087 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5088 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5089 	WREG32(RLC_LB_PARAMS, 0x00600408);
5090 	WREG32(RLC_LB_CNTL, 0x80000004);
5091 
5092 	WREG32(RLC_MC_CNTL, 0);
5093 	WREG32(RLC_UCODE_CNTL, 0);
5094 
5095 	fw_data = (const __be32 *)rdev->rlc_fw->data;
5096 		WREG32(RLC_GPM_UCODE_ADDR, 0);
5097 	for (i = 0; i < size; i++)
5098 		WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5099 	WREG32(RLC_GPM_UCODE_ADDR, 0);
5100 
5101 	/* XXX - find out what chips support lbpw */
5102 	cik_enable_lbpw(rdev, false);
5103 
5104 	if (rdev->family == CHIP_BONAIRE)
5105 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
5106 
5107 	cik_rlc_start(rdev);
5108 
5109 	return 0;
5110 }
5111 
5112 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5113 {
5114 	u32 data, orig, tmp, tmp2;
5115 
5116 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5117 
5118 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5119 		cik_enable_gui_idle_interrupt(rdev, true);
5120 
5121 		tmp = cik_halt_rlc(rdev);
5122 
5123 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5124 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5125 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5126 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5127 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
5128 
5129 		cik_update_rlc(rdev, tmp);
5130 
5131 		data |= CGCG_EN | CGLS_EN;
5132 	} else {
5133 		cik_enable_gui_idle_interrupt(rdev, false);
5134 
5135 		RREG32(CB_CGTT_SCLK_CTRL);
5136 		RREG32(CB_CGTT_SCLK_CTRL);
5137 		RREG32(CB_CGTT_SCLK_CTRL);
5138 		RREG32(CB_CGTT_SCLK_CTRL);
5139 
5140 		data &= ~(CGCG_EN | CGLS_EN);
5141 	}
5142 
5143 	if (orig != data)
5144 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5145 
5146 }
5147 
5148 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5149 {
5150 	u32 data, orig, tmp = 0;
5151 
5152 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5153 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5154 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5155 				orig = data = RREG32(CP_MEM_SLP_CNTL);
5156 				data |= CP_MEM_LS_EN;
5157 				if (orig != data)
5158 					WREG32(CP_MEM_SLP_CNTL, data);
5159 			}
5160 		}
5161 
5162 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5163 		data &= 0xfffffffd;
5164 		if (orig != data)
5165 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5166 
5167 		tmp = cik_halt_rlc(rdev);
5168 
5169 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5170 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5171 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5172 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5173 		WREG32(RLC_SERDES_WR_CTRL, data);
5174 
5175 		cik_update_rlc(rdev, tmp);
5176 
5177 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
5178 			orig = data = RREG32(CGTS_SM_CTRL_REG);
5179 			data &= ~SM_MODE_MASK;
5180 			data |= SM_MODE(0x2);
5181 			data |= SM_MODE_ENABLE;
5182 			data &= ~CGTS_OVERRIDE;
5183 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
5184 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
5185 				data &= ~CGTS_LS_OVERRIDE;
5186 			data &= ~ON_MONITOR_ADD_MASK;
5187 			data |= ON_MONITOR_ADD_EN;
5188 			data |= ON_MONITOR_ADD(0x96);
5189 			if (orig != data)
5190 				WREG32(CGTS_SM_CTRL_REG, data);
5191 		}
5192 	} else {
5193 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5194 		data |= 0x00000002;
5195 		if (orig != data)
5196 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5197 
5198 		data = RREG32(RLC_MEM_SLP_CNTL);
5199 		if (data & RLC_MEM_LS_EN) {
5200 			data &= ~RLC_MEM_LS_EN;
5201 			WREG32(RLC_MEM_SLP_CNTL, data);
5202 		}
5203 
5204 		data = RREG32(CP_MEM_SLP_CNTL);
5205 		if (data & CP_MEM_LS_EN) {
5206 			data &= ~CP_MEM_LS_EN;
5207 			WREG32(CP_MEM_SLP_CNTL, data);
5208 		}
5209 
5210 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5211 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5212 		if (orig != data)
5213 			WREG32(CGTS_SM_CTRL_REG, data);
5214 
5215 		tmp = cik_halt_rlc(rdev);
5216 
5217 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5218 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5219 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5220 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5221 		WREG32(RLC_SERDES_WR_CTRL, data);
5222 
5223 		cik_update_rlc(rdev, tmp);
5224 	}
5225 }
5226 
5227 static const u32 mc_cg_registers[] =
5228 {
5229 	MC_HUB_MISC_HUB_CG,
5230 	MC_HUB_MISC_SIP_CG,
5231 	MC_HUB_MISC_VM_CG,
5232 	MC_XPB_CLK_GAT,
5233 	ATC_MISC_CG,
5234 	MC_CITF_MISC_WR_CG,
5235 	MC_CITF_MISC_RD_CG,
5236 	MC_CITF_MISC_VM_CG,
5237 	VM_L2_CG,
5238 };
5239 
5240 static void cik_enable_mc_ls(struct radeon_device *rdev,
5241 			     bool enable)
5242 {
5243 	int i;
5244 	u32 orig, data;
5245 
5246 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5247 		orig = data = RREG32(mc_cg_registers[i]);
5248 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5249 			data |= MC_LS_ENABLE;
5250 		else
5251 			data &= ~MC_LS_ENABLE;
5252 		if (data != orig)
5253 			WREG32(mc_cg_registers[i], data);
5254 	}
5255 }
5256 
5257 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5258 			       bool enable)
5259 {
5260 	int i;
5261 	u32 orig, data;
5262 
5263 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5264 		orig = data = RREG32(mc_cg_registers[i]);
5265 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5266 			data |= MC_CG_ENABLE;
5267 		else
5268 			data &= ~MC_CG_ENABLE;
5269 		if (data != orig)
5270 			WREG32(mc_cg_registers[i], data);
5271 	}
5272 }
5273 
5274 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
5275 				 bool enable)
5276 {
5277 	u32 orig, data;
5278 
5279 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5280 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
5281 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
5282 	} else {
5283 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
5284 		data |= 0xff000000;
5285 		if (data != orig)
5286 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
5287 
5288 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
5289 		data |= 0xff000000;
5290 		if (data != orig)
5291 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
5292 	}
5293 }
5294 
5295 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
5296 				 bool enable)
5297 {
5298 	u32 orig, data;
5299 
5300 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
5301 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5302 		data |= 0x100;
5303 		if (orig != data)
5304 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5305 
5306 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5307 		data |= 0x100;
5308 		if (orig != data)
5309 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5310 	} else {
5311 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5312 		data &= ~0x100;
5313 		if (orig != data)
5314 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5315 
5316 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5317 		data &= ~0x100;
5318 		if (orig != data)
5319 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5320 	}
5321 }
5322 
5323 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
5324 				bool enable)
5325 {
5326 	u32 orig, data;
5327 
5328 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5329 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5330 		data = 0xfff;
5331 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5332 
5333 		orig = data = RREG32(UVD_CGC_CTRL);
5334 		data |= DCM;
5335 		if (orig != data)
5336 			WREG32(UVD_CGC_CTRL, data);
5337 	} else {
5338 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5339 		data &= ~0xfff;
5340 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5341 
5342 		orig = data = RREG32(UVD_CGC_CTRL);
5343 		data &= ~DCM;
5344 		if (orig != data)
5345 			WREG32(UVD_CGC_CTRL, data);
5346 	}
5347 }
5348 
5349 static void cik_enable_bif_mgls(struct radeon_device *rdev,
5350 			       bool enable)
5351 {
5352 	u32 orig, data;
5353 
5354 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
5355 
5356 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5357 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5358 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5359 	else
5360 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5361 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5362 
5363 	if (orig != data)
5364 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
5365 }
5366 
5367 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
5368 				bool enable)
5369 {
5370 	u32 orig, data;
5371 
5372 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
5373 
5374 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5375 		data &= ~CLOCK_GATING_DIS;
5376 	else
5377 		data |= CLOCK_GATING_DIS;
5378 
5379 	if (orig != data)
5380 		WREG32(HDP_HOST_PATH_CNTL, data);
5381 }
5382 
5383 static void cik_enable_hdp_ls(struct radeon_device *rdev,
5384 			      bool enable)
5385 {
5386 	u32 orig, data;
5387 
5388 	orig = data = RREG32(HDP_MEM_POWER_LS);
5389 
5390 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5391 		data |= HDP_LS_ENABLE;
5392 	else
5393 		data &= ~HDP_LS_ENABLE;
5394 
5395 	if (orig != data)
5396 		WREG32(HDP_MEM_POWER_LS, data);
5397 }
5398 
5399 void cik_update_cg(struct radeon_device *rdev,
5400 		   u32 block, bool enable)
5401 {
5402 
5403 	if (block & RADEON_CG_BLOCK_GFX) {
5404 		cik_enable_gui_idle_interrupt(rdev, false);
5405 		/* order matters! */
5406 		if (enable) {
5407 			cik_enable_mgcg(rdev, true);
5408 			cik_enable_cgcg(rdev, true);
5409 		} else {
5410 			cik_enable_cgcg(rdev, false);
5411 			cik_enable_mgcg(rdev, false);
5412 		}
5413 		cik_enable_gui_idle_interrupt(rdev, true);
5414 	}
5415 
5416 	if (block & RADEON_CG_BLOCK_MC) {
5417 		if (!(rdev->flags & RADEON_IS_IGP)) {
5418 			cik_enable_mc_mgcg(rdev, enable);
5419 			cik_enable_mc_ls(rdev, enable);
5420 		}
5421 	}
5422 
5423 	if (block & RADEON_CG_BLOCK_SDMA) {
5424 		cik_enable_sdma_mgcg(rdev, enable);
5425 		cik_enable_sdma_mgls(rdev, enable);
5426 	}
5427 
5428 	if (block & RADEON_CG_BLOCK_BIF) {
5429 		cik_enable_bif_mgls(rdev, enable);
5430 	}
5431 
5432 	if (block & RADEON_CG_BLOCK_UVD) {
5433 		if (rdev->has_uvd)
5434 			cik_enable_uvd_mgcg(rdev, enable);
5435 	}
5436 
5437 	if (block & RADEON_CG_BLOCK_HDP) {
5438 		cik_enable_hdp_mgcg(rdev, enable);
5439 		cik_enable_hdp_ls(rdev, enable);
5440 	}
5441 }
5442 
5443 static void cik_init_cg(struct radeon_device *rdev)
5444 {
5445 
5446 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
5447 
5448 	if (rdev->has_uvd)
5449 		si_init_uvd_internal_cg(rdev);
5450 
5451 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5452 			     RADEON_CG_BLOCK_SDMA |
5453 			     RADEON_CG_BLOCK_BIF |
5454 			     RADEON_CG_BLOCK_UVD |
5455 			     RADEON_CG_BLOCK_HDP), true);
5456 }
5457 
5458 static void cik_fini_cg(struct radeon_device *rdev)
5459 {
5460 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5461 			     RADEON_CG_BLOCK_SDMA |
5462 			     RADEON_CG_BLOCK_BIF |
5463 			     RADEON_CG_BLOCK_UVD |
5464 			     RADEON_CG_BLOCK_HDP), false);
5465 
5466 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
5467 }
5468 
5469 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
5470 					  bool enable)
5471 {
5472 	u32 data, orig;
5473 
5474 	orig = data = RREG32(RLC_PG_CNTL);
5475 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5476 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5477 	else
5478 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5479 	if (orig != data)
5480 		WREG32(RLC_PG_CNTL, data);
5481 }
5482 
5483 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
5484 					  bool enable)
5485 {
5486 	u32 data, orig;
5487 
5488 	orig = data = RREG32(RLC_PG_CNTL);
5489 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5490 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5491 	else
5492 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5493 	if (orig != data)
5494 		WREG32(RLC_PG_CNTL, data);
5495 }
5496 
5497 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
5498 {
5499 	u32 data, orig;
5500 
5501 	orig = data = RREG32(RLC_PG_CNTL);
5502 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
5503 		data &= ~DISABLE_CP_PG;
5504 	else
5505 		data |= DISABLE_CP_PG;
5506 	if (orig != data)
5507 		WREG32(RLC_PG_CNTL, data);
5508 }
5509 
5510 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
5511 {
5512 	u32 data, orig;
5513 
5514 	orig = data = RREG32(RLC_PG_CNTL);
5515 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
5516 		data &= ~DISABLE_GDS_PG;
5517 	else
5518 		data |= DISABLE_GDS_PG;
5519 	if (orig != data)
5520 		WREG32(RLC_PG_CNTL, data);
5521 }
5522 
5523 #define CP_ME_TABLE_SIZE    96
5524 #define CP_ME_TABLE_OFFSET  2048
5525 #define CP_MEC_TABLE_OFFSET 4096
5526 
5527 void cik_init_cp_pg_table(struct radeon_device *rdev)
5528 {
5529 	const __be32 *fw_data;
5530 	volatile u32 *dst_ptr;
5531 	int me, i, max_me = 4;
5532 	u32 bo_offset = 0;
5533 	u32 table_offset;
5534 
5535 	if (rdev->family == CHIP_KAVERI)
5536 		max_me = 5;
5537 
5538 	if (rdev->rlc.cp_table_ptr == NULL)
5539 		return;
5540 
5541 	/* write the cp table buffer */
5542 	dst_ptr = rdev->rlc.cp_table_ptr;
5543 	for (me = 0; me < max_me; me++) {
5544 		if (me == 0) {
5545 			fw_data = (const __be32 *)rdev->ce_fw->data;
5546 			table_offset = CP_ME_TABLE_OFFSET;
5547 		} else if (me == 1) {
5548 			fw_data = (const __be32 *)rdev->pfp_fw->data;
5549 			table_offset = CP_ME_TABLE_OFFSET;
5550 		} else if (me == 2) {
5551 			fw_data = (const __be32 *)rdev->me_fw->data;
5552 			table_offset = CP_ME_TABLE_OFFSET;
5553 		} else {
5554 			fw_data = (const __be32 *)rdev->mec_fw->data;
5555 			table_offset = CP_MEC_TABLE_OFFSET;
5556 		}
5557 
5558 		for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
5559 			dst_ptr[bo_offset + i] = be32_to_cpu(fw_data[table_offset + i]);
5560 		}
5561 		bo_offset += CP_ME_TABLE_SIZE;
5562 	}
5563 }
5564 
5565 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
5566 				bool enable)
5567 {
5568 	u32 data, orig;
5569 
5570 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5571 		orig = data = RREG32(RLC_PG_CNTL);
5572 		data |= GFX_PG_ENABLE;
5573 		if (orig != data)
5574 			WREG32(RLC_PG_CNTL, data);
5575 
5576 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
5577 		data |= AUTO_PG_EN;
5578 		if (orig != data)
5579 			WREG32(RLC_AUTO_PG_CTRL, data);
5580 	} else {
5581 		orig = data = RREG32(RLC_PG_CNTL);
5582 		data &= ~GFX_PG_ENABLE;
5583 		if (orig != data)
5584 			WREG32(RLC_PG_CNTL, data);
5585 
5586 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
5587 		data &= ~AUTO_PG_EN;
5588 		if (orig != data)
5589 			WREG32(RLC_AUTO_PG_CTRL, data);
5590 
5591 		data = RREG32(DB_RENDER_CONTROL);
5592 	}
5593 }
5594 
5595 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5596 {
5597 	u32 mask = 0, tmp, tmp1;
5598 	int i;
5599 
5600 	cik_select_se_sh(rdev, se, sh);
5601 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5602 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5603 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5604 
5605 	tmp &= 0xffff0000;
5606 
5607 	tmp |= tmp1;
5608 	tmp >>= 16;
5609 
5610 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
5611 		mask <<= 1;
5612 		mask |= 1;
5613 	}
5614 
5615 	return (~tmp) & mask;
5616 }
5617 
5618 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
5619 {
5620 	u32 i, j, k, active_cu_number = 0;
5621 	u32 mask, counter, cu_bitmap;
5622 	u32 tmp = 0;
5623 
5624 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5625 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5626 			mask = 1;
5627 			cu_bitmap = 0;
5628 			counter = 0;
5629 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
5630 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
5631 					if (counter < 2)
5632 						cu_bitmap |= mask;
5633 					counter ++;
5634 				}
5635 				mask <<= 1;
5636 			}
5637 
5638 			active_cu_number += counter;
5639 			tmp |= (cu_bitmap << (i * 16 + j * 8));
5640 		}
5641 	}
5642 
5643 	WREG32(RLC_PG_AO_CU_MASK, tmp);
5644 
5645 	tmp = RREG32(RLC_MAX_PG_CU);
5646 	tmp &= ~MAX_PU_CU_MASK;
5647 	tmp |= MAX_PU_CU(active_cu_number);
5648 	WREG32(RLC_MAX_PG_CU, tmp);
5649 }
5650 
5651 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
5652 				       bool enable)
5653 {
5654 	u32 data, orig;
5655 
5656 	orig = data = RREG32(RLC_PG_CNTL);
5657 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
5658 		data |= STATIC_PER_CU_PG_ENABLE;
5659 	else
5660 		data &= ~STATIC_PER_CU_PG_ENABLE;
5661 	if (orig != data)
5662 		WREG32(RLC_PG_CNTL, data);
5663 }
5664 
5665 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
5666 					bool enable)
5667 {
5668 	u32 data, orig;
5669 
5670 	orig = data = RREG32(RLC_PG_CNTL);
5671 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
5672 		data |= DYN_PER_CU_PG_ENABLE;
5673 	else
5674 		data &= ~DYN_PER_CU_PG_ENABLE;
5675 	if (orig != data)
5676 		WREG32(RLC_PG_CNTL, data);
5677 }
5678 
5679 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
5680 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
5681 
5682 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
5683 {
5684 	u32 data, orig;
5685 	u32 i;
5686 
5687 	if (rdev->rlc.cs_data) {
5688 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
5689 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
5690 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
5691 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
5692 	} else {
5693 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
5694 		for (i = 0; i < 3; i++)
5695 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
5696 	}
5697 	if (rdev->rlc.reg_list) {
5698 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
5699 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
5700 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
5701 	}
5702 
5703 	orig = data = RREG32(RLC_PG_CNTL);
5704 	data |= GFX_PG_SRC;
5705 	if (orig != data)
5706 		WREG32(RLC_PG_CNTL, data);
5707 
5708 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5709 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
5710 
5711 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
5712 	data &= ~IDLE_POLL_COUNT_MASK;
5713 	data |= IDLE_POLL_COUNT(0x60);
5714 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
5715 
5716 	data = 0x10101010;
5717 	WREG32(RLC_PG_DELAY, data);
5718 
5719 	data = RREG32(RLC_PG_DELAY_2);
5720 	data &= ~0xff;
5721 	data |= 0x3;
5722 	WREG32(RLC_PG_DELAY_2, data);
5723 
5724 	data = RREG32(RLC_AUTO_PG_CTRL);
5725 	data &= ~GRBM_REG_SGIT_MASK;
5726 	data |= GRBM_REG_SGIT(0x700);
5727 	WREG32(RLC_AUTO_PG_CTRL, data);
5728 
5729 }
5730 
5731 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
5732 {
5733 	cik_enable_gfx_cgpg(rdev, enable);
5734 	cik_enable_gfx_static_mgpg(rdev, enable);
5735 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
5736 }
5737 
5738 u32 cik_get_csb_size(struct radeon_device *rdev)
5739 {
5740 	u32 count = 0;
5741 	const struct cs_section_def *sect = NULL;
5742 	const struct cs_extent_def *ext = NULL;
5743 
5744 	if (rdev->rlc.cs_data == NULL)
5745 		return 0;
5746 
5747 	/* begin clear state */
5748 	count += 2;
5749 	/* context control state */
5750 	count += 3;
5751 
5752 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5753 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5754 			if (sect->id == SECT_CONTEXT)
5755 				count += 2 + ext->reg_count;
5756 			else
5757 				return 0;
5758 		}
5759 	}
5760 	/* pa_sc_raster_config/pa_sc_raster_config1 */
5761 	count += 4;
5762 	/* end clear state */
5763 	count += 2;
5764 	/* clear state */
5765 	count += 2;
5766 
5767 	return count;
5768 }
5769 
5770 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5771 {
5772 	u32 count = 0, i;
5773 	const struct cs_section_def *sect = NULL;
5774 	const struct cs_extent_def *ext = NULL;
5775 
5776 	if (rdev->rlc.cs_data == NULL)
5777 		return;
5778 	if (buffer == NULL)
5779 		return;
5780 
5781 	buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5782 	buffer[count++] = PACKET3_PREAMBLE_BEGIN_CLEAR_STATE;
5783 
5784 	buffer[count++] = PACKET3(PACKET3_CONTEXT_CONTROL, 1);
5785 	buffer[count++] = 0x80000000;
5786 	buffer[count++] = 0x80000000;
5787 
5788 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5789 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5790 			if (sect->id == SECT_CONTEXT) {
5791 				buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count);
5792 				buffer[count++] = ext->reg_index - 0xa000;
5793 				for (i = 0; i < ext->reg_count; i++)
5794 					buffer[count++] = ext->extent[i];
5795 			} else {
5796 				return;
5797 			}
5798 		}
5799 	}
5800 
5801 	buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
5802 	buffer[count++] = PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START;
5803 	switch (rdev->family) {
5804 	case CHIP_BONAIRE:
5805 		buffer[count++] = 0x16000012;
5806 		buffer[count++] = 0x00000000;
5807 		break;
5808 	case CHIP_KAVERI:
5809 		buffer[count++] = 0x00000000; /* XXX */
5810 		buffer[count++] = 0x00000000;
5811 		break;
5812 	case CHIP_KABINI:
5813 		buffer[count++] = 0x00000000; /* XXX */
5814 		buffer[count++] = 0x00000000;
5815 		break;
5816 	default:
5817 		buffer[count++] = 0x00000000;
5818 		buffer[count++] = 0x00000000;
5819 		break;
5820 	}
5821 
5822 	buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5823 	buffer[count++] = PACKET3_PREAMBLE_END_CLEAR_STATE;
5824 
5825 	buffer[count++] = PACKET3(PACKET3_CLEAR_STATE, 0);
5826 	buffer[count++] = 0;
5827 }
5828 
5829 static void cik_init_pg(struct radeon_device *rdev)
5830 {
5831 	if (rdev->pg_flags) {
5832 		cik_enable_sck_slowdown_on_pu(rdev, true);
5833 		cik_enable_sck_slowdown_on_pd(rdev, true);
5834 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5835 			cik_init_gfx_cgpg(rdev);
5836 			cik_enable_cp_pg(rdev, true);
5837 			cik_enable_gds_pg(rdev, true);
5838 		}
5839 		cik_init_ao_cu_mask(rdev);
5840 		cik_update_gfx_pg(rdev, true);
5841 	}
5842 }
5843 
5844 static void cik_fini_pg(struct radeon_device *rdev)
5845 {
5846 	if (rdev->pg_flags) {
5847 		cik_update_gfx_pg(rdev, false);
5848 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5849 			cik_enable_cp_pg(rdev, false);
5850 			cik_enable_gds_pg(rdev, false);
5851 		}
5852 	}
5853 }
5854 
5855 /*
5856  * Interrupts
5857  * Starting with r6xx, interrupts are handled via a ring buffer.
5858  * Ring buffers are areas of GPU accessible memory that the GPU
5859  * writes interrupt vectors into and the host reads vectors out of.
5860  * There is a rptr (read pointer) that determines where the
5861  * host is currently reading, and a wptr (write pointer)
5862  * which determines where the GPU has written.  When the
5863  * pointers are equal, the ring is idle.  When the GPU
5864  * writes vectors to the ring buffer, it increments the
5865  * wptr.  When there is an interrupt, the host then starts
5866  * fetching commands and processing them until the pointers are
5867  * equal again at which point it updates the rptr.
5868  */
5869 
5870 /**
5871  * cik_enable_interrupts - Enable the interrupt ring buffer
5872  *
5873  * @rdev: radeon_device pointer
5874  *
5875  * Enable the interrupt ring buffer (CIK).
5876  */
5877 static void cik_enable_interrupts(struct radeon_device *rdev)
5878 {
5879 	u32 ih_cntl = RREG32(IH_CNTL);
5880 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5881 
5882 	ih_cntl |= ENABLE_INTR;
5883 	ih_rb_cntl |= IH_RB_ENABLE;
5884 	WREG32(IH_CNTL, ih_cntl);
5885 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5886 	rdev->ih.enabled = true;
5887 }
5888 
5889 /**
5890  * cik_disable_interrupts - Disable the interrupt ring buffer
5891  *
5892  * @rdev: radeon_device pointer
5893  *
5894  * Disable the interrupt ring buffer (CIK).
5895  */
5896 static void cik_disable_interrupts(struct radeon_device *rdev)
5897 {
5898 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5899 	u32 ih_cntl = RREG32(IH_CNTL);
5900 
5901 	ih_rb_cntl &= ~IH_RB_ENABLE;
5902 	ih_cntl &= ~ENABLE_INTR;
5903 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5904 	WREG32(IH_CNTL, ih_cntl);
5905 	/* set rptr, wptr to 0 */
5906 	WREG32(IH_RB_RPTR, 0);
5907 	WREG32(IH_RB_WPTR, 0);
5908 	rdev->ih.enabled = false;
5909 	rdev->ih.rptr = 0;
5910 }
5911 
5912 /**
5913  * cik_disable_interrupt_state - Disable all interrupt sources
5914  *
5915  * @rdev: radeon_device pointer
5916  *
5917  * Clear all interrupt enable bits used by the driver (CIK).
5918  */
5919 static void cik_disable_interrupt_state(struct radeon_device *rdev)
5920 {
5921 	u32 tmp;
5922 
5923 	/* gfx ring */
5924 	tmp = RREG32(CP_INT_CNTL_RING0) &
5925 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5926 	WREG32(CP_INT_CNTL_RING0, tmp);
5927 	/* sdma */
5928 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5929 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5930 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5931 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5932 	/* compute queues */
5933 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
5934 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
5935 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
5936 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
5937 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
5938 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
5939 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
5940 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
5941 	/* grbm */
5942 	WREG32(GRBM_INT_CNTL, 0);
5943 	/* vline/vblank, etc. */
5944 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5945 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5946 	if (rdev->num_crtc >= 4) {
5947 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5948 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5949 	}
5950 	if (rdev->num_crtc >= 6) {
5951 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5952 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5953 	}
5954 
5955 	/* dac hotplug */
5956 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5957 
5958 	/* digital hotplug */
5959 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5960 	WREG32(DC_HPD1_INT_CONTROL, tmp);
5961 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5962 	WREG32(DC_HPD2_INT_CONTROL, tmp);
5963 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5964 	WREG32(DC_HPD3_INT_CONTROL, tmp);
5965 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5966 	WREG32(DC_HPD4_INT_CONTROL, tmp);
5967 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5968 	WREG32(DC_HPD5_INT_CONTROL, tmp);
5969 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5970 	WREG32(DC_HPD6_INT_CONTROL, tmp);
5971 
5972 }
5973 
5974 /**
5975  * cik_irq_init - init and enable the interrupt ring
5976  *
5977  * @rdev: radeon_device pointer
5978  *
5979  * Allocate a ring buffer for the interrupt controller,
5980  * enable the RLC, disable interrupts, enable the IH
5981  * ring buffer and enable it (CIK).
5982  * Called at device load and reume.
5983  * Returns 0 for success, errors for failure.
5984  */
5985 static int cik_irq_init(struct radeon_device *rdev)
5986 {
5987 	int ret = 0;
5988 	int rb_bufsz;
5989 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5990 
5991 	/* allocate ring */
5992 	ret = r600_ih_ring_alloc(rdev);
5993 	if (ret)
5994 		return ret;
5995 
5996 	/* disable irqs */
5997 	cik_disable_interrupts(rdev);
5998 
5999 	/* init rlc */
6000 	ret = cik_rlc_resume(rdev);
6001 	if (ret) {
6002 		r600_ih_ring_fini(rdev);
6003 		return ret;
6004 	}
6005 
6006 	/* setup interrupt control */
6007 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
6008 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6009 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
6010 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6011 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6012 	 */
6013 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6014 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6015 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6016 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6017 
6018 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6019 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6020 
6021 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6022 		      IH_WPTR_OVERFLOW_CLEAR |
6023 		      (rb_bufsz << 1));
6024 
6025 	if (rdev->wb.enabled)
6026 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6027 
6028 	/* set the writeback address whether it's enabled or not */
6029 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6030 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6031 
6032 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6033 
6034 	/* set rptr, wptr to 0 */
6035 	WREG32(IH_RB_RPTR, 0);
6036 	WREG32(IH_RB_WPTR, 0);
6037 
6038 	/* Default settings for IH_CNTL (disabled at first) */
6039 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6040 	/* RPTR_REARM only works if msi's are enabled */
6041 	if (rdev->msi_enabled)
6042 		ih_cntl |= RPTR_REARM;
6043 	WREG32(IH_CNTL, ih_cntl);
6044 
6045 	/* force the active interrupt state to all disabled */
6046 	cik_disable_interrupt_state(rdev);
6047 
6048 	pci_set_master(rdev->pdev);
6049 
6050 	/* enable irqs */
6051 	cik_enable_interrupts(rdev);
6052 
6053 	return ret;
6054 }
6055 
6056 /**
6057  * cik_irq_set - enable/disable interrupt sources
6058  *
6059  * @rdev: radeon_device pointer
6060  *
6061  * Enable interrupt sources on the GPU (vblanks, hpd,
6062  * etc.) (CIK).
6063  * Returns 0 for success, errors for failure.
6064  */
6065 int cik_irq_set(struct radeon_device *rdev)
6066 {
6067 	u32 cp_int_cntl;
6068 	u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6069 	u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6070 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6071 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6072 	u32 grbm_int_cntl = 0;
6073 	u32 dma_cntl, dma_cntl1;
6074 	u32 thermal_int;
6075 
6076 	if (!rdev->irq.installed) {
6077 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6078 		return -EINVAL;
6079 	}
6080 	/* don't enable anything if the ih is disabled */
6081 	if (!rdev->ih.enabled) {
6082 		cik_disable_interrupts(rdev);
6083 		/* force the active interrupt state to all disabled */
6084 		cik_disable_interrupt_state(rdev);
6085 		return 0;
6086 	}
6087 
6088 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6089 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6090 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6091 
6092 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6093 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6094 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6095 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6096 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6097 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6098 
6099 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6100 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6101 
6102 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6103 	cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6104 	cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6105 	cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6106 	cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6107 	cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6108 	cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6109 	cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6110 
6111 	if (rdev->flags & RADEON_IS_IGP)
6112 		thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6113 			~(THERM_INTH_MASK | THERM_INTL_MASK);
6114 	else
6115 		thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6116 			~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6117 
6118 	/* enable CP interrupts on all rings */
6119 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6120 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
6121 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6122 	}
6123 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6124 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6125 		DRM_DEBUG("si_irq_set: sw int cp1\n");
6126 		if (ring->me == 1) {
6127 			switch (ring->pipe) {
6128 			case 0:
6129 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6130 				break;
6131 			case 1:
6132 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6133 				break;
6134 			case 2:
6135 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6136 				break;
6137 			case 3:
6138 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6139 				break;
6140 			default:
6141 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6142 				break;
6143 			}
6144 		} else if (ring->me == 2) {
6145 			switch (ring->pipe) {
6146 			case 0:
6147 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6148 				break;
6149 			case 1:
6150 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6151 				break;
6152 			case 2:
6153 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6154 				break;
6155 			case 3:
6156 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6157 				break;
6158 			default:
6159 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6160 				break;
6161 			}
6162 		} else {
6163 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6164 		}
6165 	}
6166 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6167 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6168 		DRM_DEBUG("si_irq_set: sw int cp2\n");
6169 		if (ring->me == 1) {
6170 			switch (ring->pipe) {
6171 			case 0:
6172 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6173 				break;
6174 			case 1:
6175 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6176 				break;
6177 			case 2:
6178 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6179 				break;
6180 			case 3:
6181 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6182 				break;
6183 			default:
6184 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6185 				break;
6186 			}
6187 		} else if (ring->me == 2) {
6188 			switch (ring->pipe) {
6189 			case 0:
6190 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6191 				break;
6192 			case 1:
6193 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6194 				break;
6195 			case 2:
6196 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6197 				break;
6198 			case 3:
6199 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6200 				break;
6201 			default:
6202 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6203 				break;
6204 			}
6205 		} else {
6206 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6207 		}
6208 	}
6209 
6210 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6211 		DRM_DEBUG("cik_irq_set: sw int dma\n");
6212 		dma_cntl |= TRAP_ENABLE;
6213 	}
6214 
6215 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6216 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
6217 		dma_cntl1 |= TRAP_ENABLE;
6218 	}
6219 
6220 	if (rdev->irq.crtc_vblank_int[0] ||
6221 	    atomic_read(&rdev->irq.pflip[0])) {
6222 		DRM_DEBUG("cik_irq_set: vblank 0\n");
6223 		crtc1 |= VBLANK_INTERRUPT_MASK;
6224 	}
6225 	if (rdev->irq.crtc_vblank_int[1] ||
6226 	    atomic_read(&rdev->irq.pflip[1])) {
6227 		DRM_DEBUG("cik_irq_set: vblank 1\n");
6228 		crtc2 |= VBLANK_INTERRUPT_MASK;
6229 	}
6230 	if (rdev->irq.crtc_vblank_int[2] ||
6231 	    atomic_read(&rdev->irq.pflip[2])) {
6232 		DRM_DEBUG("cik_irq_set: vblank 2\n");
6233 		crtc3 |= VBLANK_INTERRUPT_MASK;
6234 	}
6235 	if (rdev->irq.crtc_vblank_int[3] ||
6236 	    atomic_read(&rdev->irq.pflip[3])) {
6237 		DRM_DEBUG("cik_irq_set: vblank 3\n");
6238 		crtc4 |= VBLANK_INTERRUPT_MASK;
6239 	}
6240 	if (rdev->irq.crtc_vblank_int[4] ||
6241 	    atomic_read(&rdev->irq.pflip[4])) {
6242 		DRM_DEBUG("cik_irq_set: vblank 4\n");
6243 		crtc5 |= VBLANK_INTERRUPT_MASK;
6244 	}
6245 	if (rdev->irq.crtc_vblank_int[5] ||
6246 	    atomic_read(&rdev->irq.pflip[5])) {
6247 		DRM_DEBUG("cik_irq_set: vblank 5\n");
6248 		crtc6 |= VBLANK_INTERRUPT_MASK;
6249 	}
6250 	if (rdev->irq.hpd[0]) {
6251 		DRM_DEBUG("cik_irq_set: hpd 1\n");
6252 		hpd1 |= DC_HPDx_INT_EN;
6253 	}
6254 	if (rdev->irq.hpd[1]) {
6255 		DRM_DEBUG("cik_irq_set: hpd 2\n");
6256 		hpd2 |= DC_HPDx_INT_EN;
6257 	}
6258 	if (rdev->irq.hpd[2]) {
6259 		DRM_DEBUG("cik_irq_set: hpd 3\n");
6260 		hpd3 |= DC_HPDx_INT_EN;
6261 	}
6262 	if (rdev->irq.hpd[3]) {
6263 		DRM_DEBUG("cik_irq_set: hpd 4\n");
6264 		hpd4 |= DC_HPDx_INT_EN;
6265 	}
6266 	if (rdev->irq.hpd[4]) {
6267 		DRM_DEBUG("cik_irq_set: hpd 5\n");
6268 		hpd5 |= DC_HPDx_INT_EN;
6269 	}
6270 	if (rdev->irq.hpd[5]) {
6271 		DRM_DEBUG("cik_irq_set: hpd 6\n");
6272 		hpd6 |= DC_HPDx_INT_EN;
6273 	}
6274 
6275 	if (rdev->irq.dpm_thermal) {
6276 		DRM_DEBUG("dpm thermal\n");
6277 		if (rdev->flags & RADEON_IS_IGP)
6278 			thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
6279 		else
6280 			thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6281 	}
6282 
6283 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6284 
6285 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
6286 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
6287 
6288 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
6289 	WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
6290 	WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
6291 	WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
6292 	WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
6293 	WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
6294 	WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
6295 	WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
6296 
6297 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6298 
6299 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6300 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6301 	if (rdev->num_crtc >= 4) {
6302 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6303 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6304 	}
6305 	if (rdev->num_crtc >= 6) {
6306 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6307 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6308 	}
6309 
6310 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
6311 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
6312 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
6313 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
6314 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
6315 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
6316 
6317 	if (rdev->flags & RADEON_IS_IGP)
6318 		WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
6319 	else
6320 		WREG32_SMC(CG_THERMAL_INT, thermal_int);
6321 
6322 	return 0;
6323 }
6324 
6325 /**
6326  * cik_irq_ack - ack interrupt sources
6327  *
6328  * @rdev: radeon_device pointer
6329  *
6330  * Ack interrupt sources on the GPU (vblanks, hpd,
6331  * etc.) (CIK).  Certain interrupts sources are sw
6332  * generated and do not require an explicit ack.
6333  */
6334 static inline void cik_irq_ack(struct radeon_device *rdev)
6335 {
6336 	u32 tmp;
6337 
6338 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6339 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6340 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6341 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6342 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6343 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6344 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
6345 
6346 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
6347 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6348 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
6349 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6350 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6351 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6352 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6353 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6354 
6355 	if (rdev->num_crtc >= 4) {
6356 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6357 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6358 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6359 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6360 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6361 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6362 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6363 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6364 	}
6365 
6366 	if (rdev->num_crtc >= 6) {
6367 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6368 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6369 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6370 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6371 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6372 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6373 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6374 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6375 	}
6376 
6377 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6378 		tmp = RREG32(DC_HPD1_INT_CONTROL);
6379 		tmp |= DC_HPDx_INT_ACK;
6380 		WREG32(DC_HPD1_INT_CONTROL, tmp);
6381 	}
6382 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6383 		tmp = RREG32(DC_HPD2_INT_CONTROL);
6384 		tmp |= DC_HPDx_INT_ACK;
6385 		WREG32(DC_HPD2_INT_CONTROL, tmp);
6386 	}
6387 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6388 		tmp = RREG32(DC_HPD3_INT_CONTROL);
6389 		tmp |= DC_HPDx_INT_ACK;
6390 		WREG32(DC_HPD3_INT_CONTROL, tmp);
6391 	}
6392 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6393 		tmp = RREG32(DC_HPD4_INT_CONTROL);
6394 		tmp |= DC_HPDx_INT_ACK;
6395 		WREG32(DC_HPD4_INT_CONTROL, tmp);
6396 	}
6397 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6398 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6399 		tmp |= DC_HPDx_INT_ACK;
6400 		WREG32(DC_HPD5_INT_CONTROL, tmp);
6401 	}
6402 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6403 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6404 		tmp |= DC_HPDx_INT_ACK;
6405 		WREG32(DC_HPD6_INT_CONTROL, tmp);
6406 	}
6407 }
6408 
6409 /**
6410  * cik_irq_disable - disable interrupts
6411  *
6412  * @rdev: radeon_device pointer
6413  *
6414  * Disable interrupts on the hw (CIK).
6415  */
6416 static void cik_irq_disable(struct radeon_device *rdev)
6417 {
6418 	cik_disable_interrupts(rdev);
6419 	/* Wait and acknowledge irq */
6420 	mdelay(1);
6421 	cik_irq_ack(rdev);
6422 	cik_disable_interrupt_state(rdev);
6423 }
6424 
6425 /**
6426  * cik_irq_disable - disable interrupts for suspend
6427  *
6428  * @rdev: radeon_device pointer
6429  *
6430  * Disable interrupts and stop the RLC (CIK).
6431  * Used for suspend.
6432  */
6433 static void cik_irq_suspend(struct radeon_device *rdev)
6434 {
6435 	cik_irq_disable(rdev);
6436 	cik_rlc_stop(rdev);
6437 }
6438 
6439 /**
6440  * cik_irq_fini - tear down interrupt support
6441  *
6442  * @rdev: radeon_device pointer
6443  *
6444  * Disable interrupts on the hw and free the IH ring
6445  * buffer (CIK).
6446  * Used for driver unload.
6447  */
6448 static void cik_irq_fini(struct radeon_device *rdev)
6449 {
6450 	cik_irq_suspend(rdev);
6451 	r600_ih_ring_fini(rdev);
6452 }
6453 
6454 /**
6455  * cik_get_ih_wptr - get the IH ring buffer wptr
6456  *
6457  * @rdev: radeon_device pointer
6458  *
6459  * Get the IH ring buffer wptr from either the register
6460  * or the writeback memory buffer (CIK).  Also check for
6461  * ring buffer overflow and deal with it.
6462  * Used by cik_irq_process().
6463  * Returns the value of the wptr.
6464  */
6465 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
6466 {
6467 	u32 wptr, tmp;
6468 
6469 	if (rdev->wb.enabled)
6470 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6471 	else
6472 		wptr = RREG32(IH_RB_WPTR);
6473 
6474 	if (wptr & RB_OVERFLOW) {
6475 		/* When a ring buffer overflow happen start parsing interrupt
6476 		 * from the last not overwritten vector (wptr + 16). Hopefully
6477 		 * this should allow us to catchup.
6478 		 */
6479 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
6480 			wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
6481 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6482 		tmp = RREG32(IH_RB_CNTL);
6483 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
6484 		WREG32(IH_RB_CNTL, tmp);
6485 	}
6486 	return (wptr & rdev->ih.ptr_mask);
6487 }
6488 
6489 /*        CIK IV Ring
6490  * Each IV ring entry is 128 bits:
6491  * [7:0]    - interrupt source id
6492  * [31:8]   - reserved
6493  * [59:32]  - interrupt source data
6494  * [63:60]  - reserved
6495  * [71:64]  - RINGID
6496  *            CP:
6497  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
6498  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
6499  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
6500  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
6501  *            PIPE_ID - ME0 0=3D
6502  *                    - ME1&2 compute dispatcher (4 pipes each)
6503  *            SDMA:
6504  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
6505  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
6506  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
6507  * [79:72]  - VMID
6508  * [95:80]  - PASID
6509  * [127:96] - reserved
6510  */
6511 /**
6512  * cik_irq_process - interrupt handler
6513  *
6514  * @rdev: radeon_device pointer
6515  *
6516  * Interrupt hander (CIK).  Walk the IH ring,
6517  * ack interrupts and schedule work to handle
6518  * interrupt events.
6519  * Returns irq process return code.
6520  */
6521 int cik_irq_process(struct radeon_device *rdev)
6522 {
6523 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6524 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6525 	u32 wptr;
6526 	u32 rptr;
6527 	u32 src_id, src_data, ring_id;
6528 	u8 me_id, pipe_id, queue_id;
6529 	u32 ring_index;
6530 	bool queue_hotplug = false;
6531 	bool queue_reset = false;
6532 	u32 addr, status, mc_client;
6533 	bool queue_thermal = false;
6534 
6535 	if (!rdev->ih.enabled || rdev->shutdown)
6536 		return IRQ_NONE;
6537 
6538 	wptr = cik_get_ih_wptr(rdev);
6539 
6540 restart_ih:
6541 	/* is somebody else already processing irqs? */
6542 	if (atomic_xchg(&rdev->ih.lock, 1))
6543 		return IRQ_NONE;
6544 
6545 	rptr = rdev->ih.rptr;
6546 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6547 
6548 	/* Order reading of wptr vs. reading of IH ring data */
6549 	rmb();
6550 
6551 	/* display interrupts */
6552 	cik_irq_ack(rdev);
6553 
6554 	while (rptr != wptr) {
6555 		/* wptr/rptr are in bytes! */
6556 		ring_index = rptr / 4;
6557 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6558 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6559 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6560 
6561 		switch (src_id) {
6562 		case 1: /* D1 vblank/vline */
6563 			switch (src_data) {
6564 			case 0: /* D1 vblank */
6565 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
6566 					if (rdev->irq.crtc_vblank_int[0]) {
6567 						drm_handle_vblank(rdev->ddev, 0);
6568 						rdev->pm.vblank_sync = true;
6569 						wake_up(&rdev->irq.vblank_queue);
6570 					}
6571 					if (atomic_read(&rdev->irq.pflip[0]))
6572 						radeon_crtc_handle_flip(rdev, 0);
6573 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6574 					DRM_DEBUG("IH: D1 vblank\n");
6575 				}
6576 				break;
6577 			case 1: /* D1 vline */
6578 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
6579 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6580 					DRM_DEBUG("IH: D1 vline\n");
6581 				}
6582 				break;
6583 			default:
6584 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6585 				break;
6586 			}
6587 			break;
6588 		case 2: /* D2 vblank/vline */
6589 			switch (src_data) {
6590 			case 0: /* D2 vblank */
6591 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6592 					if (rdev->irq.crtc_vblank_int[1]) {
6593 						drm_handle_vblank(rdev->ddev, 1);
6594 						rdev->pm.vblank_sync = true;
6595 						wake_up(&rdev->irq.vblank_queue);
6596 					}
6597 					if (atomic_read(&rdev->irq.pflip[1]))
6598 						radeon_crtc_handle_flip(rdev, 1);
6599 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6600 					DRM_DEBUG("IH: D2 vblank\n");
6601 				}
6602 				break;
6603 			case 1: /* D2 vline */
6604 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6605 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6606 					DRM_DEBUG("IH: D2 vline\n");
6607 				}
6608 				break;
6609 			default:
6610 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6611 				break;
6612 			}
6613 			break;
6614 		case 3: /* D3 vblank/vline */
6615 			switch (src_data) {
6616 			case 0: /* D3 vblank */
6617 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6618 					if (rdev->irq.crtc_vblank_int[2]) {
6619 						drm_handle_vblank(rdev->ddev, 2);
6620 						rdev->pm.vblank_sync = true;
6621 						wake_up(&rdev->irq.vblank_queue);
6622 					}
6623 					if (atomic_read(&rdev->irq.pflip[2]))
6624 						radeon_crtc_handle_flip(rdev, 2);
6625 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6626 					DRM_DEBUG("IH: D3 vblank\n");
6627 				}
6628 				break;
6629 			case 1: /* D3 vline */
6630 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6631 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6632 					DRM_DEBUG("IH: D3 vline\n");
6633 				}
6634 				break;
6635 			default:
6636 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6637 				break;
6638 			}
6639 			break;
6640 		case 4: /* D4 vblank/vline */
6641 			switch (src_data) {
6642 			case 0: /* D4 vblank */
6643 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6644 					if (rdev->irq.crtc_vblank_int[3]) {
6645 						drm_handle_vblank(rdev->ddev, 3);
6646 						rdev->pm.vblank_sync = true;
6647 						wake_up(&rdev->irq.vblank_queue);
6648 					}
6649 					if (atomic_read(&rdev->irq.pflip[3]))
6650 						radeon_crtc_handle_flip(rdev, 3);
6651 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6652 					DRM_DEBUG("IH: D4 vblank\n");
6653 				}
6654 				break;
6655 			case 1: /* D4 vline */
6656 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6657 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6658 					DRM_DEBUG("IH: D4 vline\n");
6659 				}
6660 				break;
6661 			default:
6662 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6663 				break;
6664 			}
6665 			break;
6666 		case 5: /* D5 vblank/vline */
6667 			switch (src_data) {
6668 			case 0: /* D5 vblank */
6669 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6670 					if (rdev->irq.crtc_vblank_int[4]) {
6671 						drm_handle_vblank(rdev->ddev, 4);
6672 						rdev->pm.vblank_sync = true;
6673 						wake_up(&rdev->irq.vblank_queue);
6674 					}
6675 					if (atomic_read(&rdev->irq.pflip[4]))
6676 						radeon_crtc_handle_flip(rdev, 4);
6677 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6678 					DRM_DEBUG("IH: D5 vblank\n");
6679 				}
6680 				break;
6681 			case 1: /* D5 vline */
6682 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6683 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6684 					DRM_DEBUG("IH: D5 vline\n");
6685 				}
6686 				break;
6687 			default:
6688 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6689 				break;
6690 			}
6691 			break;
6692 		case 6: /* D6 vblank/vline */
6693 			switch (src_data) {
6694 			case 0: /* D6 vblank */
6695 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6696 					if (rdev->irq.crtc_vblank_int[5]) {
6697 						drm_handle_vblank(rdev->ddev, 5);
6698 						rdev->pm.vblank_sync = true;
6699 						wake_up(&rdev->irq.vblank_queue);
6700 					}
6701 					if (atomic_read(&rdev->irq.pflip[5]))
6702 						radeon_crtc_handle_flip(rdev, 5);
6703 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6704 					DRM_DEBUG("IH: D6 vblank\n");
6705 				}
6706 				break;
6707 			case 1: /* D6 vline */
6708 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6709 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6710 					DRM_DEBUG("IH: D6 vline\n");
6711 				}
6712 				break;
6713 			default:
6714 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6715 				break;
6716 			}
6717 			break;
6718 		case 42: /* HPD hotplug */
6719 			switch (src_data) {
6720 			case 0:
6721 				if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6722 					rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
6723 					queue_hotplug = true;
6724 					DRM_DEBUG("IH: HPD1\n");
6725 				}
6726 				break;
6727 			case 1:
6728 				if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6729 					rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6730 					queue_hotplug = true;
6731 					DRM_DEBUG("IH: HPD2\n");
6732 				}
6733 				break;
6734 			case 2:
6735 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6736 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6737 					queue_hotplug = true;
6738 					DRM_DEBUG("IH: HPD3\n");
6739 				}
6740 				break;
6741 			case 3:
6742 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6743 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6744 					queue_hotplug = true;
6745 					DRM_DEBUG("IH: HPD4\n");
6746 				}
6747 				break;
6748 			case 4:
6749 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6750 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6751 					queue_hotplug = true;
6752 					DRM_DEBUG("IH: HPD5\n");
6753 				}
6754 				break;
6755 			case 5:
6756 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6757 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6758 					queue_hotplug = true;
6759 					DRM_DEBUG("IH: HPD6\n");
6760 				}
6761 				break;
6762 			default:
6763 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6764 				break;
6765 			}
6766 			break;
6767 		case 124: /* UVD */
6768 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6769 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6770 			break;
6771 		case 146:
6772 		case 147:
6773 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6774 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6775 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
6776 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6777 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6778 				addr);
6779 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6780 				status);
6781 			cik_vm_decode_fault(rdev, status, addr, mc_client);
6782 			/* reset addr and status */
6783 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6784 			break;
6785 		case 176: /* GFX RB CP_INT */
6786 		case 177: /* GFX IB CP_INT */
6787 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6788 			break;
6789 		case 181: /* CP EOP event */
6790 			DRM_DEBUG("IH: CP EOP\n");
6791 			/* XXX check the bitfield order! */
6792 			me_id = (ring_id & 0x60) >> 5;
6793 			pipe_id = (ring_id & 0x18) >> 3;
6794 			queue_id = (ring_id & 0x7) >> 0;
6795 			switch (me_id) {
6796 			case 0:
6797 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6798 				break;
6799 			case 1:
6800 			case 2:
6801 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
6802 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6803 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
6804 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6805 				break;
6806 			}
6807 			break;
6808 		case 184: /* CP Privileged reg access */
6809 			DRM_ERROR("Illegal register access in command stream\n");
6810 			/* XXX check the bitfield order! */
6811 			me_id = (ring_id & 0x60) >> 5;
6812 			pipe_id = (ring_id & 0x18) >> 3;
6813 			queue_id = (ring_id & 0x7) >> 0;
6814 			switch (me_id) {
6815 			case 0:
6816 				/* This results in a full GPU reset, but all we need to do is soft
6817 				 * reset the CP for gfx
6818 				 */
6819 				queue_reset = true;
6820 				break;
6821 			case 1:
6822 				/* XXX compute */
6823 				queue_reset = true;
6824 				break;
6825 			case 2:
6826 				/* XXX compute */
6827 				queue_reset = true;
6828 				break;
6829 			}
6830 			break;
6831 		case 185: /* CP Privileged inst */
6832 			DRM_ERROR("Illegal instruction in command stream\n");
6833 			/* XXX check the bitfield order! */
6834 			me_id = (ring_id & 0x60) >> 5;
6835 			pipe_id = (ring_id & 0x18) >> 3;
6836 			queue_id = (ring_id & 0x7) >> 0;
6837 			switch (me_id) {
6838 			case 0:
6839 				/* This results in a full GPU reset, but all we need to do is soft
6840 				 * reset the CP for gfx
6841 				 */
6842 				queue_reset = true;
6843 				break;
6844 			case 1:
6845 				/* XXX compute */
6846 				queue_reset = true;
6847 				break;
6848 			case 2:
6849 				/* XXX compute */
6850 				queue_reset = true;
6851 				break;
6852 			}
6853 			break;
6854 		case 224: /* SDMA trap event */
6855 			/* XXX check the bitfield order! */
6856 			me_id = (ring_id & 0x3) >> 0;
6857 			queue_id = (ring_id & 0xc) >> 2;
6858 			DRM_DEBUG("IH: SDMA trap\n");
6859 			switch (me_id) {
6860 			case 0:
6861 				switch (queue_id) {
6862 				case 0:
6863 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6864 					break;
6865 				case 1:
6866 					/* XXX compute */
6867 					break;
6868 				case 2:
6869 					/* XXX compute */
6870 					break;
6871 				}
6872 				break;
6873 			case 1:
6874 				switch (queue_id) {
6875 				case 0:
6876 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6877 					break;
6878 				case 1:
6879 					/* XXX compute */
6880 					break;
6881 				case 2:
6882 					/* XXX compute */
6883 					break;
6884 				}
6885 				break;
6886 			}
6887 			break;
6888 		case 230: /* thermal low to high */
6889 			DRM_DEBUG("IH: thermal low to high\n");
6890 			rdev->pm.dpm.thermal.high_to_low = false;
6891 			queue_thermal = true;
6892 			break;
6893 		case 231: /* thermal high to low */
6894 			DRM_DEBUG("IH: thermal high to low\n");
6895 			rdev->pm.dpm.thermal.high_to_low = true;
6896 			queue_thermal = true;
6897 			break;
6898 		case 233: /* GUI IDLE */
6899 			DRM_DEBUG("IH: GUI idle\n");
6900 			break;
6901 		case 241: /* SDMA Privileged inst */
6902 		case 247: /* SDMA Privileged inst */
6903 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
6904 			/* XXX check the bitfield order! */
6905 			me_id = (ring_id & 0x3) >> 0;
6906 			queue_id = (ring_id & 0xc) >> 2;
6907 			switch (me_id) {
6908 			case 0:
6909 				switch (queue_id) {
6910 				case 0:
6911 					queue_reset = true;
6912 					break;
6913 				case 1:
6914 					/* XXX compute */
6915 					queue_reset = true;
6916 					break;
6917 				case 2:
6918 					/* XXX compute */
6919 					queue_reset = true;
6920 					break;
6921 				}
6922 				break;
6923 			case 1:
6924 				switch (queue_id) {
6925 				case 0:
6926 					queue_reset = true;
6927 					break;
6928 				case 1:
6929 					/* XXX compute */
6930 					queue_reset = true;
6931 					break;
6932 				case 2:
6933 					/* XXX compute */
6934 					queue_reset = true;
6935 					break;
6936 				}
6937 				break;
6938 			}
6939 			break;
6940 		default:
6941 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6942 			break;
6943 		}
6944 
6945 		/* wptr/rptr are in bytes! */
6946 		rptr += 16;
6947 		rptr &= rdev->ih.ptr_mask;
6948 	}
6949 	if (queue_hotplug)
6950 		schedule_work(&rdev->hotplug_work);
6951 	if (queue_reset)
6952 		schedule_work(&rdev->reset_work);
6953 	if (queue_thermal)
6954 		schedule_work(&rdev->pm.dpm.thermal.work);
6955 	rdev->ih.rptr = rptr;
6956 	WREG32(IH_RB_RPTR, rdev->ih.rptr);
6957 	atomic_set(&rdev->ih.lock, 0);
6958 
6959 	/* make sure wptr hasn't changed while processing */
6960 	wptr = cik_get_ih_wptr(rdev);
6961 	if (wptr != rptr)
6962 		goto restart_ih;
6963 
6964 	return IRQ_HANDLED;
6965 }
6966 
6967 /*
6968  * startup/shutdown callbacks
6969  */
6970 /**
6971  * cik_startup - program the asic to a functional state
6972  *
6973  * @rdev: radeon_device pointer
6974  *
6975  * Programs the asic to a functional state (CIK).
6976  * Called by cik_init() and cik_resume().
6977  * Returns 0 for success, error for failure.
6978  */
6979 static int cik_startup(struct radeon_device *rdev)
6980 {
6981 	struct radeon_ring *ring;
6982 	int r;
6983 
6984 	/* enable pcie gen2/3 link */
6985 	cik_pcie_gen3_enable(rdev);
6986 	/* enable aspm */
6987 	cik_program_aspm(rdev);
6988 
6989 	/* scratch needs to be initialized before MC */
6990 	r = r600_vram_scratch_init(rdev);
6991 	if (r)
6992 		return r;
6993 
6994 	cik_mc_program(rdev);
6995 
6996 	if (rdev->flags & RADEON_IS_IGP) {
6997 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6998 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
6999 			r = cik_init_microcode(rdev);
7000 			if (r) {
7001 				DRM_ERROR("Failed to load firmware!\n");
7002 				return r;
7003 			}
7004 		}
7005 	} else {
7006 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7007 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
7008 		    !rdev->mc_fw) {
7009 			r = cik_init_microcode(rdev);
7010 			if (r) {
7011 				DRM_ERROR("Failed to load firmware!\n");
7012 				return r;
7013 			}
7014 		}
7015 
7016 		r = ci_mc_load_microcode(rdev);
7017 		if (r) {
7018 			DRM_ERROR("Failed to load MC firmware!\n");
7019 			return r;
7020 		}
7021 	}
7022 
7023 	r = cik_pcie_gart_enable(rdev);
7024 	if (r)
7025 		return r;
7026 	cik_gpu_init(rdev);
7027 
7028 	/* allocate rlc buffers */
7029 	if (rdev->flags & RADEON_IS_IGP) {
7030 		if (rdev->family == CHIP_KAVERI) {
7031 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7032 			rdev->rlc.reg_list_size =
7033 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7034 		} else {
7035 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7036 			rdev->rlc.reg_list_size =
7037 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7038 		}
7039 	}
7040 	rdev->rlc.cs_data = ci_cs_data;
7041 	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7042 	r = sumo_rlc_init(rdev);
7043 	if (r) {
7044 		DRM_ERROR("Failed to init rlc BOs!\n");
7045 		return r;
7046 	}
7047 
7048 	/* allocate wb buffer */
7049 	r = radeon_wb_init(rdev);
7050 	if (r)
7051 		return r;
7052 
7053 	/* allocate mec buffers */
7054 	r = cik_mec_init(rdev);
7055 	if (r) {
7056 		DRM_ERROR("Failed to init MEC BOs!\n");
7057 		return r;
7058 	}
7059 
7060 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7061 	if (r) {
7062 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7063 		return r;
7064 	}
7065 
7066 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7067 	if (r) {
7068 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7069 		return r;
7070 	}
7071 
7072 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7073 	if (r) {
7074 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7075 		return r;
7076 	}
7077 
7078 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7079 	if (r) {
7080 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7081 		return r;
7082 	}
7083 
7084 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7085 	if (r) {
7086 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7087 		return r;
7088 	}
7089 
7090 	r = radeon_uvd_resume(rdev);
7091 	if (!r) {
7092 		r = uvd_v4_2_resume(rdev);
7093 		if (!r) {
7094 			r = radeon_fence_driver_start_ring(rdev,
7095 							   R600_RING_TYPE_UVD_INDEX);
7096 			if (r)
7097 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7098 		}
7099 	}
7100 	if (r)
7101 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7102 
7103 	/* Enable IRQ */
7104 	if (!rdev->irq.installed) {
7105 		r = radeon_irq_kms_init(rdev);
7106 		if (r)
7107 			return r;
7108 	}
7109 
7110 	r = cik_irq_init(rdev);
7111 	if (r) {
7112 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
7113 		radeon_irq_kms_fini(rdev);
7114 		return r;
7115 	}
7116 	cik_irq_set(rdev);
7117 
7118 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7119 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7120 			     CP_RB0_RPTR, CP_RB0_WPTR,
7121 			     RADEON_CP_PACKET2);
7122 	if (r)
7123 		return r;
7124 
7125 	/* set up the compute queues */
7126 	/* type-2 packets are deprecated on MEC, use type-3 instead */
7127 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7128 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7129 			     CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7130 			     PACKET3(PACKET3_NOP, 0x3FFF));
7131 	if (r)
7132 		return r;
7133 	ring->me = 1; /* first MEC */
7134 	ring->pipe = 0; /* first pipe */
7135 	ring->queue = 0; /* first queue */
7136 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
7137 
7138 	/* type-2 packets are deprecated on MEC, use type-3 instead */
7139 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7140 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7141 			     CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7142 			     PACKET3(PACKET3_NOP, 0x3FFF));
7143 	if (r)
7144 		return r;
7145 	/* dGPU only have 1 MEC */
7146 	ring->me = 1; /* first MEC */
7147 	ring->pipe = 0; /* first pipe */
7148 	ring->queue = 1; /* second queue */
7149 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
7150 
7151 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7152 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7153 			     SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
7154 			     SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
7155 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7156 	if (r)
7157 		return r;
7158 
7159 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7160 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7161 			     SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
7162 			     SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
7163 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7164 	if (r)
7165 		return r;
7166 
7167 	r = cik_cp_resume(rdev);
7168 	if (r)
7169 		return r;
7170 
7171 	r = cik_sdma_resume(rdev);
7172 	if (r)
7173 		return r;
7174 
7175 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7176 	if (ring->ring_size) {
7177 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7178 				     UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
7179 				     RADEON_CP_PACKET2);
7180 		if (!r)
7181 			r = uvd_v1_0_init(rdev);
7182 		if (r)
7183 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7184 	}
7185 
7186 	r = radeon_ib_pool_init(rdev);
7187 	if (r) {
7188 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7189 		return r;
7190 	}
7191 
7192 	r = radeon_vm_manager_init(rdev);
7193 	if (r) {
7194 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7195 		return r;
7196 	}
7197 
7198 	r = dce6_audio_init(rdev);
7199 	if (r)
7200 		return r;
7201 
7202 	return 0;
7203 }
7204 
7205 /**
7206  * cik_resume - resume the asic to a functional state
7207  *
7208  * @rdev: radeon_device pointer
7209  *
7210  * Programs the asic to a functional state (CIK).
7211  * Called at resume.
7212  * Returns 0 for success, error for failure.
7213  */
7214 int cik_resume(struct radeon_device *rdev)
7215 {
7216 	int r;
7217 
7218 	/* post card */
7219 	atom_asic_init(rdev->mode_info.atom_context);
7220 
7221 	/* init golden registers */
7222 	cik_init_golden_registers(rdev);
7223 
7224 	rdev->accel_working = true;
7225 	r = cik_startup(rdev);
7226 	if (r) {
7227 		DRM_ERROR("cik startup failed on resume\n");
7228 		rdev->accel_working = false;
7229 		return r;
7230 	}
7231 
7232 	return r;
7233 
7234 }
7235 
7236 /**
7237  * cik_suspend - suspend the asic
7238  *
7239  * @rdev: radeon_device pointer
7240  *
7241  * Bring the chip into a state suitable for suspend (CIK).
7242  * Called at suspend.
7243  * Returns 0 for success.
7244  */
7245 int cik_suspend(struct radeon_device *rdev)
7246 {
7247 	dce6_audio_fini(rdev);
7248 	radeon_vm_manager_fini(rdev);
7249 	cik_cp_enable(rdev, false);
7250 	cik_sdma_enable(rdev, false);
7251 	uvd_v1_0_fini(rdev);
7252 	radeon_uvd_suspend(rdev);
7253 	cik_fini_pg(rdev);
7254 	cik_fini_cg(rdev);
7255 	cik_irq_suspend(rdev);
7256 	radeon_wb_disable(rdev);
7257 	cik_pcie_gart_disable(rdev);
7258 	return 0;
7259 }
7260 
7261 /* Plan is to move initialization in that function and use
7262  * helper function so that radeon_device_init pretty much
7263  * do nothing more than calling asic specific function. This
7264  * should also allow to remove a bunch of callback function
7265  * like vram_info.
7266  */
7267 /**
7268  * cik_init - asic specific driver and hw init
7269  *
7270  * @rdev: radeon_device pointer
7271  *
7272  * Setup asic specific driver variables and program the hw
7273  * to a functional state (CIK).
7274  * Called at driver startup.
7275  * Returns 0 for success, errors for failure.
7276  */
7277 int cik_init(struct radeon_device *rdev)
7278 {
7279 	struct radeon_ring *ring;
7280 	int r;
7281 
7282 	/* Read BIOS */
7283 	if (!radeon_get_bios(rdev)) {
7284 		if (ASIC_IS_AVIVO(rdev))
7285 			return -EINVAL;
7286 	}
7287 	/* Must be an ATOMBIOS */
7288 	if (!rdev->is_atom_bios) {
7289 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7290 		return -EINVAL;
7291 	}
7292 	r = radeon_atombios_init(rdev);
7293 	if (r)
7294 		return r;
7295 
7296 	/* Post card if necessary */
7297 	if (!radeon_card_posted(rdev)) {
7298 		if (!rdev->bios) {
7299 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7300 			return -EINVAL;
7301 		}
7302 		DRM_INFO("GPU not posted. posting now...\n");
7303 		atom_asic_init(rdev->mode_info.atom_context);
7304 	}
7305 	/* init golden registers */
7306 	cik_init_golden_registers(rdev);
7307 	/* Initialize scratch registers */
7308 	cik_scratch_init(rdev);
7309 	/* Initialize surface registers */
7310 	radeon_surface_init(rdev);
7311 	/* Initialize clocks */
7312 	radeon_get_clock_info(rdev->ddev);
7313 
7314 	/* Fence driver */
7315 	r = radeon_fence_driver_init(rdev);
7316 	if (r)
7317 		return r;
7318 
7319 	/* initialize memory controller */
7320 	r = cik_mc_init(rdev);
7321 	if (r)
7322 		return r;
7323 	/* Memory manager */
7324 	r = radeon_bo_init(rdev);
7325 	if (r)
7326 		return r;
7327 
7328 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7329 	ring->ring_obj = NULL;
7330 	r600_ring_init(rdev, ring, 1024 * 1024);
7331 
7332 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7333 	ring->ring_obj = NULL;
7334 	r600_ring_init(rdev, ring, 1024 * 1024);
7335 	r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7336 	if (r)
7337 		return r;
7338 
7339 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7340 	ring->ring_obj = NULL;
7341 	r600_ring_init(rdev, ring, 1024 * 1024);
7342 	r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7343 	if (r)
7344 		return r;
7345 
7346 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7347 	ring->ring_obj = NULL;
7348 	r600_ring_init(rdev, ring, 256 * 1024);
7349 
7350 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7351 	ring->ring_obj = NULL;
7352 	r600_ring_init(rdev, ring, 256 * 1024);
7353 
7354 	r = radeon_uvd_init(rdev);
7355 	if (!r) {
7356 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7357 		ring->ring_obj = NULL;
7358 		r600_ring_init(rdev, ring, 4096);
7359 	}
7360 
7361 	rdev->ih.ring_obj = NULL;
7362 	r600_ih_ring_init(rdev, 64 * 1024);
7363 
7364 	r = r600_pcie_gart_init(rdev);
7365 	if (r)
7366 		return r;
7367 
7368 	rdev->accel_working = true;
7369 	r = cik_startup(rdev);
7370 	if (r) {
7371 		dev_err(rdev->dev, "disabling GPU acceleration\n");
7372 		cik_cp_fini(rdev);
7373 		cik_sdma_fini(rdev);
7374 		cik_irq_fini(rdev);
7375 		sumo_rlc_fini(rdev);
7376 		cik_mec_fini(rdev);
7377 		radeon_wb_fini(rdev);
7378 		radeon_ib_pool_fini(rdev);
7379 		radeon_vm_manager_fini(rdev);
7380 		radeon_irq_kms_fini(rdev);
7381 		cik_pcie_gart_fini(rdev);
7382 		rdev->accel_working = false;
7383 	}
7384 
7385 	/* Don't start up if the MC ucode is missing.
7386 	 * The default clocks and voltages before the MC ucode
7387 	 * is loaded are not suffient for advanced operations.
7388 	 */
7389 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
7390 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
7391 		return -EINVAL;
7392 	}
7393 
7394 	return 0;
7395 }
7396 
7397 /**
7398  * cik_fini - asic specific driver and hw fini
7399  *
7400  * @rdev: radeon_device pointer
7401  *
7402  * Tear down the asic specific driver variables and program the hw
7403  * to an idle state (CIK).
7404  * Called at driver unload.
7405  */
7406 void cik_fini(struct radeon_device *rdev)
7407 {
7408 	cik_cp_fini(rdev);
7409 	cik_sdma_fini(rdev);
7410 	cik_fini_pg(rdev);
7411 	cik_fini_cg(rdev);
7412 	cik_irq_fini(rdev);
7413 	sumo_rlc_fini(rdev);
7414 	cik_mec_fini(rdev);
7415 	radeon_wb_fini(rdev);
7416 	radeon_vm_manager_fini(rdev);
7417 	radeon_ib_pool_fini(rdev);
7418 	radeon_irq_kms_fini(rdev);
7419 	uvd_v1_0_fini(rdev);
7420 	radeon_uvd_fini(rdev);
7421 	cik_pcie_gart_fini(rdev);
7422 	r600_vram_scratch_fini(rdev);
7423 	radeon_gem_fini(rdev);
7424 	radeon_fence_driver_fini(rdev);
7425 	radeon_bo_fini(rdev);
7426 	radeon_atombios_fini(rdev);
7427 	kfree(rdev->bios);
7428 	rdev->bios = NULL;
7429 }
7430 
7431 /* display watermark setup */
7432 /**
7433  * dce8_line_buffer_adjust - Set up the line buffer
7434  *
7435  * @rdev: radeon_device pointer
7436  * @radeon_crtc: the selected display controller
7437  * @mode: the current display mode on the selected display
7438  * controller
7439  *
7440  * Setup up the line buffer allocation for
7441  * the selected display controller (CIK).
7442  * Returns the line buffer size in pixels.
7443  */
7444 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
7445 				   struct radeon_crtc *radeon_crtc,
7446 				   struct drm_display_mode *mode)
7447 {
7448 	u32 tmp, buffer_alloc, i;
7449 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
7450 	/*
7451 	 * Line Buffer Setup
7452 	 * There are 6 line buffers, one for each display controllers.
7453 	 * There are 3 partitions per LB. Select the number of partitions
7454 	 * to enable based on the display width.  For display widths larger
7455 	 * than 4096, you need use to use 2 display controllers and combine
7456 	 * them using the stereo blender.
7457 	 */
7458 	if (radeon_crtc->base.enabled && mode) {
7459 		if (mode->crtc_hdisplay < 1920) {
7460 			tmp = 1;
7461 			buffer_alloc = 2;
7462 		} else if (mode->crtc_hdisplay < 2560) {
7463 			tmp = 2;
7464 			buffer_alloc = 2;
7465 		} else if (mode->crtc_hdisplay < 4096) {
7466 			tmp = 0;
7467 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
7468 		} else {
7469 			DRM_DEBUG_KMS("Mode too big for LB!\n");
7470 			tmp = 0;
7471 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
7472 		}
7473 	} else {
7474 		tmp = 1;
7475 		buffer_alloc = 0;
7476 	}
7477 
7478 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
7479 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
7480 
7481 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
7482 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
7483 	for (i = 0; i < rdev->usec_timeout; i++) {
7484 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
7485 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
7486 			break;
7487 		udelay(1);
7488 	}
7489 
7490 	if (radeon_crtc->base.enabled && mode) {
7491 		switch (tmp) {
7492 		case 0:
7493 		default:
7494 			return 4096 * 2;
7495 		case 1:
7496 			return 1920 * 2;
7497 		case 2:
7498 			return 2560 * 2;
7499 		}
7500 	}
7501 
7502 	/* controller not enabled, so no lb used */
7503 	return 0;
7504 }
7505 
7506 /**
7507  * cik_get_number_of_dram_channels - get the number of dram channels
7508  *
7509  * @rdev: radeon_device pointer
7510  *
7511  * Look up the number of video ram channels (CIK).
7512  * Used for display watermark bandwidth calculations
7513  * Returns the number of dram channels
7514  */
7515 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
7516 {
7517 	u32 tmp = RREG32(MC_SHARED_CHMAP);
7518 
7519 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
7520 	case 0:
7521 	default:
7522 		return 1;
7523 	case 1:
7524 		return 2;
7525 	case 2:
7526 		return 4;
7527 	case 3:
7528 		return 8;
7529 	case 4:
7530 		return 3;
7531 	case 5:
7532 		return 6;
7533 	case 6:
7534 		return 10;
7535 	case 7:
7536 		return 12;
7537 	case 8:
7538 		return 16;
7539 	}
7540 }
7541 
7542 struct dce8_wm_params {
7543 	u32 dram_channels; /* number of dram channels */
7544 	u32 yclk;          /* bandwidth per dram data pin in kHz */
7545 	u32 sclk;          /* engine clock in kHz */
7546 	u32 disp_clk;      /* display clock in kHz */
7547 	u32 src_width;     /* viewport width */
7548 	u32 active_time;   /* active display time in ns */
7549 	u32 blank_time;    /* blank time in ns */
7550 	bool interlaced;    /* mode is interlaced */
7551 	fixed20_12 vsc;    /* vertical scale ratio */
7552 	u32 num_heads;     /* number of active crtcs */
7553 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
7554 	u32 lb_size;       /* line buffer allocated to pipe */
7555 	u32 vtaps;         /* vertical scaler taps */
7556 };
7557 
7558 /**
7559  * dce8_dram_bandwidth - get the dram bandwidth
7560  *
7561  * @wm: watermark calculation data
7562  *
7563  * Calculate the raw dram bandwidth (CIK).
7564  * Used for display watermark bandwidth calculations
7565  * Returns the dram bandwidth in MBytes/s
7566  */
7567 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
7568 {
7569 	/* Calculate raw DRAM Bandwidth */
7570 	fixed20_12 dram_efficiency; /* 0.7 */
7571 	fixed20_12 yclk, dram_channels, bandwidth;
7572 	fixed20_12 a;
7573 
7574 	a.full = dfixed_const(1000);
7575 	yclk.full = dfixed_const(wm->yclk);
7576 	yclk.full = dfixed_div(yclk, a);
7577 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
7578 	a.full = dfixed_const(10);
7579 	dram_efficiency.full = dfixed_const(7);
7580 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
7581 	bandwidth.full = dfixed_mul(dram_channels, yclk);
7582 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
7583 
7584 	return dfixed_trunc(bandwidth);
7585 }
7586 
7587 /**
7588  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
7589  *
7590  * @wm: watermark calculation data
7591  *
7592  * Calculate the dram bandwidth used for display (CIK).
7593  * Used for display watermark bandwidth calculations
7594  * Returns the dram bandwidth for display in MBytes/s
7595  */
7596 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
7597 {
7598 	/* Calculate DRAM Bandwidth and the part allocated to display. */
7599 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
7600 	fixed20_12 yclk, dram_channels, bandwidth;
7601 	fixed20_12 a;
7602 
7603 	a.full = dfixed_const(1000);
7604 	yclk.full = dfixed_const(wm->yclk);
7605 	yclk.full = dfixed_div(yclk, a);
7606 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
7607 	a.full = dfixed_const(10);
7608 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
7609 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
7610 	bandwidth.full = dfixed_mul(dram_channels, yclk);
7611 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
7612 
7613 	return dfixed_trunc(bandwidth);
7614 }
7615 
7616 /**
7617  * dce8_data_return_bandwidth - get the data return bandwidth
7618  *
7619  * @wm: watermark calculation data
7620  *
7621  * Calculate the data return bandwidth used for display (CIK).
7622  * Used for display watermark bandwidth calculations
7623  * Returns the data return bandwidth in MBytes/s
7624  */
7625 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
7626 {
7627 	/* Calculate the display Data return Bandwidth */
7628 	fixed20_12 return_efficiency; /* 0.8 */
7629 	fixed20_12 sclk, bandwidth;
7630 	fixed20_12 a;
7631 
7632 	a.full = dfixed_const(1000);
7633 	sclk.full = dfixed_const(wm->sclk);
7634 	sclk.full = dfixed_div(sclk, a);
7635 	a.full = dfixed_const(10);
7636 	return_efficiency.full = dfixed_const(8);
7637 	return_efficiency.full = dfixed_div(return_efficiency, a);
7638 	a.full = dfixed_const(32);
7639 	bandwidth.full = dfixed_mul(a, sclk);
7640 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
7641 
7642 	return dfixed_trunc(bandwidth);
7643 }
7644 
7645 /**
7646  * dce8_dmif_request_bandwidth - get the dmif bandwidth
7647  *
7648  * @wm: watermark calculation data
7649  *
7650  * Calculate the dmif bandwidth used for display (CIK).
7651  * Used for display watermark bandwidth calculations
7652  * Returns the dmif bandwidth in MBytes/s
7653  */
7654 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
7655 {
7656 	/* Calculate the DMIF Request Bandwidth */
7657 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
7658 	fixed20_12 disp_clk, bandwidth;
7659 	fixed20_12 a, b;
7660 
7661 	a.full = dfixed_const(1000);
7662 	disp_clk.full = dfixed_const(wm->disp_clk);
7663 	disp_clk.full = dfixed_div(disp_clk, a);
7664 	a.full = dfixed_const(32);
7665 	b.full = dfixed_mul(a, disp_clk);
7666 
7667 	a.full = dfixed_const(10);
7668 	disp_clk_request_efficiency.full = dfixed_const(8);
7669 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
7670 
7671 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
7672 
7673 	return dfixed_trunc(bandwidth);
7674 }
7675 
7676 /**
7677  * dce8_available_bandwidth - get the min available bandwidth
7678  *
7679  * @wm: watermark calculation data
7680  *
7681  * Calculate the min available bandwidth used for display (CIK).
7682  * Used for display watermark bandwidth calculations
7683  * Returns the min available bandwidth in MBytes/s
7684  */
7685 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
7686 {
7687 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
7688 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
7689 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
7690 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
7691 
7692 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
7693 }
7694 
7695 /**
7696  * dce8_average_bandwidth - get the average available bandwidth
7697  *
7698  * @wm: watermark calculation data
7699  *
7700  * Calculate the average available bandwidth used for display (CIK).
7701  * Used for display watermark bandwidth calculations
7702  * Returns the average available bandwidth in MBytes/s
7703  */
7704 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
7705 {
7706 	/* Calculate the display mode Average Bandwidth
7707 	 * DisplayMode should contain the source and destination dimensions,
7708 	 * timing, etc.
7709 	 */
7710 	fixed20_12 bpp;
7711 	fixed20_12 line_time;
7712 	fixed20_12 src_width;
7713 	fixed20_12 bandwidth;
7714 	fixed20_12 a;
7715 
7716 	a.full = dfixed_const(1000);
7717 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
7718 	line_time.full = dfixed_div(line_time, a);
7719 	bpp.full = dfixed_const(wm->bytes_per_pixel);
7720 	src_width.full = dfixed_const(wm->src_width);
7721 	bandwidth.full = dfixed_mul(src_width, bpp);
7722 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
7723 	bandwidth.full = dfixed_div(bandwidth, line_time);
7724 
7725 	return dfixed_trunc(bandwidth);
7726 }
7727 
7728 /**
7729  * dce8_latency_watermark - get the latency watermark
7730  *
7731  * @wm: watermark calculation data
7732  *
7733  * Calculate the latency watermark (CIK).
7734  * Used for display watermark bandwidth calculations
7735  * Returns the latency watermark in ns
7736  */
7737 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
7738 {
7739 	/* First calculate the latency in ns */
7740 	u32 mc_latency = 2000; /* 2000 ns. */
7741 	u32 available_bandwidth = dce8_available_bandwidth(wm);
7742 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
7743 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
7744 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
7745 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
7746 		(wm->num_heads * cursor_line_pair_return_time);
7747 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
7748 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
7749 	u32 tmp, dmif_size = 12288;
7750 	fixed20_12 a, b, c;
7751 
7752 	if (wm->num_heads == 0)
7753 		return 0;
7754 
7755 	a.full = dfixed_const(2);
7756 	b.full = dfixed_const(1);
7757 	if ((wm->vsc.full > a.full) ||
7758 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
7759 	    (wm->vtaps >= 5) ||
7760 	    ((wm->vsc.full >= a.full) && wm->interlaced))
7761 		max_src_lines_per_dst_line = 4;
7762 	else
7763 		max_src_lines_per_dst_line = 2;
7764 
7765 	a.full = dfixed_const(available_bandwidth);
7766 	b.full = dfixed_const(wm->num_heads);
7767 	a.full = dfixed_div(a, b);
7768 
7769 	b.full = dfixed_const(mc_latency + 512);
7770 	c.full = dfixed_const(wm->disp_clk);
7771 	b.full = dfixed_div(b, c);
7772 
7773 	c.full = dfixed_const(dmif_size);
7774 	b.full = dfixed_div(c, b);
7775 
7776 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
7777 
7778 	b.full = dfixed_const(1000);
7779 	c.full = dfixed_const(wm->disp_clk);
7780 	b.full = dfixed_div(c, b);
7781 	c.full = dfixed_const(wm->bytes_per_pixel);
7782 	b.full = dfixed_mul(b, c);
7783 
7784 	lb_fill_bw = min(tmp, dfixed_trunc(b));
7785 
7786 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
7787 	b.full = dfixed_const(1000);
7788 	c.full = dfixed_const(lb_fill_bw);
7789 	b.full = dfixed_div(c, b);
7790 	a.full = dfixed_div(a, b);
7791 	line_fill_time = dfixed_trunc(a);
7792 
7793 	if (line_fill_time < wm->active_time)
7794 		return latency;
7795 	else
7796 		return latency + (line_fill_time - wm->active_time);
7797 
7798 }
7799 
7800 /**
7801  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
7802  * average and available dram bandwidth
7803  *
7804  * @wm: watermark calculation data
7805  *
7806  * Check if the display average bandwidth fits in the display
7807  * dram bandwidth (CIK).
7808  * Used for display watermark bandwidth calculations
7809  * Returns true if the display fits, false if not.
7810  */
7811 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
7812 {
7813 	if (dce8_average_bandwidth(wm) <=
7814 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
7815 		return true;
7816 	else
7817 		return false;
7818 }
7819 
7820 /**
7821  * dce8_average_bandwidth_vs_available_bandwidth - check
7822  * average and available bandwidth
7823  *
7824  * @wm: watermark calculation data
7825  *
7826  * Check if the display average bandwidth fits in the display
7827  * available bandwidth (CIK).
7828  * Used for display watermark bandwidth calculations
7829  * Returns true if the display fits, false if not.
7830  */
7831 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
7832 {
7833 	if (dce8_average_bandwidth(wm) <=
7834 	    (dce8_available_bandwidth(wm) / wm->num_heads))
7835 		return true;
7836 	else
7837 		return false;
7838 }
7839 
7840 /**
7841  * dce8_check_latency_hiding - check latency hiding
7842  *
7843  * @wm: watermark calculation data
7844  *
7845  * Check latency hiding (CIK).
7846  * Used for display watermark bandwidth calculations
7847  * Returns true if the display fits, false if not.
7848  */
7849 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
7850 {
7851 	u32 lb_partitions = wm->lb_size / wm->src_width;
7852 	u32 line_time = wm->active_time + wm->blank_time;
7853 	u32 latency_tolerant_lines;
7854 	u32 latency_hiding;
7855 	fixed20_12 a;
7856 
7857 	a.full = dfixed_const(1);
7858 	if (wm->vsc.full > a.full)
7859 		latency_tolerant_lines = 1;
7860 	else {
7861 		if (lb_partitions <= (wm->vtaps + 1))
7862 			latency_tolerant_lines = 1;
7863 		else
7864 			latency_tolerant_lines = 2;
7865 	}
7866 
7867 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
7868 
7869 	if (dce8_latency_watermark(wm) <= latency_hiding)
7870 		return true;
7871 	else
7872 		return false;
7873 }
7874 
7875 /**
7876  * dce8_program_watermarks - program display watermarks
7877  *
7878  * @rdev: radeon_device pointer
7879  * @radeon_crtc: the selected display controller
7880  * @lb_size: line buffer size
7881  * @num_heads: number of display controllers in use
7882  *
7883  * Calculate and program the display watermarks for the
7884  * selected display controller (CIK).
7885  */
7886 static void dce8_program_watermarks(struct radeon_device *rdev,
7887 				    struct radeon_crtc *radeon_crtc,
7888 				    u32 lb_size, u32 num_heads)
7889 {
7890 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
7891 	struct dce8_wm_params wm_low, wm_high;
7892 	u32 pixel_period;
7893 	u32 line_time = 0;
7894 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
7895 	u32 tmp, wm_mask;
7896 
7897 	if (radeon_crtc->base.enabled && num_heads && mode) {
7898 		pixel_period = 1000000 / (u32)mode->clock;
7899 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
7900 
7901 		/* watermark for high clocks */
7902 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
7903 		    rdev->pm.dpm_enabled) {
7904 			wm_high.yclk =
7905 				radeon_dpm_get_mclk(rdev, false) * 10;
7906 			wm_high.sclk =
7907 				radeon_dpm_get_sclk(rdev, false) * 10;
7908 		} else {
7909 			wm_high.yclk = rdev->pm.current_mclk * 10;
7910 			wm_high.sclk = rdev->pm.current_sclk * 10;
7911 		}
7912 
7913 		wm_high.disp_clk = mode->clock;
7914 		wm_high.src_width = mode->crtc_hdisplay;
7915 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
7916 		wm_high.blank_time = line_time - wm_high.active_time;
7917 		wm_high.interlaced = false;
7918 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
7919 			wm_high.interlaced = true;
7920 		wm_high.vsc = radeon_crtc->vsc;
7921 		wm_high.vtaps = 1;
7922 		if (radeon_crtc->rmx_type != RMX_OFF)
7923 			wm_high.vtaps = 2;
7924 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
7925 		wm_high.lb_size = lb_size;
7926 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
7927 		wm_high.num_heads = num_heads;
7928 
7929 		/* set for high clocks */
7930 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
7931 
7932 		/* possibly force display priority to high */
7933 		/* should really do this at mode validation time... */
7934 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
7935 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
7936 		    !dce8_check_latency_hiding(&wm_high) ||
7937 		    (rdev->disp_priority == 2)) {
7938 			DRM_DEBUG_KMS("force priority to high\n");
7939 		}
7940 
7941 		/* watermark for low clocks */
7942 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
7943 		    rdev->pm.dpm_enabled) {
7944 			wm_low.yclk =
7945 				radeon_dpm_get_mclk(rdev, true) * 10;
7946 			wm_low.sclk =
7947 				radeon_dpm_get_sclk(rdev, true) * 10;
7948 		} else {
7949 			wm_low.yclk = rdev->pm.current_mclk * 10;
7950 			wm_low.sclk = rdev->pm.current_sclk * 10;
7951 		}
7952 
7953 		wm_low.disp_clk = mode->clock;
7954 		wm_low.src_width = mode->crtc_hdisplay;
7955 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
7956 		wm_low.blank_time = line_time - wm_low.active_time;
7957 		wm_low.interlaced = false;
7958 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
7959 			wm_low.interlaced = true;
7960 		wm_low.vsc = radeon_crtc->vsc;
7961 		wm_low.vtaps = 1;
7962 		if (radeon_crtc->rmx_type != RMX_OFF)
7963 			wm_low.vtaps = 2;
7964 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
7965 		wm_low.lb_size = lb_size;
7966 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
7967 		wm_low.num_heads = num_heads;
7968 
7969 		/* set for low clocks */
7970 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
7971 
7972 		/* possibly force display priority to high */
7973 		/* should really do this at mode validation time... */
7974 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
7975 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
7976 		    !dce8_check_latency_hiding(&wm_low) ||
7977 		    (rdev->disp_priority == 2)) {
7978 			DRM_DEBUG_KMS("force priority to high\n");
7979 		}
7980 	}
7981 
7982 	/* select wm A */
7983 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
7984 	tmp = wm_mask;
7985 	tmp &= ~LATENCY_WATERMARK_MASK(3);
7986 	tmp |= LATENCY_WATERMARK_MASK(1);
7987 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
7988 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
7989 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
7990 		LATENCY_HIGH_WATERMARK(line_time)));
7991 	/* select wm B */
7992 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
7993 	tmp &= ~LATENCY_WATERMARK_MASK(3);
7994 	tmp |= LATENCY_WATERMARK_MASK(2);
7995 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
7996 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
7997 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
7998 		LATENCY_HIGH_WATERMARK(line_time)));
7999 	/* restore original selection */
8000 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
8001 
8002 	/* save values for DPM */
8003 	radeon_crtc->line_time = line_time;
8004 	radeon_crtc->wm_high = latency_watermark_a;
8005 	radeon_crtc->wm_low = latency_watermark_b;
8006 }
8007 
8008 /**
8009  * dce8_bandwidth_update - program display watermarks
8010  *
8011  * @rdev: radeon_device pointer
8012  *
8013  * Calculate and program the display watermarks and line
8014  * buffer allocation (CIK).
8015  */
8016 void dce8_bandwidth_update(struct radeon_device *rdev)
8017 {
8018 	struct drm_display_mode *mode = NULL;
8019 	u32 num_heads = 0, lb_size;
8020 	int i;
8021 
8022 	radeon_update_display_priority(rdev);
8023 
8024 	for (i = 0; i < rdev->num_crtc; i++) {
8025 		if (rdev->mode_info.crtcs[i]->base.enabled)
8026 			num_heads++;
8027 	}
8028 	for (i = 0; i < rdev->num_crtc; i++) {
8029 		mode = &rdev->mode_info.crtcs[i]->base.mode;
8030 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
8031 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
8032 	}
8033 }
8034 
8035 /**
8036  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8037  *
8038  * @rdev: radeon_device pointer
8039  *
8040  * Fetches a GPU clock counter snapshot (SI).
8041  * Returns the 64 bit clock counter snapshot.
8042  */
8043 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
8044 {
8045 	uint64_t clock;
8046 
8047 	mutex_lock(&rdev->gpu_clock_mutex);
8048 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
8049 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
8050 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
8051 	mutex_unlock(&rdev->gpu_clock_mutex);
8052 	return clock;
8053 }
8054 
8055 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
8056                               u32 cntl_reg, u32 status_reg)
8057 {
8058 	int r, i;
8059 	struct atom_clock_dividers dividers;
8060 	uint32_t tmp;
8061 
8062 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8063 					   clock, false, &dividers);
8064 	if (r)
8065 		return r;
8066 
8067 	tmp = RREG32_SMC(cntl_reg);
8068 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
8069 	tmp |= dividers.post_divider;
8070 	WREG32_SMC(cntl_reg, tmp);
8071 
8072 	for (i = 0; i < 100; i++) {
8073 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
8074 			break;
8075 		mdelay(10);
8076 	}
8077 	if (i == 100)
8078 		return -ETIMEDOUT;
8079 
8080 	return 0;
8081 }
8082 
8083 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
8084 {
8085 	int r = 0;
8086 
8087 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
8088 	if (r)
8089 		return r;
8090 
8091 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
8092 	return r;
8093 }
8094 
8095 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
8096 {
8097 	struct pci_dev *root = rdev->pdev->bus->self;
8098 	int bridge_pos, gpu_pos;
8099 	u32 speed_cntl, mask, current_data_rate;
8100 	int ret, i;
8101 	u16 tmp16;
8102 
8103 	if (radeon_pcie_gen2 == 0)
8104 		return;
8105 
8106 	if (rdev->flags & RADEON_IS_IGP)
8107 		return;
8108 
8109 	if (!(rdev->flags & RADEON_IS_PCIE))
8110 		return;
8111 
8112 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
8113 	if (ret != 0)
8114 		return;
8115 
8116 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
8117 		return;
8118 
8119 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8120 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
8121 		LC_CURRENT_DATA_RATE_SHIFT;
8122 	if (mask & DRM_PCIE_SPEED_80) {
8123 		if (current_data_rate == 2) {
8124 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
8125 			return;
8126 		}
8127 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
8128 	} else if (mask & DRM_PCIE_SPEED_50) {
8129 		if (current_data_rate == 1) {
8130 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
8131 			return;
8132 		}
8133 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
8134 	}
8135 
8136 	bridge_pos = pci_pcie_cap(root);
8137 	if (!bridge_pos)
8138 		return;
8139 
8140 	gpu_pos = pci_pcie_cap(rdev->pdev);
8141 	if (!gpu_pos)
8142 		return;
8143 
8144 	if (mask & DRM_PCIE_SPEED_80) {
8145 		/* re-try equalization if gen3 is not already enabled */
8146 		if (current_data_rate != 2) {
8147 			u16 bridge_cfg, gpu_cfg;
8148 			u16 bridge_cfg2, gpu_cfg2;
8149 			u32 max_lw, current_lw, tmp;
8150 
8151 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8152 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8153 
8154 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
8155 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8156 
8157 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
8158 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8159 
8160 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8161 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
8162 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
8163 
8164 			if (current_lw < max_lw) {
8165 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8166 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
8167 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
8168 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
8169 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
8170 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
8171 				}
8172 			}
8173 
8174 			for (i = 0; i < 10; i++) {
8175 				/* check status */
8176 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
8177 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
8178 					break;
8179 
8180 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8181 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8182 
8183 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
8184 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
8185 
8186 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8187 				tmp |= LC_SET_QUIESCE;
8188 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8189 
8190 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8191 				tmp |= LC_REDO_EQ;
8192 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8193 
8194 				mdelay(100);
8195 
8196 				/* linkctl */
8197 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
8198 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8199 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
8200 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8201 
8202 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
8203 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8204 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
8205 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8206 
8207 				/* linkctl2 */
8208 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
8209 				tmp16 &= ~((1 << 4) | (7 << 9));
8210 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
8211 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
8212 
8213 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8214 				tmp16 &= ~((1 << 4) | (7 << 9));
8215 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
8216 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8217 
8218 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8219 				tmp &= ~LC_SET_QUIESCE;
8220 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8221 			}
8222 		}
8223 	}
8224 
8225 	/* set the link speed */
8226 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
8227 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
8228 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8229 
8230 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8231 	tmp16 &= ~0xf;
8232 	if (mask & DRM_PCIE_SPEED_80)
8233 		tmp16 |= 3; /* gen3 */
8234 	else if (mask & DRM_PCIE_SPEED_50)
8235 		tmp16 |= 2; /* gen2 */
8236 	else
8237 		tmp16 |= 1; /* gen1 */
8238 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8239 
8240 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8241 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
8242 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8243 
8244 	for (i = 0; i < rdev->usec_timeout; i++) {
8245 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8246 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
8247 			break;
8248 		udelay(1);
8249 	}
8250 }
8251 
8252 static void cik_program_aspm(struct radeon_device *rdev)
8253 {
8254 	u32 data, orig;
8255 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
8256 	bool disable_clkreq = false;
8257 
8258 	if (radeon_aspm == 0)
8259 		return;
8260 
8261 	/* XXX double check IGPs */
8262 	if (rdev->flags & RADEON_IS_IGP)
8263 		return;
8264 
8265 	if (!(rdev->flags & RADEON_IS_PCIE))
8266 		return;
8267 
8268 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8269 	data &= ~LC_XMIT_N_FTS_MASK;
8270 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
8271 	if (orig != data)
8272 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
8273 
8274 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
8275 	data |= LC_GO_TO_RECOVERY;
8276 	if (orig != data)
8277 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
8278 
8279 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
8280 	data |= P_IGNORE_EDB_ERR;
8281 	if (orig != data)
8282 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
8283 
8284 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8285 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
8286 	data |= LC_PMI_TO_L1_DIS;
8287 	if (!disable_l0s)
8288 		data |= LC_L0S_INACTIVITY(7);
8289 
8290 	if (!disable_l1) {
8291 		data |= LC_L1_INACTIVITY(7);
8292 		data &= ~LC_PMI_TO_L1_DIS;
8293 		if (orig != data)
8294 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8295 
8296 		if (!disable_plloff_in_l1) {
8297 			bool clk_req_support;
8298 
8299 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
8300 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8301 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8302 			if (orig != data)
8303 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
8304 
8305 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
8306 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8307 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8308 			if (orig != data)
8309 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
8310 
8311 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
8312 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8313 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8314 			if (orig != data)
8315 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
8316 
8317 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
8318 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8319 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8320 			if (orig != data)
8321 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
8322 
8323 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8324 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
8325 			data |= LC_DYN_LANES_PWR_STATE(3);
8326 			if (orig != data)
8327 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
8328 
8329 			if (!disable_clkreq) {
8330 				struct pci_dev *root = rdev->pdev->bus->self;
8331 				u32 lnkcap;
8332 
8333 				clk_req_support = false;
8334 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
8335 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
8336 					clk_req_support = true;
8337 			} else {
8338 				clk_req_support = false;
8339 			}
8340 
8341 			if (clk_req_support) {
8342 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
8343 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
8344 				if (orig != data)
8345 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
8346 
8347 				orig = data = RREG32_SMC(THM_CLK_CNTL);
8348 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
8349 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
8350 				if (orig != data)
8351 					WREG32_SMC(THM_CLK_CNTL, data);
8352 
8353 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
8354 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
8355 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
8356 				if (orig != data)
8357 					WREG32_SMC(MISC_CLK_CTRL, data);
8358 
8359 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
8360 				data &= ~BCLK_AS_XCLK;
8361 				if (orig != data)
8362 					WREG32_SMC(CG_CLKPIN_CNTL, data);
8363 
8364 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
8365 				data &= ~FORCE_BIF_REFCLK_EN;
8366 				if (orig != data)
8367 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
8368 
8369 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
8370 				data &= ~MPLL_CLKOUT_SEL_MASK;
8371 				data |= MPLL_CLKOUT_SEL(4);
8372 				if (orig != data)
8373 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
8374 			}
8375 		}
8376 	} else {
8377 		if (orig != data)
8378 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8379 	}
8380 
8381 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
8382 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
8383 	if (orig != data)
8384 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
8385 
8386 	if (!disable_l0s) {
8387 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8388 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
8389 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8390 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
8391 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8392 				data &= ~LC_L0S_INACTIVITY_MASK;
8393 				if (orig != data)
8394 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8395 			}
8396 		}
8397 	}
8398 }
8399