xref: /openbmc/linux/drivers/gpu/drm/radeon/si.c (revision fb574682)
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 
25 #include <linux/firmware.h>
26 #include <linux/module.h>
27 #include <linux/pci.h>
28 #include <linux/slab.h>
29 
30 #include <drm/drm_vblank.h>
31 #include <drm/radeon_drm.h>
32 
33 #include "atom.h"
34 #include "clearstate_si.h"
35 #include "radeon.h"
36 #include "radeon_asic.h"
37 #include "radeon_audio.h"
38 #include "radeon_ucode.h"
39 #include "si_blit_shaders.h"
40 #include "sid.h"
41 
42 
43 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
44 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
45 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
46 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
47 MODULE_FIRMWARE("radeon/TAHITI_mc2.bin");
48 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
49 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
50 
51 MODULE_FIRMWARE("radeon/tahiti_pfp.bin");
52 MODULE_FIRMWARE("radeon/tahiti_me.bin");
53 MODULE_FIRMWARE("radeon/tahiti_ce.bin");
54 MODULE_FIRMWARE("radeon/tahiti_mc.bin");
55 MODULE_FIRMWARE("radeon/tahiti_rlc.bin");
56 MODULE_FIRMWARE("radeon/tahiti_smc.bin");
57 
58 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
59 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
60 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
61 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
62 MODULE_FIRMWARE("radeon/PITCAIRN_mc2.bin");
63 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
64 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
65 
66 MODULE_FIRMWARE("radeon/pitcairn_pfp.bin");
67 MODULE_FIRMWARE("radeon/pitcairn_me.bin");
68 MODULE_FIRMWARE("radeon/pitcairn_ce.bin");
69 MODULE_FIRMWARE("radeon/pitcairn_mc.bin");
70 MODULE_FIRMWARE("radeon/pitcairn_rlc.bin");
71 MODULE_FIRMWARE("radeon/pitcairn_smc.bin");
72 MODULE_FIRMWARE("radeon/pitcairn_k_smc.bin");
73 
74 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
75 MODULE_FIRMWARE("radeon/VERDE_me.bin");
76 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
77 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
78 MODULE_FIRMWARE("radeon/VERDE_mc2.bin");
79 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
80 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
81 
82 MODULE_FIRMWARE("radeon/verde_pfp.bin");
83 MODULE_FIRMWARE("radeon/verde_me.bin");
84 MODULE_FIRMWARE("radeon/verde_ce.bin");
85 MODULE_FIRMWARE("radeon/verde_mc.bin");
86 MODULE_FIRMWARE("radeon/verde_rlc.bin");
87 MODULE_FIRMWARE("radeon/verde_smc.bin");
88 MODULE_FIRMWARE("radeon/verde_k_smc.bin");
89 
90 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
91 MODULE_FIRMWARE("radeon/OLAND_me.bin");
92 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
93 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
94 MODULE_FIRMWARE("radeon/OLAND_mc2.bin");
95 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
96 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
97 
98 MODULE_FIRMWARE("radeon/oland_pfp.bin");
99 MODULE_FIRMWARE("radeon/oland_me.bin");
100 MODULE_FIRMWARE("radeon/oland_ce.bin");
101 MODULE_FIRMWARE("radeon/oland_mc.bin");
102 MODULE_FIRMWARE("radeon/oland_rlc.bin");
103 MODULE_FIRMWARE("radeon/oland_smc.bin");
104 MODULE_FIRMWARE("radeon/oland_k_smc.bin");
105 
106 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
107 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
108 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
109 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
110 MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
111 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
112 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
113 
114 MODULE_FIRMWARE("radeon/hainan_pfp.bin");
115 MODULE_FIRMWARE("radeon/hainan_me.bin");
116 MODULE_FIRMWARE("radeon/hainan_ce.bin");
117 MODULE_FIRMWARE("radeon/hainan_mc.bin");
118 MODULE_FIRMWARE("radeon/hainan_rlc.bin");
119 MODULE_FIRMWARE("radeon/hainan_smc.bin");
120 MODULE_FIRMWARE("radeon/hainan_k_smc.bin");
121 MODULE_FIRMWARE("radeon/banks_k_2_smc.bin");
122 
123 MODULE_FIRMWARE("radeon/si58_mc.bin");
124 
125 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
126 static void si_pcie_gen3_enable(struct radeon_device *rdev);
127 static void si_program_aspm(struct radeon_device *rdev);
128 extern void sumo_rlc_fini(struct radeon_device *rdev);
129 extern int sumo_rlc_init(struct radeon_device *rdev);
130 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
131 extern void r600_ih_ring_fini(struct radeon_device *rdev);
132 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
133 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
134 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
135 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
136 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
137 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
138 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
139 					 bool enable);
140 static void si_init_pg(struct radeon_device *rdev);
141 static void si_init_cg(struct radeon_device *rdev);
142 static void si_fini_pg(struct radeon_device *rdev);
143 static void si_fini_cg(struct radeon_device *rdev);
144 static void si_rlc_stop(struct radeon_device *rdev);
145 
146 static const u32 crtc_offsets[] =
147 {
148 	EVERGREEN_CRTC0_REGISTER_OFFSET,
149 	EVERGREEN_CRTC1_REGISTER_OFFSET,
150 	EVERGREEN_CRTC2_REGISTER_OFFSET,
151 	EVERGREEN_CRTC3_REGISTER_OFFSET,
152 	EVERGREEN_CRTC4_REGISTER_OFFSET,
153 	EVERGREEN_CRTC5_REGISTER_OFFSET
154 };
155 
156 static const u32 si_disp_int_status[] =
157 {
158 	DISP_INTERRUPT_STATUS,
159 	DISP_INTERRUPT_STATUS_CONTINUE,
160 	DISP_INTERRUPT_STATUS_CONTINUE2,
161 	DISP_INTERRUPT_STATUS_CONTINUE3,
162 	DISP_INTERRUPT_STATUS_CONTINUE4,
163 	DISP_INTERRUPT_STATUS_CONTINUE5
164 };
165 
166 #define DC_HPDx_CONTROL(x)        (DC_HPD1_CONTROL     + (x * 0xc))
167 #define DC_HPDx_INT_CONTROL(x)    (DC_HPD1_INT_CONTROL + (x * 0xc))
168 #define DC_HPDx_INT_STATUS_REG(x) (DC_HPD1_INT_STATUS  + (x * 0xc))
169 
170 static const u32 verde_rlc_save_restore_register_list[] =
171 {
172 	(0x8000 << 16) | (0x98f4 >> 2),
173 	0x00000000,
174 	(0x8040 << 16) | (0x98f4 >> 2),
175 	0x00000000,
176 	(0x8000 << 16) | (0xe80 >> 2),
177 	0x00000000,
178 	(0x8040 << 16) | (0xe80 >> 2),
179 	0x00000000,
180 	(0x8000 << 16) | (0x89bc >> 2),
181 	0x00000000,
182 	(0x8040 << 16) | (0x89bc >> 2),
183 	0x00000000,
184 	(0x8000 << 16) | (0x8c1c >> 2),
185 	0x00000000,
186 	(0x8040 << 16) | (0x8c1c >> 2),
187 	0x00000000,
188 	(0x9c00 << 16) | (0x98f0 >> 2),
189 	0x00000000,
190 	(0x9c00 << 16) | (0xe7c >> 2),
191 	0x00000000,
192 	(0x8000 << 16) | (0x9148 >> 2),
193 	0x00000000,
194 	(0x8040 << 16) | (0x9148 >> 2),
195 	0x00000000,
196 	(0x9c00 << 16) | (0x9150 >> 2),
197 	0x00000000,
198 	(0x9c00 << 16) | (0x897c >> 2),
199 	0x00000000,
200 	(0x9c00 << 16) | (0x8d8c >> 2),
201 	0x00000000,
202 	(0x9c00 << 16) | (0xac54 >> 2),
203 	0X00000000,
204 	0x3,
205 	(0x9c00 << 16) | (0x98f8 >> 2),
206 	0x00000000,
207 	(0x9c00 << 16) | (0x9910 >> 2),
208 	0x00000000,
209 	(0x9c00 << 16) | (0x9914 >> 2),
210 	0x00000000,
211 	(0x9c00 << 16) | (0x9918 >> 2),
212 	0x00000000,
213 	(0x9c00 << 16) | (0x991c >> 2),
214 	0x00000000,
215 	(0x9c00 << 16) | (0x9920 >> 2),
216 	0x00000000,
217 	(0x9c00 << 16) | (0x9924 >> 2),
218 	0x00000000,
219 	(0x9c00 << 16) | (0x9928 >> 2),
220 	0x00000000,
221 	(0x9c00 << 16) | (0x992c >> 2),
222 	0x00000000,
223 	(0x9c00 << 16) | (0x9930 >> 2),
224 	0x00000000,
225 	(0x9c00 << 16) | (0x9934 >> 2),
226 	0x00000000,
227 	(0x9c00 << 16) | (0x9938 >> 2),
228 	0x00000000,
229 	(0x9c00 << 16) | (0x993c >> 2),
230 	0x00000000,
231 	(0x9c00 << 16) | (0x9940 >> 2),
232 	0x00000000,
233 	(0x9c00 << 16) | (0x9944 >> 2),
234 	0x00000000,
235 	(0x9c00 << 16) | (0x9948 >> 2),
236 	0x00000000,
237 	(0x9c00 << 16) | (0x994c >> 2),
238 	0x00000000,
239 	(0x9c00 << 16) | (0x9950 >> 2),
240 	0x00000000,
241 	(0x9c00 << 16) | (0x9954 >> 2),
242 	0x00000000,
243 	(0x9c00 << 16) | (0x9958 >> 2),
244 	0x00000000,
245 	(0x9c00 << 16) | (0x995c >> 2),
246 	0x00000000,
247 	(0x9c00 << 16) | (0x9960 >> 2),
248 	0x00000000,
249 	(0x9c00 << 16) | (0x9964 >> 2),
250 	0x00000000,
251 	(0x9c00 << 16) | (0x9968 >> 2),
252 	0x00000000,
253 	(0x9c00 << 16) | (0x996c >> 2),
254 	0x00000000,
255 	(0x9c00 << 16) | (0x9970 >> 2),
256 	0x00000000,
257 	(0x9c00 << 16) | (0x9974 >> 2),
258 	0x00000000,
259 	(0x9c00 << 16) | (0x9978 >> 2),
260 	0x00000000,
261 	(0x9c00 << 16) | (0x997c >> 2),
262 	0x00000000,
263 	(0x9c00 << 16) | (0x9980 >> 2),
264 	0x00000000,
265 	(0x9c00 << 16) | (0x9984 >> 2),
266 	0x00000000,
267 	(0x9c00 << 16) | (0x9988 >> 2),
268 	0x00000000,
269 	(0x9c00 << 16) | (0x998c >> 2),
270 	0x00000000,
271 	(0x9c00 << 16) | (0x8c00 >> 2),
272 	0x00000000,
273 	(0x9c00 << 16) | (0x8c14 >> 2),
274 	0x00000000,
275 	(0x9c00 << 16) | (0x8c04 >> 2),
276 	0x00000000,
277 	(0x9c00 << 16) | (0x8c08 >> 2),
278 	0x00000000,
279 	(0x8000 << 16) | (0x9b7c >> 2),
280 	0x00000000,
281 	(0x8040 << 16) | (0x9b7c >> 2),
282 	0x00000000,
283 	(0x8000 << 16) | (0xe84 >> 2),
284 	0x00000000,
285 	(0x8040 << 16) | (0xe84 >> 2),
286 	0x00000000,
287 	(0x8000 << 16) | (0x89c0 >> 2),
288 	0x00000000,
289 	(0x8040 << 16) | (0x89c0 >> 2),
290 	0x00000000,
291 	(0x8000 << 16) | (0x914c >> 2),
292 	0x00000000,
293 	(0x8040 << 16) | (0x914c >> 2),
294 	0x00000000,
295 	(0x8000 << 16) | (0x8c20 >> 2),
296 	0x00000000,
297 	(0x8040 << 16) | (0x8c20 >> 2),
298 	0x00000000,
299 	(0x8000 << 16) | (0x9354 >> 2),
300 	0x00000000,
301 	(0x8040 << 16) | (0x9354 >> 2),
302 	0x00000000,
303 	(0x9c00 << 16) | (0x9060 >> 2),
304 	0x00000000,
305 	(0x9c00 << 16) | (0x9364 >> 2),
306 	0x00000000,
307 	(0x9c00 << 16) | (0x9100 >> 2),
308 	0x00000000,
309 	(0x9c00 << 16) | (0x913c >> 2),
310 	0x00000000,
311 	(0x8000 << 16) | (0x90e0 >> 2),
312 	0x00000000,
313 	(0x8000 << 16) | (0x90e4 >> 2),
314 	0x00000000,
315 	(0x8000 << 16) | (0x90e8 >> 2),
316 	0x00000000,
317 	(0x8040 << 16) | (0x90e0 >> 2),
318 	0x00000000,
319 	(0x8040 << 16) | (0x90e4 >> 2),
320 	0x00000000,
321 	(0x8040 << 16) | (0x90e8 >> 2),
322 	0x00000000,
323 	(0x9c00 << 16) | (0x8bcc >> 2),
324 	0x00000000,
325 	(0x9c00 << 16) | (0x8b24 >> 2),
326 	0x00000000,
327 	(0x9c00 << 16) | (0x88c4 >> 2),
328 	0x00000000,
329 	(0x9c00 << 16) | (0x8e50 >> 2),
330 	0x00000000,
331 	(0x9c00 << 16) | (0x8c0c >> 2),
332 	0x00000000,
333 	(0x9c00 << 16) | (0x8e58 >> 2),
334 	0x00000000,
335 	(0x9c00 << 16) | (0x8e5c >> 2),
336 	0x00000000,
337 	(0x9c00 << 16) | (0x9508 >> 2),
338 	0x00000000,
339 	(0x9c00 << 16) | (0x950c >> 2),
340 	0x00000000,
341 	(0x9c00 << 16) | (0x9494 >> 2),
342 	0x00000000,
343 	(0x9c00 << 16) | (0xac0c >> 2),
344 	0x00000000,
345 	(0x9c00 << 16) | (0xac10 >> 2),
346 	0x00000000,
347 	(0x9c00 << 16) | (0xac14 >> 2),
348 	0x00000000,
349 	(0x9c00 << 16) | (0xae00 >> 2),
350 	0x00000000,
351 	(0x9c00 << 16) | (0xac08 >> 2),
352 	0x00000000,
353 	(0x9c00 << 16) | (0x88d4 >> 2),
354 	0x00000000,
355 	(0x9c00 << 16) | (0x88c8 >> 2),
356 	0x00000000,
357 	(0x9c00 << 16) | (0x88cc >> 2),
358 	0x00000000,
359 	(0x9c00 << 16) | (0x89b0 >> 2),
360 	0x00000000,
361 	(0x9c00 << 16) | (0x8b10 >> 2),
362 	0x00000000,
363 	(0x9c00 << 16) | (0x8a14 >> 2),
364 	0x00000000,
365 	(0x9c00 << 16) | (0x9830 >> 2),
366 	0x00000000,
367 	(0x9c00 << 16) | (0x9834 >> 2),
368 	0x00000000,
369 	(0x9c00 << 16) | (0x9838 >> 2),
370 	0x00000000,
371 	(0x9c00 << 16) | (0x9a10 >> 2),
372 	0x00000000,
373 	(0x8000 << 16) | (0x9870 >> 2),
374 	0x00000000,
375 	(0x8000 << 16) | (0x9874 >> 2),
376 	0x00000000,
377 	(0x8001 << 16) | (0x9870 >> 2),
378 	0x00000000,
379 	(0x8001 << 16) | (0x9874 >> 2),
380 	0x00000000,
381 	(0x8040 << 16) | (0x9870 >> 2),
382 	0x00000000,
383 	(0x8040 << 16) | (0x9874 >> 2),
384 	0x00000000,
385 	(0x8041 << 16) | (0x9870 >> 2),
386 	0x00000000,
387 	(0x8041 << 16) | (0x9874 >> 2),
388 	0x00000000,
389 	0x00000000
390 };
391 
392 static const u32 tahiti_golden_rlc_registers[] =
393 {
394 	0xc424, 0xffffffff, 0x00601005,
395 	0xc47c, 0xffffffff, 0x10104040,
396 	0xc488, 0xffffffff, 0x0100000a,
397 	0xc314, 0xffffffff, 0x00000800,
398 	0xc30c, 0xffffffff, 0x800000f4,
399 	0xf4a8, 0xffffffff, 0x00000000
400 };
401 
402 static const u32 tahiti_golden_registers[] =
403 {
404 	0x9a10, 0x00010000, 0x00018208,
405 	0x9830, 0xffffffff, 0x00000000,
406 	0x9834, 0xf00fffff, 0x00000400,
407 	0x9838, 0x0002021c, 0x00020200,
408 	0xc78, 0x00000080, 0x00000000,
409 	0xd030, 0x000300c0, 0x00800040,
410 	0xd830, 0x000300c0, 0x00800040,
411 	0x5bb0, 0x000000f0, 0x00000070,
412 	0x5bc0, 0x00200000, 0x50100000,
413 	0x7030, 0x31000311, 0x00000011,
414 	0x277c, 0x00000003, 0x000007ff,
415 	0x240c, 0x000007ff, 0x00000000,
416 	0x8a14, 0xf000001f, 0x00000007,
417 	0x8b24, 0xffffffff, 0x00ffffff,
418 	0x8b10, 0x0000ff0f, 0x00000000,
419 	0x28a4c, 0x07ffffff, 0x4e000000,
420 	0x28350, 0x3f3f3fff, 0x2a00126a,
421 	0x30, 0x000000ff, 0x0040,
422 	0x34, 0x00000040, 0x00004040,
423 	0x9100, 0x07ffffff, 0x03000000,
424 	0x8e88, 0x01ff1f3f, 0x00000000,
425 	0x8e84, 0x01ff1f3f, 0x00000000,
426 	0x9060, 0x0000007f, 0x00000020,
427 	0x9508, 0x00010000, 0x00010000,
428 	0xac14, 0x00000200, 0x000002fb,
429 	0xac10, 0xffffffff, 0x0000543b,
430 	0xac0c, 0xffffffff, 0xa9210876,
431 	0x88d0, 0xffffffff, 0x000fff40,
432 	0x88d4, 0x0000001f, 0x00000010,
433 	0x1410, 0x20000000, 0x20fffed8,
434 	0x15c0, 0x000c0fc0, 0x000c0400
435 };
436 
437 static const u32 tahiti_golden_registers2[] =
438 {
439 	0xc64, 0x00000001, 0x00000001
440 };
441 
442 static const u32 pitcairn_golden_rlc_registers[] =
443 {
444 	0xc424, 0xffffffff, 0x00601004,
445 	0xc47c, 0xffffffff, 0x10102020,
446 	0xc488, 0xffffffff, 0x01000020,
447 	0xc314, 0xffffffff, 0x00000800,
448 	0xc30c, 0xffffffff, 0x800000a4
449 };
450 
451 static const u32 pitcairn_golden_registers[] =
452 {
453 	0x9a10, 0x00010000, 0x00018208,
454 	0x9830, 0xffffffff, 0x00000000,
455 	0x9834, 0xf00fffff, 0x00000400,
456 	0x9838, 0x0002021c, 0x00020200,
457 	0xc78, 0x00000080, 0x00000000,
458 	0xd030, 0x000300c0, 0x00800040,
459 	0xd830, 0x000300c0, 0x00800040,
460 	0x5bb0, 0x000000f0, 0x00000070,
461 	0x5bc0, 0x00200000, 0x50100000,
462 	0x7030, 0x31000311, 0x00000011,
463 	0x2ae4, 0x00073ffe, 0x000022a2,
464 	0x240c, 0x000007ff, 0x00000000,
465 	0x8a14, 0xf000001f, 0x00000007,
466 	0x8b24, 0xffffffff, 0x00ffffff,
467 	0x8b10, 0x0000ff0f, 0x00000000,
468 	0x28a4c, 0x07ffffff, 0x4e000000,
469 	0x28350, 0x3f3f3fff, 0x2a00126a,
470 	0x30, 0x000000ff, 0x0040,
471 	0x34, 0x00000040, 0x00004040,
472 	0x9100, 0x07ffffff, 0x03000000,
473 	0x9060, 0x0000007f, 0x00000020,
474 	0x9508, 0x00010000, 0x00010000,
475 	0xac14, 0x000003ff, 0x000000f7,
476 	0xac10, 0xffffffff, 0x00000000,
477 	0xac0c, 0xffffffff, 0x32761054,
478 	0x88d4, 0x0000001f, 0x00000010,
479 	0x15c0, 0x000c0fc0, 0x000c0400
480 };
481 
482 static const u32 verde_golden_rlc_registers[] =
483 {
484 	0xc424, 0xffffffff, 0x033f1005,
485 	0xc47c, 0xffffffff, 0x10808020,
486 	0xc488, 0xffffffff, 0x00800008,
487 	0xc314, 0xffffffff, 0x00001000,
488 	0xc30c, 0xffffffff, 0x80010014
489 };
490 
491 static const u32 verde_golden_registers[] =
492 {
493 	0x9a10, 0x00010000, 0x00018208,
494 	0x9830, 0xffffffff, 0x00000000,
495 	0x9834, 0xf00fffff, 0x00000400,
496 	0x9838, 0x0002021c, 0x00020200,
497 	0xc78, 0x00000080, 0x00000000,
498 	0xd030, 0x000300c0, 0x00800040,
499 	0xd030, 0x000300c0, 0x00800040,
500 	0xd830, 0x000300c0, 0x00800040,
501 	0xd830, 0x000300c0, 0x00800040,
502 	0x5bb0, 0x000000f0, 0x00000070,
503 	0x5bc0, 0x00200000, 0x50100000,
504 	0x7030, 0x31000311, 0x00000011,
505 	0x2ae4, 0x00073ffe, 0x000022a2,
506 	0x2ae4, 0x00073ffe, 0x000022a2,
507 	0x2ae4, 0x00073ffe, 0x000022a2,
508 	0x240c, 0x000007ff, 0x00000000,
509 	0x240c, 0x000007ff, 0x00000000,
510 	0x240c, 0x000007ff, 0x00000000,
511 	0x8a14, 0xf000001f, 0x00000007,
512 	0x8a14, 0xf000001f, 0x00000007,
513 	0x8a14, 0xf000001f, 0x00000007,
514 	0x8b24, 0xffffffff, 0x00ffffff,
515 	0x8b10, 0x0000ff0f, 0x00000000,
516 	0x28a4c, 0x07ffffff, 0x4e000000,
517 	0x28350, 0x3f3f3fff, 0x0000124a,
518 	0x28350, 0x3f3f3fff, 0x0000124a,
519 	0x28350, 0x3f3f3fff, 0x0000124a,
520 	0x30, 0x000000ff, 0x0040,
521 	0x34, 0x00000040, 0x00004040,
522 	0x9100, 0x07ffffff, 0x03000000,
523 	0x9100, 0x07ffffff, 0x03000000,
524 	0x8e88, 0x01ff1f3f, 0x00000000,
525 	0x8e88, 0x01ff1f3f, 0x00000000,
526 	0x8e88, 0x01ff1f3f, 0x00000000,
527 	0x8e84, 0x01ff1f3f, 0x00000000,
528 	0x8e84, 0x01ff1f3f, 0x00000000,
529 	0x8e84, 0x01ff1f3f, 0x00000000,
530 	0x9060, 0x0000007f, 0x00000020,
531 	0x9508, 0x00010000, 0x00010000,
532 	0xac14, 0x000003ff, 0x00000003,
533 	0xac14, 0x000003ff, 0x00000003,
534 	0xac14, 0x000003ff, 0x00000003,
535 	0xac10, 0xffffffff, 0x00000000,
536 	0xac10, 0xffffffff, 0x00000000,
537 	0xac10, 0xffffffff, 0x00000000,
538 	0xac0c, 0xffffffff, 0x00001032,
539 	0xac0c, 0xffffffff, 0x00001032,
540 	0xac0c, 0xffffffff, 0x00001032,
541 	0x88d4, 0x0000001f, 0x00000010,
542 	0x88d4, 0x0000001f, 0x00000010,
543 	0x88d4, 0x0000001f, 0x00000010,
544 	0x15c0, 0x000c0fc0, 0x000c0400
545 };
546 
547 static const u32 oland_golden_rlc_registers[] =
548 {
549 	0xc424, 0xffffffff, 0x00601005,
550 	0xc47c, 0xffffffff, 0x10104040,
551 	0xc488, 0xffffffff, 0x0100000a,
552 	0xc314, 0xffffffff, 0x00000800,
553 	0xc30c, 0xffffffff, 0x800000f4
554 };
555 
556 static const u32 oland_golden_registers[] =
557 {
558 	0x9a10, 0x00010000, 0x00018208,
559 	0x9830, 0xffffffff, 0x00000000,
560 	0x9834, 0xf00fffff, 0x00000400,
561 	0x9838, 0x0002021c, 0x00020200,
562 	0xc78, 0x00000080, 0x00000000,
563 	0xd030, 0x000300c0, 0x00800040,
564 	0xd830, 0x000300c0, 0x00800040,
565 	0x5bb0, 0x000000f0, 0x00000070,
566 	0x5bc0, 0x00200000, 0x50100000,
567 	0x7030, 0x31000311, 0x00000011,
568 	0x2ae4, 0x00073ffe, 0x000022a2,
569 	0x240c, 0x000007ff, 0x00000000,
570 	0x8a14, 0xf000001f, 0x00000007,
571 	0x8b24, 0xffffffff, 0x00ffffff,
572 	0x8b10, 0x0000ff0f, 0x00000000,
573 	0x28a4c, 0x07ffffff, 0x4e000000,
574 	0x28350, 0x3f3f3fff, 0x00000082,
575 	0x30, 0x000000ff, 0x0040,
576 	0x34, 0x00000040, 0x00004040,
577 	0x9100, 0x07ffffff, 0x03000000,
578 	0x9060, 0x0000007f, 0x00000020,
579 	0x9508, 0x00010000, 0x00010000,
580 	0xac14, 0x000003ff, 0x000000f3,
581 	0xac10, 0xffffffff, 0x00000000,
582 	0xac0c, 0xffffffff, 0x00003210,
583 	0x88d4, 0x0000001f, 0x00000010,
584 	0x15c0, 0x000c0fc0, 0x000c0400
585 };
586 
587 static const u32 hainan_golden_registers[] =
588 {
589 	0x9a10, 0x00010000, 0x00018208,
590 	0x9830, 0xffffffff, 0x00000000,
591 	0x9834, 0xf00fffff, 0x00000400,
592 	0x9838, 0x0002021c, 0x00020200,
593 	0xd0c0, 0xff000fff, 0x00000100,
594 	0xd030, 0x000300c0, 0x00800040,
595 	0xd8c0, 0xff000fff, 0x00000100,
596 	0xd830, 0x000300c0, 0x00800040,
597 	0x2ae4, 0x00073ffe, 0x000022a2,
598 	0x240c, 0x000007ff, 0x00000000,
599 	0x8a14, 0xf000001f, 0x00000007,
600 	0x8b24, 0xffffffff, 0x00ffffff,
601 	0x8b10, 0x0000ff0f, 0x00000000,
602 	0x28a4c, 0x07ffffff, 0x4e000000,
603 	0x28350, 0x3f3f3fff, 0x00000000,
604 	0x30, 0x000000ff, 0x0040,
605 	0x34, 0x00000040, 0x00004040,
606 	0x9100, 0x03e00000, 0x03600000,
607 	0x9060, 0x0000007f, 0x00000020,
608 	0x9508, 0x00010000, 0x00010000,
609 	0xac14, 0x000003ff, 0x000000f1,
610 	0xac10, 0xffffffff, 0x00000000,
611 	0xac0c, 0xffffffff, 0x00003210,
612 	0x88d4, 0x0000001f, 0x00000010,
613 	0x15c0, 0x000c0fc0, 0x000c0400
614 };
615 
616 static const u32 hainan_golden_registers2[] =
617 {
618 	0x98f8, 0xffffffff, 0x02010001
619 };
620 
621 static const u32 tahiti_mgcg_cgcg_init[] =
622 {
623 	0xc400, 0xffffffff, 0xfffffffc,
624 	0x802c, 0xffffffff, 0xe0000000,
625 	0x9a60, 0xffffffff, 0x00000100,
626 	0x92a4, 0xffffffff, 0x00000100,
627 	0xc164, 0xffffffff, 0x00000100,
628 	0x9774, 0xffffffff, 0x00000100,
629 	0x8984, 0xffffffff, 0x06000100,
630 	0x8a18, 0xffffffff, 0x00000100,
631 	0x92a0, 0xffffffff, 0x00000100,
632 	0xc380, 0xffffffff, 0x00000100,
633 	0x8b28, 0xffffffff, 0x00000100,
634 	0x9144, 0xffffffff, 0x00000100,
635 	0x8d88, 0xffffffff, 0x00000100,
636 	0x8d8c, 0xffffffff, 0x00000100,
637 	0x9030, 0xffffffff, 0x00000100,
638 	0x9034, 0xffffffff, 0x00000100,
639 	0x9038, 0xffffffff, 0x00000100,
640 	0x903c, 0xffffffff, 0x00000100,
641 	0xad80, 0xffffffff, 0x00000100,
642 	0xac54, 0xffffffff, 0x00000100,
643 	0x897c, 0xffffffff, 0x06000100,
644 	0x9868, 0xffffffff, 0x00000100,
645 	0x9510, 0xffffffff, 0x00000100,
646 	0xaf04, 0xffffffff, 0x00000100,
647 	0xae04, 0xffffffff, 0x00000100,
648 	0x949c, 0xffffffff, 0x00000100,
649 	0x802c, 0xffffffff, 0xe0000000,
650 	0x9160, 0xffffffff, 0x00010000,
651 	0x9164, 0xffffffff, 0x00030002,
652 	0x9168, 0xffffffff, 0x00040007,
653 	0x916c, 0xffffffff, 0x00060005,
654 	0x9170, 0xffffffff, 0x00090008,
655 	0x9174, 0xffffffff, 0x00020001,
656 	0x9178, 0xffffffff, 0x00040003,
657 	0x917c, 0xffffffff, 0x00000007,
658 	0x9180, 0xffffffff, 0x00060005,
659 	0x9184, 0xffffffff, 0x00090008,
660 	0x9188, 0xffffffff, 0x00030002,
661 	0x918c, 0xffffffff, 0x00050004,
662 	0x9190, 0xffffffff, 0x00000008,
663 	0x9194, 0xffffffff, 0x00070006,
664 	0x9198, 0xffffffff, 0x000a0009,
665 	0x919c, 0xffffffff, 0x00040003,
666 	0x91a0, 0xffffffff, 0x00060005,
667 	0x91a4, 0xffffffff, 0x00000009,
668 	0x91a8, 0xffffffff, 0x00080007,
669 	0x91ac, 0xffffffff, 0x000b000a,
670 	0x91b0, 0xffffffff, 0x00050004,
671 	0x91b4, 0xffffffff, 0x00070006,
672 	0x91b8, 0xffffffff, 0x0008000b,
673 	0x91bc, 0xffffffff, 0x000a0009,
674 	0x91c0, 0xffffffff, 0x000d000c,
675 	0x91c4, 0xffffffff, 0x00060005,
676 	0x91c8, 0xffffffff, 0x00080007,
677 	0x91cc, 0xffffffff, 0x0000000b,
678 	0x91d0, 0xffffffff, 0x000a0009,
679 	0x91d4, 0xffffffff, 0x000d000c,
680 	0x91d8, 0xffffffff, 0x00070006,
681 	0x91dc, 0xffffffff, 0x00090008,
682 	0x91e0, 0xffffffff, 0x0000000c,
683 	0x91e4, 0xffffffff, 0x000b000a,
684 	0x91e8, 0xffffffff, 0x000e000d,
685 	0x91ec, 0xffffffff, 0x00080007,
686 	0x91f0, 0xffffffff, 0x000a0009,
687 	0x91f4, 0xffffffff, 0x0000000d,
688 	0x91f8, 0xffffffff, 0x000c000b,
689 	0x91fc, 0xffffffff, 0x000f000e,
690 	0x9200, 0xffffffff, 0x00090008,
691 	0x9204, 0xffffffff, 0x000b000a,
692 	0x9208, 0xffffffff, 0x000c000f,
693 	0x920c, 0xffffffff, 0x000e000d,
694 	0x9210, 0xffffffff, 0x00110010,
695 	0x9214, 0xffffffff, 0x000a0009,
696 	0x9218, 0xffffffff, 0x000c000b,
697 	0x921c, 0xffffffff, 0x0000000f,
698 	0x9220, 0xffffffff, 0x000e000d,
699 	0x9224, 0xffffffff, 0x00110010,
700 	0x9228, 0xffffffff, 0x000b000a,
701 	0x922c, 0xffffffff, 0x000d000c,
702 	0x9230, 0xffffffff, 0x00000010,
703 	0x9234, 0xffffffff, 0x000f000e,
704 	0x9238, 0xffffffff, 0x00120011,
705 	0x923c, 0xffffffff, 0x000c000b,
706 	0x9240, 0xffffffff, 0x000e000d,
707 	0x9244, 0xffffffff, 0x00000011,
708 	0x9248, 0xffffffff, 0x0010000f,
709 	0x924c, 0xffffffff, 0x00130012,
710 	0x9250, 0xffffffff, 0x000d000c,
711 	0x9254, 0xffffffff, 0x000f000e,
712 	0x9258, 0xffffffff, 0x00100013,
713 	0x925c, 0xffffffff, 0x00120011,
714 	0x9260, 0xffffffff, 0x00150014,
715 	0x9264, 0xffffffff, 0x000e000d,
716 	0x9268, 0xffffffff, 0x0010000f,
717 	0x926c, 0xffffffff, 0x00000013,
718 	0x9270, 0xffffffff, 0x00120011,
719 	0x9274, 0xffffffff, 0x00150014,
720 	0x9278, 0xffffffff, 0x000f000e,
721 	0x927c, 0xffffffff, 0x00110010,
722 	0x9280, 0xffffffff, 0x00000014,
723 	0x9284, 0xffffffff, 0x00130012,
724 	0x9288, 0xffffffff, 0x00160015,
725 	0x928c, 0xffffffff, 0x0010000f,
726 	0x9290, 0xffffffff, 0x00120011,
727 	0x9294, 0xffffffff, 0x00000015,
728 	0x9298, 0xffffffff, 0x00140013,
729 	0x929c, 0xffffffff, 0x00170016,
730 	0x9150, 0xffffffff, 0x96940200,
731 	0x8708, 0xffffffff, 0x00900100,
732 	0xc478, 0xffffffff, 0x00000080,
733 	0xc404, 0xffffffff, 0x0020003f,
734 	0x30, 0xffffffff, 0x0000001c,
735 	0x34, 0x000f0000, 0x000f0000,
736 	0x160c, 0xffffffff, 0x00000100,
737 	0x1024, 0xffffffff, 0x00000100,
738 	0x102c, 0x00000101, 0x00000000,
739 	0x20a8, 0xffffffff, 0x00000104,
740 	0x264c, 0x000c0000, 0x000c0000,
741 	0x2648, 0x000c0000, 0x000c0000,
742 	0x55e4, 0xff000fff, 0x00000100,
743 	0x55e8, 0x00000001, 0x00000001,
744 	0x2f50, 0x00000001, 0x00000001,
745 	0x30cc, 0xc0000fff, 0x00000104,
746 	0xc1e4, 0x00000001, 0x00000001,
747 	0xd0c0, 0xfffffff0, 0x00000100,
748 	0xd8c0, 0xfffffff0, 0x00000100
749 };
750 
751 static const u32 pitcairn_mgcg_cgcg_init[] =
752 {
753 	0xc400, 0xffffffff, 0xfffffffc,
754 	0x802c, 0xffffffff, 0xe0000000,
755 	0x9a60, 0xffffffff, 0x00000100,
756 	0x92a4, 0xffffffff, 0x00000100,
757 	0xc164, 0xffffffff, 0x00000100,
758 	0x9774, 0xffffffff, 0x00000100,
759 	0x8984, 0xffffffff, 0x06000100,
760 	0x8a18, 0xffffffff, 0x00000100,
761 	0x92a0, 0xffffffff, 0x00000100,
762 	0xc380, 0xffffffff, 0x00000100,
763 	0x8b28, 0xffffffff, 0x00000100,
764 	0x9144, 0xffffffff, 0x00000100,
765 	0x8d88, 0xffffffff, 0x00000100,
766 	0x8d8c, 0xffffffff, 0x00000100,
767 	0x9030, 0xffffffff, 0x00000100,
768 	0x9034, 0xffffffff, 0x00000100,
769 	0x9038, 0xffffffff, 0x00000100,
770 	0x903c, 0xffffffff, 0x00000100,
771 	0xad80, 0xffffffff, 0x00000100,
772 	0xac54, 0xffffffff, 0x00000100,
773 	0x897c, 0xffffffff, 0x06000100,
774 	0x9868, 0xffffffff, 0x00000100,
775 	0x9510, 0xffffffff, 0x00000100,
776 	0xaf04, 0xffffffff, 0x00000100,
777 	0xae04, 0xffffffff, 0x00000100,
778 	0x949c, 0xffffffff, 0x00000100,
779 	0x802c, 0xffffffff, 0xe0000000,
780 	0x9160, 0xffffffff, 0x00010000,
781 	0x9164, 0xffffffff, 0x00030002,
782 	0x9168, 0xffffffff, 0x00040007,
783 	0x916c, 0xffffffff, 0x00060005,
784 	0x9170, 0xffffffff, 0x00090008,
785 	0x9174, 0xffffffff, 0x00020001,
786 	0x9178, 0xffffffff, 0x00040003,
787 	0x917c, 0xffffffff, 0x00000007,
788 	0x9180, 0xffffffff, 0x00060005,
789 	0x9184, 0xffffffff, 0x00090008,
790 	0x9188, 0xffffffff, 0x00030002,
791 	0x918c, 0xffffffff, 0x00050004,
792 	0x9190, 0xffffffff, 0x00000008,
793 	0x9194, 0xffffffff, 0x00070006,
794 	0x9198, 0xffffffff, 0x000a0009,
795 	0x919c, 0xffffffff, 0x00040003,
796 	0x91a0, 0xffffffff, 0x00060005,
797 	0x91a4, 0xffffffff, 0x00000009,
798 	0x91a8, 0xffffffff, 0x00080007,
799 	0x91ac, 0xffffffff, 0x000b000a,
800 	0x91b0, 0xffffffff, 0x00050004,
801 	0x91b4, 0xffffffff, 0x00070006,
802 	0x91b8, 0xffffffff, 0x0008000b,
803 	0x91bc, 0xffffffff, 0x000a0009,
804 	0x91c0, 0xffffffff, 0x000d000c,
805 	0x9200, 0xffffffff, 0x00090008,
806 	0x9204, 0xffffffff, 0x000b000a,
807 	0x9208, 0xffffffff, 0x000c000f,
808 	0x920c, 0xffffffff, 0x000e000d,
809 	0x9210, 0xffffffff, 0x00110010,
810 	0x9214, 0xffffffff, 0x000a0009,
811 	0x9218, 0xffffffff, 0x000c000b,
812 	0x921c, 0xffffffff, 0x0000000f,
813 	0x9220, 0xffffffff, 0x000e000d,
814 	0x9224, 0xffffffff, 0x00110010,
815 	0x9228, 0xffffffff, 0x000b000a,
816 	0x922c, 0xffffffff, 0x000d000c,
817 	0x9230, 0xffffffff, 0x00000010,
818 	0x9234, 0xffffffff, 0x000f000e,
819 	0x9238, 0xffffffff, 0x00120011,
820 	0x923c, 0xffffffff, 0x000c000b,
821 	0x9240, 0xffffffff, 0x000e000d,
822 	0x9244, 0xffffffff, 0x00000011,
823 	0x9248, 0xffffffff, 0x0010000f,
824 	0x924c, 0xffffffff, 0x00130012,
825 	0x9250, 0xffffffff, 0x000d000c,
826 	0x9254, 0xffffffff, 0x000f000e,
827 	0x9258, 0xffffffff, 0x00100013,
828 	0x925c, 0xffffffff, 0x00120011,
829 	0x9260, 0xffffffff, 0x00150014,
830 	0x9150, 0xffffffff, 0x96940200,
831 	0x8708, 0xffffffff, 0x00900100,
832 	0xc478, 0xffffffff, 0x00000080,
833 	0xc404, 0xffffffff, 0x0020003f,
834 	0x30, 0xffffffff, 0x0000001c,
835 	0x34, 0x000f0000, 0x000f0000,
836 	0x160c, 0xffffffff, 0x00000100,
837 	0x1024, 0xffffffff, 0x00000100,
838 	0x102c, 0x00000101, 0x00000000,
839 	0x20a8, 0xffffffff, 0x00000104,
840 	0x55e4, 0xff000fff, 0x00000100,
841 	0x55e8, 0x00000001, 0x00000001,
842 	0x2f50, 0x00000001, 0x00000001,
843 	0x30cc, 0xc0000fff, 0x00000104,
844 	0xc1e4, 0x00000001, 0x00000001,
845 	0xd0c0, 0xfffffff0, 0x00000100,
846 	0xd8c0, 0xfffffff0, 0x00000100
847 };
848 
849 static const u32 verde_mgcg_cgcg_init[] =
850 {
851 	0xc400, 0xffffffff, 0xfffffffc,
852 	0x802c, 0xffffffff, 0xe0000000,
853 	0x9a60, 0xffffffff, 0x00000100,
854 	0x92a4, 0xffffffff, 0x00000100,
855 	0xc164, 0xffffffff, 0x00000100,
856 	0x9774, 0xffffffff, 0x00000100,
857 	0x8984, 0xffffffff, 0x06000100,
858 	0x8a18, 0xffffffff, 0x00000100,
859 	0x92a0, 0xffffffff, 0x00000100,
860 	0xc380, 0xffffffff, 0x00000100,
861 	0x8b28, 0xffffffff, 0x00000100,
862 	0x9144, 0xffffffff, 0x00000100,
863 	0x8d88, 0xffffffff, 0x00000100,
864 	0x8d8c, 0xffffffff, 0x00000100,
865 	0x9030, 0xffffffff, 0x00000100,
866 	0x9034, 0xffffffff, 0x00000100,
867 	0x9038, 0xffffffff, 0x00000100,
868 	0x903c, 0xffffffff, 0x00000100,
869 	0xad80, 0xffffffff, 0x00000100,
870 	0xac54, 0xffffffff, 0x00000100,
871 	0x897c, 0xffffffff, 0x06000100,
872 	0x9868, 0xffffffff, 0x00000100,
873 	0x9510, 0xffffffff, 0x00000100,
874 	0xaf04, 0xffffffff, 0x00000100,
875 	0xae04, 0xffffffff, 0x00000100,
876 	0x949c, 0xffffffff, 0x00000100,
877 	0x802c, 0xffffffff, 0xe0000000,
878 	0x9160, 0xffffffff, 0x00010000,
879 	0x9164, 0xffffffff, 0x00030002,
880 	0x9168, 0xffffffff, 0x00040007,
881 	0x916c, 0xffffffff, 0x00060005,
882 	0x9170, 0xffffffff, 0x00090008,
883 	0x9174, 0xffffffff, 0x00020001,
884 	0x9178, 0xffffffff, 0x00040003,
885 	0x917c, 0xffffffff, 0x00000007,
886 	0x9180, 0xffffffff, 0x00060005,
887 	0x9184, 0xffffffff, 0x00090008,
888 	0x9188, 0xffffffff, 0x00030002,
889 	0x918c, 0xffffffff, 0x00050004,
890 	0x9190, 0xffffffff, 0x00000008,
891 	0x9194, 0xffffffff, 0x00070006,
892 	0x9198, 0xffffffff, 0x000a0009,
893 	0x919c, 0xffffffff, 0x00040003,
894 	0x91a0, 0xffffffff, 0x00060005,
895 	0x91a4, 0xffffffff, 0x00000009,
896 	0x91a8, 0xffffffff, 0x00080007,
897 	0x91ac, 0xffffffff, 0x000b000a,
898 	0x91b0, 0xffffffff, 0x00050004,
899 	0x91b4, 0xffffffff, 0x00070006,
900 	0x91b8, 0xffffffff, 0x0008000b,
901 	0x91bc, 0xffffffff, 0x000a0009,
902 	0x91c0, 0xffffffff, 0x000d000c,
903 	0x9200, 0xffffffff, 0x00090008,
904 	0x9204, 0xffffffff, 0x000b000a,
905 	0x9208, 0xffffffff, 0x000c000f,
906 	0x920c, 0xffffffff, 0x000e000d,
907 	0x9210, 0xffffffff, 0x00110010,
908 	0x9214, 0xffffffff, 0x000a0009,
909 	0x9218, 0xffffffff, 0x000c000b,
910 	0x921c, 0xffffffff, 0x0000000f,
911 	0x9220, 0xffffffff, 0x000e000d,
912 	0x9224, 0xffffffff, 0x00110010,
913 	0x9228, 0xffffffff, 0x000b000a,
914 	0x922c, 0xffffffff, 0x000d000c,
915 	0x9230, 0xffffffff, 0x00000010,
916 	0x9234, 0xffffffff, 0x000f000e,
917 	0x9238, 0xffffffff, 0x00120011,
918 	0x923c, 0xffffffff, 0x000c000b,
919 	0x9240, 0xffffffff, 0x000e000d,
920 	0x9244, 0xffffffff, 0x00000011,
921 	0x9248, 0xffffffff, 0x0010000f,
922 	0x924c, 0xffffffff, 0x00130012,
923 	0x9250, 0xffffffff, 0x000d000c,
924 	0x9254, 0xffffffff, 0x000f000e,
925 	0x9258, 0xffffffff, 0x00100013,
926 	0x925c, 0xffffffff, 0x00120011,
927 	0x9260, 0xffffffff, 0x00150014,
928 	0x9150, 0xffffffff, 0x96940200,
929 	0x8708, 0xffffffff, 0x00900100,
930 	0xc478, 0xffffffff, 0x00000080,
931 	0xc404, 0xffffffff, 0x0020003f,
932 	0x30, 0xffffffff, 0x0000001c,
933 	0x34, 0x000f0000, 0x000f0000,
934 	0x160c, 0xffffffff, 0x00000100,
935 	0x1024, 0xffffffff, 0x00000100,
936 	0x102c, 0x00000101, 0x00000000,
937 	0x20a8, 0xffffffff, 0x00000104,
938 	0x264c, 0x000c0000, 0x000c0000,
939 	0x2648, 0x000c0000, 0x000c0000,
940 	0x55e4, 0xff000fff, 0x00000100,
941 	0x55e8, 0x00000001, 0x00000001,
942 	0x2f50, 0x00000001, 0x00000001,
943 	0x30cc, 0xc0000fff, 0x00000104,
944 	0xc1e4, 0x00000001, 0x00000001,
945 	0xd0c0, 0xfffffff0, 0x00000100,
946 	0xd8c0, 0xfffffff0, 0x00000100
947 };
948 
949 static const u32 oland_mgcg_cgcg_init[] =
950 {
951 	0xc400, 0xffffffff, 0xfffffffc,
952 	0x802c, 0xffffffff, 0xe0000000,
953 	0x9a60, 0xffffffff, 0x00000100,
954 	0x92a4, 0xffffffff, 0x00000100,
955 	0xc164, 0xffffffff, 0x00000100,
956 	0x9774, 0xffffffff, 0x00000100,
957 	0x8984, 0xffffffff, 0x06000100,
958 	0x8a18, 0xffffffff, 0x00000100,
959 	0x92a0, 0xffffffff, 0x00000100,
960 	0xc380, 0xffffffff, 0x00000100,
961 	0x8b28, 0xffffffff, 0x00000100,
962 	0x9144, 0xffffffff, 0x00000100,
963 	0x8d88, 0xffffffff, 0x00000100,
964 	0x8d8c, 0xffffffff, 0x00000100,
965 	0x9030, 0xffffffff, 0x00000100,
966 	0x9034, 0xffffffff, 0x00000100,
967 	0x9038, 0xffffffff, 0x00000100,
968 	0x903c, 0xffffffff, 0x00000100,
969 	0xad80, 0xffffffff, 0x00000100,
970 	0xac54, 0xffffffff, 0x00000100,
971 	0x897c, 0xffffffff, 0x06000100,
972 	0x9868, 0xffffffff, 0x00000100,
973 	0x9510, 0xffffffff, 0x00000100,
974 	0xaf04, 0xffffffff, 0x00000100,
975 	0xae04, 0xffffffff, 0x00000100,
976 	0x949c, 0xffffffff, 0x00000100,
977 	0x802c, 0xffffffff, 0xe0000000,
978 	0x9160, 0xffffffff, 0x00010000,
979 	0x9164, 0xffffffff, 0x00030002,
980 	0x9168, 0xffffffff, 0x00040007,
981 	0x916c, 0xffffffff, 0x00060005,
982 	0x9170, 0xffffffff, 0x00090008,
983 	0x9174, 0xffffffff, 0x00020001,
984 	0x9178, 0xffffffff, 0x00040003,
985 	0x917c, 0xffffffff, 0x00000007,
986 	0x9180, 0xffffffff, 0x00060005,
987 	0x9184, 0xffffffff, 0x00090008,
988 	0x9188, 0xffffffff, 0x00030002,
989 	0x918c, 0xffffffff, 0x00050004,
990 	0x9190, 0xffffffff, 0x00000008,
991 	0x9194, 0xffffffff, 0x00070006,
992 	0x9198, 0xffffffff, 0x000a0009,
993 	0x919c, 0xffffffff, 0x00040003,
994 	0x91a0, 0xffffffff, 0x00060005,
995 	0x91a4, 0xffffffff, 0x00000009,
996 	0x91a8, 0xffffffff, 0x00080007,
997 	0x91ac, 0xffffffff, 0x000b000a,
998 	0x91b0, 0xffffffff, 0x00050004,
999 	0x91b4, 0xffffffff, 0x00070006,
1000 	0x91b8, 0xffffffff, 0x0008000b,
1001 	0x91bc, 0xffffffff, 0x000a0009,
1002 	0x91c0, 0xffffffff, 0x000d000c,
1003 	0x91c4, 0xffffffff, 0x00060005,
1004 	0x91c8, 0xffffffff, 0x00080007,
1005 	0x91cc, 0xffffffff, 0x0000000b,
1006 	0x91d0, 0xffffffff, 0x000a0009,
1007 	0x91d4, 0xffffffff, 0x000d000c,
1008 	0x9150, 0xffffffff, 0x96940200,
1009 	0x8708, 0xffffffff, 0x00900100,
1010 	0xc478, 0xffffffff, 0x00000080,
1011 	0xc404, 0xffffffff, 0x0020003f,
1012 	0x30, 0xffffffff, 0x0000001c,
1013 	0x34, 0x000f0000, 0x000f0000,
1014 	0x160c, 0xffffffff, 0x00000100,
1015 	0x1024, 0xffffffff, 0x00000100,
1016 	0x102c, 0x00000101, 0x00000000,
1017 	0x20a8, 0xffffffff, 0x00000104,
1018 	0x264c, 0x000c0000, 0x000c0000,
1019 	0x2648, 0x000c0000, 0x000c0000,
1020 	0x55e4, 0xff000fff, 0x00000100,
1021 	0x55e8, 0x00000001, 0x00000001,
1022 	0x2f50, 0x00000001, 0x00000001,
1023 	0x30cc, 0xc0000fff, 0x00000104,
1024 	0xc1e4, 0x00000001, 0x00000001,
1025 	0xd0c0, 0xfffffff0, 0x00000100,
1026 	0xd8c0, 0xfffffff0, 0x00000100
1027 };
1028 
1029 static const u32 hainan_mgcg_cgcg_init[] =
1030 {
1031 	0xc400, 0xffffffff, 0xfffffffc,
1032 	0x802c, 0xffffffff, 0xe0000000,
1033 	0x9a60, 0xffffffff, 0x00000100,
1034 	0x92a4, 0xffffffff, 0x00000100,
1035 	0xc164, 0xffffffff, 0x00000100,
1036 	0x9774, 0xffffffff, 0x00000100,
1037 	0x8984, 0xffffffff, 0x06000100,
1038 	0x8a18, 0xffffffff, 0x00000100,
1039 	0x92a0, 0xffffffff, 0x00000100,
1040 	0xc380, 0xffffffff, 0x00000100,
1041 	0x8b28, 0xffffffff, 0x00000100,
1042 	0x9144, 0xffffffff, 0x00000100,
1043 	0x8d88, 0xffffffff, 0x00000100,
1044 	0x8d8c, 0xffffffff, 0x00000100,
1045 	0x9030, 0xffffffff, 0x00000100,
1046 	0x9034, 0xffffffff, 0x00000100,
1047 	0x9038, 0xffffffff, 0x00000100,
1048 	0x903c, 0xffffffff, 0x00000100,
1049 	0xad80, 0xffffffff, 0x00000100,
1050 	0xac54, 0xffffffff, 0x00000100,
1051 	0x897c, 0xffffffff, 0x06000100,
1052 	0x9868, 0xffffffff, 0x00000100,
1053 	0x9510, 0xffffffff, 0x00000100,
1054 	0xaf04, 0xffffffff, 0x00000100,
1055 	0xae04, 0xffffffff, 0x00000100,
1056 	0x949c, 0xffffffff, 0x00000100,
1057 	0x802c, 0xffffffff, 0xe0000000,
1058 	0x9160, 0xffffffff, 0x00010000,
1059 	0x9164, 0xffffffff, 0x00030002,
1060 	0x9168, 0xffffffff, 0x00040007,
1061 	0x916c, 0xffffffff, 0x00060005,
1062 	0x9170, 0xffffffff, 0x00090008,
1063 	0x9174, 0xffffffff, 0x00020001,
1064 	0x9178, 0xffffffff, 0x00040003,
1065 	0x917c, 0xffffffff, 0x00000007,
1066 	0x9180, 0xffffffff, 0x00060005,
1067 	0x9184, 0xffffffff, 0x00090008,
1068 	0x9188, 0xffffffff, 0x00030002,
1069 	0x918c, 0xffffffff, 0x00050004,
1070 	0x9190, 0xffffffff, 0x00000008,
1071 	0x9194, 0xffffffff, 0x00070006,
1072 	0x9198, 0xffffffff, 0x000a0009,
1073 	0x919c, 0xffffffff, 0x00040003,
1074 	0x91a0, 0xffffffff, 0x00060005,
1075 	0x91a4, 0xffffffff, 0x00000009,
1076 	0x91a8, 0xffffffff, 0x00080007,
1077 	0x91ac, 0xffffffff, 0x000b000a,
1078 	0x91b0, 0xffffffff, 0x00050004,
1079 	0x91b4, 0xffffffff, 0x00070006,
1080 	0x91b8, 0xffffffff, 0x0008000b,
1081 	0x91bc, 0xffffffff, 0x000a0009,
1082 	0x91c0, 0xffffffff, 0x000d000c,
1083 	0x91c4, 0xffffffff, 0x00060005,
1084 	0x91c8, 0xffffffff, 0x00080007,
1085 	0x91cc, 0xffffffff, 0x0000000b,
1086 	0x91d0, 0xffffffff, 0x000a0009,
1087 	0x91d4, 0xffffffff, 0x000d000c,
1088 	0x9150, 0xffffffff, 0x96940200,
1089 	0x8708, 0xffffffff, 0x00900100,
1090 	0xc478, 0xffffffff, 0x00000080,
1091 	0xc404, 0xffffffff, 0x0020003f,
1092 	0x30, 0xffffffff, 0x0000001c,
1093 	0x34, 0x000f0000, 0x000f0000,
1094 	0x160c, 0xffffffff, 0x00000100,
1095 	0x1024, 0xffffffff, 0x00000100,
1096 	0x20a8, 0xffffffff, 0x00000104,
1097 	0x264c, 0x000c0000, 0x000c0000,
1098 	0x2648, 0x000c0000, 0x000c0000,
1099 	0x2f50, 0x00000001, 0x00000001,
1100 	0x30cc, 0xc0000fff, 0x00000104,
1101 	0xc1e4, 0x00000001, 0x00000001,
1102 	0xd0c0, 0xfffffff0, 0x00000100,
1103 	0xd8c0, 0xfffffff0, 0x00000100
1104 };
1105 
1106 static u32 verde_pg_init[] =
1107 {
1108 	0x353c, 0xffffffff, 0x40000,
1109 	0x3538, 0xffffffff, 0x200010ff,
1110 	0x353c, 0xffffffff, 0x0,
1111 	0x353c, 0xffffffff, 0x0,
1112 	0x353c, 0xffffffff, 0x0,
1113 	0x353c, 0xffffffff, 0x0,
1114 	0x353c, 0xffffffff, 0x0,
1115 	0x353c, 0xffffffff, 0x7007,
1116 	0x3538, 0xffffffff, 0x300010ff,
1117 	0x353c, 0xffffffff, 0x0,
1118 	0x353c, 0xffffffff, 0x0,
1119 	0x353c, 0xffffffff, 0x0,
1120 	0x353c, 0xffffffff, 0x0,
1121 	0x353c, 0xffffffff, 0x0,
1122 	0x353c, 0xffffffff, 0x400000,
1123 	0x3538, 0xffffffff, 0x100010ff,
1124 	0x353c, 0xffffffff, 0x0,
1125 	0x353c, 0xffffffff, 0x0,
1126 	0x353c, 0xffffffff, 0x0,
1127 	0x353c, 0xffffffff, 0x0,
1128 	0x353c, 0xffffffff, 0x0,
1129 	0x353c, 0xffffffff, 0x120200,
1130 	0x3538, 0xffffffff, 0x500010ff,
1131 	0x353c, 0xffffffff, 0x0,
1132 	0x353c, 0xffffffff, 0x0,
1133 	0x353c, 0xffffffff, 0x0,
1134 	0x353c, 0xffffffff, 0x0,
1135 	0x353c, 0xffffffff, 0x0,
1136 	0x353c, 0xffffffff, 0x1e1e16,
1137 	0x3538, 0xffffffff, 0x600010ff,
1138 	0x353c, 0xffffffff, 0x0,
1139 	0x353c, 0xffffffff, 0x0,
1140 	0x353c, 0xffffffff, 0x0,
1141 	0x353c, 0xffffffff, 0x0,
1142 	0x353c, 0xffffffff, 0x0,
1143 	0x353c, 0xffffffff, 0x171f1e,
1144 	0x3538, 0xffffffff, 0x700010ff,
1145 	0x353c, 0xffffffff, 0x0,
1146 	0x353c, 0xffffffff, 0x0,
1147 	0x353c, 0xffffffff, 0x0,
1148 	0x353c, 0xffffffff, 0x0,
1149 	0x353c, 0xffffffff, 0x0,
1150 	0x353c, 0xffffffff, 0x0,
1151 	0x3538, 0xffffffff, 0x9ff,
1152 	0x3500, 0xffffffff, 0x0,
1153 	0x3504, 0xffffffff, 0x10000800,
1154 	0x3504, 0xffffffff, 0xf,
1155 	0x3504, 0xffffffff, 0xf,
1156 	0x3500, 0xffffffff, 0x4,
1157 	0x3504, 0xffffffff, 0x1000051e,
1158 	0x3504, 0xffffffff, 0xffff,
1159 	0x3504, 0xffffffff, 0xffff,
1160 	0x3500, 0xffffffff, 0x8,
1161 	0x3504, 0xffffffff, 0x80500,
1162 	0x3500, 0xffffffff, 0x12,
1163 	0x3504, 0xffffffff, 0x9050c,
1164 	0x3500, 0xffffffff, 0x1d,
1165 	0x3504, 0xffffffff, 0xb052c,
1166 	0x3500, 0xffffffff, 0x2a,
1167 	0x3504, 0xffffffff, 0x1053e,
1168 	0x3500, 0xffffffff, 0x2d,
1169 	0x3504, 0xffffffff, 0x10546,
1170 	0x3500, 0xffffffff, 0x30,
1171 	0x3504, 0xffffffff, 0xa054e,
1172 	0x3500, 0xffffffff, 0x3c,
1173 	0x3504, 0xffffffff, 0x1055f,
1174 	0x3500, 0xffffffff, 0x3f,
1175 	0x3504, 0xffffffff, 0x10567,
1176 	0x3500, 0xffffffff, 0x42,
1177 	0x3504, 0xffffffff, 0x1056f,
1178 	0x3500, 0xffffffff, 0x45,
1179 	0x3504, 0xffffffff, 0x10572,
1180 	0x3500, 0xffffffff, 0x48,
1181 	0x3504, 0xffffffff, 0x20575,
1182 	0x3500, 0xffffffff, 0x4c,
1183 	0x3504, 0xffffffff, 0x190801,
1184 	0x3500, 0xffffffff, 0x67,
1185 	0x3504, 0xffffffff, 0x1082a,
1186 	0x3500, 0xffffffff, 0x6a,
1187 	0x3504, 0xffffffff, 0x1b082d,
1188 	0x3500, 0xffffffff, 0x87,
1189 	0x3504, 0xffffffff, 0x310851,
1190 	0x3500, 0xffffffff, 0xba,
1191 	0x3504, 0xffffffff, 0x891,
1192 	0x3500, 0xffffffff, 0xbc,
1193 	0x3504, 0xffffffff, 0x893,
1194 	0x3500, 0xffffffff, 0xbe,
1195 	0x3504, 0xffffffff, 0x20895,
1196 	0x3500, 0xffffffff, 0xc2,
1197 	0x3504, 0xffffffff, 0x20899,
1198 	0x3500, 0xffffffff, 0xc6,
1199 	0x3504, 0xffffffff, 0x2089d,
1200 	0x3500, 0xffffffff, 0xca,
1201 	0x3504, 0xffffffff, 0x8a1,
1202 	0x3500, 0xffffffff, 0xcc,
1203 	0x3504, 0xffffffff, 0x8a3,
1204 	0x3500, 0xffffffff, 0xce,
1205 	0x3504, 0xffffffff, 0x308a5,
1206 	0x3500, 0xffffffff, 0xd3,
1207 	0x3504, 0xffffffff, 0x6d08cd,
1208 	0x3500, 0xffffffff, 0x142,
1209 	0x3504, 0xffffffff, 0x2000095a,
1210 	0x3504, 0xffffffff, 0x1,
1211 	0x3500, 0xffffffff, 0x144,
1212 	0x3504, 0xffffffff, 0x301f095b,
1213 	0x3500, 0xffffffff, 0x165,
1214 	0x3504, 0xffffffff, 0xc094d,
1215 	0x3500, 0xffffffff, 0x173,
1216 	0x3504, 0xffffffff, 0xf096d,
1217 	0x3500, 0xffffffff, 0x184,
1218 	0x3504, 0xffffffff, 0x15097f,
1219 	0x3500, 0xffffffff, 0x19b,
1220 	0x3504, 0xffffffff, 0xc0998,
1221 	0x3500, 0xffffffff, 0x1a9,
1222 	0x3504, 0xffffffff, 0x409a7,
1223 	0x3500, 0xffffffff, 0x1af,
1224 	0x3504, 0xffffffff, 0xcdc,
1225 	0x3500, 0xffffffff, 0x1b1,
1226 	0x3504, 0xffffffff, 0x800,
1227 	0x3508, 0xffffffff, 0x6c9b2000,
1228 	0x3510, 0xfc00, 0x2000,
1229 	0x3544, 0xffffffff, 0xfc0,
1230 	0x28d4, 0x00000100, 0x100
1231 };
1232 
1233 static void si_init_golden_registers(struct radeon_device *rdev)
1234 {
1235 	switch (rdev->family) {
1236 	case CHIP_TAHITI:
1237 		radeon_program_register_sequence(rdev,
1238 						 tahiti_golden_registers,
1239 						 (const u32)ARRAY_SIZE(tahiti_golden_registers));
1240 		radeon_program_register_sequence(rdev,
1241 						 tahiti_golden_rlc_registers,
1242 						 (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1243 		radeon_program_register_sequence(rdev,
1244 						 tahiti_mgcg_cgcg_init,
1245 						 (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1246 		radeon_program_register_sequence(rdev,
1247 						 tahiti_golden_registers2,
1248 						 (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1249 		break;
1250 	case CHIP_PITCAIRN:
1251 		radeon_program_register_sequence(rdev,
1252 						 pitcairn_golden_registers,
1253 						 (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1254 		radeon_program_register_sequence(rdev,
1255 						 pitcairn_golden_rlc_registers,
1256 						 (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1257 		radeon_program_register_sequence(rdev,
1258 						 pitcairn_mgcg_cgcg_init,
1259 						 (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1260 		break;
1261 	case CHIP_VERDE:
1262 		radeon_program_register_sequence(rdev,
1263 						 verde_golden_registers,
1264 						 (const u32)ARRAY_SIZE(verde_golden_registers));
1265 		radeon_program_register_sequence(rdev,
1266 						 verde_golden_rlc_registers,
1267 						 (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1268 		radeon_program_register_sequence(rdev,
1269 						 verde_mgcg_cgcg_init,
1270 						 (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1271 		radeon_program_register_sequence(rdev,
1272 						 verde_pg_init,
1273 						 (const u32)ARRAY_SIZE(verde_pg_init));
1274 		break;
1275 	case CHIP_OLAND:
1276 		radeon_program_register_sequence(rdev,
1277 						 oland_golden_registers,
1278 						 (const u32)ARRAY_SIZE(oland_golden_registers));
1279 		radeon_program_register_sequence(rdev,
1280 						 oland_golden_rlc_registers,
1281 						 (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1282 		radeon_program_register_sequence(rdev,
1283 						 oland_mgcg_cgcg_init,
1284 						 (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1285 		break;
1286 	case CHIP_HAINAN:
1287 		radeon_program_register_sequence(rdev,
1288 						 hainan_golden_registers,
1289 						 (const u32)ARRAY_SIZE(hainan_golden_registers));
1290 		radeon_program_register_sequence(rdev,
1291 						 hainan_golden_registers2,
1292 						 (const u32)ARRAY_SIZE(hainan_golden_registers2));
1293 		radeon_program_register_sequence(rdev,
1294 						 hainan_mgcg_cgcg_init,
1295 						 (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1296 		break;
1297 	default:
1298 		break;
1299 	}
1300 }
1301 
1302 /**
1303  * si_get_allowed_info_register - fetch the register for the info ioctl
1304  *
1305  * @rdev: radeon_device pointer
1306  * @reg: register offset in bytes
1307  * @val: register value
1308  *
1309  * Returns 0 for success or -EINVAL for an invalid register
1310  *
1311  */
1312 int si_get_allowed_info_register(struct radeon_device *rdev,
1313 				 u32 reg, u32 *val)
1314 {
1315 	switch (reg) {
1316 	case GRBM_STATUS:
1317 	case GRBM_STATUS2:
1318 	case GRBM_STATUS_SE0:
1319 	case GRBM_STATUS_SE1:
1320 	case SRBM_STATUS:
1321 	case SRBM_STATUS2:
1322 	case (DMA_STATUS_REG + DMA0_REGISTER_OFFSET):
1323 	case (DMA_STATUS_REG + DMA1_REGISTER_OFFSET):
1324 	case UVD_STATUS:
1325 		*val = RREG32(reg);
1326 		return 0;
1327 	default:
1328 		return -EINVAL;
1329 	}
1330 }
1331 
1332 #define PCIE_BUS_CLK                10000
1333 #define TCLK                        (PCIE_BUS_CLK / 10)
1334 
1335 /**
1336  * si_get_xclk - get the xclk
1337  *
1338  * @rdev: radeon_device pointer
1339  *
1340  * Returns the reference clock used by the gfx engine
1341  * (SI).
1342  */
1343 u32 si_get_xclk(struct radeon_device *rdev)
1344 {
1345 	u32 reference_clock = rdev->clock.spll.reference_freq;
1346 	u32 tmp;
1347 
1348 	tmp = RREG32(CG_CLKPIN_CNTL_2);
1349 	if (tmp & MUX_TCLK_TO_XCLK)
1350 		return TCLK;
1351 
1352 	tmp = RREG32(CG_CLKPIN_CNTL);
1353 	if (tmp & XTALIN_DIVIDE)
1354 		return reference_clock / 4;
1355 
1356 	return reference_clock;
1357 }
1358 
1359 /* get temperature in millidegrees */
1360 int si_get_temp(struct radeon_device *rdev)
1361 {
1362 	u32 temp;
1363 	int actual_temp = 0;
1364 
1365 	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1366 		CTF_TEMP_SHIFT;
1367 
1368 	if (temp & 0x200)
1369 		actual_temp = 255;
1370 	else
1371 		actual_temp = temp & 0x1ff;
1372 
1373 	actual_temp = (actual_temp * 1000);
1374 
1375 	return actual_temp;
1376 }
1377 
1378 #define TAHITI_IO_MC_REGS_SIZE 36
1379 
1380 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1381 	{0x0000006f, 0x03044000},
1382 	{0x00000070, 0x0480c018},
1383 	{0x00000071, 0x00000040},
1384 	{0x00000072, 0x01000000},
1385 	{0x00000074, 0x000000ff},
1386 	{0x00000075, 0x00143400},
1387 	{0x00000076, 0x08ec0800},
1388 	{0x00000077, 0x040000cc},
1389 	{0x00000079, 0x00000000},
1390 	{0x0000007a, 0x21000409},
1391 	{0x0000007c, 0x00000000},
1392 	{0x0000007d, 0xe8000000},
1393 	{0x0000007e, 0x044408a8},
1394 	{0x0000007f, 0x00000003},
1395 	{0x00000080, 0x00000000},
1396 	{0x00000081, 0x01000000},
1397 	{0x00000082, 0x02000000},
1398 	{0x00000083, 0x00000000},
1399 	{0x00000084, 0xe3f3e4f4},
1400 	{0x00000085, 0x00052024},
1401 	{0x00000087, 0x00000000},
1402 	{0x00000088, 0x66036603},
1403 	{0x00000089, 0x01000000},
1404 	{0x0000008b, 0x1c0a0000},
1405 	{0x0000008c, 0xff010000},
1406 	{0x0000008e, 0xffffefff},
1407 	{0x0000008f, 0xfff3efff},
1408 	{0x00000090, 0xfff3efbf},
1409 	{0x00000094, 0x00101101},
1410 	{0x00000095, 0x00000fff},
1411 	{0x00000096, 0x00116fff},
1412 	{0x00000097, 0x60010000},
1413 	{0x00000098, 0x10010000},
1414 	{0x00000099, 0x00006000},
1415 	{0x0000009a, 0x00001000},
1416 	{0x0000009f, 0x00a77400}
1417 };
1418 
1419 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1420 	{0x0000006f, 0x03044000},
1421 	{0x00000070, 0x0480c018},
1422 	{0x00000071, 0x00000040},
1423 	{0x00000072, 0x01000000},
1424 	{0x00000074, 0x000000ff},
1425 	{0x00000075, 0x00143400},
1426 	{0x00000076, 0x08ec0800},
1427 	{0x00000077, 0x040000cc},
1428 	{0x00000079, 0x00000000},
1429 	{0x0000007a, 0x21000409},
1430 	{0x0000007c, 0x00000000},
1431 	{0x0000007d, 0xe8000000},
1432 	{0x0000007e, 0x044408a8},
1433 	{0x0000007f, 0x00000003},
1434 	{0x00000080, 0x00000000},
1435 	{0x00000081, 0x01000000},
1436 	{0x00000082, 0x02000000},
1437 	{0x00000083, 0x00000000},
1438 	{0x00000084, 0xe3f3e4f4},
1439 	{0x00000085, 0x00052024},
1440 	{0x00000087, 0x00000000},
1441 	{0x00000088, 0x66036603},
1442 	{0x00000089, 0x01000000},
1443 	{0x0000008b, 0x1c0a0000},
1444 	{0x0000008c, 0xff010000},
1445 	{0x0000008e, 0xffffefff},
1446 	{0x0000008f, 0xfff3efff},
1447 	{0x00000090, 0xfff3efbf},
1448 	{0x00000094, 0x00101101},
1449 	{0x00000095, 0x00000fff},
1450 	{0x00000096, 0x00116fff},
1451 	{0x00000097, 0x60010000},
1452 	{0x00000098, 0x10010000},
1453 	{0x00000099, 0x00006000},
1454 	{0x0000009a, 0x00001000},
1455 	{0x0000009f, 0x00a47400}
1456 };
1457 
1458 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1459 	{0x0000006f, 0x03044000},
1460 	{0x00000070, 0x0480c018},
1461 	{0x00000071, 0x00000040},
1462 	{0x00000072, 0x01000000},
1463 	{0x00000074, 0x000000ff},
1464 	{0x00000075, 0x00143400},
1465 	{0x00000076, 0x08ec0800},
1466 	{0x00000077, 0x040000cc},
1467 	{0x00000079, 0x00000000},
1468 	{0x0000007a, 0x21000409},
1469 	{0x0000007c, 0x00000000},
1470 	{0x0000007d, 0xe8000000},
1471 	{0x0000007e, 0x044408a8},
1472 	{0x0000007f, 0x00000003},
1473 	{0x00000080, 0x00000000},
1474 	{0x00000081, 0x01000000},
1475 	{0x00000082, 0x02000000},
1476 	{0x00000083, 0x00000000},
1477 	{0x00000084, 0xe3f3e4f4},
1478 	{0x00000085, 0x00052024},
1479 	{0x00000087, 0x00000000},
1480 	{0x00000088, 0x66036603},
1481 	{0x00000089, 0x01000000},
1482 	{0x0000008b, 0x1c0a0000},
1483 	{0x0000008c, 0xff010000},
1484 	{0x0000008e, 0xffffefff},
1485 	{0x0000008f, 0xfff3efff},
1486 	{0x00000090, 0xfff3efbf},
1487 	{0x00000094, 0x00101101},
1488 	{0x00000095, 0x00000fff},
1489 	{0x00000096, 0x00116fff},
1490 	{0x00000097, 0x60010000},
1491 	{0x00000098, 0x10010000},
1492 	{0x00000099, 0x00006000},
1493 	{0x0000009a, 0x00001000},
1494 	{0x0000009f, 0x00a37400}
1495 };
1496 
1497 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1498 	{0x0000006f, 0x03044000},
1499 	{0x00000070, 0x0480c018},
1500 	{0x00000071, 0x00000040},
1501 	{0x00000072, 0x01000000},
1502 	{0x00000074, 0x000000ff},
1503 	{0x00000075, 0x00143400},
1504 	{0x00000076, 0x08ec0800},
1505 	{0x00000077, 0x040000cc},
1506 	{0x00000079, 0x00000000},
1507 	{0x0000007a, 0x21000409},
1508 	{0x0000007c, 0x00000000},
1509 	{0x0000007d, 0xe8000000},
1510 	{0x0000007e, 0x044408a8},
1511 	{0x0000007f, 0x00000003},
1512 	{0x00000080, 0x00000000},
1513 	{0x00000081, 0x01000000},
1514 	{0x00000082, 0x02000000},
1515 	{0x00000083, 0x00000000},
1516 	{0x00000084, 0xe3f3e4f4},
1517 	{0x00000085, 0x00052024},
1518 	{0x00000087, 0x00000000},
1519 	{0x00000088, 0x66036603},
1520 	{0x00000089, 0x01000000},
1521 	{0x0000008b, 0x1c0a0000},
1522 	{0x0000008c, 0xff010000},
1523 	{0x0000008e, 0xffffefff},
1524 	{0x0000008f, 0xfff3efff},
1525 	{0x00000090, 0xfff3efbf},
1526 	{0x00000094, 0x00101101},
1527 	{0x00000095, 0x00000fff},
1528 	{0x00000096, 0x00116fff},
1529 	{0x00000097, 0x60010000},
1530 	{0x00000098, 0x10010000},
1531 	{0x00000099, 0x00006000},
1532 	{0x0000009a, 0x00001000},
1533 	{0x0000009f, 0x00a17730}
1534 };
1535 
1536 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1537 	{0x0000006f, 0x03044000},
1538 	{0x00000070, 0x0480c018},
1539 	{0x00000071, 0x00000040},
1540 	{0x00000072, 0x01000000},
1541 	{0x00000074, 0x000000ff},
1542 	{0x00000075, 0x00143400},
1543 	{0x00000076, 0x08ec0800},
1544 	{0x00000077, 0x040000cc},
1545 	{0x00000079, 0x00000000},
1546 	{0x0000007a, 0x21000409},
1547 	{0x0000007c, 0x00000000},
1548 	{0x0000007d, 0xe8000000},
1549 	{0x0000007e, 0x044408a8},
1550 	{0x0000007f, 0x00000003},
1551 	{0x00000080, 0x00000000},
1552 	{0x00000081, 0x01000000},
1553 	{0x00000082, 0x02000000},
1554 	{0x00000083, 0x00000000},
1555 	{0x00000084, 0xe3f3e4f4},
1556 	{0x00000085, 0x00052024},
1557 	{0x00000087, 0x00000000},
1558 	{0x00000088, 0x66036603},
1559 	{0x00000089, 0x01000000},
1560 	{0x0000008b, 0x1c0a0000},
1561 	{0x0000008c, 0xff010000},
1562 	{0x0000008e, 0xffffefff},
1563 	{0x0000008f, 0xfff3efff},
1564 	{0x00000090, 0xfff3efbf},
1565 	{0x00000094, 0x00101101},
1566 	{0x00000095, 0x00000fff},
1567 	{0x00000096, 0x00116fff},
1568 	{0x00000097, 0x60010000},
1569 	{0x00000098, 0x10010000},
1570 	{0x00000099, 0x00006000},
1571 	{0x0000009a, 0x00001000},
1572 	{0x0000009f, 0x00a07730}
1573 };
1574 
1575 /* ucode loading */
1576 int si_mc_load_microcode(struct radeon_device *rdev)
1577 {
1578 	const __be32 *fw_data = NULL;
1579 	const __le32 *new_fw_data = NULL;
1580 	u32 running;
1581 	u32 *io_mc_regs = NULL;
1582 	const __le32 *new_io_mc_regs = NULL;
1583 	int i, regs_size, ucode_size;
1584 
1585 	if (!rdev->mc_fw)
1586 		return -EINVAL;
1587 
1588 	if (rdev->new_fw) {
1589 		const struct mc_firmware_header_v1_0 *hdr =
1590 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1591 
1592 		radeon_ucode_print_mc_hdr(&hdr->header);
1593 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1594 		new_io_mc_regs = (const __le32 *)
1595 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1596 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1597 		new_fw_data = (const __le32 *)
1598 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1599 	} else {
1600 		ucode_size = rdev->mc_fw->size / 4;
1601 
1602 		switch (rdev->family) {
1603 		case CHIP_TAHITI:
1604 			io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1605 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1606 			break;
1607 		case CHIP_PITCAIRN:
1608 			io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1609 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1610 			break;
1611 		case CHIP_VERDE:
1612 		default:
1613 			io_mc_regs = (u32 *)&verde_io_mc_regs;
1614 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1615 			break;
1616 		case CHIP_OLAND:
1617 			io_mc_regs = (u32 *)&oland_io_mc_regs;
1618 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1619 			break;
1620 		case CHIP_HAINAN:
1621 			io_mc_regs = (u32 *)&hainan_io_mc_regs;
1622 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1623 			break;
1624 		}
1625 		fw_data = (const __be32 *)rdev->mc_fw->data;
1626 	}
1627 
1628 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1629 
1630 	if (running == 0) {
1631 		/* reset the engine and set to writable */
1632 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1633 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1634 
1635 		/* load mc io regs */
1636 		for (i = 0; i < regs_size; i++) {
1637 			if (rdev->new_fw) {
1638 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1639 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1640 			} else {
1641 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1642 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1643 			}
1644 		}
1645 		/* load the MC ucode */
1646 		for (i = 0; i < ucode_size; i++) {
1647 			if (rdev->new_fw)
1648 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1649 			else
1650 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1651 		}
1652 
1653 		/* put the engine back into the active state */
1654 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1655 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1656 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1657 
1658 		/* wait for training to complete */
1659 		for (i = 0; i < rdev->usec_timeout; i++) {
1660 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1661 				break;
1662 			udelay(1);
1663 		}
1664 		for (i = 0; i < rdev->usec_timeout; i++) {
1665 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1666 				break;
1667 			udelay(1);
1668 		}
1669 	}
1670 
1671 	return 0;
1672 }
1673 
1674 static int si_init_microcode(struct radeon_device *rdev)
1675 {
1676 	const char *chip_name;
1677 	const char *new_chip_name;
1678 	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1679 	size_t smc_req_size, mc2_req_size;
1680 	char fw_name[30];
1681 	int err;
1682 	int new_fw = 0;
1683 	bool new_smc = false;
1684 	bool si58_fw = false;
1685 	bool banks2_fw = false;
1686 
1687 	DRM_DEBUG("\n");
1688 
1689 	switch (rdev->family) {
1690 	case CHIP_TAHITI:
1691 		chip_name = "TAHITI";
1692 		new_chip_name = "tahiti";
1693 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1694 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1695 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1696 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1697 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1698 		mc2_req_size = TAHITI_MC_UCODE_SIZE * 4;
1699 		smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1700 		break;
1701 	case CHIP_PITCAIRN:
1702 		chip_name = "PITCAIRN";
1703 		if ((rdev->pdev->revision == 0x81) &&
1704 		    ((rdev->pdev->device == 0x6810) ||
1705 		     (rdev->pdev->device == 0x6811)))
1706 			new_smc = true;
1707 		new_chip_name = "pitcairn";
1708 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1709 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1710 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1711 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1712 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1713 		mc2_req_size = PITCAIRN_MC_UCODE_SIZE * 4;
1714 		smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1715 		break;
1716 	case CHIP_VERDE:
1717 		chip_name = "VERDE";
1718 		if (((rdev->pdev->device == 0x6820) &&
1719 		     ((rdev->pdev->revision == 0x81) ||
1720 		      (rdev->pdev->revision == 0x83))) ||
1721 		    ((rdev->pdev->device == 0x6821) &&
1722 		     ((rdev->pdev->revision == 0x83) ||
1723 		      (rdev->pdev->revision == 0x87))) ||
1724 		    ((rdev->pdev->revision == 0x87) &&
1725 		     ((rdev->pdev->device == 0x6823) ||
1726 		      (rdev->pdev->device == 0x682b))))
1727 			new_smc = true;
1728 		new_chip_name = "verde";
1729 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1730 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1731 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1732 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1733 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1734 		mc2_req_size = VERDE_MC_UCODE_SIZE * 4;
1735 		smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1736 		break;
1737 	case CHIP_OLAND:
1738 		chip_name = "OLAND";
1739 		if (((rdev->pdev->revision == 0x81) &&
1740 		     ((rdev->pdev->device == 0x6600) ||
1741 		      (rdev->pdev->device == 0x6604) ||
1742 		      (rdev->pdev->device == 0x6605) ||
1743 		      (rdev->pdev->device == 0x6610))) ||
1744 		    ((rdev->pdev->revision == 0x83) &&
1745 		     (rdev->pdev->device == 0x6610)))
1746 			new_smc = true;
1747 		new_chip_name = "oland";
1748 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1749 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1750 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1751 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1752 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1753 		smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1754 		break;
1755 	case CHIP_HAINAN:
1756 		chip_name = "HAINAN";
1757 		if (((rdev->pdev->revision == 0x81) &&
1758 		     (rdev->pdev->device == 0x6660)) ||
1759 		    ((rdev->pdev->revision == 0x83) &&
1760 		     ((rdev->pdev->device == 0x6660) ||
1761 		      (rdev->pdev->device == 0x6663) ||
1762 		      (rdev->pdev->device == 0x6665) ||
1763 		      (rdev->pdev->device == 0x6667))))
1764 			new_smc = true;
1765 		else if ((rdev->pdev->revision == 0xc3) &&
1766 			 (rdev->pdev->device == 0x6665))
1767 			banks2_fw = true;
1768 		new_chip_name = "hainan";
1769 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1770 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1771 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1772 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1773 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1774 		smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1775 		break;
1776 	default: BUG();
1777 	}
1778 
1779 	/* this memory configuration requires special firmware */
1780 	if (((RREG32(MC_SEQ_MISC0) & 0xff000000) >> 24) == 0x58)
1781 		si58_fw = true;
1782 
1783 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
1784 
1785 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
1786 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1787 	if (err) {
1788 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1789 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1790 		if (err)
1791 			goto out;
1792 		if (rdev->pfp_fw->size != pfp_req_size) {
1793 			pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1794 			       rdev->pfp_fw->size, fw_name);
1795 			err = -EINVAL;
1796 			goto out;
1797 		}
1798 	} else {
1799 		err = radeon_ucode_validate(rdev->pfp_fw);
1800 		if (err) {
1801 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1802 			       fw_name);
1803 			goto out;
1804 		} else {
1805 			new_fw++;
1806 		}
1807 	}
1808 
1809 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
1810 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1811 	if (err) {
1812 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1813 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1814 		if (err)
1815 			goto out;
1816 		if (rdev->me_fw->size != me_req_size) {
1817 			pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1818 			       rdev->me_fw->size, fw_name);
1819 			err = -EINVAL;
1820 		}
1821 	} else {
1822 		err = radeon_ucode_validate(rdev->me_fw);
1823 		if (err) {
1824 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1825 			       fw_name);
1826 			goto out;
1827 		} else {
1828 			new_fw++;
1829 		}
1830 	}
1831 
1832 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
1833 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1834 	if (err) {
1835 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1836 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1837 		if (err)
1838 			goto out;
1839 		if (rdev->ce_fw->size != ce_req_size) {
1840 			pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1841 			       rdev->ce_fw->size, fw_name);
1842 			err = -EINVAL;
1843 		}
1844 	} else {
1845 		err = radeon_ucode_validate(rdev->ce_fw);
1846 		if (err) {
1847 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1848 			       fw_name);
1849 			goto out;
1850 		} else {
1851 			new_fw++;
1852 		}
1853 	}
1854 
1855 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
1856 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1857 	if (err) {
1858 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1859 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1860 		if (err)
1861 			goto out;
1862 		if (rdev->rlc_fw->size != rlc_req_size) {
1863 			pr_err("si_rlc: Bogus length %zu in firmware \"%s\"\n",
1864 			       rdev->rlc_fw->size, fw_name);
1865 			err = -EINVAL;
1866 		}
1867 	} else {
1868 		err = radeon_ucode_validate(rdev->rlc_fw);
1869 		if (err) {
1870 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1871 			       fw_name);
1872 			goto out;
1873 		} else {
1874 			new_fw++;
1875 		}
1876 	}
1877 
1878 	if (si58_fw)
1879 		snprintf(fw_name, sizeof(fw_name), "radeon/si58_mc.bin");
1880 	else
1881 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
1882 	err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1883 	if (err) {
1884 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
1885 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1886 		if (err) {
1887 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1888 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1889 			if (err)
1890 				goto out;
1891 		}
1892 		if ((rdev->mc_fw->size != mc_req_size) &&
1893 		    (rdev->mc_fw->size != mc2_req_size)) {
1894 			pr_err("si_mc: Bogus length %zu in firmware \"%s\"\n",
1895 			       rdev->mc_fw->size, fw_name);
1896 			err = -EINVAL;
1897 		}
1898 		DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
1899 	} else {
1900 		err = radeon_ucode_validate(rdev->mc_fw);
1901 		if (err) {
1902 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1903 			       fw_name);
1904 			goto out;
1905 		} else {
1906 			new_fw++;
1907 		}
1908 	}
1909 
1910 	if (banks2_fw)
1911 		snprintf(fw_name, sizeof(fw_name), "radeon/banks_k_2_smc.bin");
1912 	else if (new_smc)
1913 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
1914 	else
1915 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
1916 	err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1917 	if (err) {
1918 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1919 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1920 		if (err) {
1921 			pr_err("smc: error loading firmware \"%s\"\n", fw_name);
1922 			release_firmware(rdev->smc_fw);
1923 			rdev->smc_fw = NULL;
1924 			err = 0;
1925 		} else if (rdev->smc_fw->size != smc_req_size) {
1926 			pr_err("si_smc: Bogus length %zu in firmware \"%s\"\n",
1927 			       rdev->smc_fw->size, fw_name);
1928 			err = -EINVAL;
1929 		}
1930 	} else {
1931 		err = radeon_ucode_validate(rdev->smc_fw);
1932 		if (err) {
1933 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1934 			       fw_name);
1935 			goto out;
1936 		} else {
1937 			new_fw++;
1938 		}
1939 	}
1940 
1941 	if (new_fw == 0) {
1942 		rdev->new_fw = false;
1943 	} else if (new_fw < 6) {
1944 		pr_err("si_fw: mixing new and old firmware!\n");
1945 		err = -EINVAL;
1946 	} else {
1947 		rdev->new_fw = true;
1948 	}
1949 out:
1950 	if (err) {
1951 		if (err != -EINVAL)
1952 			pr_err("si_cp: Failed to load firmware \"%s\"\n",
1953 			       fw_name);
1954 		release_firmware(rdev->pfp_fw);
1955 		rdev->pfp_fw = NULL;
1956 		release_firmware(rdev->me_fw);
1957 		rdev->me_fw = NULL;
1958 		release_firmware(rdev->ce_fw);
1959 		rdev->ce_fw = NULL;
1960 		release_firmware(rdev->rlc_fw);
1961 		rdev->rlc_fw = NULL;
1962 		release_firmware(rdev->mc_fw);
1963 		rdev->mc_fw = NULL;
1964 		release_firmware(rdev->smc_fw);
1965 		rdev->smc_fw = NULL;
1966 	}
1967 	return err;
1968 }
1969 
1970 /* watermark setup */
1971 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1972 				   struct radeon_crtc *radeon_crtc,
1973 				   struct drm_display_mode *mode,
1974 				   struct drm_display_mode *other_mode)
1975 {
1976 	u32 tmp, buffer_alloc, i;
1977 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1978 	/*
1979 	 * Line Buffer Setup
1980 	 * There are 3 line buffers, each one shared by 2 display controllers.
1981 	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1982 	 * the display controllers.  The paritioning is done via one of four
1983 	 * preset allocations specified in bits 21:20:
1984 	 *  0 - half lb
1985 	 *  2 - whole lb, other crtc must be disabled
1986 	 */
1987 	/* this can get tricky if we have two large displays on a paired group
1988 	 * of crtcs.  Ideally for multiple large displays we'd assign them to
1989 	 * non-linked crtcs for maximum line buffer allocation.
1990 	 */
1991 	if (radeon_crtc->base.enabled && mode) {
1992 		if (other_mode) {
1993 			tmp = 0; /* 1/2 */
1994 			buffer_alloc = 1;
1995 		} else {
1996 			tmp = 2; /* whole */
1997 			buffer_alloc = 2;
1998 		}
1999 	} else {
2000 		tmp = 0;
2001 		buffer_alloc = 0;
2002 	}
2003 
2004 	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
2005 	       DC_LB_MEMORY_CONFIG(tmp));
2006 
2007 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
2008 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
2009 	for (i = 0; i < rdev->usec_timeout; i++) {
2010 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
2011 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
2012 			break;
2013 		udelay(1);
2014 	}
2015 
2016 	if (radeon_crtc->base.enabled && mode) {
2017 		switch (tmp) {
2018 		case 0:
2019 		default:
2020 			return 4096 * 2;
2021 		case 2:
2022 			return 8192 * 2;
2023 		}
2024 	}
2025 
2026 	/* controller not enabled, so no lb used */
2027 	return 0;
2028 }
2029 
2030 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
2031 {
2032 	u32 tmp = RREG32(MC_SHARED_CHMAP);
2033 
2034 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
2035 	case 0:
2036 	default:
2037 		return 1;
2038 	case 1:
2039 		return 2;
2040 	case 2:
2041 		return 4;
2042 	case 3:
2043 		return 8;
2044 	case 4:
2045 		return 3;
2046 	case 5:
2047 		return 6;
2048 	case 6:
2049 		return 10;
2050 	case 7:
2051 		return 12;
2052 	case 8:
2053 		return 16;
2054 	}
2055 }
2056 
2057 struct dce6_wm_params {
2058 	u32 dram_channels; /* number of dram channels */
2059 	u32 yclk;          /* bandwidth per dram data pin in kHz */
2060 	u32 sclk;          /* engine clock in kHz */
2061 	u32 disp_clk;      /* display clock in kHz */
2062 	u32 src_width;     /* viewport width */
2063 	u32 active_time;   /* active display time in ns */
2064 	u32 blank_time;    /* blank time in ns */
2065 	bool interlaced;    /* mode is interlaced */
2066 	fixed20_12 vsc;    /* vertical scale ratio */
2067 	u32 num_heads;     /* number of active crtcs */
2068 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
2069 	u32 lb_size;       /* line buffer allocated to pipe */
2070 	u32 vtaps;         /* vertical scaler taps */
2071 };
2072 
2073 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
2074 {
2075 	/* Calculate raw DRAM Bandwidth */
2076 	fixed20_12 dram_efficiency; /* 0.7 */
2077 	fixed20_12 yclk, dram_channels, bandwidth;
2078 	fixed20_12 a;
2079 
2080 	a.full = dfixed_const(1000);
2081 	yclk.full = dfixed_const(wm->yclk);
2082 	yclk.full = dfixed_div(yclk, a);
2083 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2084 	a.full = dfixed_const(10);
2085 	dram_efficiency.full = dfixed_const(7);
2086 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
2087 	bandwidth.full = dfixed_mul(dram_channels, yclk);
2088 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
2089 
2090 	return dfixed_trunc(bandwidth);
2091 }
2092 
2093 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2094 {
2095 	/* Calculate DRAM Bandwidth and the part allocated to display. */
2096 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
2097 	fixed20_12 yclk, dram_channels, bandwidth;
2098 	fixed20_12 a;
2099 
2100 	a.full = dfixed_const(1000);
2101 	yclk.full = dfixed_const(wm->yclk);
2102 	yclk.full = dfixed_div(yclk, a);
2103 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2104 	a.full = dfixed_const(10);
2105 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
2106 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
2107 	bandwidth.full = dfixed_mul(dram_channels, yclk);
2108 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
2109 
2110 	return dfixed_trunc(bandwidth);
2111 }
2112 
2113 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
2114 {
2115 	/* Calculate the display Data return Bandwidth */
2116 	fixed20_12 return_efficiency; /* 0.8 */
2117 	fixed20_12 sclk, bandwidth;
2118 	fixed20_12 a;
2119 
2120 	a.full = dfixed_const(1000);
2121 	sclk.full = dfixed_const(wm->sclk);
2122 	sclk.full = dfixed_div(sclk, a);
2123 	a.full = dfixed_const(10);
2124 	return_efficiency.full = dfixed_const(8);
2125 	return_efficiency.full = dfixed_div(return_efficiency, a);
2126 	a.full = dfixed_const(32);
2127 	bandwidth.full = dfixed_mul(a, sclk);
2128 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
2129 
2130 	return dfixed_trunc(bandwidth);
2131 }
2132 
2133 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
2134 {
2135 	return 32;
2136 }
2137 
2138 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
2139 {
2140 	/* Calculate the DMIF Request Bandwidth */
2141 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
2142 	fixed20_12 disp_clk, sclk, bandwidth;
2143 	fixed20_12 a, b1, b2;
2144 	u32 min_bandwidth;
2145 
2146 	a.full = dfixed_const(1000);
2147 	disp_clk.full = dfixed_const(wm->disp_clk);
2148 	disp_clk.full = dfixed_div(disp_clk, a);
2149 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
2150 	b1.full = dfixed_mul(a, disp_clk);
2151 
2152 	a.full = dfixed_const(1000);
2153 	sclk.full = dfixed_const(wm->sclk);
2154 	sclk.full = dfixed_div(sclk, a);
2155 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
2156 	b2.full = dfixed_mul(a, sclk);
2157 
2158 	a.full = dfixed_const(10);
2159 	disp_clk_request_efficiency.full = dfixed_const(8);
2160 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
2161 
2162 	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
2163 
2164 	a.full = dfixed_const(min_bandwidth);
2165 	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
2166 
2167 	return dfixed_trunc(bandwidth);
2168 }
2169 
2170 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
2171 {
2172 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
2173 	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
2174 	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
2175 	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
2176 
2177 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
2178 }
2179 
2180 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
2181 {
2182 	/* Calculate the display mode Average Bandwidth
2183 	 * DisplayMode should contain the source and destination dimensions,
2184 	 * timing, etc.
2185 	 */
2186 	fixed20_12 bpp;
2187 	fixed20_12 line_time;
2188 	fixed20_12 src_width;
2189 	fixed20_12 bandwidth;
2190 	fixed20_12 a;
2191 
2192 	a.full = dfixed_const(1000);
2193 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
2194 	line_time.full = dfixed_div(line_time, a);
2195 	bpp.full = dfixed_const(wm->bytes_per_pixel);
2196 	src_width.full = dfixed_const(wm->src_width);
2197 	bandwidth.full = dfixed_mul(src_width, bpp);
2198 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
2199 	bandwidth.full = dfixed_div(bandwidth, line_time);
2200 
2201 	return dfixed_trunc(bandwidth);
2202 }
2203 
2204 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
2205 {
2206 	/* First calcualte the latency in ns */
2207 	u32 mc_latency = 2000; /* 2000 ns. */
2208 	u32 available_bandwidth = dce6_available_bandwidth(wm);
2209 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
2210 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
2211 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
2212 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
2213 		(wm->num_heads * cursor_line_pair_return_time);
2214 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
2215 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
2216 	u32 tmp, dmif_size = 12288;
2217 	fixed20_12 a, b, c;
2218 
2219 	if (wm->num_heads == 0)
2220 		return 0;
2221 
2222 	a.full = dfixed_const(2);
2223 	b.full = dfixed_const(1);
2224 	if ((wm->vsc.full > a.full) ||
2225 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
2226 	    (wm->vtaps >= 5) ||
2227 	    ((wm->vsc.full >= a.full) && wm->interlaced))
2228 		max_src_lines_per_dst_line = 4;
2229 	else
2230 		max_src_lines_per_dst_line = 2;
2231 
2232 	a.full = dfixed_const(available_bandwidth);
2233 	b.full = dfixed_const(wm->num_heads);
2234 	a.full = dfixed_div(a, b);
2235 	tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
2236 	tmp = min(dfixed_trunc(a), tmp);
2237 
2238 	lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
2239 
2240 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2241 	b.full = dfixed_const(1000);
2242 	c.full = dfixed_const(lb_fill_bw);
2243 	b.full = dfixed_div(c, b);
2244 	a.full = dfixed_div(a, b);
2245 	line_fill_time = dfixed_trunc(a);
2246 
2247 	if (line_fill_time < wm->active_time)
2248 		return latency;
2249 	else
2250 		return latency + (line_fill_time - wm->active_time);
2251 
2252 }
2253 
2254 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2255 {
2256 	if (dce6_average_bandwidth(wm) <=
2257 	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2258 		return true;
2259 	else
2260 		return false;
2261 };
2262 
2263 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2264 {
2265 	if (dce6_average_bandwidth(wm) <=
2266 	    (dce6_available_bandwidth(wm) / wm->num_heads))
2267 		return true;
2268 	else
2269 		return false;
2270 };
2271 
2272 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2273 {
2274 	u32 lb_partitions = wm->lb_size / wm->src_width;
2275 	u32 line_time = wm->active_time + wm->blank_time;
2276 	u32 latency_tolerant_lines;
2277 	u32 latency_hiding;
2278 	fixed20_12 a;
2279 
2280 	a.full = dfixed_const(1);
2281 	if (wm->vsc.full > a.full)
2282 		latency_tolerant_lines = 1;
2283 	else {
2284 		if (lb_partitions <= (wm->vtaps + 1))
2285 			latency_tolerant_lines = 1;
2286 		else
2287 			latency_tolerant_lines = 2;
2288 	}
2289 
2290 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2291 
2292 	if (dce6_latency_watermark(wm) <= latency_hiding)
2293 		return true;
2294 	else
2295 		return false;
2296 }
2297 
2298 static void dce6_program_watermarks(struct radeon_device *rdev,
2299 					 struct radeon_crtc *radeon_crtc,
2300 					 u32 lb_size, u32 num_heads)
2301 {
2302 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
2303 	struct dce6_wm_params wm_low, wm_high;
2304 	u32 dram_channels;
2305 	u32 active_time;
2306 	u32 line_time = 0;
2307 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
2308 	u32 priority_a_mark = 0, priority_b_mark = 0;
2309 	u32 priority_a_cnt = PRIORITY_OFF;
2310 	u32 priority_b_cnt = PRIORITY_OFF;
2311 	u32 tmp, arb_control3;
2312 	fixed20_12 a, b, c;
2313 
2314 	if (radeon_crtc->base.enabled && num_heads && mode) {
2315 		active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
2316 					    (u32)mode->clock);
2317 		line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
2318 					  (u32)mode->clock);
2319 		line_time = min(line_time, (u32)65535);
2320 		priority_a_cnt = 0;
2321 		priority_b_cnt = 0;
2322 
2323 		if (rdev->family == CHIP_ARUBA)
2324 			dram_channels = evergreen_get_number_of_dram_channels(rdev);
2325 		else
2326 			dram_channels = si_get_number_of_dram_channels(rdev);
2327 
2328 		/* watermark for high clocks */
2329 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2330 			wm_high.yclk =
2331 				radeon_dpm_get_mclk(rdev, false) * 10;
2332 			wm_high.sclk =
2333 				radeon_dpm_get_sclk(rdev, false) * 10;
2334 		} else {
2335 			wm_high.yclk = rdev->pm.current_mclk * 10;
2336 			wm_high.sclk = rdev->pm.current_sclk * 10;
2337 		}
2338 
2339 		wm_high.disp_clk = mode->clock;
2340 		wm_high.src_width = mode->crtc_hdisplay;
2341 		wm_high.active_time = active_time;
2342 		wm_high.blank_time = line_time - wm_high.active_time;
2343 		wm_high.interlaced = false;
2344 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2345 			wm_high.interlaced = true;
2346 		wm_high.vsc = radeon_crtc->vsc;
2347 		wm_high.vtaps = 1;
2348 		if (radeon_crtc->rmx_type != RMX_OFF)
2349 			wm_high.vtaps = 2;
2350 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2351 		wm_high.lb_size = lb_size;
2352 		wm_high.dram_channels = dram_channels;
2353 		wm_high.num_heads = num_heads;
2354 
2355 		/* watermark for low clocks */
2356 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2357 			wm_low.yclk =
2358 				radeon_dpm_get_mclk(rdev, true) * 10;
2359 			wm_low.sclk =
2360 				radeon_dpm_get_sclk(rdev, true) * 10;
2361 		} else {
2362 			wm_low.yclk = rdev->pm.current_mclk * 10;
2363 			wm_low.sclk = rdev->pm.current_sclk * 10;
2364 		}
2365 
2366 		wm_low.disp_clk = mode->clock;
2367 		wm_low.src_width = mode->crtc_hdisplay;
2368 		wm_low.active_time = active_time;
2369 		wm_low.blank_time = line_time - wm_low.active_time;
2370 		wm_low.interlaced = false;
2371 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2372 			wm_low.interlaced = true;
2373 		wm_low.vsc = radeon_crtc->vsc;
2374 		wm_low.vtaps = 1;
2375 		if (radeon_crtc->rmx_type != RMX_OFF)
2376 			wm_low.vtaps = 2;
2377 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2378 		wm_low.lb_size = lb_size;
2379 		wm_low.dram_channels = dram_channels;
2380 		wm_low.num_heads = num_heads;
2381 
2382 		/* set for high clocks */
2383 		latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2384 		/* set for low clocks */
2385 		latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2386 
2387 		/* possibly force display priority to high */
2388 		/* should really do this at mode validation time... */
2389 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2390 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2391 		    !dce6_check_latency_hiding(&wm_high) ||
2392 		    (rdev->disp_priority == 2)) {
2393 			DRM_DEBUG_KMS("force priority to high\n");
2394 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2395 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2396 		}
2397 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2398 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2399 		    !dce6_check_latency_hiding(&wm_low) ||
2400 		    (rdev->disp_priority == 2)) {
2401 			DRM_DEBUG_KMS("force priority to high\n");
2402 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2403 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2404 		}
2405 
2406 		a.full = dfixed_const(1000);
2407 		b.full = dfixed_const(mode->clock);
2408 		b.full = dfixed_div(b, a);
2409 		c.full = dfixed_const(latency_watermark_a);
2410 		c.full = dfixed_mul(c, b);
2411 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2412 		c.full = dfixed_div(c, a);
2413 		a.full = dfixed_const(16);
2414 		c.full = dfixed_div(c, a);
2415 		priority_a_mark = dfixed_trunc(c);
2416 		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2417 
2418 		a.full = dfixed_const(1000);
2419 		b.full = dfixed_const(mode->clock);
2420 		b.full = dfixed_div(b, a);
2421 		c.full = dfixed_const(latency_watermark_b);
2422 		c.full = dfixed_mul(c, b);
2423 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2424 		c.full = dfixed_div(c, a);
2425 		a.full = dfixed_const(16);
2426 		c.full = dfixed_div(c, a);
2427 		priority_b_mark = dfixed_trunc(c);
2428 		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2429 
2430 		/* Save number of lines the linebuffer leads before the scanout */
2431 		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
2432 	}
2433 
2434 	/* select wm A */
2435 	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2436 	tmp = arb_control3;
2437 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2438 	tmp |= LATENCY_WATERMARK_MASK(1);
2439 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2440 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2441 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2442 		LATENCY_HIGH_WATERMARK(line_time)));
2443 	/* select wm B */
2444 	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2445 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2446 	tmp |= LATENCY_WATERMARK_MASK(2);
2447 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2448 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2449 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2450 		LATENCY_HIGH_WATERMARK(line_time)));
2451 	/* restore original selection */
2452 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2453 
2454 	/* write the priority marks */
2455 	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2456 	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2457 
2458 	/* save values for DPM */
2459 	radeon_crtc->line_time = line_time;
2460 	radeon_crtc->wm_high = latency_watermark_a;
2461 	radeon_crtc->wm_low = latency_watermark_b;
2462 }
2463 
2464 void dce6_bandwidth_update(struct radeon_device *rdev)
2465 {
2466 	struct drm_display_mode *mode0 = NULL;
2467 	struct drm_display_mode *mode1 = NULL;
2468 	u32 num_heads = 0, lb_size;
2469 	int i;
2470 
2471 	if (!rdev->mode_info.mode_config_initialized)
2472 		return;
2473 
2474 	radeon_update_display_priority(rdev);
2475 
2476 	for (i = 0; i < rdev->num_crtc; i++) {
2477 		if (rdev->mode_info.crtcs[i]->base.enabled)
2478 			num_heads++;
2479 	}
2480 	for (i = 0; i < rdev->num_crtc; i += 2) {
2481 		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2482 		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2483 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2484 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2485 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2486 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2487 	}
2488 }
2489 
2490 /*
2491  * Core functions
2492  */
2493 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2494 {
2495 	u32 *tile = rdev->config.si.tile_mode_array;
2496 	const u32 num_tile_mode_states =
2497 			ARRAY_SIZE(rdev->config.si.tile_mode_array);
2498 	u32 reg_offset, split_equal_to_row_size;
2499 
2500 	switch (rdev->config.si.mem_row_size_in_kb) {
2501 	case 1:
2502 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2503 		break;
2504 	case 2:
2505 	default:
2506 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2507 		break;
2508 	case 4:
2509 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2510 		break;
2511 	}
2512 
2513 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2514 		tile[reg_offset] = 0;
2515 
2516 	switch(rdev->family) {
2517 	case CHIP_TAHITI:
2518 	case CHIP_PITCAIRN:
2519 		/* non-AA compressed depth or any compressed stencil */
2520 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2521 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2522 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2523 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2524 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2525 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2526 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2527 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2528 		/* 2xAA/4xAA compressed depth only */
2529 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2530 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2531 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2532 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2533 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2534 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2535 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2536 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2537 		/* 8xAA compressed depth only */
2538 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2539 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2540 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2541 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2542 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2543 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2544 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2545 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2546 		/* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2547 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2548 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2549 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2550 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2551 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2552 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2553 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2554 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2555 		/* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2556 		tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2557 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2558 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2559 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2560 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2561 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2562 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2563 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2564 		/* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2565 		tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2566 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2567 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2568 			   TILE_SPLIT(split_equal_to_row_size) |
2569 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2570 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2571 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2572 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2573 		/* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2574 		tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2575 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2576 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2577 			   TILE_SPLIT(split_equal_to_row_size) |
2578 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2579 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2580 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2581 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2582 		/* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2583 		tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2584 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2585 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2586 			   TILE_SPLIT(split_equal_to_row_size) |
2587 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2588 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2589 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2590 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2591 		/* 1D and 1D Array Surfaces */
2592 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2593 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2594 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2595 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2596 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2597 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2598 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2599 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2600 		/* Displayable maps. */
2601 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2602 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2603 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2604 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2605 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2606 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2607 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2608 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2609 		/* Display 8bpp. */
2610 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2611 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2612 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2613 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2614 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2615 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2616 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2617 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2618 		/* Display 16bpp. */
2619 		tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2620 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2621 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2622 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2623 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2624 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2625 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2626 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2627 		/* Display 32bpp. */
2628 		tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2629 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2630 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2631 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2632 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2633 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2634 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2635 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2636 		/* Thin. */
2637 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2638 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2639 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2640 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2641 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2642 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2643 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2644 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2645 		/* Thin 8 bpp. */
2646 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2647 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2648 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2649 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2650 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2651 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2652 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2653 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2654 		/* Thin 16 bpp. */
2655 		tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2656 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2657 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2658 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2659 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2660 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2661 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2662 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2663 		/* Thin 32 bpp. */
2664 		tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2665 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2666 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2667 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2668 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2669 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2670 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2671 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2672 		/* Thin 64 bpp. */
2673 		tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2674 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2675 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2676 			   TILE_SPLIT(split_equal_to_row_size) |
2677 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2678 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2679 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2680 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2681 		/* 8 bpp PRT. */
2682 		tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2683 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2684 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2685 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2686 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2687 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2688 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2689 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2690 		/* 16 bpp PRT */
2691 		tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2692 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2693 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2694 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2695 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2696 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2697 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2698 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2699 		/* 32 bpp PRT */
2700 		tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2701 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2702 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2703 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2704 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2705 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2706 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2707 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2708 		/* 64 bpp PRT */
2709 		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2710 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2711 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2712 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2713 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2714 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2715 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2716 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2717 		/* 128 bpp PRT */
2718 		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2719 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2720 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2721 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2722 			   NUM_BANKS(ADDR_SURF_8_BANK) |
2723 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2724 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2725 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2726 
2727 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2728 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2729 		break;
2730 
2731 	case CHIP_VERDE:
2732 	case CHIP_OLAND:
2733 	case CHIP_HAINAN:
2734 		/* non-AA compressed depth or any compressed stencil */
2735 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2736 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2737 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2738 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2739 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2740 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2741 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2742 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2743 		/* 2xAA/4xAA compressed depth only */
2744 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2745 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2746 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2747 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2748 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2749 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2750 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2751 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2752 		/* 8xAA compressed depth only */
2753 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2754 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2755 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2756 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2757 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2758 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2759 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2760 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2761 		/* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2762 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2763 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2764 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2765 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2766 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2767 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2768 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2769 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2770 		/* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2771 		tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2772 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2773 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2774 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2775 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2776 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2777 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2778 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2779 		/* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2780 		tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2781 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2782 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2783 			   TILE_SPLIT(split_equal_to_row_size) |
2784 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2785 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2786 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2787 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2788 		/* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2789 		tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2790 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2791 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2792 			   TILE_SPLIT(split_equal_to_row_size) |
2793 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2794 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2795 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2796 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2797 		/* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2798 		tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2799 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2800 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2801 			   TILE_SPLIT(split_equal_to_row_size) |
2802 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2803 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2804 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2805 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2806 		/* 1D and 1D Array Surfaces */
2807 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2808 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2809 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2810 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2811 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2812 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2813 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2814 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2815 		/* Displayable maps. */
2816 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2817 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2818 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2819 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2820 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2821 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2822 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2823 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2824 		/* Display 8bpp. */
2825 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2826 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2827 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2828 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2829 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2830 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2831 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2832 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2833 		/* Display 16bpp. */
2834 		tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2835 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2836 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2837 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2838 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2839 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2840 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2841 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2842 		/* Display 32bpp. */
2843 		tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2844 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2845 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2846 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2847 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2848 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2849 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2850 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2851 		/* Thin. */
2852 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2853 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2854 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2855 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2856 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2857 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2858 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2859 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2860 		/* Thin 8 bpp. */
2861 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2862 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2863 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2864 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2865 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2866 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2867 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2868 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2869 		/* Thin 16 bpp. */
2870 		tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2871 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2872 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2873 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2874 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2875 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2876 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2877 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2878 		/* Thin 32 bpp. */
2879 		tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2880 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2881 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2882 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2883 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2884 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2885 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2886 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2887 		/* Thin 64 bpp. */
2888 		tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2889 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2890 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2891 			   TILE_SPLIT(split_equal_to_row_size) |
2892 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2893 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2894 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2895 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2896 		/* 8 bpp PRT. */
2897 		tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2898 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2899 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2900 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2901 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2902 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2903 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2904 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2905 		/* 16 bpp PRT */
2906 		tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2907 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2908 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2909 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2910 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2911 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2912 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2913 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2914 		/* 32 bpp PRT */
2915 		tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2916 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2917 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2918 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2919 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2920 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2921 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2922 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2923 		/* 64 bpp PRT */
2924 		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2925 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2926 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2927 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2928 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2929 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2930 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2931 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2932 		/* 128 bpp PRT */
2933 		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2934 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2935 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2936 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2937 			   NUM_BANKS(ADDR_SURF_8_BANK) |
2938 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2939 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2940 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2941 
2942 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2943 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2944 		break;
2945 
2946 	default:
2947 		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2948 	}
2949 }
2950 
2951 static void si_select_se_sh(struct radeon_device *rdev,
2952 			    u32 se_num, u32 sh_num)
2953 {
2954 	u32 data = INSTANCE_BROADCAST_WRITES;
2955 
2956 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2957 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2958 	else if (se_num == 0xffffffff)
2959 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2960 	else if (sh_num == 0xffffffff)
2961 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2962 	else
2963 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2964 	WREG32(GRBM_GFX_INDEX, data);
2965 }
2966 
2967 static u32 si_create_bitmask(u32 bit_width)
2968 {
2969 	u32 i, mask = 0;
2970 
2971 	for (i = 0; i < bit_width; i++) {
2972 		mask <<= 1;
2973 		mask |= 1;
2974 	}
2975 	return mask;
2976 }
2977 
2978 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2979 {
2980 	u32 data, mask;
2981 
2982 	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2983 	if (data & 1)
2984 		data &= INACTIVE_CUS_MASK;
2985 	else
2986 		data = 0;
2987 	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2988 
2989 	data >>= INACTIVE_CUS_SHIFT;
2990 
2991 	mask = si_create_bitmask(cu_per_sh);
2992 
2993 	return ~data & mask;
2994 }
2995 
2996 static void si_setup_spi(struct radeon_device *rdev,
2997 			 u32 se_num, u32 sh_per_se,
2998 			 u32 cu_per_sh)
2999 {
3000 	int i, j, k;
3001 	u32 data, mask, active_cu;
3002 
3003 	for (i = 0; i < se_num; i++) {
3004 		for (j = 0; j < sh_per_se; j++) {
3005 			si_select_se_sh(rdev, i, j);
3006 			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
3007 			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
3008 
3009 			mask = 1;
3010 			for (k = 0; k < 16; k++) {
3011 				mask <<= k;
3012 				if (active_cu & mask) {
3013 					data &= ~mask;
3014 					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
3015 					break;
3016 				}
3017 			}
3018 		}
3019 	}
3020 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3021 }
3022 
3023 static u32 si_get_rb_disabled(struct radeon_device *rdev,
3024 			      u32 max_rb_num_per_se,
3025 			      u32 sh_per_se)
3026 {
3027 	u32 data, mask;
3028 
3029 	data = RREG32(CC_RB_BACKEND_DISABLE);
3030 	if (data & 1)
3031 		data &= BACKEND_DISABLE_MASK;
3032 	else
3033 		data = 0;
3034 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3035 
3036 	data >>= BACKEND_DISABLE_SHIFT;
3037 
3038 	mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
3039 
3040 	return data & mask;
3041 }
3042 
3043 static void si_setup_rb(struct radeon_device *rdev,
3044 			u32 se_num, u32 sh_per_se,
3045 			u32 max_rb_num_per_se)
3046 {
3047 	int i, j;
3048 	u32 data, mask;
3049 	u32 disabled_rbs = 0;
3050 	u32 enabled_rbs = 0;
3051 
3052 	for (i = 0; i < se_num; i++) {
3053 		for (j = 0; j < sh_per_se; j++) {
3054 			si_select_se_sh(rdev, i, j);
3055 			data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3056 			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
3057 		}
3058 	}
3059 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3060 
3061 	mask = 1;
3062 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3063 		if (!(disabled_rbs & mask))
3064 			enabled_rbs |= mask;
3065 		mask <<= 1;
3066 	}
3067 
3068 	rdev->config.si.backend_enable_mask = enabled_rbs;
3069 
3070 	for (i = 0; i < se_num; i++) {
3071 		si_select_se_sh(rdev, i, 0xffffffff);
3072 		data = 0;
3073 		for (j = 0; j < sh_per_se; j++) {
3074 			switch (enabled_rbs & 3) {
3075 			case 1:
3076 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3077 				break;
3078 			case 2:
3079 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3080 				break;
3081 			case 3:
3082 			default:
3083 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3084 				break;
3085 			}
3086 			enabled_rbs >>= 2;
3087 		}
3088 		WREG32(PA_SC_RASTER_CONFIG, data);
3089 	}
3090 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3091 }
3092 
3093 static void si_gpu_init(struct radeon_device *rdev)
3094 {
3095 	u32 gb_addr_config = 0;
3096 	u32 mc_shared_chmap, mc_arb_ramcfg;
3097 	u32 sx_debug_1;
3098 	u32 hdp_host_path_cntl;
3099 	u32 tmp;
3100 	int i, j;
3101 
3102 	switch (rdev->family) {
3103 	case CHIP_TAHITI:
3104 		rdev->config.si.max_shader_engines = 2;
3105 		rdev->config.si.max_tile_pipes = 12;
3106 		rdev->config.si.max_cu_per_sh = 8;
3107 		rdev->config.si.max_sh_per_se = 2;
3108 		rdev->config.si.max_backends_per_se = 4;
3109 		rdev->config.si.max_texture_channel_caches = 12;
3110 		rdev->config.si.max_gprs = 256;
3111 		rdev->config.si.max_gs_threads = 32;
3112 		rdev->config.si.max_hw_contexts = 8;
3113 
3114 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3115 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3116 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3117 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3118 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3119 		break;
3120 	case CHIP_PITCAIRN:
3121 		rdev->config.si.max_shader_engines = 2;
3122 		rdev->config.si.max_tile_pipes = 8;
3123 		rdev->config.si.max_cu_per_sh = 5;
3124 		rdev->config.si.max_sh_per_se = 2;
3125 		rdev->config.si.max_backends_per_se = 4;
3126 		rdev->config.si.max_texture_channel_caches = 8;
3127 		rdev->config.si.max_gprs = 256;
3128 		rdev->config.si.max_gs_threads = 32;
3129 		rdev->config.si.max_hw_contexts = 8;
3130 
3131 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3132 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3133 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3134 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3135 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3136 		break;
3137 	case CHIP_VERDE:
3138 	default:
3139 		rdev->config.si.max_shader_engines = 1;
3140 		rdev->config.si.max_tile_pipes = 4;
3141 		rdev->config.si.max_cu_per_sh = 5;
3142 		rdev->config.si.max_sh_per_se = 2;
3143 		rdev->config.si.max_backends_per_se = 4;
3144 		rdev->config.si.max_texture_channel_caches = 4;
3145 		rdev->config.si.max_gprs = 256;
3146 		rdev->config.si.max_gs_threads = 32;
3147 		rdev->config.si.max_hw_contexts = 8;
3148 
3149 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3150 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3151 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3152 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3153 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3154 		break;
3155 	case CHIP_OLAND:
3156 		rdev->config.si.max_shader_engines = 1;
3157 		rdev->config.si.max_tile_pipes = 4;
3158 		rdev->config.si.max_cu_per_sh = 6;
3159 		rdev->config.si.max_sh_per_se = 1;
3160 		rdev->config.si.max_backends_per_se = 2;
3161 		rdev->config.si.max_texture_channel_caches = 4;
3162 		rdev->config.si.max_gprs = 256;
3163 		rdev->config.si.max_gs_threads = 16;
3164 		rdev->config.si.max_hw_contexts = 8;
3165 
3166 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3167 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3168 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3169 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3170 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3171 		break;
3172 	case CHIP_HAINAN:
3173 		rdev->config.si.max_shader_engines = 1;
3174 		rdev->config.si.max_tile_pipes = 4;
3175 		rdev->config.si.max_cu_per_sh = 5;
3176 		rdev->config.si.max_sh_per_se = 1;
3177 		rdev->config.si.max_backends_per_se = 1;
3178 		rdev->config.si.max_texture_channel_caches = 2;
3179 		rdev->config.si.max_gprs = 256;
3180 		rdev->config.si.max_gs_threads = 16;
3181 		rdev->config.si.max_hw_contexts = 8;
3182 
3183 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3184 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3185 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3186 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3187 		gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
3188 		break;
3189 	}
3190 
3191 	/* Initialize HDP */
3192 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3193 		WREG32((0x2c14 + j), 0x00000000);
3194 		WREG32((0x2c18 + j), 0x00000000);
3195 		WREG32((0x2c1c + j), 0x00000000);
3196 		WREG32((0x2c20 + j), 0x00000000);
3197 		WREG32((0x2c24 + j), 0x00000000);
3198 	}
3199 
3200 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3201 	WREG32(SRBM_INT_CNTL, 1);
3202 	WREG32(SRBM_INT_ACK, 1);
3203 
3204 	evergreen_fix_pci_max_read_req_size(rdev);
3205 
3206 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3207 
3208 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3209 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3210 
3211 	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3212 	rdev->config.si.mem_max_burst_length_bytes = 256;
3213 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3214 	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3215 	if (rdev->config.si.mem_row_size_in_kb > 4)
3216 		rdev->config.si.mem_row_size_in_kb = 4;
3217 	/* XXX use MC settings? */
3218 	rdev->config.si.shader_engine_tile_size = 32;
3219 	rdev->config.si.num_gpus = 1;
3220 	rdev->config.si.multi_gpu_tile_size = 64;
3221 
3222 	/* fix up row size */
3223 	gb_addr_config &= ~ROW_SIZE_MASK;
3224 	switch (rdev->config.si.mem_row_size_in_kb) {
3225 	case 1:
3226 	default:
3227 		gb_addr_config |= ROW_SIZE(0);
3228 		break;
3229 	case 2:
3230 		gb_addr_config |= ROW_SIZE(1);
3231 		break;
3232 	case 4:
3233 		gb_addr_config |= ROW_SIZE(2);
3234 		break;
3235 	}
3236 
3237 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3238 	 * not have bank info, so create a custom tiling dword.
3239 	 * bits 3:0   num_pipes
3240 	 * bits 7:4   num_banks
3241 	 * bits 11:8  group_size
3242 	 * bits 15:12 row_size
3243 	 */
3244 	rdev->config.si.tile_config = 0;
3245 	switch (rdev->config.si.num_tile_pipes) {
3246 	case 1:
3247 		rdev->config.si.tile_config |= (0 << 0);
3248 		break;
3249 	case 2:
3250 		rdev->config.si.tile_config |= (1 << 0);
3251 		break;
3252 	case 4:
3253 		rdev->config.si.tile_config |= (2 << 0);
3254 		break;
3255 	case 8:
3256 	default:
3257 		/* XXX what about 12? */
3258 		rdev->config.si.tile_config |= (3 << 0);
3259 		break;
3260 	}
3261 	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3262 	case 0: /* four banks */
3263 		rdev->config.si.tile_config |= 0 << 4;
3264 		break;
3265 	case 1: /* eight banks */
3266 		rdev->config.si.tile_config |= 1 << 4;
3267 		break;
3268 	case 2: /* sixteen banks */
3269 	default:
3270 		rdev->config.si.tile_config |= 2 << 4;
3271 		break;
3272 	}
3273 	rdev->config.si.tile_config |=
3274 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3275 	rdev->config.si.tile_config |=
3276 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3277 
3278 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3279 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3280 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3281 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3282 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3283 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3284 	if (rdev->has_uvd) {
3285 		WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3286 		WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3287 		WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3288 	}
3289 
3290 	si_tiling_mode_table_init(rdev);
3291 
3292 	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3293 		    rdev->config.si.max_sh_per_se,
3294 		    rdev->config.si.max_backends_per_se);
3295 
3296 	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3297 		     rdev->config.si.max_sh_per_se,
3298 		     rdev->config.si.max_cu_per_sh);
3299 
3300 	rdev->config.si.active_cus = 0;
3301 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
3302 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
3303 			rdev->config.si.active_cus +=
3304 				hweight32(si_get_cu_active_bitmap(rdev, i, j));
3305 		}
3306 	}
3307 
3308 	/* set HW defaults for 3D engine */
3309 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3310 				     ROQ_IB2_START(0x2b)));
3311 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3312 
3313 	sx_debug_1 = RREG32(SX_DEBUG_1);
3314 	WREG32(SX_DEBUG_1, sx_debug_1);
3315 
3316 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3317 
3318 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3319 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3320 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3321 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3322 
3323 	WREG32(VGT_NUM_INSTANCES, 1);
3324 
3325 	WREG32(CP_PERFMON_CNTL, 0);
3326 
3327 	WREG32(SQ_CONFIG, 0);
3328 
3329 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3330 					  FORCE_EOV_MAX_REZ_CNT(255)));
3331 
3332 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3333 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3334 
3335 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3336 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3337 
3338 	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3339 	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3340 	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3341 	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3342 	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3343 	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3344 	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3345 	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3346 
3347 	tmp = RREG32(HDP_MISC_CNTL);
3348 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3349 	WREG32(HDP_MISC_CNTL, tmp);
3350 
3351 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3352 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3353 
3354 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3355 
3356 	udelay(50);
3357 }
3358 
3359 /*
3360  * GPU scratch registers helpers function.
3361  */
3362 static void si_scratch_init(struct radeon_device *rdev)
3363 {
3364 	int i;
3365 
3366 	rdev->scratch.num_reg = 7;
3367 	rdev->scratch.reg_base = SCRATCH_REG0;
3368 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3369 		rdev->scratch.free[i] = true;
3370 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3371 	}
3372 }
3373 
3374 void si_fence_ring_emit(struct radeon_device *rdev,
3375 			struct radeon_fence *fence)
3376 {
3377 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3378 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3379 
3380 	/* flush read cache over gart */
3381 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3382 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3383 	radeon_ring_write(ring, 0);
3384 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3385 	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3386 			  PACKET3_TC_ACTION_ENA |
3387 			  PACKET3_SH_KCACHE_ACTION_ENA |
3388 			  PACKET3_SH_ICACHE_ACTION_ENA);
3389 	radeon_ring_write(ring, 0xFFFFFFFF);
3390 	radeon_ring_write(ring, 0);
3391 	radeon_ring_write(ring, 10); /* poll interval */
3392 	/* EVENT_WRITE_EOP - flush caches, send int */
3393 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3394 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3395 	radeon_ring_write(ring, lower_32_bits(addr));
3396 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3397 	radeon_ring_write(ring, fence->seq);
3398 	radeon_ring_write(ring, 0);
3399 }
3400 
3401 /*
3402  * IB stuff
3403  */
3404 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3405 {
3406 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3407 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3408 	u32 header;
3409 
3410 	if (ib->is_const_ib) {
3411 		/* set switch buffer packet before const IB */
3412 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3413 		radeon_ring_write(ring, 0);
3414 
3415 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3416 	} else {
3417 		u32 next_rptr;
3418 		if (ring->rptr_save_reg) {
3419 			next_rptr = ring->wptr + 3 + 4 + 8;
3420 			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3421 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3422 						  PACKET3_SET_CONFIG_REG_START) >> 2));
3423 			radeon_ring_write(ring, next_rptr);
3424 		} else if (rdev->wb.enabled) {
3425 			next_rptr = ring->wptr + 5 + 4 + 8;
3426 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3427 			radeon_ring_write(ring, (1 << 8));
3428 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3429 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3430 			radeon_ring_write(ring, next_rptr);
3431 		}
3432 
3433 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3434 	}
3435 
3436 	radeon_ring_write(ring, header);
3437 	radeon_ring_write(ring,
3438 #ifdef __BIG_ENDIAN
3439 			  (2 << 0) |
3440 #endif
3441 			  (ib->gpu_addr & 0xFFFFFFFC));
3442 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3443 	radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
3444 
3445 	if (!ib->is_const_ib) {
3446 		/* flush read cache over gart for this vmid */
3447 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3448 		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3449 		radeon_ring_write(ring, vm_id);
3450 		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3451 		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3452 				  PACKET3_TC_ACTION_ENA |
3453 				  PACKET3_SH_KCACHE_ACTION_ENA |
3454 				  PACKET3_SH_ICACHE_ACTION_ENA);
3455 		radeon_ring_write(ring, 0xFFFFFFFF);
3456 		radeon_ring_write(ring, 0);
3457 		radeon_ring_write(ring, 10); /* poll interval */
3458 	}
3459 }
3460 
3461 /*
3462  * CP.
3463  */
3464 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3465 {
3466 	if (enable)
3467 		WREG32(CP_ME_CNTL, 0);
3468 	else {
3469 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3470 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3471 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3472 		WREG32(SCRATCH_UMSK, 0);
3473 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3474 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3475 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3476 	}
3477 	udelay(50);
3478 }
3479 
3480 static int si_cp_load_microcode(struct radeon_device *rdev)
3481 {
3482 	int i;
3483 
3484 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3485 		return -EINVAL;
3486 
3487 	si_cp_enable(rdev, false);
3488 
3489 	if (rdev->new_fw) {
3490 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3491 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3492 		const struct gfx_firmware_header_v1_0 *ce_hdr =
3493 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3494 		const struct gfx_firmware_header_v1_0 *me_hdr =
3495 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3496 		const __le32 *fw_data;
3497 		u32 fw_size;
3498 
3499 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3500 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3501 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3502 
3503 		/* PFP */
3504 		fw_data = (const __le32 *)
3505 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3506 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3507 		WREG32(CP_PFP_UCODE_ADDR, 0);
3508 		for (i = 0; i < fw_size; i++)
3509 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3510 		WREG32(CP_PFP_UCODE_ADDR, 0);
3511 
3512 		/* CE */
3513 		fw_data = (const __le32 *)
3514 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3515 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3516 		WREG32(CP_CE_UCODE_ADDR, 0);
3517 		for (i = 0; i < fw_size; i++)
3518 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3519 		WREG32(CP_CE_UCODE_ADDR, 0);
3520 
3521 		/* ME */
3522 		fw_data = (const __be32 *)
3523 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3524 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3525 		WREG32(CP_ME_RAM_WADDR, 0);
3526 		for (i = 0; i < fw_size; i++)
3527 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3528 		WREG32(CP_ME_RAM_WADDR, 0);
3529 	} else {
3530 		const __be32 *fw_data;
3531 
3532 		/* PFP */
3533 		fw_data = (const __be32 *)rdev->pfp_fw->data;
3534 		WREG32(CP_PFP_UCODE_ADDR, 0);
3535 		for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3536 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3537 		WREG32(CP_PFP_UCODE_ADDR, 0);
3538 
3539 		/* CE */
3540 		fw_data = (const __be32 *)rdev->ce_fw->data;
3541 		WREG32(CP_CE_UCODE_ADDR, 0);
3542 		for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3543 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3544 		WREG32(CP_CE_UCODE_ADDR, 0);
3545 
3546 		/* ME */
3547 		fw_data = (const __be32 *)rdev->me_fw->data;
3548 		WREG32(CP_ME_RAM_WADDR, 0);
3549 		for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3550 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3551 		WREG32(CP_ME_RAM_WADDR, 0);
3552 	}
3553 
3554 	WREG32(CP_PFP_UCODE_ADDR, 0);
3555 	WREG32(CP_CE_UCODE_ADDR, 0);
3556 	WREG32(CP_ME_RAM_WADDR, 0);
3557 	WREG32(CP_ME_RAM_RADDR, 0);
3558 	return 0;
3559 }
3560 
3561 static int si_cp_start(struct radeon_device *rdev)
3562 {
3563 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3564 	int r, i;
3565 
3566 	r = radeon_ring_lock(rdev, ring, 7 + 4);
3567 	if (r) {
3568 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3569 		return r;
3570 	}
3571 	/* init the CP */
3572 	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3573 	radeon_ring_write(ring, 0x1);
3574 	radeon_ring_write(ring, 0x0);
3575 	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3576 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3577 	radeon_ring_write(ring, 0);
3578 	radeon_ring_write(ring, 0);
3579 
3580 	/* init the CE partitions */
3581 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3582 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3583 	radeon_ring_write(ring, 0xc000);
3584 	radeon_ring_write(ring, 0xe000);
3585 	radeon_ring_unlock_commit(rdev, ring, false);
3586 
3587 	si_cp_enable(rdev, true);
3588 
3589 	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3590 	if (r) {
3591 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3592 		return r;
3593 	}
3594 
3595 	/* setup clear context state */
3596 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3597 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3598 
3599 	for (i = 0; i < si_default_size; i++)
3600 		radeon_ring_write(ring, si_default_state[i]);
3601 
3602 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3603 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3604 
3605 	/* set clear context state */
3606 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3607 	radeon_ring_write(ring, 0);
3608 
3609 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3610 	radeon_ring_write(ring, 0x00000316);
3611 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3612 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3613 
3614 	radeon_ring_unlock_commit(rdev, ring, false);
3615 
3616 	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3617 		ring = &rdev->ring[i];
3618 		r = radeon_ring_lock(rdev, ring, 2);
3619 
3620 		/* clear the compute context state */
3621 		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3622 		radeon_ring_write(ring, 0);
3623 
3624 		radeon_ring_unlock_commit(rdev, ring, false);
3625 	}
3626 
3627 	return 0;
3628 }
3629 
3630 static void si_cp_fini(struct radeon_device *rdev)
3631 {
3632 	struct radeon_ring *ring;
3633 	si_cp_enable(rdev, false);
3634 
3635 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3636 	radeon_ring_fini(rdev, ring);
3637 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3638 
3639 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3640 	radeon_ring_fini(rdev, ring);
3641 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3642 
3643 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3644 	radeon_ring_fini(rdev, ring);
3645 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3646 }
3647 
3648 static int si_cp_resume(struct radeon_device *rdev)
3649 {
3650 	struct radeon_ring *ring;
3651 	u32 tmp;
3652 	u32 rb_bufsz;
3653 	int r;
3654 
3655 	si_enable_gui_idle_interrupt(rdev, false);
3656 
3657 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3658 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3659 
3660 	/* Set the write pointer delay */
3661 	WREG32(CP_RB_WPTR_DELAY, 0);
3662 
3663 	WREG32(CP_DEBUG, 0);
3664 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3665 
3666 	/* ring 0 - compute and gfx */
3667 	/* Set ring buffer size */
3668 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3669 	rb_bufsz = order_base_2(ring->ring_size / 8);
3670 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3671 #ifdef __BIG_ENDIAN
3672 	tmp |= BUF_SWAP_32BIT;
3673 #endif
3674 	WREG32(CP_RB0_CNTL, tmp);
3675 
3676 	/* Initialize the ring buffer's read and write pointers */
3677 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3678 	ring->wptr = 0;
3679 	WREG32(CP_RB0_WPTR, ring->wptr);
3680 
3681 	/* set the wb address whether it's enabled or not */
3682 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3683 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3684 
3685 	if (rdev->wb.enabled)
3686 		WREG32(SCRATCH_UMSK, 0xff);
3687 	else {
3688 		tmp |= RB_NO_UPDATE;
3689 		WREG32(SCRATCH_UMSK, 0);
3690 	}
3691 
3692 	mdelay(1);
3693 	WREG32(CP_RB0_CNTL, tmp);
3694 
3695 	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3696 
3697 	/* ring1  - compute only */
3698 	/* Set ring buffer size */
3699 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3700 	rb_bufsz = order_base_2(ring->ring_size / 8);
3701 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3702 #ifdef __BIG_ENDIAN
3703 	tmp |= BUF_SWAP_32BIT;
3704 #endif
3705 	WREG32(CP_RB1_CNTL, tmp);
3706 
3707 	/* Initialize the ring buffer's read and write pointers */
3708 	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3709 	ring->wptr = 0;
3710 	WREG32(CP_RB1_WPTR, ring->wptr);
3711 
3712 	/* set the wb address whether it's enabled or not */
3713 	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3714 	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3715 
3716 	mdelay(1);
3717 	WREG32(CP_RB1_CNTL, tmp);
3718 
3719 	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3720 
3721 	/* ring2 - compute only */
3722 	/* Set ring buffer size */
3723 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3724 	rb_bufsz = order_base_2(ring->ring_size / 8);
3725 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3726 #ifdef __BIG_ENDIAN
3727 	tmp |= BUF_SWAP_32BIT;
3728 #endif
3729 	WREG32(CP_RB2_CNTL, tmp);
3730 
3731 	/* Initialize the ring buffer's read and write pointers */
3732 	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3733 	ring->wptr = 0;
3734 	WREG32(CP_RB2_WPTR, ring->wptr);
3735 
3736 	/* set the wb address whether it's enabled or not */
3737 	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3738 	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3739 
3740 	mdelay(1);
3741 	WREG32(CP_RB2_CNTL, tmp);
3742 
3743 	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3744 
3745 	/* start the rings */
3746 	si_cp_start(rdev);
3747 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3748 	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3749 	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3750 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3751 	if (r) {
3752 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3753 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3754 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3755 		return r;
3756 	}
3757 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3758 	if (r) {
3759 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3760 	}
3761 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3762 	if (r) {
3763 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3764 	}
3765 
3766 	si_enable_gui_idle_interrupt(rdev, true);
3767 
3768 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3769 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3770 
3771 	return 0;
3772 }
3773 
3774 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3775 {
3776 	u32 reset_mask = 0;
3777 	u32 tmp;
3778 
3779 	/* GRBM_STATUS */
3780 	tmp = RREG32(GRBM_STATUS);
3781 	if (tmp & (PA_BUSY | SC_BUSY |
3782 		   BCI_BUSY | SX_BUSY |
3783 		   TA_BUSY | VGT_BUSY |
3784 		   DB_BUSY | CB_BUSY |
3785 		   GDS_BUSY | SPI_BUSY |
3786 		   IA_BUSY | IA_BUSY_NO_DMA))
3787 		reset_mask |= RADEON_RESET_GFX;
3788 
3789 	if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3790 		   CP_BUSY | CP_COHERENCY_BUSY))
3791 		reset_mask |= RADEON_RESET_CP;
3792 
3793 	if (tmp & GRBM_EE_BUSY)
3794 		reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3795 
3796 	/* GRBM_STATUS2 */
3797 	tmp = RREG32(GRBM_STATUS2);
3798 	if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3799 		reset_mask |= RADEON_RESET_RLC;
3800 
3801 	/* DMA_STATUS_REG 0 */
3802 	tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3803 	if (!(tmp & DMA_IDLE))
3804 		reset_mask |= RADEON_RESET_DMA;
3805 
3806 	/* DMA_STATUS_REG 1 */
3807 	tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3808 	if (!(tmp & DMA_IDLE))
3809 		reset_mask |= RADEON_RESET_DMA1;
3810 
3811 	/* SRBM_STATUS2 */
3812 	tmp = RREG32(SRBM_STATUS2);
3813 	if (tmp & DMA_BUSY)
3814 		reset_mask |= RADEON_RESET_DMA;
3815 
3816 	if (tmp & DMA1_BUSY)
3817 		reset_mask |= RADEON_RESET_DMA1;
3818 
3819 	/* SRBM_STATUS */
3820 	tmp = RREG32(SRBM_STATUS);
3821 
3822 	if (tmp & IH_BUSY)
3823 		reset_mask |= RADEON_RESET_IH;
3824 
3825 	if (tmp & SEM_BUSY)
3826 		reset_mask |= RADEON_RESET_SEM;
3827 
3828 	if (tmp & GRBM_RQ_PENDING)
3829 		reset_mask |= RADEON_RESET_GRBM;
3830 
3831 	if (tmp & VMC_BUSY)
3832 		reset_mask |= RADEON_RESET_VMC;
3833 
3834 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3835 		   MCC_BUSY | MCD_BUSY))
3836 		reset_mask |= RADEON_RESET_MC;
3837 
3838 	if (evergreen_is_display_hung(rdev))
3839 		reset_mask |= RADEON_RESET_DISPLAY;
3840 
3841 	/* VM_L2_STATUS */
3842 	tmp = RREG32(VM_L2_STATUS);
3843 	if (tmp & L2_BUSY)
3844 		reset_mask |= RADEON_RESET_VMC;
3845 
3846 	/* Skip MC reset as it's mostly likely not hung, just busy */
3847 	if (reset_mask & RADEON_RESET_MC) {
3848 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3849 		reset_mask &= ~RADEON_RESET_MC;
3850 	}
3851 
3852 	return reset_mask;
3853 }
3854 
3855 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3856 {
3857 	struct evergreen_mc_save save;
3858 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3859 	u32 tmp;
3860 
3861 	if (reset_mask == 0)
3862 		return;
3863 
3864 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3865 
3866 	evergreen_print_gpu_status_regs(rdev);
3867 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3868 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3869 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3870 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3871 
3872 	/* disable PG/CG */
3873 	si_fini_pg(rdev);
3874 	si_fini_cg(rdev);
3875 
3876 	/* stop the rlc */
3877 	si_rlc_stop(rdev);
3878 
3879 	/* Disable CP parsing/prefetching */
3880 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3881 
3882 	if (reset_mask & RADEON_RESET_DMA) {
3883 		/* dma0 */
3884 		tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3885 		tmp &= ~DMA_RB_ENABLE;
3886 		WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3887 	}
3888 	if (reset_mask & RADEON_RESET_DMA1) {
3889 		/* dma1 */
3890 		tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3891 		tmp &= ~DMA_RB_ENABLE;
3892 		WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3893 	}
3894 
3895 	udelay(50);
3896 
3897 	evergreen_mc_stop(rdev, &save);
3898 	if (evergreen_mc_wait_for_idle(rdev)) {
3899 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3900 	}
3901 
3902 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3903 		grbm_soft_reset = SOFT_RESET_CB |
3904 			SOFT_RESET_DB |
3905 			SOFT_RESET_GDS |
3906 			SOFT_RESET_PA |
3907 			SOFT_RESET_SC |
3908 			SOFT_RESET_BCI |
3909 			SOFT_RESET_SPI |
3910 			SOFT_RESET_SX |
3911 			SOFT_RESET_TC |
3912 			SOFT_RESET_TA |
3913 			SOFT_RESET_VGT |
3914 			SOFT_RESET_IA;
3915 	}
3916 
3917 	if (reset_mask & RADEON_RESET_CP) {
3918 		grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3919 
3920 		srbm_soft_reset |= SOFT_RESET_GRBM;
3921 	}
3922 
3923 	if (reset_mask & RADEON_RESET_DMA)
3924 		srbm_soft_reset |= SOFT_RESET_DMA;
3925 
3926 	if (reset_mask & RADEON_RESET_DMA1)
3927 		srbm_soft_reset |= SOFT_RESET_DMA1;
3928 
3929 	if (reset_mask & RADEON_RESET_DISPLAY)
3930 		srbm_soft_reset |= SOFT_RESET_DC;
3931 
3932 	if (reset_mask & RADEON_RESET_RLC)
3933 		grbm_soft_reset |= SOFT_RESET_RLC;
3934 
3935 	if (reset_mask & RADEON_RESET_SEM)
3936 		srbm_soft_reset |= SOFT_RESET_SEM;
3937 
3938 	if (reset_mask & RADEON_RESET_IH)
3939 		srbm_soft_reset |= SOFT_RESET_IH;
3940 
3941 	if (reset_mask & RADEON_RESET_GRBM)
3942 		srbm_soft_reset |= SOFT_RESET_GRBM;
3943 
3944 	if (reset_mask & RADEON_RESET_VMC)
3945 		srbm_soft_reset |= SOFT_RESET_VMC;
3946 
3947 	if (reset_mask & RADEON_RESET_MC)
3948 		srbm_soft_reset |= SOFT_RESET_MC;
3949 
3950 	if (grbm_soft_reset) {
3951 		tmp = RREG32(GRBM_SOFT_RESET);
3952 		tmp |= grbm_soft_reset;
3953 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3954 		WREG32(GRBM_SOFT_RESET, tmp);
3955 		tmp = RREG32(GRBM_SOFT_RESET);
3956 
3957 		udelay(50);
3958 
3959 		tmp &= ~grbm_soft_reset;
3960 		WREG32(GRBM_SOFT_RESET, tmp);
3961 		tmp = RREG32(GRBM_SOFT_RESET);
3962 	}
3963 
3964 	if (srbm_soft_reset) {
3965 		tmp = RREG32(SRBM_SOFT_RESET);
3966 		tmp |= srbm_soft_reset;
3967 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3968 		WREG32(SRBM_SOFT_RESET, tmp);
3969 		tmp = RREG32(SRBM_SOFT_RESET);
3970 
3971 		udelay(50);
3972 
3973 		tmp &= ~srbm_soft_reset;
3974 		WREG32(SRBM_SOFT_RESET, tmp);
3975 		tmp = RREG32(SRBM_SOFT_RESET);
3976 	}
3977 
3978 	/* Wait a little for things to settle down */
3979 	udelay(50);
3980 
3981 	evergreen_mc_resume(rdev, &save);
3982 	udelay(50);
3983 
3984 	evergreen_print_gpu_status_regs(rdev);
3985 }
3986 
3987 static void si_set_clk_bypass_mode(struct radeon_device *rdev)
3988 {
3989 	u32 tmp, i;
3990 
3991 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3992 	tmp |= SPLL_BYPASS_EN;
3993 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3994 
3995 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3996 	tmp |= SPLL_CTLREQ_CHG;
3997 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3998 
3999 	for (i = 0; i < rdev->usec_timeout; i++) {
4000 		if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
4001 			break;
4002 		udelay(1);
4003 	}
4004 
4005 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
4006 	tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
4007 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
4008 
4009 	tmp = RREG32(MPLL_CNTL_MODE);
4010 	tmp &= ~MPLL_MCLK_SEL;
4011 	WREG32(MPLL_CNTL_MODE, tmp);
4012 }
4013 
4014 static void si_spll_powerdown(struct radeon_device *rdev)
4015 {
4016 	u32 tmp;
4017 
4018 	tmp = RREG32(SPLL_CNTL_MODE);
4019 	tmp |= SPLL_SW_DIR_CONTROL;
4020 	WREG32(SPLL_CNTL_MODE, tmp);
4021 
4022 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
4023 	tmp |= SPLL_RESET;
4024 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
4025 
4026 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
4027 	tmp |= SPLL_SLEEP;
4028 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
4029 
4030 	tmp = RREG32(SPLL_CNTL_MODE);
4031 	tmp &= ~SPLL_SW_DIR_CONTROL;
4032 	WREG32(SPLL_CNTL_MODE, tmp);
4033 }
4034 
4035 static void si_gpu_pci_config_reset(struct radeon_device *rdev)
4036 {
4037 	struct evergreen_mc_save save;
4038 	u32 tmp, i;
4039 
4040 	dev_info(rdev->dev, "GPU pci config reset\n");
4041 
4042 	/* disable dpm? */
4043 
4044 	/* disable cg/pg */
4045 	si_fini_pg(rdev);
4046 	si_fini_cg(rdev);
4047 
4048 	/* Disable CP parsing/prefetching */
4049 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4050 	/* dma0 */
4051 	tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
4052 	tmp &= ~DMA_RB_ENABLE;
4053 	WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
4054 	/* dma1 */
4055 	tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
4056 	tmp &= ~DMA_RB_ENABLE;
4057 	WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
4058 	/* XXX other engines? */
4059 
4060 	/* halt the rlc, disable cp internal ints */
4061 	si_rlc_stop(rdev);
4062 
4063 	udelay(50);
4064 
4065 	/* disable mem access */
4066 	evergreen_mc_stop(rdev, &save);
4067 	if (evergreen_mc_wait_for_idle(rdev)) {
4068 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
4069 	}
4070 
4071 	/* set mclk/sclk to bypass */
4072 	si_set_clk_bypass_mode(rdev);
4073 	/* powerdown spll */
4074 	si_spll_powerdown(rdev);
4075 	/* disable BM */
4076 	pci_clear_master(rdev->pdev);
4077 	/* reset */
4078 	radeon_pci_config_reset(rdev);
4079 	/* wait for asic to come out of reset */
4080 	for (i = 0; i < rdev->usec_timeout; i++) {
4081 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
4082 			break;
4083 		udelay(1);
4084 	}
4085 }
4086 
4087 int si_asic_reset(struct radeon_device *rdev, bool hard)
4088 {
4089 	u32 reset_mask;
4090 
4091 	if (hard) {
4092 		si_gpu_pci_config_reset(rdev);
4093 		return 0;
4094 	}
4095 
4096 	reset_mask = si_gpu_check_soft_reset(rdev);
4097 
4098 	if (reset_mask)
4099 		r600_set_bios_scratch_engine_hung(rdev, true);
4100 
4101 	/* try soft reset */
4102 	si_gpu_soft_reset(rdev, reset_mask);
4103 
4104 	reset_mask = si_gpu_check_soft_reset(rdev);
4105 
4106 	/* try pci config reset */
4107 	if (reset_mask && radeon_hard_reset)
4108 		si_gpu_pci_config_reset(rdev);
4109 
4110 	reset_mask = si_gpu_check_soft_reset(rdev);
4111 
4112 	if (!reset_mask)
4113 		r600_set_bios_scratch_engine_hung(rdev, false);
4114 
4115 	return 0;
4116 }
4117 
4118 /**
4119  * si_gfx_is_lockup - Check if the GFX engine is locked up
4120  *
4121  * @rdev: radeon_device pointer
4122  * @ring: radeon_ring structure holding ring information
4123  *
4124  * Check if the GFX engine is locked up.
4125  * Returns true if the engine appears to be locked up, false if not.
4126  */
4127 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4128 {
4129 	u32 reset_mask = si_gpu_check_soft_reset(rdev);
4130 
4131 	if (!(reset_mask & (RADEON_RESET_GFX |
4132 			    RADEON_RESET_COMPUTE |
4133 			    RADEON_RESET_CP))) {
4134 		radeon_ring_lockup_update(rdev, ring);
4135 		return false;
4136 	}
4137 	return radeon_ring_test_lockup(rdev, ring);
4138 }
4139 
4140 /* MC */
4141 static void si_mc_program(struct radeon_device *rdev)
4142 {
4143 	struct evergreen_mc_save save;
4144 	u32 tmp;
4145 	int i, j;
4146 
4147 	/* Initialize HDP */
4148 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4149 		WREG32((0x2c14 + j), 0x00000000);
4150 		WREG32((0x2c18 + j), 0x00000000);
4151 		WREG32((0x2c1c + j), 0x00000000);
4152 		WREG32((0x2c20 + j), 0x00000000);
4153 		WREG32((0x2c24 + j), 0x00000000);
4154 	}
4155 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4156 
4157 	evergreen_mc_stop(rdev, &save);
4158 	if (radeon_mc_wait_for_idle(rdev)) {
4159 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4160 	}
4161 	if (!ASIC_IS_NODCE(rdev))
4162 		/* Lockout access through VGA aperture*/
4163 		WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4164 	/* Update configuration */
4165 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4166 	       rdev->mc.vram_start >> 12);
4167 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4168 	       rdev->mc.vram_end >> 12);
4169 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4170 	       rdev->vram_scratch.gpu_addr >> 12);
4171 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4172 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4173 	WREG32(MC_VM_FB_LOCATION, tmp);
4174 	/* XXX double check these! */
4175 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4176 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4177 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4178 	WREG32(MC_VM_AGP_BASE, 0);
4179 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4180 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4181 	if (radeon_mc_wait_for_idle(rdev)) {
4182 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4183 	}
4184 	evergreen_mc_resume(rdev, &save);
4185 	if (!ASIC_IS_NODCE(rdev)) {
4186 		/* we need to own VRAM, so turn off the VGA renderer here
4187 		 * to stop it overwriting our objects */
4188 		rv515_vga_render_disable(rdev);
4189 	}
4190 }
4191 
4192 void si_vram_gtt_location(struct radeon_device *rdev,
4193 			  struct radeon_mc *mc)
4194 {
4195 	if (mc->mc_vram_size > 0xFFC0000000ULL) {
4196 		/* leave room for at least 1024M GTT */
4197 		dev_warn(rdev->dev, "limiting VRAM\n");
4198 		mc->real_vram_size = 0xFFC0000000ULL;
4199 		mc->mc_vram_size = 0xFFC0000000ULL;
4200 	}
4201 	radeon_vram_location(rdev, &rdev->mc, 0);
4202 	rdev->mc.gtt_base_align = 0;
4203 	radeon_gtt_location(rdev, mc);
4204 }
4205 
4206 static int si_mc_init(struct radeon_device *rdev)
4207 {
4208 	u32 tmp;
4209 	int chansize, numchan;
4210 
4211 	/* Get VRAM informations */
4212 	rdev->mc.vram_is_ddr = true;
4213 	tmp = RREG32(MC_ARB_RAMCFG);
4214 	if (tmp & CHANSIZE_OVERRIDE) {
4215 		chansize = 16;
4216 	} else if (tmp & CHANSIZE_MASK) {
4217 		chansize = 64;
4218 	} else {
4219 		chansize = 32;
4220 	}
4221 	tmp = RREG32(MC_SHARED_CHMAP);
4222 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4223 	case 0:
4224 	default:
4225 		numchan = 1;
4226 		break;
4227 	case 1:
4228 		numchan = 2;
4229 		break;
4230 	case 2:
4231 		numchan = 4;
4232 		break;
4233 	case 3:
4234 		numchan = 8;
4235 		break;
4236 	case 4:
4237 		numchan = 3;
4238 		break;
4239 	case 5:
4240 		numchan = 6;
4241 		break;
4242 	case 6:
4243 		numchan = 10;
4244 		break;
4245 	case 7:
4246 		numchan = 12;
4247 		break;
4248 	case 8:
4249 		numchan = 16;
4250 		break;
4251 	}
4252 	rdev->mc.vram_width = numchan * chansize;
4253 	/* Could aper size report 0 ? */
4254 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4255 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4256 	/* size in MB on si */
4257 	tmp = RREG32(CONFIG_MEMSIZE);
4258 	/* some boards may have garbage in the upper 16 bits */
4259 	if (tmp & 0xffff0000) {
4260 		DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
4261 		if (tmp & 0xffff)
4262 			tmp &= 0xffff;
4263 	}
4264 	rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
4265 	rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
4266 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
4267 	si_vram_gtt_location(rdev, &rdev->mc);
4268 	radeon_update_bandwidth_info(rdev);
4269 
4270 	return 0;
4271 }
4272 
4273 /*
4274  * GART
4275  */
4276 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
4277 {
4278 	/* flush hdp cache */
4279 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
4280 
4281 	/* bits 0-15 are the VM contexts0-15 */
4282 	WREG32(VM_INVALIDATE_REQUEST, 1);
4283 }
4284 
4285 static int si_pcie_gart_enable(struct radeon_device *rdev)
4286 {
4287 	int r, i;
4288 
4289 	if (rdev->gart.robj == NULL) {
4290 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4291 		return -EINVAL;
4292 	}
4293 	r = radeon_gart_table_vram_pin(rdev);
4294 	if (r)
4295 		return r;
4296 	/* Setup TLB control */
4297 	WREG32(MC_VM_MX_L1_TLB_CNTL,
4298 	       (0xA << 7) |
4299 	       ENABLE_L1_TLB |
4300 	       ENABLE_L1_FRAGMENT_PROCESSING |
4301 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4302 	       ENABLE_ADVANCED_DRIVER_MODEL |
4303 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4304 	/* Setup L2 cache */
4305 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4306 	       ENABLE_L2_FRAGMENT_PROCESSING |
4307 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4308 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4309 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4310 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4311 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4312 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4313 	       BANK_SELECT(4) |
4314 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
4315 	/* setup context0 */
4316 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4317 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4318 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4319 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4320 			(u32)(rdev->dummy_page.addr >> 12));
4321 	WREG32(VM_CONTEXT0_CNTL2, 0);
4322 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4323 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4324 
4325 	WREG32(0x15D4, 0);
4326 	WREG32(0x15D8, 0);
4327 	WREG32(0x15DC, 0);
4328 
4329 	/* empty context1-15 */
4330 	/* set vm size, must be a multiple of 4 */
4331 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4332 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
4333 	/* Assign the pt base to something valid for now; the pts used for
4334 	 * the VMs are determined by the application and setup and assigned
4335 	 * on the fly in the vm part of radeon_gart.c
4336 	 */
4337 	for (i = 1; i < 16; i++) {
4338 		if (i < 8)
4339 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4340 			       rdev->vm_manager.saved_table_addr[i]);
4341 		else
4342 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4343 			       rdev->vm_manager.saved_table_addr[i]);
4344 	}
4345 
4346 	/* enable context1-15 */
4347 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4348 	       (u32)(rdev->dummy_page.addr >> 12));
4349 	WREG32(VM_CONTEXT1_CNTL2, 4);
4350 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4351 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
4352 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4353 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4354 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4355 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4356 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4357 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4358 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4359 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4360 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4361 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4362 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4363 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4364 
4365 	si_pcie_gart_tlb_flush(rdev);
4366 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4367 		 (unsigned)(rdev->mc.gtt_size >> 20),
4368 		 (unsigned long long)rdev->gart.table_addr);
4369 	rdev->gart.ready = true;
4370 	return 0;
4371 }
4372 
4373 static void si_pcie_gart_disable(struct radeon_device *rdev)
4374 {
4375 	unsigned i;
4376 
4377 	for (i = 1; i < 16; ++i) {
4378 		uint32_t reg;
4379 		if (i < 8)
4380 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
4381 		else
4382 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
4383 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
4384 	}
4385 
4386 	/* Disable all tables */
4387 	WREG32(VM_CONTEXT0_CNTL, 0);
4388 	WREG32(VM_CONTEXT1_CNTL, 0);
4389 	/* Setup TLB control */
4390 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4391 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4392 	/* Setup L2 cache */
4393 	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4394 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4395 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4396 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4397 	WREG32(VM_L2_CNTL2, 0);
4398 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4399 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4400 	radeon_gart_table_vram_unpin(rdev);
4401 }
4402 
4403 static void si_pcie_gart_fini(struct radeon_device *rdev)
4404 {
4405 	si_pcie_gart_disable(rdev);
4406 	radeon_gart_table_vram_free(rdev);
4407 	radeon_gart_fini(rdev);
4408 }
4409 
4410 /* vm parser */
4411 static bool si_vm_reg_valid(u32 reg)
4412 {
4413 	/* context regs are fine */
4414 	if (reg >= 0x28000)
4415 		return true;
4416 
4417 	/* shader regs are also fine */
4418 	if (reg >= 0xB000 && reg < 0xC000)
4419 		return true;
4420 
4421 	/* check config regs */
4422 	switch (reg) {
4423 	case GRBM_GFX_INDEX:
4424 	case CP_STRMOUT_CNTL:
4425 	case VGT_VTX_VECT_EJECT_REG:
4426 	case VGT_CACHE_INVALIDATION:
4427 	case VGT_ESGS_RING_SIZE:
4428 	case VGT_GSVS_RING_SIZE:
4429 	case VGT_GS_VERTEX_REUSE:
4430 	case VGT_PRIMITIVE_TYPE:
4431 	case VGT_INDEX_TYPE:
4432 	case VGT_NUM_INDICES:
4433 	case VGT_NUM_INSTANCES:
4434 	case VGT_TF_RING_SIZE:
4435 	case VGT_HS_OFFCHIP_PARAM:
4436 	case VGT_TF_MEMORY_BASE:
4437 	case PA_CL_ENHANCE:
4438 	case PA_SU_LINE_STIPPLE_VALUE:
4439 	case PA_SC_LINE_STIPPLE_STATE:
4440 	case PA_SC_ENHANCE:
4441 	case SQC_CACHES:
4442 	case SPI_STATIC_THREAD_MGMT_1:
4443 	case SPI_STATIC_THREAD_MGMT_2:
4444 	case SPI_STATIC_THREAD_MGMT_3:
4445 	case SPI_PS_MAX_WAVE_ID:
4446 	case SPI_CONFIG_CNTL:
4447 	case SPI_CONFIG_CNTL_1:
4448 	case TA_CNTL_AUX:
4449 	case TA_CS_BC_BASE_ADDR:
4450 		return true;
4451 	default:
4452 		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4453 		return false;
4454 	}
4455 }
4456 
4457 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4458 				  u32 *ib, struct radeon_cs_packet *pkt)
4459 {
4460 	switch (pkt->opcode) {
4461 	case PACKET3_NOP:
4462 	case PACKET3_SET_BASE:
4463 	case PACKET3_SET_CE_DE_COUNTERS:
4464 	case PACKET3_LOAD_CONST_RAM:
4465 	case PACKET3_WRITE_CONST_RAM:
4466 	case PACKET3_WRITE_CONST_RAM_OFFSET:
4467 	case PACKET3_DUMP_CONST_RAM:
4468 	case PACKET3_INCREMENT_CE_COUNTER:
4469 	case PACKET3_WAIT_ON_DE_COUNTER:
4470 	case PACKET3_CE_WRITE:
4471 		break;
4472 	default:
4473 		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4474 		return -EINVAL;
4475 	}
4476 	return 0;
4477 }
4478 
4479 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4480 {
4481 	u32 start_reg, reg, i;
4482 	u32 command = ib[idx + 4];
4483 	u32 info = ib[idx + 1];
4484 	u32 idx_value = ib[idx];
4485 	if (command & PACKET3_CP_DMA_CMD_SAS) {
4486 		/* src address space is register */
4487 		if (((info & 0x60000000) >> 29) == 0) {
4488 			start_reg = idx_value << 2;
4489 			if (command & PACKET3_CP_DMA_CMD_SAIC) {
4490 				reg = start_reg;
4491 				if (!si_vm_reg_valid(reg)) {
4492 					DRM_ERROR("CP DMA Bad SRC register\n");
4493 					return -EINVAL;
4494 				}
4495 			} else {
4496 				for (i = 0; i < (command & 0x1fffff); i++) {
4497 					reg = start_reg + (4 * i);
4498 					if (!si_vm_reg_valid(reg)) {
4499 						DRM_ERROR("CP DMA Bad SRC register\n");
4500 						return -EINVAL;
4501 					}
4502 				}
4503 			}
4504 		}
4505 	}
4506 	if (command & PACKET3_CP_DMA_CMD_DAS) {
4507 		/* dst address space is register */
4508 		if (((info & 0x00300000) >> 20) == 0) {
4509 			start_reg = ib[idx + 2];
4510 			if (command & PACKET3_CP_DMA_CMD_DAIC) {
4511 				reg = start_reg;
4512 				if (!si_vm_reg_valid(reg)) {
4513 					DRM_ERROR("CP DMA Bad DST register\n");
4514 					return -EINVAL;
4515 				}
4516 			} else {
4517 				for (i = 0; i < (command & 0x1fffff); i++) {
4518 					reg = start_reg + (4 * i);
4519 				if (!si_vm_reg_valid(reg)) {
4520 						DRM_ERROR("CP DMA Bad DST register\n");
4521 						return -EINVAL;
4522 					}
4523 				}
4524 			}
4525 		}
4526 	}
4527 	return 0;
4528 }
4529 
4530 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4531 				   u32 *ib, struct radeon_cs_packet *pkt)
4532 {
4533 	int r;
4534 	u32 idx = pkt->idx + 1;
4535 	u32 idx_value = ib[idx];
4536 	u32 start_reg, end_reg, reg, i;
4537 
4538 	switch (pkt->opcode) {
4539 	case PACKET3_NOP:
4540 	case PACKET3_SET_BASE:
4541 	case PACKET3_CLEAR_STATE:
4542 	case PACKET3_INDEX_BUFFER_SIZE:
4543 	case PACKET3_DISPATCH_DIRECT:
4544 	case PACKET3_DISPATCH_INDIRECT:
4545 	case PACKET3_ALLOC_GDS:
4546 	case PACKET3_WRITE_GDS_RAM:
4547 	case PACKET3_ATOMIC_GDS:
4548 	case PACKET3_ATOMIC:
4549 	case PACKET3_OCCLUSION_QUERY:
4550 	case PACKET3_SET_PREDICATION:
4551 	case PACKET3_COND_EXEC:
4552 	case PACKET3_PRED_EXEC:
4553 	case PACKET3_DRAW_INDIRECT:
4554 	case PACKET3_DRAW_INDEX_INDIRECT:
4555 	case PACKET3_INDEX_BASE:
4556 	case PACKET3_DRAW_INDEX_2:
4557 	case PACKET3_CONTEXT_CONTROL:
4558 	case PACKET3_INDEX_TYPE:
4559 	case PACKET3_DRAW_INDIRECT_MULTI:
4560 	case PACKET3_DRAW_INDEX_AUTO:
4561 	case PACKET3_DRAW_INDEX_IMMD:
4562 	case PACKET3_NUM_INSTANCES:
4563 	case PACKET3_DRAW_INDEX_MULTI_AUTO:
4564 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4565 	case PACKET3_DRAW_INDEX_OFFSET_2:
4566 	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4567 	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4568 	case PACKET3_MPEG_INDEX:
4569 	case PACKET3_WAIT_REG_MEM:
4570 	case PACKET3_MEM_WRITE:
4571 	case PACKET3_PFP_SYNC_ME:
4572 	case PACKET3_SURFACE_SYNC:
4573 	case PACKET3_EVENT_WRITE:
4574 	case PACKET3_EVENT_WRITE_EOP:
4575 	case PACKET3_EVENT_WRITE_EOS:
4576 	case PACKET3_SET_CONTEXT_REG:
4577 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4578 	case PACKET3_SET_SH_REG:
4579 	case PACKET3_SET_SH_REG_OFFSET:
4580 	case PACKET3_INCREMENT_DE_COUNTER:
4581 	case PACKET3_WAIT_ON_CE_COUNTER:
4582 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4583 	case PACKET3_ME_WRITE:
4584 		break;
4585 	case PACKET3_COPY_DATA:
4586 		if ((idx_value & 0xf00) == 0) {
4587 			reg = ib[idx + 3] * 4;
4588 			if (!si_vm_reg_valid(reg))
4589 				return -EINVAL;
4590 		}
4591 		break;
4592 	case PACKET3_WRITE_DATA:
4593 		if ((idx_value & 0xf00) == 0) {
4594 			start_reg = ib[idx + 1] * 4;
4595 			if (idx_value & 0x10000) {
4596 				if (!si_vm_reg_valid(start_reg))
4597 					return -EINVAL;
4598 			} else {
4599 				for (i = 0; i < (pkt->count - 2); i++) {
4600 					reg = start_reg + (4 * i);
4601 					if (!si_vm_reg_valid(reg))
4602 						return -EINVAL;
4603 				}
4604 			}
4605 		}
4606 		break;
4607 	case PACKET3_COND_WRITE:
4608 		if (idx_value & 0x100) {
4609 			reg = ib[idx + 5] * 4;
4610 			if (!si_vm_reg_valid(reg))
4611 				return -EINVAL;
4612 		}
4613 		break;
4614 	case PACKET3_COPY_DW:
4615 		if (idx_value & 0x2) {
4616 			reg = ib[idx + 3] * 4;
4617 			if (!si_vm_reg_valid(reg))
4618 				return -EINVAL;
4619 		}
4620 		break;
4621 	case PACKET3_SET_CONFIG_REG:
4622 		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4623 		end_reg = 4 * pkt->count + start_reg - 4;
4624 		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4625 		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4626 		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4627 			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4628 			return -EINVAL;
4629 		}
4630 		for (i = 0; i < pkt->count; i++) {
4631 			reg = start_reg + (4 * i);
4632 			if (!si_vm_reg_valid(reg))
4633 				return -EINVAL;
4634 		}
4635 		break;
4636 	case PACKET3_CP_DMA:
4637 		r = si_vm_packet3_cp_dma_check(ib, idx);
4638 		if (r)
4639 			return r;
4640 		break;
4641 	default:
4642 		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4643 		return -EINVAL;
4644 	}
4645 	return 0;
4646 }
4647 
4648 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4649 				       u32 *ib, struct radeon_cs_packet *pkt)
4650 {
4651 	int r;
4652 	u32 idx = pkt->idx + 1;
4653 	u32 idx_value = ib[idx];
4654 	u32 start_reg, reg, i;
4655 
4656 	switch (pkt->opcode) {
4657 	case PACKET3_NOP:
4658 	case PACKET3_SET_BASE:
4659 	case PACKET3_CLEAR_STATE:
4660 	case PACKET3_DISPATCH_DIRECT:
4661 	case PACKET3_DISPATCH_INDIRECT:
4662 	case PACKET3_ALLOC_GDS:
4663 	case PACKET3_WRITE_GDS_RAM:
4664 	case PACKET3_ATOMIC_GDS:
4665 	case PACKET3_ATOMIC:
4666 	case PACKET3_OCCLUSION_QUERY:
4667 	case PACKET3_SET_PREDICATION:
4668 	case PACKET3_COND_EXEC:
4669 	case PACKET3_PRED_EXEC:
4670 	case PACKET3_CONTEXT_CONTROL:
4671 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4672 	case PACKET3_WAIT_REG_MEM:
4673 	case PACKET3_MEM_WRITE:
4674 	case PACKET3_PFP_SYNC_ME:
4675 	case PACKET3_SURFACE_SYNC:
4676 	case PACKET3_EVENT_WRITE:
4677 	case PACKET3_EVENT_WRITE_EOP:
4678 	case PACKET3_EVENT_WRITE_EOS:
4679 	case PACKET3_SET_CONTEXT_REG:
4680 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4681 	case PACKET3_SET_SH_REG:
4682 	case PACKET3_SET_SH_REG_OFFSET:
4683 	case PACKET3_INCREMENT_DE_COUNTER:
4684 	case PACKET3_WAIT_ON_CE_COUNTER:
4685 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4686 	case PACKET3_ME_WRITE:
4687 		break;
4688 	case PACKET3_COPY_DATA:
4689 		if ((idx_value & 0xf00) == 0) {
4690 			reg = ib[idx + 3] * 4;
4691 			if (!si_vm_reg_valid(reg))
4692 				return -EINVAL;
4693 		}
4694 		break;
4695 	case PACKET3_WRITE_DATA:
4696 		if ((idx_value & 0xf00) == 0) {
4697 			start_reg = ib[idx + 1] * 4;
4698 			if (idx_value & 0x10000) {
4699 				if (!si_vm_reg_valid(start_reg))
4700 					return -EINVAL;
4701 			} else {
4702 				for (i = 0; i < (pkt->count - 2); i++) {
4703 					reg = start_reg + (4 * i);
4704 					if (!si_vm_reg_valid(reg))
4705 						return -EINVAL;
4706 				}
4707 			}
4708 		}
4709 		break;
4710 	case PACKET3_COND_WRITE:
4711 		if (idx_value & 0x100) {
4712 			reg = ib[idx + 5] * 4;
4713 			if (!si_vm_reg_valid(reg))
4714 				return -EINVAL;
4715 		}
4716 		break;
4717 	case PACKET3_COPY_DW:
4718 		if (idx_value & 0x2) {
4719 			reg = ib[idx + 3] * 4;
4720 			if (!si_vm_reg_valid(reg))
4721 				return -EINVAL;
4722 		}
4723 		break;
4724 	case PACKET3_CP_DMA:
4725 		r = si_vm_packet3_cp_dma_check(ib, idx);
4726 		if (r)
4727 			return r;
4728 		break;
4729 	default:
4730 		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4731 		return -EINVAL;
4732 	}
4733 	return 0;
4734 }
4735 
4736 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4737 {
4738 	int ret = 0;
4739 	u32 idx = 0, i;
4740 	struct radeon_cs_packet pkt;
4741 
4742 	do {
4743 		pkt.idx = idx;
4744 		pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4745 		pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4746 		pkt.one_reg_wr = 0;
4747 		switch (pkt.type) {
4748 		case RADEON_PACKET_TYPE0:
4749 			dev_err(rdev->dev, "Packet0 not allowed!\n");
4750 			ret = -EINVAL;
4751 			break;
4752 		case RADEON_PACKET_TYPE2:
4753 			idx += 1;
4754 			break;
4755 		case RADEON_PACKET_TYPE3:
4756 			pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4757 			if (ib->is_const_ib)
4758 				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4759 			else {
4760 				switch (ib->ring) {
4761 				case RADEON_RING_TYPE_GFX_INDEX:
4762 					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4763 					break;
4764 				case CAYMAN_RING_TYPE_CP1_INDEX:
4765 				case CAYMAN_RING_TYPE_CP2_INDEX:
4766 					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4767 					break;
4768 				default:
4769 					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4770 					ret = -EINVAL;
4771 					break;
4772 				}
4773 			}
4774 			idx += pkt.count + 2;
4775 			break;
4776 		default:
4777 			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4778 			ret = -EINVAL;
4779 			break;
4780 		}
4781 		if (ret) {
4782 			for (i = 0; i < ib->length_dw; i++) {
4783 				if (i == idx)
4784 					printk("\t0x%08x <---\n", ib->ptr[i]);
4785 				else
4786 					printk("\t0x%08x\n", ib->ptr[i]);
4787 			}
4788 			break;
4789 		}
4790 	} while (idx < ib->length_dw);
4791 
4792 	return ret;
4793 }
4794 
4795 /*
4796  * vm
4797  */
4798 int si_vm_init(struct radeon_device *rdev)
4799 {
4800 	/* number of VMs */
4801 	rdev->vm_manager.nvm = 16;
4802 	/* base offset of vram pages */
4803 	rdev->vm_manager.vram_base_offset = 0;
4804 
4805 	return 0;
4806 }
4807 
4808 void si_vm_fini(struct radeon_device *rdev)
4809 {
4810 }
4811 
4812 /**
4813  * si_vm_decode_fault - print human readable fault info
4814  *
4815  * @rdev: radeon_device pointer
4816  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4817  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4818  *
4819  * Print human readable fault information (SI).
4820  */
4821 static void si_vm_decode_fault(struct radeon_device *rdev,
4822 			       u32 status, u32 addr)
4823 {
4824 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4825 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4826 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4827 	char *block;
4828 
4829 	if (rdev->family == CHIP_TAHITI) {
4830 		switch (mc_id) {
4831 		case 160:
4832 		case 144:
4833 		case 96:
4834 		case 80:
4835 		case 224:
4836 		case 208:
4837 		case 32:
4838 		case 16:
4839 			block = "CB";
4840 			break;
4841 		case 161:
4842 		case 145:
4843 		case 97:
4844 		case 81:
4845 		case 225:
4846 		case 209:
4847 		case 33:
4848 		case 17:
4849 			block = "CB_FMASK";
4850 			break;
4851 		case 162:
4852 		case 146:
4853 		case 98:
4854 		case 82:
4855 		case 226:
4856 		case 210:
4857 		case 34:
4858 		case 18:
4859 			block = "CB_CMASK";
4860 			break;
4861 		case 163:
4862 		case 147:
4863 		case 99:
4864 		case 83:
4865 		case 227:
4866 		case 211:
4867 		case 35:
4868 		case 19:
4869 			block = "CB_IMMED";
4870 			break;
4871 		case 164:
4872 		case 148:
4873 		case 100:
4874 		case 84:
4875 		case 228:
4876 		case 212:
4877 		case 36:
4878 		case 20:
4879 			block = "DB";
4880 			break;
4881 		case 165:
4882 		case 149:
4883 		case 101:
4884 		case 85:
4885 		case 229:
4886 		case 213:
4887 		case 37:
4888 		case 21:
4889 			block = "DB_HTILE";
4890 			break;
4891 		case 167:
4892 		case 151:
4893 		case 103:
4894 		case 87:
4895 		case 231:
4896 		case 215:
4897 		case 39:
4898 		case 23:
4899 			block = "DB_STEN";
4900 			break;
4901 		case 72:
4902 		case 68:
4903 		case 64:
4904 		case 8:
4905 		case 4:
4906 		case 0:
4907 		case 136:
4908 		case 132:
4909 		case 128:
4910 		case 200:
4911 		case 196:
4912 		case 192:
4913 			block = "TC";
4914 			break;
4915 		case 112:
4916 		case 48:
4917 			block = "CP";
4918 			break;
4919 		case 49:
4920 		case 177:
4921 		case 50:
4922 		case 178:
4923 			block = "SH";
4924 			break;
4925 		case 53:
4926 		case 190:
4927 			block = "VGT";
4928 			break;
4929 		case 117:
4930 			block = "IH";
4931 			break;
4932 		case 51:
4933 		case 115:
4934 			block = "RLC";
4935 			break;
4936 		case 119:
4937 		case 183:
4938 			block = "DMA0";
4939 			break;
4940 		case 61:
4941 			block = "DMA1";
4942 			break;
4943 		case 248:
4944 		case 120:
4945 			block = "HDP";
4946 			break;
4947 		default:
4948 			block = "unknown";
4949 			break;
4950 		}
4951 	} else {
4952 		switch (mc_id) {
4953 		case 32:
4954 		case 16:
4955 		case 96:
4956 		case 80:
4957 		case 160:
4958 		case 144:
4959 		case 224:
4960 		case 208:
4961 			block = "CB";
4962 			break;
4963 		case 33:
4964 		case 17:
4965 		case 97:
4966 		case 81:
4967 		case 161:
4968 		case 145:
4969 		case 225:
4970 		case 209:
4971 			block = "CB_FMASK";
4972 			break;
4973 		case 34:
4974 		case 18:
4975 		case 98:
4976 		case 82:
4977 		case 162:
4978 		case 146:
4979 		case 226:
4980 		case 210:
4981 			block = "CB_CMASK";
4982 			break;
4983 		case 35:
4984 		case 19:
4985 		case 99:
4986 		case 83:
4987 		case 163:
4988 		case 147:
4989 		case 227:
4990 		case 211:
4991 			block = "CB_IMMED";
4992 			break;
4993 		case 36:
4994 		case 20:
4995 		case 100:
4996 		case 84:
4997 		case 164:
4998 		case 148:
4999 		case 228:
5000 		case 212:
5001 			block = "DB";
5002 			break;
5003 		case 37:
5004 		case 21:
5005 		case 101:
5006 		case 85:
5007 		case 165:
5008 		case 149:
5009 		case 229:
5010 		case 213:
5011 			block = "DB_HTILE";
5012 			break;
5013 		case 39:
5014 		case 23:
5015 		case 103:
5016 		case 87:
5017 		case 167:
5018 		case 151:
5019 		case 231:
5020 		case 215:
5021 			block = "DB_STEN";
5022 			break;
5023 		case 72:
5024 		case 68:
5025 		case 8:
5026 		case 4:
5027 		case 136:
5028 		case 132:
5029 		case 200:
5030 		case 196:
5031 			block = "TC";
5032 			break;
5033 		case 112:
5034 		case 48:
5035 			block = "CP";
5036 			break;
5037 		case 49:
5038 		case 177:
5039 		case 50:
5040 		case 178:
5041 			block = "SH";
5042 			break;
5043 		case 53:
5044 			block = "VGT";
5045 			break;
5046 		case 117:
5047 			block = "IH";
5048 			break;
5049 		case 51:
5050 		case 115:
5051 			block = "RLC";
5052 			break;
5053 		case 119:
5054 		case 183:
5055 			block = "DMA0";
5056 			break;
5057 		case 61:
5058 			block = "DMA1";
5059 			break;
5060 		case 248:
5061 		case 120:
5062 			block = "HDP";
5063 			break;
5064 		default:
5065 			block = "unknown";
5066 			break;
5067 		}
5068 	}
5069 
5070 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5071 	       protections, vmid, addr,
5072 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5073 	       block, mc_id);
5074 }
5075 
5076 void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5077 		 unsigned vm_id, uint64_t pd_addr)
5078 {
5079 	/* write new base address */
5080 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5081 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5082 				 WRITE_DATA_DST_SEL(0)));
5083 
5084 	if (vm_id < 8) {
5085 		radeon_ring_write(ring,
5086 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5087 	} else {
5088 		radeon_ring_write(ring,
5089 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5090 	}
5091 	radeon_ring_write(ring, 0);
5092 	radeon_ring_write(ring, pd_addr >> 12);
5093 
5094 	/* flush hdp cache */
5095 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5096 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5097 				 WRITE_DATA_DST_SEL(0)));
5098 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5099 	radeon_ring_write(ring, 0);
5100 	radeon_ring_write(ring, 0x1);
5101 
5102 	/* bits 0-15 are the VM contexts0-15 */
5103 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5104 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5105 				 WRITE_DATA_DST_SEL(0)));
5106 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5107 	radeon_ring_write(ring, 0);
5108 	radeon_ring_write(ring, 1 << vm_id);
5109 
5110 	/* wait for the invalidate to complete */
5111 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5112 	radeon_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) |  /* always */
5113 				 WAIT_REG_MEM_ENGINE(0))); /* me */
5114 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5115 	radeon_ring_write(ring, 0);
5116 	radeon_ring_write(ring, 0); /* ref */
5117 	radeon_ring_write(ring, 0); /* mask */
5118 	radeon_ring_write(ring, 0x20); /* poll interval */
5119 
5120 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
5121 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5122 	radeon_ring_write(ring, 0x0);
5123 }
5124 
5125 /*
5126  *  Power and clock gating
5127  */
5128 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
5129 {
5130 	int i;
5131 
5132 	for (i = 0; i < rdev->usec_timeout; i++) {
5133 		if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
5134 			break;
5135 		udelay(1);
5136 	}
5137 
5138 	for (i = 0; i < rdev->usec_timeout; i++) {
5139 		if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
5140 			break;
5141 		udelay(1);
5142 	}
5143 }
5144 
5145 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
5146 					 bool enable)
5147 {
5148 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5149 	u32 mask;
5150 	int i;
5151 
5152 	if (enable)
5153 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5154 	else
5155 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5156 	WREG32(CP_INT_CNTL_RING0, tmp);
5157 
5158 	if (!enable) {
5159 		/* read a gfx register */
5160 		tmp = RREG32(DB_DEPTH_INFO);
5161 
5162 		mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
5163 		for (i = 0; i < rdev->usec_timeout; i++) {
5164 			if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
5165 				break;
5166 			udelay(1);
5167 		}
5168 	}
5169 }
5170 
5171 static void si_set_uvd_dcm(struct radeon_device *rdev,
5172 			   bool sw_mode)
5173 {
5174 	u32 tmp, tmp2;
5175 
5176 	tmp = RREG32(UVD_CGC_CTRL);
5177 	tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
5178 	tmp |= DCM | CG_DT(1) | CLK_OD(4);
5179 
5180 	if (sw_mode) {
5181 		tmp &= ~0x7ffff800;
5182 		tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
5183 	} else {
5184 		tmp |= 0x7ffff800;
5185 		tmp2 = 0;
5186 	}
5187 
5188 	WREG32(UVD_CGC_CTRL, tmp);
5189 	WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
5190 }
5191 
5192 void si_init_uvd_internal_cg(struct radeon_device *rdev)
5193 {
5194 	bool hw_mode = true;
5195 
5196 	if (hw_mode) {
5197 		si_set_uvd_dcm(rdev, false);
5198 	} else {
5199 		u32 tmp = RREG32(UVD_CGC_CTRL);
5200 		tmp &= ~DCM;
5201 		WREG32(UVD_CGC_CTRL, tmp);
5202 	}
5203 }
5204 
5205 static u32 si_halt_rlc(struct radeon_device *rdev)
5206 {
5207 	u32 data, orig;
5208 
5209 	orig = data = RREG32(RLC_CNTL);
5210 
5211 	if (data & RLC_ENABLE) {
5212 		data &= ~RLC_ENABLE;
5213 		WREG32(RLC_CNTL, data);
5214 
5215 		si_wait_for_rlc_serdes(rdev);
5216 	}
5217 
5218 	return orig;
5219 }
5220 
5221 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
5222 {
5223 	u32 tmp;
5224 
5225 	tmp = RREG32(RLC_CNTL);
5226 	if (tmp != rlc)
5227 		WREG32(RLC_CNTL, rlc);
5228 }
5229 
5230 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
5231 {
5232 	u32 data, orig;
5233 
5234 	orig = data = RREG32(DMA_PG);
5235 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
5236 		data |= PG_CNTL_ENABLE;
5237 	else
5238 		data &= ~PG_CNTL_ENABLE;
5239 	if (orig != data)
5240 		WREG32(DMA_PG, data);
5241 }
5242 
5243 static void si_init_dma_pg(struct radeon_device *rdev)
5244 {
5245 	u32 tmp;
5246 
5247 	WREG32(DMA_PGFSM_WRITE,  0x00002000);
5248 	WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
5249 
5250 	for (tmp = 0; tmp < 5; tmp++)
5251 		WREG32(DMA_PGFSM_WRITE, 0);
5252 }
5253 
5254 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
5255 			       bool enable)
5256 {
5257 	u32 tmp;
5258 
5259 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5260 		tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
5261 		WREG32(RLC_TTOP_D, tmp);
5262 
5263 		tmp = RREG32(RLC_PG_CNTL);
5264 		tmp |= GFX_PG_ENABLE;
5265 		WREG32(RLC_PG_CNTL, tmp);
5266 
5267 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5268 		tmp |= AUTO_PG_EN;
5269 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5270 	} else {
5271 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5272 		tmp &= ~AUTO_PG_EN;
5273 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5274 
5275 		tmp = RREG32(DB_RENDER_CONTROL);
5276 	}
5277 }
5278 
5279 static void si_init_gfx_cgpg(struct radeon_device *rdev)
5280 {
5281 	u32 tmp;
5282 
5283 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5284 
5285 	tmp = RREG32(RLC_PG_CNTL);
5286 	tmp |= GFX_PG_SRC;
5287 	WREG32(RLC_PG_CNTL, tmp);
5288 
5289 	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5290 
5291 	tmp = RREG32(RLC_AUTO_PG_CTRL);
5292 
5293 	tmp &= ~GRBM_REG_SGIT_MASK;
5294 	tmp |= GRBM_REG_SGIT(0x700);
5295 	tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5296 	WREG32(RLC_AUTO_PG_CTRL, tmp);
5297 }
5298 
5299 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5300 {
5301 	u32 mask = 0, tmp, tmp1;
5302 	int i;
5303 
5304 	si_select_se_sh(rdev, se, sh);
5305 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5306 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5307 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5308 
5309 	tmp &= 0xffff0000;
5310 
5311 	tmp |= tmp1;
5312 	tmp >>= 16;
5313 
5314 	for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5315 		mask <<= 1;
5316 		mask |= 1;
5317 	}
5318 
5319 	return (~tmp) & mask;
5320 }
5321 
5322 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5323 {
5324 	u32 i, j, k, active_cu_number = 0;
5325 	u32 mask, counter, cu_bitmap;
5326 	u32 tmp = 0;
5327 
5328 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5329 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5330 			mask = 1;
5331 			cu_bitmap = 0;
5332 			counter  = 0;
5333 			for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5334 				if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5335 					if (counter < 2)
5336 						cu_bitmap |= mask;
5337 					counter++;
5338 				}
5339 				mask <<= 1;
5340 			}
5341 
5342 			active_cu_number += counter;
5343 			tmp |= (cu_bitmap << (i * 16 + j * 8));
5344 		}
5345 	}
5346 
5347 	WREG32(RLC_PG_AO_CU_MASK, tmp);
5348 
5349 	tmp = RREG32(RLC_MAX_PG_CU);
5350 	tmp &= ~MAX_PU_CU_MASK;
5351 	tmp |= MAX_PU_CU(active_cu_number);
5352 	WREG32(RLC_MAX_PG_CU, tmp);
5353 }
5354 
5355 static void si_enable_cgcg(struct radeon_device *rdev,
5356 			   bool enable)
5357 {
5358 	u32 data, orig, tmp;
5359 
5360 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5361 
5362 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5363 		si_enable_gui_idle_interrupt(rdev, true);
5364 
5365 		WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5366 
5367 		tmp = si_halt_rlc(rdev);
5368 
5369 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5370 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5371 		WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5372 
5373 		si_wait_for_rlc_serdes(rdev);
5374 
5375 		si_update_rlc(rdev, tmp);
5376 
5377 		WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5378 
5379 		data |= CGCG_EN | CGLS_EN;
5380 	} else {
5381 		si_enable_gui_idle_interrupt(rdev, false);
5382 
5383 		RREG32(CB_CGTT_SCLK_CTRL);
5384 		RREG32(CB_CGTT_SCLK_CTRL);
5385 		RREG32(CB_CGTT_SCLK_CTRL);
5386 		RREG32(CB_CGTT_SCLK_CTRL);
5387 
5388 		data &= ~(CGCG_EN | CGLS_EN);
5389 	}
5390 
5391 	if (orig != data)
5392 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5393 }
5394 
5395 static void si_enable_mgcg(struct radeon_device *rdev,
5396 			   bool enable)
5397 {
5398 	u32 data, orig, tmp = 0;
5399 
5400 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5401 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5402 		data = 0x96940200;
5403 		if (orig != data)
5404 			WREG32(CGTS_SM_CTRL_REG, data);
5405 
5406 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5407 			orig = data = RREG32(CP_MEM_SLP_CNTL);
5408 			data |= CP_MEM_LS_EN;
5409 			if (orig != data)
5410 				WREG32(CP_MEM_SLP_CNTL, data);
5411 		}
5412 
5413 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5414 		data &= 0xffffffc0;
5415 		if (orig != data)
5416 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5417 
5418 		tmp = si_halt_rlc(rdev);
5419 
5420 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5421 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5422 		WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5423 
5424 		si_update_rlc(rdev, tmp);
5425 	} else {
5426 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5427 		data |= 0x00000003;
5428 		if (orig != data)
5429 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5430 
5431 		data = RREG32(CP_MEM_SLP_CNTL);
5432 		if (data & CP_MEM_LS_EN) {
5433 			data &= ~CP_MEM_LS_EN;
5434 			WREG32(CP_MEM_SLP_CNTL, data);
5435 		}
5436 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5437 		data |= LS_OVERRIDE | OVERRIDE;
5438 		if (orig != data)
5439 			WREG32(CGTS_SM_CTRL_REG, data);
5440 
5441 		tmp = si_halt_rlc(rdev);
5442 
5443 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5444 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5445 		WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5446 
5447 		si_update_rlc(rdev, tmp);
5448 	}
5449 }
5450 
5451 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5452 			       bool enable)
5453 {
5454 	u32 orig, data, tmp;
5455 
5456 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5457 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5458 		tmp |= 0x3fff;
5459 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5460 
5461 		orig = data = RREG32(UVD_CGC_CTRL);
5462 		data |= DCM;
5463 		if (orig != data)
5464 			WREG32(UVD_CGC_CTRL, data);
5465 
5466 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5467 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5468 	} else {
5469 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5470 		tmp &= ~0x3fff;
5471 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5472 
5473 		orig = data = RREG32(UVD_CGC_CTRL);
5474 		data &= ~DCM;
5475 		if (orig != data)
5476 			WREG32(UVD_CGC_CTRL, data);
5477 
5478 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5479 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5480 	}
5481 }
5482 
5483 static const u32 mc_cg_registers[] =
5484 {
5485 	MC_HUB_MISC_HUB_CG,
5486 	MC_HUB_MISC_SIP_CG,
5487 	MC_HUB_MISC_VM_CG,
5488 	MC_XPB_CLK_GAT,
5489 	ATC_MISC_CG,
5490 	MC_CITF_MISC_WR_CG,
5491 	MC_CITF_MISC_RD_CG,
5492 	MC_CITF_MISC_VM_CG,
5493 	VM_L2_CG,
5494 };
5495 
5496 static void si_enable_mc_ls(struct radeon_device *rdev,
5497 			    bool enable)
5498 {
5499 	int i;
5500 	u32 orig, data;
5501 
5502 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5503 		orig = data = RREG32(mc_cg_registers[i]);
5504 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5505 			data |= MC_LS_ENABLE;
5506 		else
5507 			data &= ~MC_LS_ENABLE;
5508 		if (data != orig)
5509 			WREG32(mc_cg_registers[i], data);
5510 	}
5511 }
5512 
5513 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5514 			       bool enable)
5515 {
5516 	int i;
5517 	u32 orig, data;
5518 
5519 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5520 		orig = data = RREG32(mc_cg_registers[i]);
5521 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5522 			data |= MC_CG_ENABLE;
5523 		else
5524 			data &= ~MC_CG_ENABLE;
5525 		if (data != orig)
5526 			WREG32(mc_cg_registers[i], data);
5527 	}
5528 }
5529 
5530 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5531 			       bool enable)
5532 {
5533 	u32 orig, data, offset;
5534 	int i;
5535 
5536 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5537 		for (i = 0; i < 2; i++) {
5538 			if (i == 0)
5539 				offset = DMA0_REGISTER_OFFSET;
5540 			else
5541 				offset = DMA1_REGISTER_OFFSET;
5542 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5543 			data &= ~MEM_POWER_OVERRIDE;
5544 			if (data != orig)
5545 				WREG32(DMA_POWER_CNTL + offset, data);
5546 			WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5547 		}
5548 	} else {
5549 		for (i = 0; i < 2; i++) {
5550 			if (i == 0)
5551 				offset = DMA0_REGISTER_OFFSET;
5552 			else
5553 				offset = DMA1_REGISTER_OFFSET;
5554 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5555 			data |= MEM_POWER_OVERRIDE;
5556 			if (data != orig)
5557 				WREG32(DMA_POWER_CNTL + offset, data);
5558 
5559 			orig = data = RREG32(DMA_CLK_CTRL + offset);
5560 			data = 0xff000000;
5561 			if (data != orig)
5562 				WREG32(DMA_CLK_CTRL + offset, data);
5563 		}
5564 	}
5565 }
5566 
5567 static void si_enable_bif_mgls(struct radeon_device *rdev,
5568 			       bool enable)
5569 {
5570 	u32 orig, data;
5571 
5572 	orig = data = RREG32_PCIE(PCIE_CNTL2);
5573 
5574 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5575 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5576 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5577 	else
5578 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5579 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5580 
5581 	if (orig != data)
5582 		WREG32_PCIE(PCIE_CNTL2, data);
5583 }
5584 
5585 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5586 			       bool enable)
5587 {
5588 	u32 orig, data;
5589 
5590 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
5591 
5592 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5593 		data &= ~CLOCK_GATING_DIS;
5594 	else
5595 		data |= CLOCK_GATING_DIS;
5596 
5597 	if (orig != data)
5598 		WREG32(HDP_HOST_PATH_CNTL, data);
5599 }
5600 
5601 static void si_enable_hdp_ls(struct radeon_device *rdev,
5602 			     bool enable)
5603 {
5604 	u32 orig, data;
5605 
5606 	orig = data = RREG32(HDP_MEM_POWER_LS);
5607 
5608 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5609 		data |= HDP_LS_ENABLE;
5610 	else
5611 		data &= ~HDP_LS_ENABLE;
5612 
5613 	if (orig != data)
5614 		WREG32(HDP_MEM_POWER_LS, data);
5615 }
5616 
5617 static void si_update_cg(struct radeon_device *rdev,
5618 			 u32 block, bool enable)
5619 {
5620 	if (block & RADEON_CG_BLOCK_GFX) {
5621 		si_enable_gui_idle_interrupt(rdev, false);
5622 		/* order matters! */
5623 		if (enable) {
5624 			si_enable_mgcg(rdev, true);
5625 			si_enable_cgcg(rdev, true);
5626 		} else {
5627 			si_enable_cgcg(rdev, false);
5628 			si_enable_mgcg(rdev, false);
5629 		}
5630 		si_enable_gui_idle_interrupt(rdev, true);
5631 	}
5632 
5633 	if (block & RADEON_CG_BLOCK_MC) {
5634 		si_enable_mc_mgcg(rdev, enable);
5635 		si_enable_mc_ls(rdev, enable);
5636 	}
5637 
5638 	if (block & RADEON_CG_BLOCK_SDMA) {
5639 		si_enable_dma_mgcg(rdev, enable);
5640 	}
5641 
5642 	if (block & RADEON_CG_BLOCK_BIF) {
5643 		si_enable_bif_mgls(rdev, enable);
5644 	}
5645 
5646 	if (block & RADEON_CG_BLOCK_UVD) {
5647 		if (rdev->has_uvd) {
5648 			si_enable_uvd_mgcg(rdev, enable);
5649 		}
5650 	}
5651 
5652 	if (block & RADEON_CG_BLOCK_HDP) {
5653 		si_enable_hdp_mgcg(rdev, enable);
5654 		si_enable_hdp_ls(rdev, enable);
5655 	}
5656 }
5657 
5658 static void si_init_cg(struct radeon_device *rdev)
5659 {
5660 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5661 			    RADEON_CG_BLOCK_MC |
5662 			    RADEON_CG_BLOCK_SDMA |
5663 			    RADEON_CG_BLOCK_BIF |
5664 			    RADEON_CG_BLOCK_HDP), true);
5665 	if (rdev->has_uvd) {
5666 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5667 		si_init_uvd_internal_cg(rdev);
5668 	}
5669 }
5670 
5671 static void si_fini_cg(struct radeon_device *rdev)
5672 {
5673 	if (rdev->has_uvd) {
5674 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5675 	}
5676 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5677 			    RADEON_CG_BLOCK_MC |
5678 			    RADEON_CG_BLOCK_SDMA |
5679 			    RADEON_CG_BLOCK_BIF |
5680 			    RADEON_CG_BLOCK_HDP), false);
5681 }
5682 
5683 u32 si_get_csb_size(struct radeon_device *rdev)
5684 {
5685 	u32 count = 0;
5686 	const struct cs_section_def *sect = NULL;
5687 	const struct cs_extent_def *ext = NULL;
5688 
5689 	if (rdev->rlc.cs_data == NULL)
5690 		return 0;
5691 
5692 	/* begin clear state */
5693 	count += 2;
5694 	/* context control state */
5695 	count += 3;
5696 
5697 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5698 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5699 			if (sect->id == SECT_CONTEXT)
5700 				count += 2 + ext->reg_count;
5701 			else
5702 				return 0;
5703 		}
5704 	}
5705 	/* pa_sc_raster_config */
5706 	count += 3;
5707 	/* end clear state */
5708 	count += 2;
5709 	/* clear state */
5710 	count += 2;
5711 
5712 	return count;
5713 }
5714 
5715 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5716 {
5717 	u32 count = 0, i;
5718 	const struct cs_section_def *sect = NULL;
5719 	const struct cs_extent_def *ext = NULL;
5720 
5721 	if (rdev->rlc.cs_data == NULL)
5722 		return;
5723 	if (buffer == NULL)
5724 		return;
5725 
5726 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5727 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5728 
5729 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5730 	buffer[count++] = cpu_to_le32(0x80000000);
5731 	buffer[count++] = cpu_to_le32(0x80000000);
5732 
5733 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5734 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5735 			if (sect->id == SECT_CONTEXT) {
5736 				buffer[count++] =
5737 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5738 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5739 				for (i = 0; i < ext->reg_count; i++)
5740 					buffer[count++] = cpu_to_le32(ext->extent[i]);
5741 			} else {
5742 				return;
5743 			}
5744 		}
5745 	}
5746 
5747 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5748 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5749 	switch (rdev->family) {
5750 	case CHIP_TAHITI:
5751 	case CHIP_PITCAIRN:
5752 		buffer[count++] = cpu_to_le32(0x2a00126a);
5753 		break;
5754 	case CHIP_VERDE:
5755 		buffer[count++] = cpu_to_le32(0x0000124a);
5756 		break;
5757 	case CHIP_OLAND:
5758 		buffer[count++] = cpu_to_le32(0x00000082);
5759 		break;
5760 	case CHIP_HAINAN:
5761 		buffer[count++] = cpu_to_le32(0x00000000);
5762 		break;
5763 	default:
5764 		buffer[count++] = cpu_to_le32(0x00000000);
5765 		break;
5766 	}
5767 
5768 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5769 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5770 
5771 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5772 	buffer[count++] = cpu_to_le32(0);
5773 }
5774 
5775 static void si_init_pg(struct radeon_device *rdev)
5776 {
5777 	if (rdev->pg_flags) {
5778 		if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5779 			si_init_dma_pg(rdev);
5780 		}
5781 		si_init_ao_cu_mask(rdev);
5782 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5783 			si_init_gfx_cgpg(rdev);
5784 		} else {
5785 			WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5786 			WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5787 		}
5788 		si_enable_dma_pg(rdev, true);
5789 		si_enable_gfx_cgpg(rdev, true);
5790 	} else {
5791 		WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5792 		WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5793 	}
5794 }
5795 
5796 static void si_fini_pg(struct radeon_device *rdev)
5797 {
5798 	if (rdev->pg_flags) {
5799 		si_enable_dma_pg(rdev, false);
5800 		si_enable_gfx_cgpg(rdev, false);
5801 	}
5802 }
5803 
5804 /*
5805  * RLC
5806  */
5807 void si_rlc_reset(struct radeon_device *rdev)
5808 {
5809 	u32 tmp = RREG32(GRBM_SOFT_RESET);
5810 
5811 	tmp |= SOFT_RESET_RLC;
5812 	WREG32(GRBM_SOFT_RESET, tmp);
5813 	udelay(50);
5814 	tmp &= ~SOFT_RESET_RLC;
5815 	WREG32(GRBM_SOFT_RESET, tmp);
5816 	udelay(50);
5817 }
5818 
5819 static void si_rlc_stop(struct radeon_device *rdev)
5820 {
5821 	WREG32(RLC_CNTL, 0);
5822 
5823 	si_enable_gui_idle_interrupt(rdev, false);
5824 
5825 	si_wait_for_rlc_serdes(rdev);
5826 }
5827 
5828 static void si_rlc_start(struct radeon_device *rdev)
5829 {
5830 	WREG32(RLC_CNTL, RLC_ENABLE);
5831 
5832 	si_enable_gui_idle_interrupt(rdev, true);
5833 
5834 	udelay(50);
5835 }
5836 
5837 static bool si_lbpw_supported(struct radeon_device *rdev)
5838 {
5839 	u32 tmp;
5840 
5841 	/* Enable LBPW only for DDR3 */
5842 	tmp = RREG32(MC_SEQ_MISC0);
5843 	if ((tmp & 0xF0000000) == 0xB0000000)
5844 		return true;
5845 	return false;
5846 }
5847 
5848 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5849 {
5850 	u32 tmp;
5851 
5852 	tmp = RREG32(RLC_LB_CNTL);
5853 	if (enable)
5854 		tmp |= LOAD_BALANCE_ENABLE;
5855 	else
5856 		tmp &= ~LOAD_BALANCE_ENABLE;
5857 	WREG32(RLC_LB_CNTL, tmp);
5858 
5859 	if (!enable) {
5860 		si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5861 		WREG32(SPI_LB_CU_MASK, 0x00ff);
5862 	}
5863 }
5864 
5865 static int si_rlc_resume(struct radeon_device *rdev)
5866 {
5867 	u32 i;
5868 
5869 	if (!rdev->rlc_fw)
5870 		return -EINVAL;
5871 
5872 	si_rlc_stop(rdev);
5873 
5874 	si_rlc_reset(rdev);
5875 
5876 	si_init_pg(rdev);
5877 
5878 	si_init_cg(rdev);
5879 
5880 	WREG32(RLC_RL_BASE, 0);
5881 	WREG32(RLC_RL_SIZE, 0);
5882 	WREG32(RLC_LB_CNTL, 0);
5883 	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5884 	WREG32(RLC_LB_CNTR_INIT, 0);
5885 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5886 
5887 	WREG32(RLC_MC_CNTL, 0);
5888 	WREG32(RLC_UCODE_CNTL, 0);
5889 
5890 	if (rdev->new_fw) {
5891 		const struct rlc_firmware_header_v1_0 *hdr =
5892 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5893 		u32 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5894 		const __le32 *fw_data = (const __le32 *)
5895 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5896 
5897 		radeon_ucode_print_rlc_hdr(&hdr->header);
5898 
5899 		for (i = 0; i < fw_size; i++) {
5900 			WREG32(RLC_UCODE_ADDR, i);
5901 			WREG32(RLC_UCODE_DATA, le32_to_cpup(fw_data++));
5902 		}
5903 	} else {
5904 		const __be32 *fw_data =
5905 			(const __be32 *)rdev->rlc_fw->data;
5906 		for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5907 			WREG32(RLC_UCODE_ADDR, i);
5908 			WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5909 		}
5910 	}
5911 	WREG32(RLC_UCODE_ADDR, 0);
5912 
5913 	si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5914 
5915 	si_rlc_start(rdev);
5916 
5917 	return 0;
5918 }
5919 
5920 static void si_enable_interrupts(struct radeon_device *rdev)
5921 {
5922 	u32 ih_cntl = RREG32(IH_CNTL);
5923 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5924 
5925 	ih_cntl |= ENABLE_INTR;
5926 	ih_rb_cntl |= IH_RB_ENABLE;
5927 	WREG32(IH_CNTL, ih_cntl);
5928 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5929 	rdev->ih.enabled = true;
5930 }
5931 
5932 static void si_disable_interrupts(struct radeon_device *rdev)
5933 {
5934 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5935 	u32 ih_cntl = RREG32(IH_CNTL);
5936 
5937 	ih_rb_cntl &= ~IH_RB_ENABLE;
5938 	ih_cntl &= ~ENABLE_INTR;
5939 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5940 	WREG32(IH_CNTL, ih_cntl);
5941 	/* set rptr, wptr to 0 */
5942 	WREG32(IH_RB_RPTR, 0);
5943 	WREG32(IH_RB_WPTR, 0);
5944 	rdev->ih.enabled = false;
5945 	rdev->ih.rptr = 0;
5946 }
5947 
5948 static void si_disable_interrupt_state(struct radeon_device *rdev)
5949 {
5950 	int i;
5951 	u32 tmp;
5952 
5953 	tmp = RREG32(CP_INT_CNTL_RING0) &
5954 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5955 	WREG32(CP_INT_CNTL_RING0, tmp);
5956 	WREG32(CP_INT_CNTL_RING1, 0);
5957 	WREG32(CP_INT_CNTL_RING2, 0);
5958 	tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5959 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5960 	tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5961 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5962 	WREG32(GRBM_INT_CNTL, 0);
5963 	WREG32(SRBM_INT_CNTL, 0);
5964 	for (i = 0; i < rdev->num_crtc; i++)
5965 		WREG32(INT_MASK + crtc_offsets[i], 0);
5966 	for (i = 0; i < rdev->num_crtc; i++)
5967 		WREG32(GRPH_INT_CONTROL + crtc_offsets[i], 0);
5968 
5969 	if (!ASIC_IS_NODCE(rdev)) {
5970 		WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5971 
5972 		for (i = 0; i < 6; i++)
5973 			WREG32_AND(DC_HPDx_INT_CONTROL(i),
5974 				   DC_HPDx_INT_POLARITY);
5975 	}
5976 }
5977 
5978 static int si_irq_init(struct radeon_device *rdev)
5979 {
5980 	int ret = 0;
5981 	int rb_bufsz;
5982 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5983 
5984 	/* allocate ring */
5985 	ret = r600_ih_ring_alloc(rdev);
5986 	if (ret)
5987 		return ret;
5988 
5989 	/* disable irqs */
5990 	si_disable_interrupts(rdev);
5991 
5992 	/* init rlc */
5993 	ret = si_rlc_resume(rdev);
5994 	if (ret) {
5995 		r600_ih_ring_fini(rdev);
5996 		return ret;
5997 	}
5998 
5999 	/* setup interrupt control */
6000 	/* set dummy read address to dummy page address */
6001 	WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8);
6002 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
6003 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6004 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6005 	 */
6006 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6007 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6008 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6009 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6010 
6011 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6012 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6013 
6014 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6015 		      IH_WPTR_OVERFLOW_CLEAR |
6016 		      (rb_bufsz << 1));
6017 
6018 	if (rdev->wb.enabled)
6019 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6020 
6021 	/* set the writeback address whether it's enabled or not */
6022 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6023 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6024 
6025 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6026 
6027 	/* set rptr, wptr to 0 */
6028 	WREG32(IH_RB_RPTR, 0);
6029 	WREG32(IH_RB_WPTR, 0);
6030 
6031 	/* Default settings for IH_CNTL (disabled at first) */
6032 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6033 	/* RPTR_REARM only works if msi's are enabled */
6034 	if (rdev->msi_enabled)
6035 		ih_cntl |= RPTR_REARM;
6036 	WREG32(IH_CNTL, ih_cntl);
6037 
6038 	/* force the active interrupt state to all disabled */
6039 	si_disable_interrupt_state(rdev);
6040 
6041 	pci_set_master(rdev->pdev);
6042 
6043 	/* enable irqs */
6044 	si_enable_interrupts(rdev);
6045 
6046 	return ret;
6047 }
6048 
6049 /* The order we write back each register here is important */
6050 int si_irq_set(struct radeon_device *rdev)
6051 {
6052 	int i;
6053 	u32 cp_int_cntl;
6054 	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
6055 	u32 grbm_int_cntl = 0;
6056 	u32 dma_cntl, dma_cntl1;
6057 	u32 thermal_int = 0;
6058 
6059 	if (!rdev->irq.installed) {
6060 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6061 		return -EINVAL;
6062 	}
6063 	/* don't enable anything if the ih is disabled */
6064 	if (!rdev->ih.enabled) {
6065 		si_disable_interrupts(rdev);
6066 		/* force the active interrupt state to all disabled */
6067 		si_disable_interrupt_state(rdev);
6068 		return 0;
6069 	}
6070 
6071 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6072 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6073 
6074 	dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6075 	dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6076 
6077 	thermal_int = RREG32(CG_THERMAL_INT) &
6078 		~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6079 
6080 	/* enable CP interrupts on all rings */
6081 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6082 		DRM_DEBUG("si_irq_set: sw int gfx\n");
6083 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6084 	}
6085 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6086 		DRM_DEBUG("si_irq_set: sw int cp1\n");
6087 		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
6088 	}
6089 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6090 		DRM_DEBUG("si_irq_set: sw int cp2\n");
6091 		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
6092 	}
6093 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6094 		DRM_DEBUG("si_irq_set: sw int dma\n");
6095 		dma_cntl |= TRAP_ENABLE;
6096 	}
6097 
6098 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6099 		DRM_DEBUG("si_irq_set: sw int dma1\n");
6100 		dma_cntl1 |= TRAP_ENABLE;
6101 	}
6102 
6103 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6104 	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
6105 	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
6106 
6107 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
6108 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
6109 
6110 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6111 
6112 	if (rdev->irq.dpm_thermal) {
6113 		DRM_DEBUG("dpm thermal\n");
6114 		thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6115 	}
6116 
6117 	for (i = 0; i < rdev->num_crtc; i++) {
6118 		radeon_irq_kms_set_irq_n_enabled(
6119 		    rdev, INT_MASK + crtc_offsets[i], VBLANK_INT_MASK,
6120 		    rdev->irq.crtc_vblank_int[i] ||
6121 		    atomic_read(&rdev->irq.pflip[i]), "vblank", i);
6122 	}
6123 
6124 	for (i = 0; i < rdev->num_crtc; i++)
6125 		WREG32(GRPH_INT_CONTROL + crtc_offsets[i], GRPH_PFLIP_INT_MASK);
6126 
6127 	if (!ASIC_IS_NODCE(rdev)) {
6128 		for (i = 0; i < 6; i++) {
6129 			radeon_irq_kms_set_irq_n_enabled(
6130 			    rdev, DC_HPDx_INT_CONTROL(i),
6131 			    DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN,
6132 			    rdev->irq.hpd[i], "HPD", i);
6133 		}
6134 	}
6135 
6136 	WREG32(CG_THERMAL_INT, thermal_int);
6137 
6138 	/* posting read */
6139 	RREG32(SRBM_STATUS);
6140 
6141 	return 0;
6142 }
6143 
6144 /* The order we write back each register here is important */
6145 static inline void si_irq_ack(struct radeon_device *rdev)
6146 {
6147 	int i, j;
6148 	u32 *disp_int = rdev->irq.stat_regs.evergreen.disp_int;
6149 	u32 *grph_int = rdev->irq.stat_regs.evergreen.grph_int;
6150 
6151 	if (ASIC_IS_NODCE(rdev))
6152 		return;
6153 
6154 	for (i = 0; i < 6; i++) {
6155 		disp_int[i] = RREG32(si_disp_int_status[i]);
6156 		if (i < rdev->num_crtc)
6157 			grph_int[i] = RREG32(GRPH_INT_STATUS + crtc_offsets[i]);
6158 	}
6159 
6160 	/* We write back each interrupt register in pairs of two */
6161 	for (i = 0; i < rdev->num_crtc; i += 2) {
6162 		for (j = i; j < (i + 2); j++) {
6163 			if (grph_int[j] & GRPH_PFLIP_INT_OCCURRED)
6164 				WREG32(GRPH_INT_STATUS + crtc_offsets[j],
6165 				       GRPH_PFLIP_INT_CLEAR);
6166 		}
6167 
6168 		for (j = i; j < (i + 2); j++) {
6169 			if (disp_int[j] & LB_D1_VBLANK_INTERRUPT)
6170 				WREG32(VBLANK_STATUS + crtc_offsets[j],
6171 				       VBLANK_ACK);
6172 			if (disp_int[j] & LB_D1_VLINE_INTERRUPT)
6173 				WREG32(VLINE_STATUS + crtc_offsets[j],
6174 				       VLINE_ACK);
6175 		}
6176 	}
6177 
6178 	for (i = 0; i < 6; i++) {
6179 		if (disp_int[i] & DC_HPD1_INTERRUPT)
6180 			WREG32_OR(DC_HPDx_INT_CONTROL(i), DC_HPDx_INT_ACK);
6181 	}
6182 
6183 	for (i = 0; i < 6; i++) {
6184 		if (disp_int[i] & DC_HPD1_RX_INTERRUPT)
6185 			WREG32_OR(DC_HPDx_INT_CONTROL(i), DC_HPDx_RX_INT_ACK);
6186 	}
6187 }
6188 
6189 static void si_irq_disable(struct radeon_device *rdev)
6190 {
6191 	si_disable_interrupts(rdev);
6192 	/* Wait and acknowledge irq */
6193 	mdelay(1);
6194 	si_irq_ack(rdev);
6195 	si_disable_interrupt_state(rdev);
6196 }
6197 
6198 static void si_irq_suspend(struct radeon_device *rdev)
6199 {
6200 	si_irq_disable(rdev);
6201 	si_rlc_stop(rdev);
6202 }
6203 
6204 static void si_irq_fini(struct radeon_device *rdev)
6205 {
6206 	si_irq_suspend(rdev);
6207 	r600_ih_ring_fini(rdev);
6208 }
6209 
6210 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6211 {
6212 	u32 wptr, tmp;
6213 
6214 	if (rdev->wb.enabled)
6215 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6216 	else
6217 		wptr = RREG32(IH_RB_WPTR);
6218 
6219 	if (wptr & RB_OVERFLOW) {
6220 		wptr &= ~RB_OVERFLOW;
6221 		/* When a ring buffer overflow happen start parsing interrupt
6222 		 * from the last not overwritten vector (wptr + 16). Hopefully
6223 		 * this should allow us to catchup.
6224 		 */
6225 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
6226 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
6227 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6228 		tmp = RREG32(IH_RB_CNTL);
6229 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
6230 		WREG32(IH_RB_CNTL, tmp);
6231 	}
6232 	return (wptr & rdev->ih.ptr_mask);
6233 }
6234 
6235 /*        SI IV Ring
6236  * Each IV ring entry is 128 bits:
6237  * [7:0]    - interrupt source id
6238  * [31:8]   - reserved
6239  * [59:32]  - interrupt source data
6240  * [63:60]  - reserved
6241  * [71:64]  - RINGID
6242  * [79:72]  - VMID
6243  * [127:80] - reserved
6244  */
6245 int si_irq_process(struct radeon_device *rdev)
6246 {
6247 	u32 *disp_int = rdev->irq.stat_regs.evergreen.disp_int;
6248 	u32 crtc_idx, hpd_idx;
6249 	u32 mask;
6250 	u32 wptr;
6251 	u32 rptr;
6252 	u32 src_id, src_data, ring_id;
6253 	u32 ring_index;
6254 	bool queue_hotplug = false;
6255 	bool queue_dp = false;
6256 	bool queue_thermal = false;
6257 	u32 status, addr;
6258 	const char *event_name;
6259 
6260 	if (!rdev->ih.enabled || rdev->shutdown)
6261 		return IRQ_NONE;
6262 
6263 	wptr = si_get_ih_wptr(rdev);
6264 
6265 restart_ih:
6266 	/* is somebody else already processing irqs? */
6267 	if (atomic_xchg(&rdev->ih.lock, 1))
6268 		return IRQ_NONE;
6269 
6270 	rptr = rdev->ih.rptr;
6271 	DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6272 
6273 	/* Order reading of wptr vs. reading of IH ring data */
6274 	rmb();
6275 
6276 	/* display interrupts */
6277 	si_irq_ack(rdev);
6278 
6279 	while (rptr != wptr) {
6280 		/* wptr/rptr are in bytes! */
6281 		ring_index = rptr / 4;
6282 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6283 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6284 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6285 
6286 		switch (src_id) {
6287 		case 1: /* D1 vblank/vline */
6288 		case 2: /* D2 vblank/vline */
6289 		case 3: /* D3 vblank/vline */
6290 		case 4: /* D4 vblank/vline */
6291 		case 5: /* D5 vblank/vline */
6292 		case 6: /* D6 vblank/vline */
6293 			crtc_idx = src_id - 1;
6294 
6295 			if (src_data == 0) { /* vblank */
6296 				mask = LB_D1_VBLANK_INTERRUPT;
6297 				event_name = "vblank";
6298 
6299 				if (rdev->irq.crtc_vblank_int[crtc_idx]) {
6300 					drm_handle_vblank(rdev->ddev, crtc_idx);
6301 					rdev->pm.vblank_sync = true;
6302 					wake_up(&rdev->irq.vblank_queue);
6303 				}
6304 				if (atomic_read(&rdev->irq.pflip[crtc_idx])) {
6305 					radeon_crtc_handle_vblank(rdev,
6306 								  crtc_idx);
6307 				}
6308 
6309 			} else if (src_data == 1) { /* vline */
6310 				mask = LB_D1_VLINE_INTERRUPT;
6311 				event_name = "vline";
6312 			} else {
6313 				DRM_DEBUG("Unhandled interrupt: %d %d\n",
6314 					  src_id, src_data);
6315 				break;
6316 			}
6317 
6318 			if (!(disp_int[crtc_idx] & mask)) {
6319 				DRM_DEBUG("IH: D%d %s - IH event w/o asserted irq bit?\n",
6320 					  crtc_idx + 1, event_name);
6321 			}
6322 
6323 			disp_int[crtc_idx] &= ~mask;
6324 			DRM_DEBUG("IH: D%d %s\n", crtc_idx + 1, event_name);
6325 
6326 			break;
6327 		case 8: /* D1 page flip */
6328 		case 10: /* D2 page flip */
6329 		case 12: /* D3 page flip */
6330 		case 14: /* D4 page flip */
6331 		case 16: /* D5 page flip */
6332 		case 18: /* D6 page flip */
6333 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
6334 			if (radeon_use_pflipirq > 0)
6335 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
6336 			break;
6337 		case 42: /* HPD hotplug */
6338 			if (src_data <= 5) {
6339 				hpd_idx = src_data;
6340 				mask = DC_HPD1_INTERRUPT;
6341 				queue_hotplug = true;
6342 				event_name = "HPD";
6343 
6344 			} else if (src_data <= 11) {
6345 				hpd_idx = src_data - 6;
6346 				mask = DC_HPD1_RX_INTERRUPT;
6347 				queue_dp = true;
6348 				event_name = "HPD_RX";
6349 
6350 			} else {
6351 				DRM_DEBUG("Unhandled interrupt: %d %d\n",
6352 					  src_id, src_data);
6353 				break;
6354 			}
6355 
6356 			if (!(disp_int[hpd_idx] & mask))
6357 				DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6358 
6359 			disp_int[hpd_idx] &= ~mask;
6360 			DRM_DEBUG("IH: %s%d\n", event_name, hpd_idx + 1);
6361 			break;
6362 		case 96:
6363 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
6364 			WREG32(SRBM_INT_ACK, 0x1);
6365 			break;
6366 		case 124: /* UVD */
6367 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6368 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6369 			break;
6370 		case 146:
6371 		case 147:
6372 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6373 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6374 			/* reset addr and status */
6375 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6376 			if (addr == 0x0 && status == 0x0)
6377 				break;
6378 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6379 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6380 				addr);
6381 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6382 				status);
6383 			si_vm_decode_fault(rdev, status, addr);
6384 			break;
6385 		case 176: /* RINGID0 CP_INT */
6386 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6387 			break;
6388 		case 177: /* RINGID1 CP_INT */
6389 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6390 			break;
6391 		case 178: /* RINGID2 CP_INT */
6392 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6393 			break;
6394 		case 181: /* CP EOP event */
6395 			DRM_DEBUG("IH: CP EOP\n");
6396 			switch (ring_id) {
6397 			case 0:
6398 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6399 				break;
6400 			case 1:
6401 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6402 				break;
6403 			case 2:
6404 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6405 				break;
6406 			}
6407 			break;
6408 		case 224: /* DMA trap event */
6409 			DRM_DEBUG("IH: DMA trap\n");
6410 			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6411 			break;
6412 		case 230: /* thermal low to high */
6413 			DRM_DEBUG("IH: thermal low to high\n");
6414 			rdev->pm.dpm.thermal.high_to_low = false;
6415 			queue_thermal = true;
6416 			break;
6417 		case 231: /* thermal high to low */
6418 			DRM_DEBUG("IH: thermal high to low\n");
6419 			rdev->pm.dpm.thermal.high_to_low = true;
6420 			queue_thermal = true;
6421 			break;
6422 		case 233: /* GUI IDLE */
6423 			DRM_DEBUG("IH: GUI idle\n");
6424 			break;
6425 		case 244: /* DMA trap event */
6426 			DRM_DEBUG("IH: DMA1 trap\n");
6427 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6428 			break;
6429 		default:
6430 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6431 			break;
6432 		}
6433 
6434 		/* wptr/rptr are in bytes! */
6435 		rptr += 16;
6436 		rptr &= rdev->ih.ptr_mask;
6437 		WREG32(IH_RB_RPTR, rptr);
6438 	}
6439 	if (queue_dp)
6440 		schedule_work(&rdev->dp_work);
6441 	if (queue_hotplug)
6442 		schedule_delayed_work(&rdev->hotplug_work, 0);
6443 	if (queue_thermal && rdev->pm.dpm_enabled)
6444 		schedule_work(&rdev->pm.dpm.thermal.work);
6445 	rdev->ih.rptr = rptr;
6446 	atomic_set(&rdev->ih.lock, 0);
6447 
6448 	/* make sure wptr hasn't changed while processing */
6449 	wptr = si_get_ih_wptr(rdev);
6450 	if (wptr != rptr)
6451 		goto restart_ih;
6452 
6453 	return IRQ_HANDLED;
6454 }
6455 
6456 /*
6457  * startup/shutdown callbacks
6458  */
6459 static void si_uvd_init(struct radeon_device *rdev)
6460 {
6461 	int r;
6462 
6463 	if (!rdev->has_uvd)
6464 		return;
6465 
6466 	r = radeon_uvd_init(rdev);
6467 	if (r) {
6468 		dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
6469 		/*
6470 		 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
6471 		 * to early fails uvd_v2_2_resume() and thus nothing happens
6472 		 * there. So it is pointless to try to go through that code
6473 		 * hence why we disable uvd here.
6474 		 */
6475 		rdev->has_uvd = false;
6476 		return;
6477 	}
6478 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
6479 	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
6480 }
6481 
6482 static void si_uvd_start(struct radeon_device *rdev)
6483 {
6484 	int r;
6485 
6486 	if (!rdev->has_uvd)
6487 		return;
6488 
6489 	r = uvd_v2_2_resume(rdev);
6490 	if (r) {
6491 		dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
6492 		goto error;
6493 	}
6494 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
6495 	if (r) {
6496 		dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
6497 		goto error;
6498 	}
6499 	return;
6500 
6501 error:
6502 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6503 }
6504 
6505 static void si_uvd_resume(struct radeon_device *rdev)
6506 {
6507 	struct radeon_ring *ring;
6508 	int r;
6509 
6510 	if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
6511 		return;
6512 
6513 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6514 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
6515 	if (r) {
6516 		dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
6517 		return;
6518 	}
6519 	r = uvd_v1_0_init(rdev);
6520 	if (r) {
6521 		dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
6522 		return;
6523 	}
6524 }
6525 
6526 static void si_vce_init(struct radeon_device *rdev)
6527 {
6528 	int r;
6529 
6530 	if (!rdev->has_vce)
6531 		return;
6532 
6533 	r = radeon_vce_init(rdev);
6534 	if (r) {
6535 		dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
6536 		/*
6537 		 * At this point rdev->vce.vcpu_bo is NULL which trickles down
6538 		 * to early fails si_vce_start() and thus nothing happens
6539 		 * there. So it is pointless to try to go through that code
6540 		 * hence why we disable vce here.
6541 		 */
6542 		rdev->has_vce = false;
6543 		return;
6544 	}
6545 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
6546 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
6547 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
6548 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
6549 }
6550 
6551 static void si_vce_start(struct radeon_device *rdev)
6552 {
6553 	int r;
6554 
6555 	if (!rdev->has_vce)
6556 		return;
6557 
6558 	r = radeon_vce_resume(rdev);
6559 	if (r) {
6560 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6561 		goto error;
6562 	}
6563 	r = vce_v1_0_resume(rdev);
6564 	if (r) {
6565 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6566 		goto error;
6567 	}
6568 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
6569 	if (r) {
6570 		dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
6571 		goto error;
6572 	}
6573 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
6574 	if (r) {
6575 		dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
6576 		goto error;
6577 	}
6578 	return;
6579 
6580 error:
6581 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
6582 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
6583 }
6584 
6585 static void si_vce_resume(struct radeon_device *rdev)
6586 {
6587 	struct radeon_ring *ring;
6588 	int r;
6589 
6590 	if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
6591 		return;
6592 
6593 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
6594 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
6595 	if (r) {
6596 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
6597 		return;
6598 	}
6599 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
6600 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
6601 	if (r) {
6602 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
6603 		return;
6604 	}
6605 	r = vce_v1_0_init(rdev);
6606 	if (r) {
6607 		dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
6608 		return;
6609 	}
6610 }
6611 
6612 static int si_startup(struct radeon_device *rdev)
6613 {
6614 	struct radeon_ring *ring;
6615 	int r;
6616 
6617 	/* enable pcie gen2/3 link */
6618 	si_pcie_gen3_enable(rdev);
6619 	/* enable aspm */
6620 	si_program_aspm(rdev);
6621 
6622 	/* scratch needs to be initialized before MC */
6623 	r = r600_vram_scratch_init(rdev);
6624 	if (r)
6625 		return r;
6626 
6627 	si_mc_program(rdev);
6628 
6629 	if (!rdev->pm.dpm_enabled) {
6630 		r = si_mc_load_microcode(rdev);
6631 		if (r) {
6632 			DRM_ERROR("Failed to load MC firmware!\n");
6633 			return r;
6634 		}
6635 	}
6636 
6637 	r = si_pcie_gart_enable(rdev);
6638 	if (r)
6639 		return r;
6640 	si_gpu_init(rdev);
6641 
6642 	/* allocate rlc buffers */
6643 	if (rdev->family == CHIP_VERDE) {
6644 		rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
6645 		rdev->rlc.reg_list_size =
6646 			(u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
6647 	}
6648 	rdev->rlc.cs_data = si_cs_data;
6649 	r = sumo_rlc_init(rdev);
6650 	if (r) {
6651 		DRM_ERROR("Failed to init rlc BOs!\n");
6652 		return r;
6653 	}
6654 
6655 	/* allocate wb buffer */
6656 	r = radeon_wb_init(rdev);
6657 	if (r)
6658 		return r;
6659 
6660 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6661 	if (r) {
6662 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6663 		return r;
6664 	}
6665 
6666 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6667 	if (r) {
6668 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6669 		return r;
6670 	}
6671 
6672 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6673 	if (r) {
6674 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6675 		return r;
6676 	}
6677 
6678 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6679 	if (r) {
6680 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6681 		return r;
6682 	}
6683 
6684 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6685 	if (r) {
6686 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6687 		return r;
6688 	}
6689 
6690 	si_uvd_start(rdev);
6691 	si_vce_start(rdev);
6692 
6693 	/* Enable IRQ */
6694 	if (!rdev->irq.installed) {
6695 		r = radeon_irq_kms_init(rdev);
6696 		if (r)
6697 			return r;
6698 	}
6699 
6700 	r = si_irq_init(rdev);
6701 	if (r) {
6702 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
6703 		radeon_irq_kms_fini(rdev);
6704 		return r;
6705 	}
6706 	si_irq_set(rdev);
6707 
6708 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6709 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6710 			     RADEON_CP_PACKET2);
6711 	if (r)
6712 		return r;
6713 
6714 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6715 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6716 			     RADEON_CP_PACKET2);
6717 	if (r)
6718 		return r;
6719 
6720 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6721 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6722 			     RADEON_CP_PACKET2);
6723 	if (r)
6724 		return r;
6725 
6726 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6727 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6728 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6729 	if (r)
6730 		return r;
6731 
6732 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6733 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6734 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6735 	if (r)
6736 		return r;
6737 
6738 	r = si_cp_load_microcode(rdev);
6739 	if (r)
6740 		return r;
6741 	r = si_cp_resume(rdev);
6742 	if (r)
6743 		return r;
6744 
6745 	r = cayman_dma_resume(rdev);
6746 	if (r)
6747 		return r;
6748 
6749 	si_uvd_resume(rdev);
6750 	si_vce_resume(rdev);
6751 
6752 	r = radeon_ib_pool_init(rdev);
6753 	if (r) {
6754 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6755 		return r;
6756 	}
6757 
6758 	r = radeon_vm_manager_init(rdev);
6759 	if (r) {
6760 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6761 		return r;
6762 	}
6763 
6764 	r = radeon_audio_init(rdev);
6765 	if (r)
6766 		return r;
6767 
6768 	return 0;
6769 }
6770 
6771 int si_resume(struct radeon_device *rdev)
6772 {
6773 	int r;
6774 
6775 	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6776 	 * posting will perform necessary task to bring back GPU into good
6777 	 * shape.
6778 	 */
6779 	/* post card */
6780 	atom_asic_init(rdev->mode_info.atom_context);
6781 
6782 	/* init golden registers */
6783 	si_init_golden_registers(rdev);
6784 
6785 	if (rdev->pm.pm_method == PM_METHOD_DPM)
6786 		radeon_pm_resume(rdev);
6787 
6788 	rdev->accel_working = true;
6789 	r = si_startup(rdev);
6790 	if (r) {
6791 		DRM_ERROR("si startup failed on resume\n");
6792 		rdev->accel_working = false;
6793 		return r;
6794 	}
6795 
6796 	return r;
6797 
6798 }
6799 
6800 int si_suspend(struct radeon_device *rdev)
6801 {
6802 	radeon_pm_suspend(rdev);
6803 	radeon_audio_fini(rdev);
6804 	radeon_vm_manager_fini(rdev);
6805 	si_cp_enable(rdev, false);
6806 	cayman_dma_stop(rdev);
6807 	if (rdev->has_uvd) {
6808 		uvd_v1_0_fini(rdev);
6809 		radeon_uvd_suspend(rdev);
6810 	}
6811 	if (rdev->has_vce)
6812 		radeon_vce_suspend(rdev);
6813 	si_fini_pg(rdev);
6814 	si_fini_cg(rdev);
6815 	si_irq_suspend(rdev);
6816 	radeon_wb_disable(rdev);
6817 	si_pcie_gart_disable(rdev);
6818 	return 0;
6819 }
6820 
6821 /* Plan is to move initialization in that function and use
6822  * helper function so that radeon_device_init pretty much
6823  * do nothing more than calling asic specific function. This
6824  * should also allow to remove a bunch of callback function
6825  * like vram_info.
6826  */
6827 int si_init(struct radeon_device *rdev)
6828 {
6829 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6830 	int r;
6831 
6832 	/* Read BIOS */
6833 	if (!radeon_get_bios(rdev)) {
6834 		if (ASIC_IS_AVIVO(rdev))
6835 			return -EINVAL;
6836 	}
6837 	/* Must be an ATOMBIOS */
6838 	if (!rdev->is_atom_bios) {
6839 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6840 		return -EINVAL;
6841 	}
6842 	r = radeon_atombios_init(rdev);
6843 	if (r)
6844 		return r;
6845 
6846 	/* Post card if necessary */
6847 	if (!radeon_card_posted(rdev)) {
6848 		if (!rdev->bios) {
6849 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6850 			return -EINVAL;
6851 		}
6852 		DRM_INFO("GPU not posted. posting now...\n");
6853 		atom_asic_init(rdev->mode_info.atom_context);
6854 	}
6855 	/* init golden registers */
6856 	si_init_golden_registers(rdev);
6857 	/* Initialize scratch registers */
6858 	si_scratch_init(rdev);
6859 	/* Initialize surface registers */
6860 	radeon_surface_init(rdev);
6861 	/* Initialize clocks */
6862 	radeon_get_clock_info(rdev->ddev);
6863 
6864 	/* Fence driver */
6865 	r = radeon_fence_driver_init(rdev);
6866 	if (r)
6867 		return r;
6868 
6869 	/* initialize memory controller */
6870 	r = si_mc_init(rdev);
6871 	if (r)
6872 		return r;
6873 	/* Memory manager */
6874 	r = radeon_bo_init(rdev);
6875 	if (r)
6876 		return r;
6877 
6878 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6879 	    !rdev->rlc_fw || !rdev->mc_fw) {
6880 		r = si_init_microcode(rdev);
6881 		if (r) {
6882 			DRM_ERROR("Failed to load firmware!\n");
6883 			return r;
6884 		}
6885 	}
6886 
6887 	/* Initialize power management */
6888 	radeon_pm_init(rdev);
6889 
6890 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6891 	ring->ring_obj = NULL;
6892 	r600_ring_init(rdev, ring, 1024 * 1024);
6893 
6894 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6895 	ring->ring_obj = NULL;
6896 	r600_ring_init(rdev, ring, 1024 * 1024);
6897 
6898 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6899 	ring->ring_obj = NULL;
6900 	r600_ring_init(rdev, ring, 1024 * 1024);
6901 
6902 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6903 	ring->ring_obj = NULL;
6904 	r600_ring_init(rdev, ring, 64 * 1024);
6905 
6906 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6907 	ring->ring_obj = NULL;
6908 	r600_ring_init(rdev, ring, 64 * 1024);
6909 
6910 	si_uvd_init(rdev);
6911 	si_vce_init(rdev);
6912 
6913 	rdev->ih.ring_obj = NULL;
6914 	r600_ih_ring_init(rdev, 64 * 1024);
6915 
6916 	r = r600_pcie_gart_init(rdev);
6917 	if (r)
6918 		return r;
6919 
6920 	rdev->accel_working = true;
6921 	r = si_startup(rdev);
6922 	if (r) {
6923 		dev_err(rdev->dev, "disabling GPU acceleration\n");
6924 		si_cp_fini(rdev);
6925 		cayman_dma_fini(rdev);
6926 		si_irq_fini(rdev);
6927 		sumo_rlc_fini(rdev);
6928 		radeon_wb_fini(rdev);
6929 		radeon_ib_pool_fini(rdev);
6930 		radeon_vm_manager_fini(rdev);
6931 		radeon_irq_kms_fini(rdev);
6932 		si_pcie_gart_fini(rdev);
6933 		rdev->accel_working = false;
6934 	}
6935 
6936 	/* Don't start up if the MC ucode is missing.
6937 	 * The default clocks and voltages before the MC ucode
6938 	 * is loaded are not suffient for advanced operations.
6939 	 */
6940 	if (!rdev->mc_fw) {
6941 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
6942 		return -EINVAL;
6943 	}
6944 
6945 	return 0;
6946 }
6947 
6948 void si_fini(struct radeon_device *rdev)
6949 {
6950 	radeon_pm_fini(rdev);
6951 	si_cp_fini(rdev);
6952 	cayman_dma_fini(rdev);
6953 	si_fini_pg(rdev);
6954 	si_fini_cg(rdev);
6955 	si_irq_fini(rdev);
6956 	sumo_rlc_fini(rdev);
6957 	radeon_wb_fini(rdev);
6958 	radeon_vm_manager_fini(rdev);
6959 	radeon_ib_pool_fini(rdev);
6960 	radeon_irq_kms_fini(rdev);
6961 	if (rdev->has_uvd) {
6962 		uvd_v1_0_fini(rdev);
6963 		radeon_uvd_fini(rdev);
6964 	}
6965 	if (rdev->has_vce)
6966 		radeon_vce_fini(rdev);
6967 	si_pcie_gart_fini(rdev);
6968 	r600_vram_scratch_fini(rdev);
6969 	radeon_gem_fini(rdev);
6970 	radeon_fence_driver_fini(rdev);
6971 	radeon_bo_fini(rdev);
6972 	radeon_atombios_fini(rdev);
6973 	kfree(rdev->bios);
6974 	rdev->bios = NULL;
6975 }
6976 
6977 /**
6978  * si_get_gpu_clock_counter - return GPU clock counter snapshot
6979  *
6980  * @rdev: radeon_device pointer
6981  *
6982  * Fetches a GPU clock counter snapshot (SI).
6983  * Returns the 64 bit clock counter snapshot.
6984  */
6985 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
6986 {
6987 	uint64_t clock;
6988 
6989 	mutex_lock(&rdev->gpu_clock_mutex);
6990 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6991 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6992 		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6993 	mutex_unlock(&rdev->gpu_clock_mutex);
6994 	return clock;
6995 }
6996 
6997 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6998 {
6999 	unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
7000 	int r;
7001 
7002 	/* bypass vclk and dclk with bclk */
7003 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7004 		VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
7005 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7006 
7007 	/* put PLL in bypass mode */
7008 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
7009 
7010 	if (!vclk || !dclk) {
7011 		/* keep the Bypass mode */
7012 		return 0;
7013 	}
7014 
7015 	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
7016 					  16384, 0x03FFFFFF, 0, 128, 5,
7017 					  &fb_div, &vclk_div, &dclk_div);
7018 	if (r)
7019 		return r;
7020 
7021 	/* set RESET_ANTI_MUX to 0 */
7022 	WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7023 
7024 	/* set VCO_MODE to 1 */
7025 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
7026 
7027 	/* disable sleep mode */
7028 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
7029 
7030 	/* deassert UPLL_RESET */
7031 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7032 
7033 	mdelay(1);
7034 
7035 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7036 	if (r)
7037 		return r;
7038 
7039 	/* assert UPLL_RESET again */
7040 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
7041 
7042 	/* disable spread spectrum. */
7043 	WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7044 
7045 	/* set feedback divider */
7046 	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
7047 
7048 	/* set ref divider to 0 */
7049 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
7050 
7051 	if (fb_div < 307200)
7052 		WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
7053 	else
7054 		WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
7055 
7056 	/* set PDIV_A and PDIV_B */
7057 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7058 		UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
7059 		~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
7060 
7061 	/* give the PLL some time to settle */
7062 	mdelay(15);
7063 
7064 	/* deassert PLL_RESET */
7065 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7066 
7067 	mdelay(15);
7068 
7069 	/* switch from bypass mode to normal mode */
7070 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
7071 
7072 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7073 	if (r)
7074 		return r;
7075 
7076 	/* switch VCLK and DCLK selection */
7077 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7078 		VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
7079 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7080 
7081 	mdelay(100);
7082 
7083 	return 0;
7084 }
7085 
7086 static void si_pcie_gen3_enable(struct radeon_device *rdev)
7087 {
7088 	struct pci_dev *root = rdev->pdev->bus->self;
7089 	enum pci_bus_speed speed_cap;
7090 	u32 speed_cntl, current_data_rate;
7091 	int i;
7092 	u16 tmp16;
7093 
7094 	if (pci_is_root_bus(rdev->pdev->bus))
7095 		return;
7096 
7097 	if (radeon_pcie_gen2 == 0)
7098 		return;
7099 
7100 	if (rdev->flags & RADEON_IS_IGP)
7101 		return;
7102 
7103 	if (!(rdev->flags & RADEON_IS_PCIE))
7104 		return;
7105 
7106 	speed_cap = pcie_get_speed_cap(root);
7107 	if (speed_cap == PCI_SPEED_UNKNOWN)
7108 		return;
7109 
7110 	if ((speed_cap != PCIE_SPEED_8_0GT) &&
7111 	    (speed_cap != PCIE_SPEED_5_0GT))
7112 		return;
7113 
7114 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7115 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7116 		LC_CURRENT_DATA_RATE_SHIFT;
7117 	if (speed_cap == PCIE_SPEED_8_0GT) {
7118 		if (current_data_rate == 2) {
7119 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7120 			return;
7121 		}
7122 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7123 	} else if (speed_cap == PCIE_SPEED_5_0GT) {
7124 		if (current_data_rate == 1) {
7125 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7126 			return;
7127 		}
7128 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7129 	}
7130 
7131 	if (!pci_is_pcie(root) || !pci_is_pcie(rdev->pdev))
7132 		return;
7133 
7134 	if (speed_cap == PCIE_SPEED_8_0GT) {
7135 		/* re-try equalization if gen3 is not already enabled */
7136 		if (current_data_rate != 2) {
7137 			u16 bridge_cfg, gpu_cfg;
7138 			u16 bridge_cfg2, gpu_cfg2;
7139 			u32 max_lw, current_lw, tmp;
7140 
7141 			pcie_capability_read_word(root, PCI_EXP_LNKCTL,
7142 						  &bridge_cfg);
7143 			pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL,
7144 						  &gpu_cfg);
7145 
7146 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7147 			pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16);
7148 
7149 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7150 			pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL,
7151 						   tmp16);
7152 
7153 			tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7154 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7155 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7156 
7157 			if (current_lw < max_lw) {
7158 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7159 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
7160 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7161 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7162 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7163 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7164 				}
7165 			}
7166 
7167 			for (i = 0; i < 10; i++) {
7168 				/* check status */
7169 				pcie_capability_read_word(rdev->pdev,
7170 							  PCI_EXP_DEVSTA,
7171 							  &tmp16);
7172 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7173 					break;
7174 
7175 				pcie_capability_read_word(root, PCI_EXP_LNKCTL,
7176 							  &bridge_cfg);
7177 				pcie_capability_read_word(rdev->pdev,
7178 							  PCI_EXP_LNKCTL,
7179 							  &gpu_cfg);
7180 
7181 				pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
7182 							  &bridge_cfg2);
7183 				pcie_capability_read_word(rdev->pdev,
7184 							  PCI_EXP_LNKCTL2,
7185 							  &gpu_cfg2);
7186 
7187 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7188 				tmp |= LC_SET_QUIESCE;
7189 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7190 
7191 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7192 				tmp |= LC_REDO_EQ;
7193 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7194 
7195 				msleep(100);
7196 
7197 				/* linkctl */
7198 				pcie_capability_read_word(root, PCI_EXP_LNKCTL,
7199 							  &tmp16);
7200 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7201 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7202 				pcie_capability_write_word(root,
7203 							   PCI_EXP_LNKCTL,
7204 							   tmp16);
7205 
7206 				pcie_capability_read_word(rdev->pdev,
7207 							  PCI_EXP_LNKCTL,
7208 							  &tmp16);
7209 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7210 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7211 				pcie_capability_write_word(rdev->pdev,
7212 							   PCI_EXP_LNKCTL,
7213 							   tmp16);
7214 
7215 				/* linkctl2 */
7216 				pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
7217 							  &tmp16);
7218 				tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
7219 					   PCI_EXP_LNKCTL2_TX_MARGIN);
7220 				tmp16 |= (bridge_cfg2 &
7221 					  (PCI_EXP_LNKCTL2_ENTER_COMP |
7222 					   PCI_EXP_LNKCTL2_TX_MARGIN));
7223 				pcie_capability_write_word(root,
7224 							   PCI_EXP_LNKCTL2,
7225 							   tmp16);
7226 
7227 				pcie_capability_read_word(rdev->pdev,
7228 							  PCI_EXP_LNKCTL2,
7229 							  &tmp16);
7230 				tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
7231 					   PCI_EXP_LNKCTL2_TX_MARGIN);
7232 				tmp16 |= (gpu_cfg2 &
7233 					  (PCI_EXP_LNKCTL2_ENTER_COMP |
7234 					   PCI_EXP_LNKCTL2_TX_MARGIN));
7235 				pcie_capability_write_word(rdev->pdev,
7236 							   PCI_EXP_LNKCTL2,
7237 							   tmp16);
7238 
7239 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7240 				tmp &= ~LC_SET_QUIESCE;
7241 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7242 			}
7243 		}
7244 	}
7245 
7246 	/* set the link speed */
7247 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7248 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7249 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7250 
7251 	pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL2, &tmp16);
7252 	tmp16 &= ~PCI_EXP_LNKCTL2_TLS;
7253 	if (speed_cap == PCIE_SPEED_8_0GT)
7254 		tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
7255 	else if (speed_cap == PCIE_SPEED_5_0GT)
7256 		tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
7257 	else
7258 		tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
7259 	pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL2, tmp16);
7260 
7261 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7262 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7263 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7264 
7265 	for (i = 0; i < rdev->usec_timeout; i++) {
7266 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7267 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7268 			break;
7269 		udelay(1);
7270 	}
7271 }
7272 
7273 static void si_program_aspm(struct radeon_device *rdev)
7274 {
7275 	u32 data, orig;
7276 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7277 	bool disable_clkreq = false;
7278 
7279 	if (radeon_aspm == 0)
7280 		return;
7281 
7282 	if (!(rdev->flags & RADEON_IS_PCIE))
7283 		return;
7284 
7285 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7286 	data &= ~LC_XMIT_N_FTS_MASK;
7287 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7288 	if (orig != data)
7289 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7290 
7291 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7292 	data |= LC_GO_TO_RECOVERY;
7293 	if (orig != data)
7294 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7295 
7296 	orig = data = RREG32_PCIE(PCIE_P_CNTL);
7297 	data |= P_IGNORE_EDB_ERR;
7298 	if (orig != data)
7299 		WREG32_PCIE(PCIE_P_CNTL, data);
7300 
7301 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7302 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7303 	data |= LC_PMI_TO_L1_DIS;
7304 	if (!disable_l0s)
7305 		data |= LC_L0S_INACTIVITY(7);
7306 
7307 	if (!disable_l1) {
7308 		data |= LC_L1_INACTIVITY(7);
7309 		data &= ~LC_PMI_TO_L1_DIS;
7310 		if (orig != data)
7311 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7312 
7313 		if (!disable_plloff_in_l1) {
7314 			bool clk_req_support;
7315 
7316 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7317 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7318 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7319 			if (orig != data)
7320 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7321 
7322 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7323 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7324 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7325 			if (orig != data)
7326 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7327 
7328 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7329 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7330 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7331 			if (orig != data)
7332 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7333 
7334 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7335 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7336 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7337 			if (orig != data)
7338 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7339 
7340 			if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7341 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7342 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7343 				if (orig != data)
7344 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7345 
7346 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7347 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7348 				if (orig != data)
7349 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7350 
7351 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7352 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7353 				if (orig != data)
7354 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7355 
7356 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7357 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7358 				if (orig != data)
7359 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7360 
7361 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7362 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7363 				if (orig != data)
7364 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7365 
7366 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7367 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7368 				if (orig != data)
7369 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7370 
7371 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7372 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7373 				if (orig != data)
7374 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7375 
7376 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7377 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7378 				if (orig != data)
7379 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7380 			}
7381 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7382 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7383 			data |= LC_DYN_LANES_PWR_STATE(3);
7384 			if (orig != data)
7385 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7386 
7387 			orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7388 			data &= ~LS2_EXIT_TIME_MASK;
7389 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7390 				data |= LS2_EXIT_TIME(5);
7391 			if (orig != data)
7392 				WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7393 
7394 			orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7395 			data &= ~LS2_EXIT_TIME_MASK;
7396 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7397 				data |= LS2_EXIT_TIME(5);
7398 			if (orig != data)
7399 				WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7400 
7401 			if (!disable_clkreq &&
7402 			    !pci_is_root_bus(rdev->pdev->bus)) {
7403 				struct pci_dev *root = rdev->pdev->bus->self;
7404 				u32 lnkcap;
7405 
7406 				clk_req_support = false;
7407 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7408 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7409 					clk_req_support = true;
7410 			} else {
7411 				clk_req_support = false;
7412 			}
7413 
7414 			if (clk_req_support) {
7415 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7416 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7417 				if (orig != data)
7418 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7419 
7420 				orig = data = RREG32(THM_CLK_CNTL);
7421 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7422 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7423 				if (orig != data)
7424 					WREG32(THM_CLK_CNTL, data);
7425 
7426 				orig = data = RREG32(MISC_CLK_CNTL);
7427 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7428 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7429 				if (orig != data)
7430 					WREG32(MISC_CLK_CNTL, data);
7431 
7432 				orig = data = RREG32(CG_CLKPIN_CNTL);
7433 				data &= ~BCLK_AS_XCLK;
7434 				if (orig != data)
7435 					WREG32(CG_CLKPIN_CNTL, data);
7436 
7437 				orig = data = RREG32(CG_CLKPIN_CNTL_2);
7438 				data &= ~FORCE_BIF_REFCLK_EN;
7439 				if (orig != data)
7440 					WREG32(CG_CLKPIN_CNTL_2, data);
7441 
7442 				orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7443 				data &= ~MPLL_CLKOUT_SEL_MASK;
7444 				data |= MPLL_CLKOUT_SEL(4);
7445 				if (orig != data)
7446 					WREG32(MPLL_BYPASSCLK_SEL, data);
7447 
7448 				orig = data = RREG32(SPLL_CNTL_MODE);
7449 				data &= ~SPLL_REFCLK_SEL_MASK;
7450 				if (orig != data)
7451 					WREG32(SPLL_CNTL_MODE, data);
7452 			}
7453 		}
7454 	} else {
7455 		if (orig != data)
7456 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7457 	}
7458 
7459 	orig = data = RREG32_PCIE(PCIE_CNTL2);
7460 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7461 	if (orig != data)
7462 		WREG32_PCIE(PCIE_CNTL2, data);
7463 
7464 	if (!disable_l0s) {
7465 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7466 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7467 			data = RREG32_PCIE(PCIE_LC_STATUS1);
7468 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7469 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7470 				data &= ~LC_L0S_INACTIVITY_MASK;
7471 				if (orig != data)
7472 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7473 			}
7474 		}
7475 	}
7476 }
7477 
7478 static int si_vce_send_vcepll_ctlreq(struct radeon_device *rdev)
7479 {
7480 	unsigned i;
7481 
7482 	/* make sure VCEPLL_CTLREQ is deasserted */
7483 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7484 
7485 	mdelay(10);
7486 
7487 	/* assert UPLL_CTLREQ */
7488 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
7489 
7490 	/* wait for CTLACK and CTLACK2 to get asserted */
7491 	for (i = 0; i < 100; ++i) {
7492 		uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
7493 		if ((RREG32_SMC(CG_VCEPLL_FUNC_CNTL) & mask) == mask)
7494 			break;
7495 		mdelay(10);
7496 	}
7497 
7498 	/* deassert UPLL_CTLREQ */
7499 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7500 
7501 	if (i == 100) {
7502 		DRM_ERROR("Timeout setting UVD clocks!\n");
7503 		return -ETIMEDOUT;
7504 	}
7505 
7506 	return 0;
7507 }
7508 
7509 int si_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
7510 {
7511 	unsigned fb_div = 0, evclk_div = 0, ecclk_div = 0;
7512 	int r;
7513 
7514 	/* bypass evclk and ecclk with bclk */
7515 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7516 		     EVCLK_SRC_SEL(1) | ECCLK_SRC_SEL(1),
7517 		     ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7518 
7519 	/* put PLL in bypass mode */
7520 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_BYPASS_EN_MASK,
7521 		     ~VCEPLL_BYPASS_EN_MASK);
7522 
7523 	if (!evclk || !ecclk) {
7524 		/* keep the Bypass mode, put PLL to sleep */
7525 		WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7526 			     ~VCEPLL_SLEEP_MASK);
7527 		return 0;
7528 	}
7529 
7530 	r = radeon_uvd_calc_upll_dividers(rdev, evclk, ecclk, 125000, 250000,
7531 					  16384, 0x03FFFFFF, 0, 128, 5,
7532 					  &fb_div, &evclk_div, &ecclk_div);
7533 	if (r)
7534 		return r;
7535 
7536 	/* set RESET_ANTI_MUX to 0 */
7537 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7538 
7539 	/* set VCO_MODE to 1 */
7540 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_VCO_MODE_MASK,
7541 		     ~VCEPLL_VCO_MODE_MASK);
7542 
7543 	/* toggle VCEPLL_SLEEP to 1 then back to 0 */
7544 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7545 		     ~VCEPLL_SLEEP_MASK);
7546 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_SLEEP_MASK);
7547 
7548 	/* deassert VCEPLL_RESET */
7549 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7550 
7551 	mdelay(1);
7552 
7553 	r = si_vce_send_vcepll_ctlreq(rdev);
7554 	if (r)
7555 		return r;
7556 
7557 	/* assert VCEPLL_RESET again */
7558 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_RESET_MASK, ~VCEPLL_RESET_MASK);
7559 
7560 	/* disable spread spectrum. */
7561 	WREG32_SMC_P(CG_VCEPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7562 
7563 	/* set feedback divider */
7564 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_3, VCEPLL_FB_DIV(fb_div), ~VCEPLL_FB_DIV_MASK);
7565 
7566 	/* set ref divider to 0 */
7567 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_REF_DIV_MASK);
7568 
7569 	/* set PDIV_A and PDIV_B */
7570 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7571 		     VCEPLL_PDIV_A(evclk_div) | VCEPLL_PDIV_B(ecclk_div),
7572 		     ~(VCEPLL_PDIV_A_MASK | VCEPLL_PDIV_B_MASK));
7573 
7574 	/* give the PLL some time to settle */
7575 	mdelay(15);
7576 
7577 	/* deassert PLL_RESET */
7578 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7579 
7580 	mdelay(15);
7581 
7582 	/* switch from bypass mode to normal mode */
7583 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_BYPASS_EN_MASK);
7584 
7585 	r = si_vce_send_vcepll_ctlreq(rdev);
7586 	if (r)
7587 		return r;
7588 
7589 	/* switch VCLK and DCLK selection */
7590 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7591 		     EVCLK_SRC_SEL(16) | ECCLK_SRC_SEL(16),
7592 		     ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7593 
7594 	mdelay(100);
7595 
7596 	return 0;
7597 }
7598