xref: /openbmc/linux/drivers/gpu/drm/radeon/si.c (revision ce666cec)
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 
25 #include <linux/firmware.h>
26 #include <linux/module.h>
27 #include <linux/pci.h>
28 #include <linux/slab.h>
29 
30 #include <drm/drm_vblank.h>
31 #include <drm/radeon_drm.h>
32 
33 #include "atom.h"
34 #include "clearstate_si.h"
35 #include "evergreen.h"
36 #include "r600.h"
37 #include "radeon.h"
38 #include "radeon_asic.h"
39 #include "radeon_audio.h"
40 #include "radeon_ucode.h"
41 #include "si_blit_shaders.h"
42 #include "si.h"
43 #include "sid.h"
44 
45 
46 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
47 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
48 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
49 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
50 MODULE_FIRMWARE("radeon/TAHITI_mc2.bin");
51 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
52 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
53 
54 MODULE_FIRMWARE("radeon/tahiti_pfp.bin");
55 MODULE_FIRMWARE("radeon/tahiti_me.bin");
56 MODULE_FIRMWARE("radeon/tahiti_ce.bin");
57 MODULE_FIRMWARE("radeon/tahiti_mc.bin");
58 MODULE_FIRMWARE("radeon/tahiti_rlc.bin");
59 MODULE_FIRMWARE("radeon/tahiti_smc.bin");
60 
61 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
62 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
63 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
64 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
65 MODULE_FIRMWARE("radeon/PITCAIRN_mc2.bin");
66 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
67 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
68 
69 MODULE_FIRMWARE("radeon/pitcairn_pfp.bin");
70 MODULE_FIRMWARE("radeon/pitcairn_me.bin");
71 MODULE_FIRMWARE("radeon/pitcairn_ce.bin");
72 MODULE_FIRMWARE("radeon/pitcairn_mc.bin");
73 MODULE_FIRMWARE("radeon/pitcairn_rlc.bin");
74 MODULE_FIRMWARE("radeon/pitcairn_smc.bin");
75 MODULE_FIRMWARE("radeon/pitcairn_k_smc.bin");
76 
77 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
78 MODULE_FIRMWARE("radeon/VERDE_me.bin");
79 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
80 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
81 MODULE_FIRMWARE("radeon/VERDE_mc2.bin");
82 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
83 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
84 
85 MODULE_FIRMWARE("radeon/verde_pfp.bin");
86 MODULE_FIRMWARE("radeon/verde_me.bin");
87 MODULE_FIRMWARE("radeon/verde_ce.bin");
88 MODULE_FIRMWARE("radeon/verde_mc.bin");
89 MODULE_FIRMWARE("radeon/verde_rlc.bin");
90 MODULE_FIRMWARE("radeon/verde_smc.bin");
91 MODULE_FIRMWARE("radeon/verde_k_smc.bin");
92 
93 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
94 MODULE_FIRMWARE("radeon/OLAND_me.bin");
95 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
96 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
97 MODULE_FIRMWARE("radeon/OLAND_mc2.bin");
98 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
99 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
100 
101 MODULE_FIRMWARE("radeon/oland_pfp.bin");
102 MODULE_FIRMWARE("radeon/oland_me.bin");
103 MODULE_FIRMWARE("radeon/oland_ce.bin");
104 MODULE_FIRMWARE("radeon/oland_mc.bin");
105 MODULE_FIRMWARE("radeon/oland_rlc.bin");
106 MODULE_FIRMWARE("radeon/oland_smc.bin");
107 MODULE_FIRMWARE("radeon/oland_k_smc.bin");
108 
109 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
110 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
111 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
112 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
113 MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
114 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
115 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
116 
117 MODULE_FIRMWARE("radeon/hainan_pfp.bin");
118 MODULE_FIRMWARE("radeon/hainan_me.bin");
119 MODULE_FIRMWARE("radeon/hainan_ce.bin");
120 MODULE_FIRMWARE("radeon/hainan_mc.bin");
121 MODULE_FIRMWARE("radeon/hainan_rlc.bin");
122 MODULE_FIRMWARE("radeon/hainan_smc.bin");
123 MODULE_FIRMWARE("radeon/hainan_k_smc.bin");
124 MODULE_FIRMWARE("radeon/banks_k_2_smc.bin");
125 
126 MODULE_FIRMWARE("radeon/si58_mc.bin");
127 
128 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
129 static void si_pcie_gen3_enable(struct radeon_device *rdev);
130 static void si_program_aspm(struct radeon_device *rdev);
131 extern void sumo_rlc_fini(struct radeon_device *rdev);
132 extern int sumo_rlc_init(struct radeon_device *rdev);
133 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
134 					 bool enable);
135 static void si_init_pg(struct radeon_device *rdev);
136 static void si_init_cg(struct radeon_device *rdev);
137 static void si_fini_pg(struct radeon_device *rdev);
138 static void si_fini_cg(struct radeon_device *rdev);
139 static void si_rlc_stop(struct radeon_device *rdev);
140 
141 static const u32 crtc_offsets[] =
142 {
143 	EVERGREEN_CRTC0_REGISTER_OFFSET,
144 	EVERGREEN_CRTC1_REGISTER_OFFSET,
145 	EVERGREEN_CRTC2_REGISTER_OFFSET,
146 	EVERGREEN_CRTC3_REGISTER_OFFSET,
147 	EVERGREEN_CRTC4_REGISTER_OFFSET,
148 	EVERGREEN_CRTC5_REGISTER_OFFSET
149 };
150 
151 static const u32 si_disp_int_status[] =
152 {
153 	DISP_INTERRUPT_STATUS,
154 	DISP_INTERRUPT_STATUS_CONTINUE,
155 	DISP_INTERRUPT_STATUS_CONTINUE2,
156 	DISP_INTERRUPT_STATUS_CONTINUE3,
157 	DISP_INTERRUPT_STATUS_CONTINUE4,
158 	DISP_INTERRUPT_STATUS_CONTINUE5
159 };
160 
161 #define DC_HPDx_CONTROL(x)        (DC_HPD1_CONTROL     + (x * 0xc))
162 #define DC_HPDx_INT_CONTROL(x)    (DC_HPD1_INT_CONTROL + (x * 0xc))
163 #define DC_HPDx_INT_STATUS_REG(x) (DC_HPD1_INT_STATUS  + (x * 0xc))
164 
165 static const u32 verde_rlc_save_restore_register_list[] =
166 {
167 	(0x8000 << 16) | (0x98f4 >> 2),
168 	0x00000000,
169 	(0x8040 << 16) | (0x98f4 >> 2),
170 	0x00000000,
171 	(0x8000 << 16) | (0xe80 >> 2),
172 	0x00000000,
173 	(0x8040 << 16) | (0xe80 >> 2),
174 	0x00000000,
175 	(0x8000 << 16) | (0x89bc >> 2),
176 	0x00000000,
177 	(0x8040 << 16) | (0x89bc >> 2),
178 	0x00000000,
179 	(0x8000 << 16) | (0x8c1c >> 2),
180 	0x00000000,
181 	(0x8040 << 16) | (0x8c1c >> 2),
182 	0x00000000,
183 	(0x9c00 << 16) | (0x98f0 >> 2),
184 	0x00000000,
185 	(0x9c00 << 16) | (0xe7c >> 2),
186 	0x00000000,
187 	(0x8000 << 16) | (0x9148 >> 2),
188 	0x00000000,
189 	(0x8040 << 16) | (0x9148 >> 2),
190 	0x00000000,
191 	(0x9c00 << 16) | (0x9150 >> 2),
192 	0x00000000,
193 	(0x9c00 << 16) | (0x897c >> 2),
194 	0x00000000,
195 	(0x9c00 << 16) | (0x8d8c >> 2),
196 	0x00000000,
197 	(0x9c00 << 16) | (0xac54 >> 2),
198 	0X00000000,
199 	0x3,
200 	(0x9c00 << 16) | (0x98f8 >> 2),
201 	0x00000000,
202 	(0x9c00 << 16) | (0x9910 >> 2),
203 	0x00000000,
204 	(0x9c00 << 16) | (0x9914 >> 2),
205 	0x00000000,
206 	(0x9c00 << 16) | (0x9918 >> 2),
207 	0x00000000,
208 	(0x9c00 << 16) | (0x991c >> 2),
209 	0x00000000,
210 	(0x9c00 << 16) | (0x9920 >> 2),
211 	0x00000000,
212 	(0x9c00 << 16) | (0x9924 >> 2),
213 	0x00000000,
214 	(0x9c00 << 16) | (0x9928 >> 2),
215 	0x00000000,
216 	(0x9c00 << 16) | (0x992c >> 2),
217 	0x00000000,
218 	(0x9c00 << 16) | (0x9930 >> 2),
219 	0x00000000,
220 	(0x9c00 << 16) | (0x9934 >> 2),
221 	0x00000000,
222 	(0x9c00 << 16) | (0x9938 >> 2),
223 	0x00000000,
224 	(0x9c00 << 16) | (0x993c >> 2),
225 	0x00000000,
226 	(0x9c00 << 16) | (0x9940 >> 2),
227 	0x00000000,
228 	(0x9c00 << 16) | (0x9944 >> 2),
229 	0x00000000,
230 	(0x9c00 << 16) | (0x9948 >> 2),
231 	0x00000000,
232 	(0x9c00 << 16) | (0x994c >> 2),
233 	0x00000000,
234 	(0x9c00 << 16) | (0x9950 >> 2),
235 	0x00000000,
236 	(0x9c00 << 16) | (0x9954 >> 2),
237 	0x00000000,
238 	(0x9c00 << 16) | (0x9958 >> 2),
239 	0x00000000,
240 	(0x9c00 << 16) | (0x995c >> 2),
241 	0x00000000,
242 	(0x9c00 << 16) | (0x9960 >> 2),
243 	0x00000000,
244 	(0x9c00 << 16) | (0x9964 >> 2),
245 	0x00000000,
246 	(0x9c00 << 16) | (0x9968 >> 2),
247 	0x00000000,
248 	(0x9c00 << 16) | (0x996c >> 2),
249 	0x00000000,
250 	(0x9c00 << 16) | (0x9970 >> 2),
251 	0x00000000,
252 	(0x9c00 << 16) | (0x9974 >> 2),
253 	0x00000000,
254 	(0x9c00 << 16) | (0x9978 >> 2),
255 	0x00000000,
256 	(0x9c00 << 16) | (0x997c >> 2),
257 	0x00000000,
258 	(0x9c00 << 16) | (0x9980 >> 2),
259 	0x00000000,
260 	(0x9c00 << 16) | (0x9984 >> 2),
261 	0x00000000,
262 	(0x9c00 << 16) | (0x9988 >> 2),
263 	0x00000000,
264 	(0x9c00 << 16) | (0x998c >> 2),
265 	0x00000000,
266 	(0x9c00 << 16) | (0x8c00 >> 2),
267 	0x00000000,
268 	(0x9c00 << 16) | (0x8c14 >> 2),
269 	0x00000000,
270 	(0x9c00 << 16) | (0x8c04 >> 2),
271 	0x00000000,
272 	(0x9c00 << 16) | (0x8c08 >> 2),
273 	0x00000000,
274 	(0x8000 << 16) | (0x9b7c >> 2),
275 	0x00000000,
276 	(0x8040 << 16) | (0x9b7c >> 2),
277 	0x00000000,
278 	(0x8000 << 16) | (0xe84 >> 2),
279 	0x00000000,
280 	(0x8040 << 16) | (0xe84 >> 2),
281 	0x00000000,
282 	(0x8000 << 16) | (0x89c0 >> 2),
283 	0x00000000,
284 	(0x8040 << 16) | (0x89c0 >> 2),
285 	0x00000000,
286 	(0x8000 << 16) | (0x914c >> 2),
287 	0x00000000,
288 	(0x8040 << 16) | (0x914c >> 2),
289 	0x00000000,
290 	(0x8000 << 16) | (0x8c20 >> 2),
291 	0x00000000,
292 	(0x8040 << 16) | (0x8c20 >> 2),
293 	0x00000000,
294 	(0x8000 << 16) | (0x9354 >> 2),
295 	0x00000000,
296 	(0x8040 << 16) | (0x9354 >> 2),
297 	0x00000000,
298 	(0x9c00 << 16) | (0x9060 >> 2),
299 	0x00000000,
300 	(0x9c00 << 16) | (0x9364 >> 2),
301 	0x00000000,
302 	(0x9c00 << 16) | (0x9100 >> 2),
303 	0x00000000,
304 	(0x9c00 << 16) | (0x913c >> 2),
305 	0x00000000,
306 	(0x8000 << 16) | (0x90e0 >> 2),
307 	0x00000000,
308 	(0x8000 << 16) | (0x90e4 >> 2),
309 	0x00000000,
310 	(0x8000 << 16) | (0x90e8 >> 2),
311 	0x00000000,
312 	(0x8040 << 16) | (0x90e0 >> 2),
313 	0x00000000,
314 	(0x8040 << 16) | (0x90e4 >> 2),
315 	0x00000000,
316 	(0x8040 << 16) | (0x90e8 >> 2),
317 	0x00000000,
318 	(0x9c00 << 16) | (0x8bcc >> 2),
319 	0x00000000,
320 	(0x9c00 << 16) | (0x8b24 >> 2),
321 	0x00000000,
322 	(0x9c00 << 16) | (0x88c4 >> 2),
323 	0x00000000,
324 	(0x9c00 << 16) | (0x8e50 >> 2),
325 	0x00000000,
326 	(0x9c00 << 16) | (0x8c0c >> 2),
327 	0x00000000,
328 	(0x9c00 << 16) | (0x8e58 >> 2),
329 	0x00000000,
330 	(0x9c00 << 16) | (0x8e5c >> 2),
331 	0x00000000,
332 	(0x9c00 << 16) | (0x9508 >> 2),
333 	0x00000000,
334 	(0x9c00 << 16) | (0x950c >> 2),
335 	0x00000000,
336 	(0x9c00 << 16) | (0x9494 >> 2),
337 	0x00000000,
338 	(0x9c00 << 16) | (0xac0c >> 2),
339 	0x00000000,
340 	(0x9c00 << 16) | (0xac10 >> 2),
341 	0x00000000,
342 	(0x9c00 << 16) | (0xac14 >> 2),
343 	0x00000000,
344 	(0x9c00 << 16) | (0xae00 >> 2),
345 	0x00000000,
346 	(0x9c00 << 16) | (0xac08 >> 2),
347 	0x00000000,
348 	(0x9c00 << 16) | (0x88d4 >> 2),
349 	0x00000000,
350 	(0x9c00 << 16) | (0x88c8 >> 2),
351 	0x00000000,
352 	(0x9c00 << 16) | (0x88cc >> 2),
353 	0x00000000,
354 	(0x9c00 << 16) | (0x89b0 >> 2),
355 	0x00000000,
356 	(0x9c00 << 16) | (0x8b10 >> 2),
357 	0x00000000,
358 	(0x9c00 << 16) | (0x8a14 >> 2),
359 	0x00000000,
360 	(0x9c00 << 16) | (0x9830 >> 2),
361 	0x00000000,
362 	(0x9c00 << 16) | (0x9834 >> 2),
363 	0x00000000,
364 	(0x9c00 << 16) | (0x9838 >> 2),
365 	0x00000000,
366 	(0x9c00 << 16) | (0x9a10 >> 2),
367 	0x00000000,
368 	(0x8000 << 16) | (0x9870 >> 2),
369 	0x00000000,
370 	(0x8000 << 16) | (0x9874 >> 2),
371 	0x00000000,
372 	(0x8001 << 16) | (0x9870 >> 2),
373 	0x00000000,
374 	(0x8001 << 16) | (0x9874 >> 2),
375 	0x00000000,
376 	(0x8040 << 16) | (0x9870 >> 2),
377 	0x00000000,
378 	(0x8040 << 16) | (0x9874 >> 2),
379 	0x00000000,
380 	(0x8041 << 16) | (0x9870 >> 2),
381 	0x00000000,
382 	(0x8041 << 16) | (0x9874 >> 2),
383 	0x00000000,
384 	0x00000000
385 };
386 
387 static const u32 tahiti_golden_rlc_registers[] =
388 {
389 	0xc424, 0xffffffff, 0x00601005,
390 	0xc47c, 0xffffffff, 0x10104040,
391 	0xc488, 0xffffffff, 0x0100000a,
392 	0xc314, 0xffffffff, 0x00000800,
393 	0xc30c, 0xffffffff, 0x800000f4,
394 	0xf4a8, 0xffffffff, 0x00000000
395 };
396 
397 static const u32 tahiti_golden_registers[] =
398 {
399 	0x9a10, 0x00010000, 0x00018208,
400 	0x9830, 0xffffffff, 0x00000000,
401 	0x9834, 0xf00fffff, 0x00000400,
402 	0x9838, 0x0002021c, 0x00020200,
403 	0xc78, 0x00000080, 0x00000000,
404 	0xd030, 0x000300c0, 0x00800040,
405 	0xd830, 0x000300c0, 0x00800040,
406 	0x5bb0, 0x000000f0, 0x00000070,
407 	0x5bc0, 0x00200000, 0x50100000,
408 	0x7030, 0x31000311, 0x00000011,
409 	0x277c, 0x00000003, 0x000007ff,
410 	0x240c, 0x000007ff, 0x00000000,
411 	0x8a14, 0xf000001f, 0x00000007,
412 	0x8b24, 0xffffffff, 0x00ffffff,
413 	0x8b10, 0x0000ff0f, 0x00000000,
414 	0x28a4c, 0x07ffffff, 0x4e000000,
415 	0x28350, 0x3f3f3fff, 0x2a00126a,
416 	0x30, 0x000000ff, 0x0040,
417 	0x34, 0x00000040, 0x00004040,
418 	0x9100, 0x07ffffff, 0x03000000,
419 	0x8e88, 0x01ff1f3f, 0x00000000,
420 	0x8e84, 0x01ff1f3f, 0x00000000,
421 	0x9060, 0x0000007f, 0x00000020,
422 	0x9508, 0x00010000, 0x00010000,
423 	0xac14, 0x00000200, 0x000002fb,
424 	0xac10, 0xffffffff, 0x0000543b,
425 	0xac0c, 0xffffffff, 0xa9210876,
426 	0x88d0, 0xffffffff, 0x000fff40,
427 	0x88d4, 0x0000001f, 0x00000010,
428 	0x1410, 0x20000000, 0x20fffed8,
429 	0x15c0, 0x000c0fc0, 0x000c0400
430 };
431 
432 static const u32 tahiti_golden_registers2[] =
433 {
434 	0xc64, 0x00000001, 0x00000001
435 };
436 
437 static const u32 pitcairn_golden_rlc_registers[] =
438 {
439 	0xc424, 0xffffffff, 0x00601004,
440 	0xc47c, 0xffffffff, 0x10102020,
441 	0xc488, 0xffffffff, 0x01000020,
442 	0xc314, 0xffffffff, 0x00000800,
443 	0xc30c, 0xffffffff, 0x800000a4
444 };
445 
446 static const u32 pitcairn_golden_registers[] =
447 {
448 	0x9a10, 0x00010000, 0x00018208,
449 	0x9830, 0xffffffff, 0x00000000,
450 	0x9834, 0xf00fffff, 0x00000400,
451 	0x9838, 0x0002021c, 0x00020200,
452 	0xc78, 0x00000080, 0x00000000,
453 	0xd030, 0x000300c0, 0x00800040,
454 	0xd830, 0x000300c0, 0x00800040,
455 	0x5bb0, 0x000000f0, 0x00000070,
456 	0x5bc0, 0x00200000, 0x50100000,
457 	0x7030, 0x31000311, 0x00000011,
458 	0x2ae4, 0x00073ffe, 0x000022a2,
459 	0x240c, 0x000007ff, 0x00000000,
460 	0x8a14, 0xf000001f, 0x00000007,
461 	0x8b24, 0xffffffff, 0x00ffffff,
462 	0x8b10, 0x0000ff0f, 0x00000000,
463 	0x28a4c, 0x07ffffff, 0x4e000000,
464 	0x28350, 0x3f3f3fff, 0x2a00126a,
465 	0x30, 0x000000ff, 0x0040,
466 	0x34, 0x00000040, 0x00004040,
467 	0x9100, 0x07ffffff, 0x03000000,
468 	0x9060, 0x0000007f, 0x00000020,
469 	0x9508, 0x00010000, 0x00010000,
470 	0xac14, 0x000003ff, 0x000000f7,
471 	0xac10, 0xffffffff, 0x00000000,
472 	0xac0c, 0xffffffff, 0x32761054,
473 	0x88d4, 0x0000001f, 0x00000010,
474 	0x15c0, 0x000c0fc0, 0x000c0400
475 };
476 
477 static const u32 verde_golden_rlc_registers[] =
478 {
479 	0xc424, 0xffffffff, 0x033f1005,
480 	0xc47c, 0xffffffff, 0x10808020,
481 	0xc488, 0xffffffff, 0x00800008,
482 	0xc314, 0xffffffff, 0x00001000,
483 	0xc30c, 0xffffffff, 0x80010014
484 };
485 
486 static const u32 verde_golden_registers[] =
487 {
488 	0x9a10, 0x00010000, 0x00018208,
489 	0x9830, 0xffffffff, 0x00000000,
490 	0x9834, 0xf00fffff, 0x00000400,
491 	0x9838, 0x0002021c, 0x00020200,
492 	0xc78, 0x00000080, 0x00000000,
493 	0xd030, 0x000300c0, 0x00800040,
494 	0xd030, 0x000300c0, 0x00800040,
495 	0xd830, 0x000300c0, 0x00800040,
496 	0xd830, 0x000300c0, 0x00800040,
497 	0x5bb0, 0x000000f0, 0x00000070,
498 	0x5bc0, 0x00200000, 0x50100000,
499 	0x7030, 0x31000311, 0x00000011,
500 	0x2ae4, 0x00073ffe, 0x000022a2,
501 	0x2ae4, 0x00073ffe, 0x000022a2,
502 	0x2ae4, 0x00073ffe, 0x000022a2,
503 	0x240c, 0x000007ff, 0x00000000,
504 	0x240c, 0x000007ff, 0x00000000,
505 	0x240c, 0x000007ff, 0x00000000,
506 	0x8a14, 0xf000001f, 0x00000007,
507 	0x8a14, 0xf000001f, 0x00000007,
508 	0x8a14, 0xf000001f, 0x00000007,
509 	0x8b24, 0xffffffff, 0x00ffffff,
510 	0x8b10, 0x0000ff0f, 0x00000000,
511 	0x28a4c, 0x07ffffff, 0x4e000000,
512 	0x28350, 0x3f3f3fff, 0x0000124a,
513 	0x28350, 0x3f3f3fff, 0x0000124a,
514 	0x28350, 0x3f3f3fff, 0x0000124a,
515 	0x30, 0x000000ff, 0x0040,
516 	0x34, 0x00000040, 0x00004040,
517 	0x9100, 0x07ffffff, 0x03000000,
518 	0x9100, 0x07ffffff, 0x03000000,
519 	0x8e88, 0x01ff1f3f, 0x00000000,
520 	0x8e88, 0x01ff1f3f, 0x00000000,
521 	0x8e88, 0x01ff1f3f, 0x00000000,
522 	0x8e84, 0x01ff1f3f, 0x00000000,
523 	0x8e84, 0x01ff1f3f, 0x00000000,
524 	0x8e84, 0x01ff1f3f, 0x00000000,
525 	0x9060, 0x0000007f, 0x00000020,
526 	0x9508, 0x00010000, 0x00010000,
527 	0xac14, 0x000003ff, 0x00000003,
528 	0xac14, 0x000003ff, 0x00000003,
529 	0xac14, 0x000003ff, 0x00000003,
530 	0xac10, 0xffffffff, 0x00000000,
531 	0xac10, 0xffffffff, 0x00000000,
532 	0xac10, 0xffffffff, 0x00000000,
533 	0xac0c, 0xffffffff, 0x00001032,
534 	0xac0c, 0xffffffff, 0x00001032,
535 	0xac0c, 0xffffffff, 0x00001032,
536 	0x88d4, 0x0000001f, 0x00000010,
537 	0x88d4, 0x0000001f, 0x00000010,
538 	0x88d4, 0x0000001f, 0x00000010,
539 	0x15c0, 0x000c0fc0, 0x000c0400
540 };
541 
542 static const u32 oland_golden_rlc_registers[] =
543 {
544 	0xc424, 0xffffffff, 0x00601005,
545 	0xc47c, 0xffffffff, 0x10104040,
546 	0xc488, 0xffffffff, 0x0100000a,
547 	0xc314, 0xffffffff, 0x00000800,
548 	0xc30c, 0xffffffff, 0x800000f4
549 };
550 
551 static const u32 oland_golden_registers[] =
552 {
553 	0x9a10, 0x00010000, 0x00018208,
554 	0x9830, 0xffffffff, 0x00000000,
555 	0x9834, 0xf00fffff, 0x00000400,
556 	0x9838, 0x0002021c, 0x00020200,
557 	0xc78, 0x00000080, 0x00000000,
558 	0xd030, 0x000300c0, 0x00800040,
559 	0xd830, 0x000300c0, 0x00800040,
560 	0x5bb0, 0x000000f0, 0x00000070,
561 	0x5bc0, 0x00200000, 0x50100000,
562 	0x7030, 0x31000311, 0x00000011,
563 	0x2ae4, 0x00073ffe, 0x000022a2,
564 	0x240c, 0x000007ff, 0x00000000,
565 	0x8a14, 0xf000001f, 0x00000007,
566 	0x8b24, 0xffffffff, 0x00ffffff,
567 	0x8b10, 0x0000ff0f, 0x00000000,
568 	0x28a4c, 0x07ffffff, 0x4e000000,
569 	0x28350, 0x3f3f3fff, 0x00000082,
570 	0x30, 0x000000ff, 0x0040,
571 	0x34, 0x00000040, 0x00004040,
572 	0x9100, 0x07ffffff, 0x03000000,
573 	0x9060, 0x0000007f, 0x00000020,
574 	0x9508, 0x00010000, 0x00010000,
575 	0xac14, 0x000003ff, 0x000000f3,
576 	0xac10, 0xffffffff, 0x00000000,
577 	0xac0c, 0xffffffff, 0x00003210,
578 	0x88d4, 0x0000001f, 0x00000010,
579 	0x15c0, 0x000c0fc0, 0x000c0400
580 };
581 
582 static const u32 hainan_golden_registers[] =
583 {
584 	0x9a10, 0x00010000, 0x00018208,
585 	0x9830, 0xffffffff, 0x00000000,
586 	0x9834, 0xf00fffff, 0x00000400,
587 	0x9838, 0x0002021c, 0x00020200,
588 	0xd0c0, 0xff000fff, 0x00000100,
589 	0xd030, 0x000300c0, 0x00800040,
590 	0xd8c0, 0xff000fff, 0x00000100,
591 	0xd830, 0x000300c0, 0x00800040,
592 	0x2ae4, 0x00073ffe, 0x000022a2,
593 	0x240c, 0x000007ff, 0x00000000,
594 	0x8a14, 0xf000001f, 0x00000007,
595 	0x8b24, 0xffffffff, 0x00ffffff,
596 	0x8b10, 0x0000ff0f, 0x00000000,
597 	0x28a4c, 0x07ffffff, 0x4e000000,
598 	0x28350, 0x3f3f3fff, 0x00000000,
599 	0x30, 0x000000ff, 0x0040,
600 	0x34, 0x00000040, 0x00004040,
601 	0x9100, 0x03e00000, 0x03600000,
602 	0x9060, 0x0000007f, 0x00000020,
603 	0x9508, 0x00010000, 0x00010000,
604 	0xac14, 0x000003ff, 0x000000f1,
605 	0xac10, 0xffffffff, 0x00000000,
606 	0xac0c, 0xffffffff, 0x00003210,
607 	0x88d4, 0x0000001f, 0x00000010,
608 	0x15c0, 0x000c0fc0, 0x000c0400
609 };
610 
611 static const u32 hainan_golden_registers2[] =
612 {
613 	0x98f8, 0xffffffff, 0x02010001
614 };
615 
616 static const u32 tahiti_mgcg_cgcg_init[] =
617 {
618 	0xc400, 0xffffffff, 0xfffffffc,
619 	0x802c, 0xffffffff, 0xe0000000,
620 	0x9a60, 0xffffffff, 0x00000100,
621 	0x92a4, 0xffffffff, 0x00000100,
622 	0xc164, 0xffffffff, 0x00000100,
623 	0x9774, 0xffffffff, 0x00000100,
624 	0x8984, 0xffffffff, 0x06000100,
625 	0x8a18, 0xffffffff, 0x00000100,
626 	0x92a0, 0xffffffff, 0x00000100,
627 	0xc380, 0xffffffff, 0x00000100,
628 	0x8b28, 0xffffffff, 0x00000100,
629 	0x9144, 0xffffffff, 0x00000100,
630 	0x8d88, 0xffffffff, 0x00000100,
631 	0x8d8c, 0xffffffff, 0x00000100,
632 	0x9030, 0xffffffff, 0x00000100,
633 	0x9034, 0xffffffff, 0x00000100,
634 	0x9038, 0xffffffff, 0x00000100,
635 	0x903c, 0xffffffff, 0x00000100,
636 	0xad80, 0xffffffff, 0x00000100,
637 	0xac54, 0xffffffff, 0x00000100,
638 	0x897c, 0xffffffff, 0x06000100,
639 	0x9868, 0xffffffff, 0x00000100,
640 	0x9510, 0xffffffff, 0x00000100,
641 	0xaf04, 0xffffffff, 0x00000100,
642 	0xae04, 0xffffffff, 0x00000100,
643 	0x949c, 0xffffffff, 0x00000100,
644 	0x802c, 0xffffffff, 0xe0000000,
645 	0x9160, 0xffffffff, 0x00010000,
646 	0x9164, 0xffffffff, 0x00030002,
647 	0x9168, 0xffffffff, 0x00040007,
648 	0x916c, 0xffffffff, 0x00060005,
649 	0x9170, 0xffffffff, 0x00090008,
650 	0x9174, 0xffffffff, 0x00020001,
651 	0x9178, 0xffffffff, 0x00040003,
652 	0x917c, 0xffffffff, 0x00000007,
653 	0x9180, 0xffffffff, 0x00060005,
654 	0x9184, 0xffffffff, 0x00090008,
655 	0x9188, 0xffffffff, 0x00030002,
656 	0x918c, 0xffffffff, 0x00050004,
657 	0x9190, 0xffffffff, 0x00000008,
658 	0x9194, 0xffffffff, 0x00070006,
659 	0x9198, 0xffffffff, 0x000a0009,
660 	0x919c, 0xffffffff, 0x00040003,
661 	0x91a0, 0xffffffff, 0x00060005,
662 	0x91a4, 0xffffffff, 0x00000009,
663 	0x91a8, 0xffffffff, 0x00080007,
664 	0x91ac, 0xffffffff, 0x000b000a,
665 	0x91b0, 0xffffffff, 0x00050004,
666 	0x91b4, 0xffffffff, 0x00070006,
667 	0x91b8, 0xffffffff, 0x0008000b,
668 	0x91bc, 0xffffffff, 0x000a0009,
669 	0x91c0, 0xffffffff, 0x000d000c,
670 	0x91c4, 0xffffffff, 0x00060005,
671 	0x91c8, 0xffffffff, 0x00080007,
672 	0x91cc, 0xffffffff, 0x0000000b,
673 	0x91d0, 0xffffffff, 0x000a0009,
674 	0x91d4, 0xffffffff, 0x000d000c,
675 	0x91d8, 0xffffffff, 0x00070006,
676 	0x91dc, 0xffffffff, 0x00090008,
677 	0x91e0, 0xffffffff, 0x0000000c,
678 	0x91e4, 0xffffffff, 0x000b000a,
679 	0x91e8, 0xffffffff, 0x000e000d,
680 	0x91ec, 0xffffffff, 0x00080007,
681 	0x91f0, 0xffffffff, 0x000a0009,
682 	0x91f4, 0xffffffff, 0x0000000d,
683 	0x91f8, 0xffffffff, 0x000c000b,
684 	0x91fc, 0xffffffff, 0x000f000e,
685 	0x9200, 0xffffffff, 0x00090008,
686 	0x9204, 0xffffffff, 0x000b000a,
687 	0x9208, 0xffffffff, 0x000c000f,
688 	0x920c, 0xffffffff, 0x000e000d,
689 	0x9210, 0xffffffff, 0x00110010,
690 	0x9214, 0xffffffff, 0x000a0009,
691 	0x9218, 0xffffffff, 0x000c000b,
692 	0x921c, 0xffffffff, 0x0000000f,
693 	0x9220, 0xffffffff, 0x000e000d,
694 	0x9224, 0xffffffff, 0x00110010,
695 	0x9228, 0xffffffff, 0x000b000a,
696 	0x922c, 0xffffffff, 0x000d000c,
697 	0x9230, 0xffffffff, 0x00000010,
698 	0x9234, 0xffffffff, 0x000f000e,
699 	0x9238, 0xffffffff, 0x00120011,
700 	0x923c, 0xffffffff, 0x000c000b,
701 	0x9240, 0xffffffff, 0x000e000d,
702 	0x9244, 0xffffffff, 0x00000011,
703 	0x9248, 0xffffffff, 0x0010000f,
704 	0x924c, 0xffffffff, 0x00130012,
705 	0x9250, 0xffffffff, 0x000d000c,
706 	0x9254, 0xffffffff, 0x000f000e,
707 	0x9258, 0xffffffff, 0x00100013,
708 	0x925c, 0xffffffff, 0x00120011,
709 	0x9260, 0xffffffff, 0x00150014,
710 	0x9264, 0xffffffff, 0x000e000d,
711 	0x9268, 0xffffffff, 0x0010000f,
712 	0x926c, 0xffffffff, 0x00000013,
713 	0x9270, 0xffffffff, 0x00120011,
714 	0x9274, 0xffffffff, 0x00150014,
715 	0x9278, 0xffffffff, 0x000f000e,
716 	0x927c, 0xffffffff, 0x00110010,
717 	0x9280, 0xffffffff, 0x00000014,
718 	0x9284, 0xffffffff, 0x00130012,
719 	0x9288, 0xffffffff, 0x00160015,
720 	0x928c, 0xffffffff, 0x0010000f,
721 	0x9290, 0xffffffff, 0x00120011,
722 	0x9294, 0xffffffff, 0x00000015,
723 	0x9298, 0xffffffff, 0x00140013,
724 	0x929c, 0xffffffff, 0x00170016,
725 	0x9150, 0xffffffff, 0x96940200,
726 	0x8708, 0xffffffff, 0x00900100,
727 	0xc478, 0xffffffff, 0x00000080,
728 	0xc404, 0xffffffff, 0x0020003f,
729 	0x30, 0xffffffff, 0x0000001c,
730 	0x34, 0x000f0000, 0x000f0000,
731 	0x160c, 0xffffffff, 0x00000100,
732 	0x1024, 0xffffffff, 0x00000100,
733 	0x102c, 0x00000101, 0x00000000,
734 	0x20a8, 0xffffffff, 0x00000104,
735 	0x264c, 0x000c0000, 0x000c0000,
736 	0x2648, 0x000c0000, 0x000c0000,
737 	0x55e4, 0xff000fff, 0x00000100,
738 	0x55e8, 0x00000001, 0x00000001,
739 	0x2f50, 0x00000001, 0x00000001,
740 	0x30cc, 0xc0000fff, 0x00000104,
741 	0xc1e4, 0x00000001, 0x00000001,
742 	0xd0c0, 0xfffffff0, 0x00000100,
743 	0xd8c0, 0xfffffff0, 0x00000100
744 };
745 
746 static const u32 pitcairn_mgcg_cgcg_init[] =
747 {
748 	0xc400, 0xffffffff, 0xfffffffc,
749 	0x802c, 0xffffffff, 0xe0000000,
750 	0x9a60, 0xffffffff, 0x00000100,
751 	0x92a4, 0xffffffff, 0x00000100,
752 	0xc164, 0xffffffff, 0x00000100,
753 	0x9774, 0xffffffff, 0x00000100,
754 	0x8984, 0xffffffff, 0x06000100,
755 	0x8a18, 0xffffffff, 0x00000100,
756 	0x92a0, 0xffffffff, 0x00000100,
757 	0xc380, 0xffffffff, 0x00000100,
758 	0x8b28, 0xffffffff, 0x00000100,
759 	0x9144, 0xffffffff, 0x00000100,
760 	0x8d88, 0xffffffff, 0x00000100,
761 	0x8d8c, 0xffffffff, 0x00000100,
762 	0x9030, 0xffffffff, 0x00000100,
763 	0x9034, 0xffffffff, 0x00000100,
764 	0x9038, 0xffffffff, 0x00000100,
765 	0x903c, 0xffffffff, 0x00000100,
766 	0xad80, 0xffffffff, 0x00000100,
767 	0xac54, 0xffffffff, 0x00000100,
768 	0x897c, 0xffffffff, 0x06000100,
769 	0x9868, 0xffffffff, 0x00000100,
770 	0x9510, 0xffffffff, 0x00000100,
771 	0xaf04, 0xffffffff, 0x00000100,
772 	0xae04, 0xffffffff, 0x00000100,
773 	0x949c, 0xffffffff, 0x00000100,
774 	0x802c, 0xffffffff, 0xe0000000,
775 	0x9160, 0xffffffff, 0x00010000,
776 	0x9164, 0xffffffff, 0x00030002,
777 	0x9168, 0xffffffff, 0x00040007,
778 	0x916c, 0xffffffff, 0x00060005,
779 	0x9170, 0xffffffff, 0x00090008,
780 	0x9174, 0xffffffff, 0x00020001,
781 	0x9178, 0xffffffff, 0x00040003,
782 	0x917c, 0xffffffff, 0x00000007,
783 	0x9180, 0xffffffff, 0x00060005,
784 	0x9184, 0xffffffff, 0x00090008,
785 	0x9188, 0xffffffff, 0x00030002,
786 	0x918c, 0xffffffff, 0x00050004,
787 	0x9190, 0xffffffff, 0x00000008,
788 	0x9194, 0xffffffff, 0x00070006,
789 	0x9198, 0xffffffff, 0x000a0009,
790 	0x919c, 0xffffffff, 0x00040003,
791 	0x91a0, 0xffffffff, 0x00060005,
792 	0x91a4, 0xffffffff, 0x00000009,
793 	0x91a8, 0xffffffff, 0x00080007,
794 	0x91ac, 0xffffffff, 0x000b000a,
795 	0x91b0, 0xffffffff, 0x00050004,
796 	0x91b4, 0xffffffff, 0x00070006,
797 	0x91b8, 0xffffffff, 0x0008000b,
798 	0x91bc, 0xffffffff, 0x000a0009,
799 	0x91c0, 0xffffffff, 0x000d000c,
800 	0x9200, 0xffffffff, 0x00090008,
801 	0x9204, 0xffffffff, 0x000b000a,
802 	0x9208, 0xffffffff, 0x000c000f,
803 	0x920c, 0xffffffff, 0x000e000d,
804 	0x9210, 0xffffffff, 0x00110010,
805 	0x9214, 0xffffffff, 0x000a0009,
806 	0x9218, 0xffffffff, 0x000c000b,
807 	0x921c, 0xffffffff, 0x0000000f,
808 	0x9220, 0xffffffff, 0x000e000d,
809 	0x9224, 0xffffffff, 0x00110010,
810 	0x9228, 0xffffffff, 0x000b000a,
811 	0x922c, 0xffffffff, 0x000d000c,
812 	0x9230, 0xffffffff, 0x00000010,
813 	0x9234, 0xffffffff, 0x000f000e,
814 	0x9238, 0xffffffff, 0x00120011,
815 	0x923c, 0xffffffff, 0x000c000b,
816 	0x9240, 0xffffffff, 0x000e000d,
817 	0x9244, 0xffffffff, 0x00000011,
818 	0x9248, 0xffffffff, 0x0010000f,
819 	0x924c, 0xffffffff, 0x00130012,
820 	0x9250, 0xffffffff, 0x000d000c,
821 	0x9254, 0xffffffff, 0x000f000e,
822 	0x9258, 0xffffffff, 0x00100013,
823 	0x925c, 0xffffffff, 0x00120011,
824 	0x9260, 0xffffffff, 0x00150014,
825 	0x9150, 0xffffffff, 0x96940200,
826 	0x8708, 0xffffffff, 0x00900100,
827 	0xc478, 0xffffffff, 0x00000080,
828 	0xc404, 0xffffffff, 0x0020003f,
829 	0x30, 0xffffffff, 0x0000001c,
830 	0x34, 0x000f0000, 0x000f0000,
831 	0x160c, 0xffffffff, 0x00000100,
832 	0x1024, 0xffffffff, 0x00000100,
833 	0x102c, 0x00000101, 0x00000000,
834 	0x20a8, 0xffffffff, 0x00000104,
835 	0x55e4, 0xff000fff, 0x00000100,
836 	0x55e8, 0x00000001, 0x00000001,
837 	0x2f50, 0x00000001, 0x00000001,
838 	0x30cc, 0xc0000fff, 0x00000104,
839 	0xc1e4, 0x00000001, 0x00000001,
840 	0xd0c0, 0xfffffff0, 0x00000100,
841 	0xd8c0, 0xfffffff0, 0x00000100
842 };
843 
844 static const u32 verde_mgcg_cgcg_init[] =
845 {
846 	0xc400, 0xffffffff, 0xfffffffc,
847 	0x802c, 0xffffffff, 0xe0000000,
848 	0x9a60, 0xffffffff, 0x00000100,
849 	0x92a4, 0xffffffff, 0x00000100,
850 	0xc164, 0xffffffff, 0x00000100,
851 	0x9774, 0xffffffff, 0x00000100,
852 	0x8984, 0xffffffff, 0x06000100,
853 	0x8a18, 0xffffffff, 0x00000100,
854 	0x92a0, 0xffffffff, 0x00000100,
855 	0xc380, 0xffffffff, 0x00000100,
856 	0x8b28, 0xffffffff, 0x00000100,
857 	0x9144, 0xffffffff, 0x00000100,
858 	0x8d88, 0xffffffff, 0x00000100,
859 	0x8d8c, 0xffffffff, 0x00000100,
860 	0x9030, 0xffffffff, 0x00000100,
861 	0x9034, 0xffffffff, 0x00000100,
862 	0x9038, 0xffffffff, 0x00000100,
863 	0x903c, 0xffffffff, 0x00000100,
864 	0xad80, 0xffffffff, 0x00000100,
865 	0xac54, 0xffffffff, 0x00000100,
866 	0x897c, 0xffffffff, 0x06000100,
867 	0x9868, 0xffffffff, 0x00000100,
868 	0x9510, 0xffffffff, 0x00000100,
869 	0xaf04, 0xffffffff, 0x00000100,
870 	0xae04, 0xffffffff, 0x00000100,
871 	0x949c, 0xffffffff, 0x00000100,
872 	0x802c, 0xffffffff, 0xe0000000,
873 	0x9160, 0xffffffff, 0x00010000,
874 	0x9164, 0xffffffff, 0x00030002,
875 	0x9168, 0xffffffff, 0x00040007,
876 	0x916c, 0xffffffff, 0x00060005,
877 	0x9170, 0xffffffff, 0x00090008,
878 	0x9174, 0xffffffff, 0x00020001,
879 	0x9178, 0xffffffff, 0x00040003,
880 	0x917c, 0xffffffff, 0x00000007,
881 	0x9180, 0xffffffff, 0x00060005,
882 	0x9184, 0xffffffff, 0x00090008,
883 	0x9188, 0xffffffff, 0x00030002,
884 	0x918c, 0xffffffff, 0x00050004,
885 	0x9190, 0xffffffff, 0x00000008,
886 	0x9194, 0xffffffff, 0x00070006,
887 	0x9198, 0xffffffff, 0x000a0009,
888 	0x919c, 0xffffffff, 0x00040003,
889 	0x91a0, 0xffffffff, 0x00060005,
890 	0x91a4, 0xffffffff, 0x00000009,
891 	0x91a8, 0xffffffff, 0x00080007,
892 	0x91ac, 0xffffffff, 0x000b000a,
893 	0x91b0, 0xffffffff, 0x00050004,
894 	0x91b4, 0xffffffff, 0x00070006,
895 	0x91b8, 0xffffffff, 0x0008000b,
896 	0x91bc, 0xffffffff, 0x000a0009,
897 	0x91c0, 0xffffffff, 0x000d000c,
898 	0x9200, 0xffffffff, 0x00090008,
899 	0x9204, 0xffffffff, 0x000b000a,
900 	0x9208, 0xffffffff, 0x000c000f,
901 	0x920c, 0xffffffff, 0x000e000d,
902 	0x9210, 0xffffffff, 0x00110010,
903 	0x9214, 0xffffffff, 0x000a0009,
904 	0x9218, 0xffffffff, 0x000c000b,
905 	0x921c, 0xffffffff, 0x0000000f,
906 	0x9220, 0xffffffff, 0x000e000d,
907 	0x9224, 0xffffffff, 0x00110010,
908 	0x9228, 0xffffffff, 0x000b000a,
909 	0x922c, 0xffffffff, 0x000d000c,
910 	0x9230, 0xffffffff, 0x00000010,
911 	0x9234, 0xffffffff, 0x000f000e,
912 	0x9238, 0xffffffff, 0x00120011,
913 	0x923c, 0xffffffff, 0x000c000b,
914 	0x9240, 0xffffffff, 0x000e000d,
915 	0x9244, 0xffffffff, 0x00000011,
916 	0x9248, 0xffffffff, 0x0010000f,
917 	0x924c, 0xffffffff, 0x00130012,
918 	0x9250, 0xffffffff, 0x000d000c,
919 	0x9254, 0xffffffff, 0x000f000e,
920 	0x9258, 0xffffffff, 0x00100013,
921 	0x925c, 0xffffffff, 0x00120011,
922 	0x9260, 0xffffffff, 0x00150014,
923 	0x9150, 0xffffffff, 0x96940200,
924 	0x8708, 0xffffffff, 0x00900100,
925 	0xc478, 0xffffffff, 0x00000080,
926 	0xc404, 0xffffffff, 0x0020003f,
927 	0x30, 0xffffffff, 0x0000001c,
928 	0x34, 0x000f0000, 0x000f0000,
929 	0x160c, 0xffffffff, 0x00000100,
930 	0x1024, 0xffffffff, 0x00000100,
931 	0x102c, 0x00000101, 0x00000000,
932 	0x20a8, 0xffffffff, 0x00000104,
933 	0x264c, 0x000c0000, 0x000c0000,
934 	0x2648, 0x000c0000, 0x000c0000,
935 	0x55e4, 0xff000fff, 0x00000100,
936 	0x55e8, 0x00000001, 0x00000001,
937 	0x2f50, 0x00000001, 0x00000001,
938 	0x30cc, 0xc0000fff, 0x00000104,
939 	0xc1e4, 0x00000001, 0x00000001,
940 	0xd0c0, 0xfffffff0, 0x00000100,
941 	0xd8c0, 0xfffffff0, 0x00000100
942 };
943 
944 static const u32 oland_mgcg_cgcg_init[] =
945 {
946 	0xc400, 0xffffffff, 0xfffffffc,
947 	0x802c, 0xffffffff, 0xe0000000,
948 	0x9a60, 0xffffffff, 0x00000100,
949 	0x92a4, 0xffffffff, 0x00000100,
950 	0xc164, 0xffffffff, 0x00000100,
951 	0x9774, 0xffffffff, 0x00000100,
952 	0x8984, 0xffffffff, 0x06000100,
953 	0x8a18, 0xffffffff, 0x00000100,
954 	0x92a0, 0xffffffff, 0x00000100,
955 	0xc380, 0xffffffff, 0x00000100,
956 	0x8b28, 0xffffffff, 0x00000100,
957 	0x9144, 0xffffffff, 0x00000100,
958 	0x8d88, 0xffffffff, 0x00000100,
959 	0x8d8c, 0xffffffff, 0x00000100,
960 	0x9030, 0xffffffff, 0x00000100,
961 	0x9034, 0xffffffff, 0x00000100,
962 	0x9038, 0xffffffff, 0x00000100,
963 	0x903c, 0xffffffff, 0x00000100,
964 	0xad80, 0xffffffff, 0x00000100,
965 	0xac54, 0xffffffff, 0x00000100,
966 	0x897c, 0xffffffff, 0x06000100,
967 	0x9868, 0xffffffff, 0x00000100,
968 	0x9510, 0xffffffff, 0x00000100,
969 	0xaf04, 0xffffffff, 0x00000100,
970 	0xae04, 0xffffffff, 0x00000100,
971 	0x949c, 0xffffffff, 0x00000100,
972 	0x802c, 0xffffffff, 0xe0000000,
973 	0x9160, 0xffffffff, 0x00010000,
974 	0x9164, 0xffffffff, 0x00030002,
975 	0x9168, 0xffffffff, 0x00040007,
976 	0x916c, 0xffffffff, 0x00060005,
977 	0x9170, 0xffffffff, 0x00090008,
978 	0x9174, 0xffffffff, 0x00020001,
979 	0x9178, 0xffffffff, 0x00040003,
980 	0x917c, 0xffffffff, 0x00000007,
981 	0x9180, 0xffffffff, 0x00060005,
982 	0x9184, 0xffffffff, 0x00090008,
983 	0x9188, 0xffffffff, 0x00030002,
984 	0x918c, 0xffffffff, 0x00050004,
985 	0x9190, 0xffffffff, 0x00000008,
986 	0x9194, 0xffffffff, 0x00070006,
987 	0x9198, 0xffffffff, 0x000a0009,
988 	0x919c, 0xffffffff, 0x00040003,
989 	0x91a0, 0xffffffff, 0x00060005,
990 	0x91a4, 0xffffffff, 0x00000009,
991 	0x91a8, 0xffffffff, 0x00080007,
992 	0x91ac, 0xffffffff, 0x000b000a,
993 	0x91b0, 0xffffffff, 0x00050004,
994 	0x91b4, 0xffffffff, 0x00070006,
995 	0x91b8, 0xffffffff, 0x0008000b,
996 	0x91bc, 0xffffffff, 0x000a0009,
997 	0x91c0, 0xffffffff, 0x000d000c,
998 	0x91c4, 0xffffffff, 0x00060005,
999 	0x91c8, 0xffffffff, 0x00080007,
1000 	0x91cc, 0xffffffff, 0x0000000b,
1001 	0x91d0, 0xffffffff, 0x000a0009,
1002 	0x91d4, 0xffffffff, 0x000d000c,
1003 	0x9150, 0xffffffff, 0x96940200,
1004 	0x8708, 0xffffffff, 0x00900100,
1005 	0xc478, 0xffffffff, 0x00000080,
1006 	0xc404, 0xffffffff, 0x0020003f,
1007 	0x30, 0xffffffff, 0x0000001c,
1008 	0x34, 0x000f0000, 0x000f0000,
1009 	0x160c, 0xffffffff, 0x00000100,
1010 	0x1024, 0xffffffff, 0x00000100,
1011 	0x102c, 0x00000101, 0x00000000,
1012 	0x20a8, 0xffffffff, 0x00000104,
1013 	0x264c, 0x000c0000, 0x000c0000,
1014 	0x2648, 0x000c0000, 0x000c0000,
1015 	0x55e4, 0xff000fff, 0x00000100,
1016 	0x55e8, 0x00000001, 0x00000001,
1017 	0x2f50, 0x00000001, 0x00000001,
1018 	0x30cc, 0xc0000fff, 0x00000104,
1019 	0xc1e4, 0x00000001, 0x00000001,
1020 	0xd0c0, 0xfffffff0, 0x00000100,
1021 	0xd8c0, 0xfffffff0, 0x00000100
1022 };
1023 
1024 static const u32 hainan_mgcg_cgcg_init[] =
1025 {
1026 	0xc400, 0xffffffff, 0xfffffffc,
1027 	0x802c, 0xffffffff, 0xe0000000,
1028 	0x9a60, 0xffffffff, 0x00000100,
1029 	0x92a4, 0xffffffff, 0x00000100,
1030 	0xc164, 0xffffffff, 0x00000100,
1031 	0x9774, 0xffffffff, 0x00000100,
1032 	0x8984, 0xffffffff, 0x06000100,
1033 	0x8a18, 0xffffffff, 0x00000100,
1034 	0x92a0, 0xffffffff, 0x00000100,
1035 	0xc380, 0xffffffff, 0x00000100,
1036 	0x8b28, 0xffffffff, 0x00000100,
1037 	0x9144, 0xffffffff, 0x00000100,
1038 	0x8d88, 0xffffffff, 0x00000100,
1039 	0x8d8c, 0xffffffff, 0x00000100,
1040 	0x9030, 0xffffffff, 0x00000100,
1041 	0x9034, 0xffffffff, 0x00000100,
1042 	0x9038, 0xffffffff, 0x00000100,
1043 	0x903c, 0xffffffff, 0x00000100,
1044 	0xad80, 0xffffffff, 0x00000100,
1045 	0xac54, 0xffffffff, 0x00000100,
1046 	0x897c, 0xffffffff, 0x06000100,
1047 	0x9868, 0xffffffff, 0x00000100,
1048 	0x9510, 0xffffffff, 0x00000100,
1049 	0xaf04, 0xffffffff, 0x00000100,
1050 	0xae04, 0xffffffff, 0x00000100,
1051 	0x949c, 0xffffffff, 0x00000100,
1052 	0x802c, 0xffffffff, 0xe0000000,
1053 	0x9160, 0xffffffff, 0x00010000,
1054 	0x9164, 0xffffffff, 0x00030002,
1055 	0x9168, 0xffffffff, 0x00040007,
1056 	0x916c, 0xffffffff, 0x00060005,
1057 	0x9170, 0xffffffff, 0x00090008,
1058 	0x9174, 0xffffffff, 0x00020001,
1059 	0x9178, 0xffffffff, 0x00040003,
1060 	0x917c, 0xffffffff, 0x00000007,
1061 	0x9180, 0xffffffff, 0x00060005,
1062 	0x9184, 0xffffffff, 0x00090008,
1063 	0x9188, 0xffffffff, 0x00030002,
1064 	0x918c, 0xffffffff, 0x00050004,
1065 	0x9190, 0xffffffff, 0x00000008,
1066 	0x9194, 0xffffffff, 0x00070006,
1067 	0x9198, 0xffffffff, 0x000a0009,
1068 	0x919c, 0xffffffff, 0x00040003,
1069 	0x91a0, 0xffffffff, 0x00060005,
1070 	0x91a4, 0xffffffff, 0x00000009,
1071 	0x91a8, 0xffffffff, 0x00080007,
1072 	0x91ac, 0xffffffff, 0x000b000a,
1073 	0x91b0, 0xffffffff, 0x00050004,
1074 	0x91b4, 0xffffffff, 0x00070006,
1075 	0x91b8, 0xffffffff, 0x0008000b,
1076 	0x91bc, 0xffffffff, 0x000a0009,
1077 	0x91c0, 0xffffffff, 0x000d000c,
1078 	0x91c4, 0xffffffff, 0x00060005,
1079 	0x91c8, 0xffffffff, 0x00080007,
1080 	0x91cc, 0xffffffff, 0x0000000b,
1081 	0x91d0, 0xffffffff, 0x000a0009,
1082 	0x91d4, 0xffffffff, 0x000d000c,
1083 	0x9150, 0xffffffff, 0x96940200,
1084 	0x8708, 0xffffffff, 0x00900100,
1085 	0xc478, 0xffffffff, 0x00000080,
1086 	0xc404, 0xffffffff, 0x0020003f,
1087 	0x30, 0xffffffff, 0x0000001c,
1088 	0x34, 0x000f0000, 0x000f0000,
1089 	0x160c, 0xffffffff, 0x00000100,
1090 	0x1024, 0xffffffff, 0x00000100,
1091 	0x20a8, 0xffffffff, 0x00000104,
1092 	0x264c, 0x000c0000, 0x000c0000,
1093 	0x2648, 0x000c0000, 0x000c0000,
1094 	0x2f50, 0x00000001, 0x00000001,
1095 	0x30cc, 0xc0000fff, 0x00000104,
1096 	0xc1e4, 0x00000001, 0x00000001,
1097 	0xd0c0, 0xfffffff0, 0x00000100,
1098 	0xd8c0, 0xfffffff0, 0x00000100
1099 };
1100 
1101 static u32 verde_pg_init[] =
1102 {
1103 	0x353c, 0xffffffff, 0x40000,
1104 	0x3538, 0xffffffff, 0x200010ff,
1105 	0x353c, 0xffffffff, 0x0,
1106 	0x353c, 0xffffffff, 0x0,
1107 	0x353c, 0xffffffff, 0x0,
1108 	0x353c, 0xffffffff, 0x0,
1109 	0x353c, 0xffffffff, 0x0,
1110 	0x353c, 0xffffffff, 0x7007,
1111 	0x3538, 0xffffffff, 0x300010ff,
1112 	0x353c, 0xffffffff, 0x0,
1113 	0x353c, 0xffffffff, 0x0,
1114 	0x353c, 0xffffffff, 0x0,
1115 	0x353c, 0xffffffff, 0x0,
1116 	0x353c, 0xffffffff, 0x0,
1117 	0x353c, 0xffffffff, 0x400000,
1118 	0x3538, 0xffffffff, 0x100010ff,
1119 	0x353c, 0xffffffff, 0x0,
1120 	0x353c, 0xffffffff, 0x0,
1121 	0x353c, 0xffffffff, 0x0,
1122 	0x353c, 0xffffffff, 0x0,
1123 	0x353c, 0xffffffff, 0x0,
1124 	0x353c, 0xffffffff, 0x120200,
1125 	0x3538, 0xffffffff, 0x500010ff,
1126 	0x353c, 0xffffffff, 0x0,
1127 	0x353c, 0xffffffff, 0x0,
1128 	0x353c, 0xffffffff, 0x0,
1129 	0x353c, 0xffffffff, 0x0,
1130 	0x353c, 0xffffffff, 0x0,
1131 	0x353c, 0xffffffff, 0x1e1e16,
1132 	0x3538, 0xffffffff, 0x600010ff,
1133 	0x353c, 0xffffffff, 0x0,
1134 	0x353c, 0xffffffff, 0x0,
1135 	0x353c, 0xffffffff, 0x0,
1136 	0x353c, 0xffffffff, 0x0,
1137 	0x353c, 0xffffffff, 0x0,
1138 	0x353c, 0xffffffff, 0x171f1e,
1139 	0x3538, 0xffffffff, 0x700010ff,
1140 	0x353c, 0xffffffff, 0x0,
1141 	0x353c, 0xffffffff, 0x0,
1142 	0x353c, 0xffffffff, 0x0,
1143 	0x353c, 0xffffffff, 0x0,
1144 	0x353c, 0xffffffff, 0x0,
1145 	0x353c, 0xffffffff, 0x0,
1146 	0x3538, 0xffffffff, 0x9ff,
1147 	0x3500, 0xffffffff, 0x0,
1148 	0x3504, 0xffffffff, 0x10000800,
1149 	0x3504, 0xffffffff, 0xf,
1150 	0x3504, 0xffffffff, 0xf,
1151 	0x3500, 0xffffffff, 0x4,
1152 	0x3504, 0xffffffff, 0x1000051e,
1153 	0x3504, 0xffffffff, 0xffff,
1154 	0x3504, 0xffffffff, 0xffff,
1155 	0x3500, 0xffffffff, 0x8,
1156 	0x3504, 0xffffffff, 0x80500,
1157 	0x3500, 0xffffffff, 0x12,
1158 	0x3504, 0xffffffff, 0x9050c,
1159 	0x3500, 0xffffffff, 0x1d,
1160 	0x3504, 0xffffffff, 0xb052c,
1161 	0x3500, 0xffffffff, 0x2a,
1162 	0x3504, 0xffffffff, 0x1053e,
1163 	0x3500, 0xffffffff, 0x2d,
1164 	0x3504, 0xffffffff, 0x10546,
1165 	0x3500, 0xffffffff, 0x30,
1166 	0x3504, 0xffffffff, 0xa054e,
1167 	0x3500, 0xffffffff, 0x3c,
1168 	0x3504, 0xffffffff, 0x1055f,
1169 	0x3500, 0xffffffff, 0x3f,
1170 	0x3504, 0xffffffff, 0x10567,
1171 	0x3500, 0xffffffff, 0x42,
1172 	0x3504, 0xffffffff, 0x1056f,
1173 	0x3500, 0xffffffff, 0x45,
1174 	0x3504, 0xffffffff, 0x10572,
1175 	0x3500, 0xffffffff, 0x48,
1176 	0x3504, 0xffffffff, 0x20575,
1177 	0x3500, 0xffffffff, 0x4c,
1178 	0x3504, 0xffffffff, 0x190801,
1179 	0x3500, 0xffffffff, 0x67,
1180 	0x3504, 0xffffffff, 0x1082a,
1181 	0x3500, 0xffffffff, 0x6a,
1182 	0x3504, 0xffffffff, 0x1b082d,
1183 	0x3500, 0xffffffff, 0x87,
1184 	0x3504, 0xffffffff, 0x310851,
1185 	0x3500, 0xffffffff, 0xba,
1186 	0x3504, 0xffffffff, 0x891,
1187 	0x3500, 0xffffffff, 0xbc,
1188 	0x3504, 0xffffffff, 0x893,
1189 	0x3500, 0xffffffff, 0xbe,
1190 	0x3504, 0xffffffff, 0x20895,
1191 	0x3500, 0xffffffff, 0xc2,
1192 	0x3504, 0xffffffff, 0x20899,
1193 	0x3500, 0xffffffff, 0xc6,
1194 	0x3504, 0xffffffff, 0x2089d,
1195 	0x3500, 0xffffffff, 0xca,
1196 	0x3504, 0xffffffff, 0x8a1,
1197 	0x3500, 0xffffffff, 0xcc,
1198 	0x3504, 0xffffffff, 0x8a3,
1199 	0x3500, 0xffffffff, 0xce,
1200 	0x3504, 0xffffffff, 0x308a5,
1201 	0x3500, 0xffffffff, 0xd3,
1202 	0x3504, 0xffffffff, 0x6d08cd,
1203 	0x3500, 0xffffffff, 0x142,
1204 	0x3504, 0xffffffff, 0x2000095a,
1205 	0x3504, 0xffffffff, 0x1,
1206 	0x3500, 0xffffffff, 0x144,
1207 	0x3504, 0xffffffff, 0x301f095b,
1208 	0x3500, 0xffffffff, 0x165,
1209 	0x3504, 0xffffffff, 0xc094d,
1210 	0x3500, 0xffffffff, 0x173,
1211 	0x3504, 0xffffffff, 0xf096d,
1212 	0x3500, 0xffffffff, 0x184,
1213 	0x3504, 0xffffffff, 0x15097f,
1214 	0x3500, 0xffffffff, 0x19b,
1215 	0x3504, 0xffffffff, 0xc0998,
1216 	0x3500, 0xffffffff, 0x1a9,
1217 	0x3504, 0xffffffff, 0x409a7,
1218 	0x3500, 0xffffffff, 0x1af,
1219 	0x3504, 0xffffffff, 0xcdc,
1220 	0x3500, 0xffffffff, 0x1b1,
1221 	0x3504, 0xffffffff, 0x800,
1222 	0x3508, 0xffffffff, 0x6c9b2000,
1223 	0x3510, 0xfc00, 0x2000,
1224 	0x3544, 0xffffffff, 0xfc0,
1225 	0x28d4, 0x00000100, 0x100
1226 };
1227 
1228 static void si_init_golden_registers(struct radeon_device *rdev)
1229 {
1230 	switch (rdev->family) {
1231 	case CHIP_TAHITI:
1232 		radeon_program_register_sequence(rdev,
1233 						 tahiti_golden_registers,
1234 						 (const u32)ARRAY_SIZE(tahiti_golden_registers));
1235 		radeon_program_register_sequence(rdev,
1236 						 tahiti_golden_rlc_registers,
1237 						 (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1238 		radeon_program_register_sequence(rdev,
1239 						 tahiti_mgcg_cgcg_init,
1240 						 (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1241 		radeon_program_register_sequence(rdev,
1242 						 tahiti_golden_registers2,
1243 						 (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1244 		break;
1245 	case CHIP_PITCAIRN:
1246 		radeon_program_register_sequence(rdev,
1247 						 pitcairn_golden_registers,
1248 						 (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1249 		radeon_program_register_sequence(rdev,
1250 						 pitcairn_golden_rlc_registers,
1251 						 (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1252 		radeon_program_register_sequence(rdev,
1253 						 pitcairn_mgcg_cgcg_init,
1254 						 (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1255 		break;
1256 	case CHIP_VERDE:
1257 		radeon_program_register_sequence(rdev,
1258 						 verde_golden_registers,
1259 						 (const u32)ARRAY_SIZE(verde_golden_registers));
1260 		radeon_program_register_sequence(rdev,
1261 						 verde_golden_rlc_registers,
1262 						 (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1263 		radeon_program_register_sequence(rdev,
1264 						 verde_mgcg_cgcg_init,
1265 						 (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1266 		radeon_program_register_sequence(rdev,
1267 						 verde_pg_init,
1268 						 (const u32)ARRAY_SIZE(verde_pg_init));
1269 		break;
1270 	case CHIP_OLAND:
1271 		radeon_program_register_sequence(rdev,
1272 						 oland_golden_registers,
1273 						 (const u32)ARRAY_SIZE(oland_golden_registers));
1274 		radeon_program_register_sequence(rdev,
1275 						 oland_golden_rlc_registers,
1276 						 (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1277 		radeon_program_register_sequence(rdev,
1278 						 oland_mgcg_cgcg_init,
1279 						 (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1280 		break;
1281 	case CHIP_HAINAN:
1282 		radeon_program_register_sequence(rdev,
1283 						 hainan_golden_registers,
1284 						 (const u32)ARRAY_SIZE(hainan_golden_registers));
1285 		radeon_program_register_sequence(rdev,
1286 						 hainan_golden_registers2,
1287 						 (const u32)ARRAY_SIZE(hainan_golden_registers2));
1288 		radeon_program_register_sequence(rdev,
1289 						 hainan_mgcg_cgcg_init,
1290 						 (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1291 		break;
1292 	default:
1293 		break;
1294 	}
1295 }
1296 
1297 /**
1298  * si_get_allowed_info_register - fetch the register for the info ioctl
1299  *
1300  * @rdev: radeon_device pointer
1301  * @reg: register offset in bytes
1302  * @val: register value
1303  *
1304  * Returns 0 for success or -EINVAL for an invalid register
1305  *
1306  */
1307 int si_get_allowed_info_register(struct radeon_device *rdev,
1308 				 u32 reg, u32 *val)
1309 {
1310 	switch (reg) {
1311 	case GRBM_STATUS:
1312 	case GRBM_STATUS2:
1313 	case GRBM_STATUS_SE0:
1314 	case GRBM_STATUS_SE1:
1315 	case SRBM_STATUS:
1316 	case SRBM_STATUS2:
1317 	case (DMA_STATUS_REG + DMA0_REGISTER_OFFSET):
1318 	case (DMA_STATUS_REG + DMA1_REGISTER_OFFSET):
1319 	case UVD_STATUS:
1320 		*val = RREG32(reg);
1321 		return 0;
1322 	default:
1323 		return -EINVAL;
1324 	}
1325 }
1326 
1327 #define PCIE_BUS_CLK                10000
1328 #define TCLK                        (PCIE_BUS_CLK / 10)
1329 
1330 /**
1331  * si_get_xclk - get the xclk
1332  *
1333  * @rdev: radeon_device pointer
1334  *
1335  * Returns the reference clock used by the gfx engine
1336  * (SI).
1337  */
1338 u32 si_get_xclk(struct radeon_device *rdev)
1339 {
1340 	u32 reference_clock = rdev->clock.spll.reference_freq;
1341 	u32 tmp;
1342 
1343 	tmp = RREG32(CG_CLKPIN_CNTL_2);
1344 	if (tmp & MUX_TCLK_TO_XCLK)
1345 		return TCLK;
1346 
1347 	tmp = RREG32(CG_CLKPIN_CNTL);
1348 	if (tmp & XTALIN_DIVIDE)
1349 		return reference_clock / 4;
1350 
1351 	return reference_clock;
1352 }
1353 
1354 /* get temperature in millidegrees */
1355 int si_get_temp(struct radeon_device *rdev)
1356 {
1357 	u32 temp;
1358 	int actual_temp = 0;
1359 
1360 	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1361 		CTF_TEMP_SHIFT;
1362 
1363 	if (temp & 0x200)
1364 		actual_temp = 255;
1365 	else
1366 		actual_temp = temp & 0x1ff;
1367 
1368 	actual_temp = (actual_temp * 1000);
1369 
1370 	return actual_temp;
1371 }
1372 
1373 #define TAHITI_IO_MC_REGS_SIZE 36
1374 
1375 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1376 	{0x0000006f, 0x03044000},
1377 	{0x00000070, 0x0480c018},
1378 	{0x00000071, 0x00000040},
1379 	{0x00000072, 0x01000000},
1380 	{0x00000074, 0x000000ff},
1381 	{0x00000075, 0x00143400},
1382 	{0x00000076, 0x08ec0800},
1383 	{0x00000077, 0x040000cc},
1384 	{0x00000079, 0x00000000},
1385 	{0x0000007a, 0x21000409},
1386 	{0x0000007c, 0x00000000},
1387 	{0x0000007d, 0xe8000000},
1388 	{0x0000007e, 0x044408a8},
1389 	{0x0000007f, 0x00000003},
1390 	{0x00000080, 0x00000000},
1391 	{0x00000081, 0x01000000},
1392 	{0x00000082, 0x02000000},
1393 	{0x00000083, 0x00000000},
1394 	{0x00000084, 0xe3f3e4f4},
1395 	{0x00000085, 0x00052024},
1396 	{0x00000087, 0x00000000},
1397 	{0x00000088, 0x66036603},
1398 	{0x00000089, 0x01000000},
1399 	{0x0000008b, 0x1c0a0000},
1400 	{0x0000008c, 0xff010000},
1401 	{0x0000008e, 0xffffefff},
1402 	{0x0000008f, 0xfff3efff},
1403 	{0x00000090, 0xfff3efbf},
1404 	{0x00000094, 0x00101101},
1405 	{0x00000095, 0x00000fff},
1406 	{0x00000096, 0x00116fff},
1407 	{0x00000097, 0x60010000},
1408 	{0x00000098, 0x10010000},
1409 	{0x00000099, 0x00006000},
1410 	{0x0000009a, 0x00001000},
1411 	{0x0000009f, 0x00a77400}
1412 };
1413 
1414 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1415 	{0x0000006f, 0x03044000},
1416 	{0x00000070, 0x0480c018},
1417 	{0x00000071, 0x00000040},
1418 	{0x00000072, 0x01000000},
1419 	{0x00000074, 0x000000ff},
1420 	{0x00000075, 0x00143400},
1421 	{0x00000076, 0x08ec0800},
1422 	{0x00000077, 0x040000cc},
1423 	{0x00000079, 0x00000000},
1424 	{0x0000007a, 0x21000409},
1425 	{0x0000007c, 0x00000000},
1426 	{0x0000007d, 0xe8000000},
1427 	{0x0000007e, 0x044408a8},
1428 	{0x0000007f, 0x00000003},
1429 	{0x00000080, 0x00000000},
1430 	{0x00000081, 0x01000000},
1431 	{0x00000082, 0x02000000},
1432 	{0x00000083, 0x00000000},
1433 	{0x00000084, 0xe3f3e4f4},
1434 	{0x00000085, 0x00052024},
1435 	{0x00000087, 0x00000000},
1436 	{0x00000088, 0x66036603},
1437 	{0x00000089, 0x01000000},
1438 	{0x0000008b, 0x1c0a0000},
1439 	{0x0000008c, 0xff010000},
1440 	{0x0000008e, 0xffffefff},
1441 	{0x0000008f, 0xfff3efff},
1442 	{0x00000090, 0xfff3efbf},
1443 	{0x00000094, 0x00101101},
1444 	{0x00000095, 0x00000fff},
1445 	{0x00000096, 0x00116fff},
1446 	{0x00000097, 0x60010000},
1447 	{0x00000098, 0x10010000},
1448 	{0x00000099, 0x00006000},
1449 	{0x0000009a, 0x00001000},
1450 	{0x0000009f, 0x00a47400}
1451 };
1452 
1453 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1454 	{0x0000006f, 0x03044000},
1455 	{0x00000070, 0x0480c018},
1456 	{0x00000071, 0x00000040},
1457 	{0x00000072, 0x01000000},
1458 	{0x00000074, 0x000000ff},
1459 	{0x00000075, 0x00143400},
1460 	{0x00000076, 0x08ec0800},
1461 	{0x00000077, 0x040000cc},
1462 	{0x00000079, 0x00000000},
1463 	{0x0000007a, 0x21000409},
1464 	{0x0000007c, 0x00000000},
1465 	{0x0000007d, 0xe8000000},
1466 	{0x0000007e, 0x044408a8},
1467 	{0x0000007f, 0x00000003},
1468 	{0x00000080, 0x00000000},
1469 	{0x00000081, 0x01000000},
1470 	{0x00000082, 0x02000000},
1471 	{0x00000083, 0x00000000},
1472 	{0x00000084, 0xe3f3e4f4},
1473 	{0x00000085, 0x00052024},
1474 	{0x00000087, 0x00000000},
1475 	{0x00000088, 0x66036603},
1476 	{0x00000089, 0x01000000},
1477 	{0x0000008b, 0x1c0a0000},
1478 	{0x0000008c, 0xff010000},
1479 	{0x0000008e, 0xffffefff},
1480 	{0x0000008f, 0xfff3efff},
1481 	{0x00000090, 0xfff3efbf},
1482 	{0x00000094, 0x00101101},
1483 	{0x00000095, 0x00000fff},
1484 	{0x00000096, 0x00116fff},
1485 	{0x00000097, 0x60010000},
1486 	{0x00000098, 0x10010000},
1487 	{0x00000099, 0x00006000},
1488 	{0x0000009a, 0x00001000},
1489 	{0x0000009f, 0x00a37400}
1490 };
1491 
1492 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1493 	{0x0000006f, 0x03044000},
1494 	{0x00000070, 0x0480c018},
1495 	{0x00000071, 0x00000040},
1496 	{0x00000072, 0x01000000},
1497 	{0x00000074, 0x000000ff},
1498 	{0x00000075, 0x00143400},
1499 	{0x00000076, 0x08ec0800},
1500 	{0x00000077, 0x040000cc},
1501 	{0x00000079, 0x00000000},
1502 	{0x0000007a, 0x21000409},
1503 	{0x0000007c, 0x00000000},
1504 	{0x0000007d, 0xe8000000},
1505 	{0x0000007e, 0x044408a8},
1506 	{0x0000007f, 0x00000003},
1507 	{0x00000080, 0x00000000},
1508 	{0x00000081, 0x01000000},
1509 	{0x00000082, 0x02000000},
1510 	{0x00000083, 0x00000000},
1511 	{0x00000084, 0xe3f3e4f4},
1512 	{0x00000085, 0x00052024},
1513 	{0x00000087, 0x00000000},
1514 	{0x00000088, 0x66036603},
1515 	{0x00000089, 0x01000000},
1516 	{0x0000008b, 0x1c0a0000},
1517 	{0x0000008c, 0xff010000},
1518 	{0x0000008e, 0xffffefff},
1519 	{0x0000008f, 0xfff3efff},
1520 	{0x00000090, 0xfff3efbf},
1521 	{0x00000094, 0x00101101},
1522 	{0x00000095, 0x00000fff},
1523 	{0x00000096, 0x00116fff},
1524 	{0x00000097, 0x60010000},
1525 	{0x00000098, 0x10010000},
1526 	{0x00000099, 0x00006000},
1527 	{0x0000009a, 0x00001000},
1528 	{0x0000009f, 0x00a17730}
1529 };
1530 
1531 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1532 	{0x0000006f, 0x03044000},
1533 	{0x00000070, 0x0480c018},
1534 	{0x00000071, 0x00000040},
1535 	{0x00000072, 0x01000000},
1536 	{0x00000074, 0x000000ff},
1537 	{0x00000075, 0x00143400},
1538 	{0x00000076, 0x08ec0800},
1539 	{0x00000077, 0x040000cc},
1540 	{0x00000079, 0x00000000},
1541 	{0x0000007a, 0x21000409},
1542 	{0x0000007c, 0x00000000},
1543 	{0x0000007d, 0xe8000000},
1544 	{0x0000007e, 0x044408a8},
1545 	{0x0000007f, 0x00000003},
1546 	{0x00000080, 0x00000000},
1547 	{0x00000081, 0x01000000},
1548 	{0x00000082, 0x02000000},
1549 	{0x00000083, 0x00000000},
1550 	{0x00000084, 0xe3f3e4f4},
1551 	{0x00000085, 0x00052024},
1552 	{0x00000087, 0x00000000},
1553 	{0x00000088, 0x66036603},
1554 	{0x00000089, 0x01000000},
1555 	{0x0000008b, 0x1c0a0000},
1556 	{0x0000008c, 0xff010000},
1557 	{0x0000008e, 0xffffefff},
1558 	{0x0000008f, 0xfff3efff},
1559 	{0x00000090, 0xfff3efbf},
1560 	{0x00000094, 0x00101101},
1561 	{0x00000095, 0x00000fff},
1562 	{0x00000096, 0x00116fff},
1563 	{0x00000097, 0x60010000},
1564 	{0x00000098, 0x10010000},
1565 	{0x00000099, 0x00006000},
1566 	{0x0000009a, 0x00001000},
1567 	{0x0000009f, 0x00a07730}
1568 };
1569 
1570 /* ucode loading */
1571 int si_mc_load_microcode(struct radeon_device *rdev)
1572 {
1573 	const __be32 *fw_data = NULL;
1574 	const __le32 *new_fw_data = NULL;
1575 	u32 running;
1576 	u32 *io_mc_regs = NULL;
1577 	const __le32 *new_io_mc_regs = NULL;
1578 	int i, regs_size, ucode_size;
1579 
1580 	if (!rdev->mc_fw)
1581 		return -EINVAL;
1582 
1583 	if (rdev->new_fw) {
1584 		const struct mc_firmware_header_v1_0 *hdr =
1585 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1586 
1587 		radeon_ucode_print_mc_hdr(&hdr->header);
1588 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1589 		new_io_mc_regs = (const __le32 *)
1590 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1591 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1592 		new_fw_data = (const __le32 *)
1593 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1594 	} else {
1595 		ucode_size = rdev->mc_fw->size / 4;
1596 
1597 		switch (rdev->family) {
1598 		case CHIP_TAHITI:
1599 			io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1600 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1601 			break;
1602 		case CHIP_PITCAIRN:
1603 			io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1604 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1605 			break;
1606 		case CHIP_VERDE:
1607 		default:
1608 			io_mc_regs = (u32 *)&verde_io_mc_regs;
1609 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1610 			break;
1611 		case CHIP_OLAND:
1612 			io_mc_regs = (u32 *)&oland_io_mc_regs;
1613 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1614 			break;
1615 		case CHIP_HAINAN:
1616 			io_mc_regs = (u32 *)&hainan_io_mc_regs;
1617 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1618 			break;
1619 		}
1620 		fw_data = (const __be32 *)rdev->mc_fw->data;
1621 	}
1622 
1623 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1624 
1625 	if (running == 0) {
1626 		/* reset the engine and set to writable */
1627 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1628 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1629 
1630 		/* load mc io regs */
1631 		for (i = 0; i < regs_size; i++) {
1632 			if (rdev->new_fw) {
1633 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1634 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1635 			} else {
1636 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1637 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1638 			}
1639 		}
1640 		/* load the MC ucode */
1641 		for (i = 0; i < ucode_size; i++) {
1642 			if (rdev->new_fw)
1643 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1644 			else
1645 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1646 		}
1647 
1648 		/* put the engine back into the active state */
1649 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1650 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1651 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1652 
1653 		/* wait for training to complete */
1654 		for (i = 0; i < rdev->usec_timeout; i++) {
1655 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1656 				break;
1657 			udelay(1);
1658 		}
1659 		for (i = 0; i < rdev->usec_timeout; i++) {
1660 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1661 				break;
1662 			udelay(1);
1663 		}
1664 	}
1665 
1666 	return 0;
1667 }
1668 
1669 static int si_init_microcode(struct radeon_device *rdev)
1670 {
1671 	const char *chip_name;
1672 	const char *new_chip_name;
1673 	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1674 	size_t smc_req_size, mc2_req_size;
1675 	char fw_name[30];
1676 	int err;
1677 	int new_fw = 0;
1678 	bool new_smc = false;
1679 	bool si58_fw = false;
1680 	bool banks2_fw = false;
1681 
1682 	DRM_DEBUG("\n");
1683 
1684 	switch (rdev->family) {
1685 	case CHIP_TAHITI:
1686 		chip_name = "TAHITI";
1687 		new_chip_name = "tahiti";
1688 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1689 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1690 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1691 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1692 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1693 		mc2_req_size = TAHITI_MC_UCODE_SIZE * 4;
1694 		smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1695 		break;
1696 	case CHIP_PITCAIRN:
1697 		chip_name = "PITCAIRN";
1698 		if ((rdev->pdev->revision == 0x81) &&
1699 		    ((rdev->pdev->device == 0x6810) ||
1700 		     (rdev->pdev->device == 0x6811)))
1701 			new_smc = true;
1702 		new_chip_name = "pitcairn";
1703 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1704 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1705 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1706 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1707 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1708 		mc2_req_size = PITCAIRN_MC_UCODE_SIZE * 4;
1709 		smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1710 		break;
1711 	case CHIP_VERDE:
1712 		chip_name = "VERDE";
1713 		if (((rdev->pdev->device == 0x6820) &&
1714 		     ((rdev->pdev->revision == 0x81) ||
1715 		      (rdev->pdev->revision == 0x83))) ||
1716 		    ((rdev->pdev->device == 0x6821) &&
1717 		     ((rdev->pdev->revision == 0x83) ||
1718 		      (rdev->pdev->revision == 0x87))) ||
1719 		    ((rdev->pdev->revision == 0x87) &&
1720 		     ((rdev->pdev->device == 0x6823) ||
1721 		      (rdev->pdev->device == 0x682b))))
1722 			new_smc = true;
1723 		new_chip_name = "verde";
1724 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1725 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1726 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1727 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1728 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1729 		mc2_req_size = VERDE_MC_UCODE_SIZE * 4;
1730 		smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1731 		break;
1732 	case CHIP_OLAND:
1733 		chip_name = "OLAND";
1734 		if (((rdev->pdev->revision == 0x81) &&
1735 		     ((rdev->pdev->device == 0x6600) ||
1736 		      (rdev->pdev->device == 0x6604) ||
1737 		      (rdev->pdev->device == 0x6605) ||
1738 		      (rdev->pdev->device == 0x6610))) ||
1739 		    ((rdev->pdev->revision == 0x83) &&
1740 		     (rdev->pdev->device == 0x6610)))
1741 			new_smc = true;
1742 		new_chip_name = "oland";
1743 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1744 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1745 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1746 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1747 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1748 		smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1749 		break;
1750 	case CHIP_HAINAN:
1751 		chip_name = "HAINAN";
1752 		if (((rdev->pdev->revision == 0x81) &&
1753 		     (rdev->pdev->device == 0x6660)) ||
1754 		    ((rdev->pdev->revision == 0x83) &&
1755 		     ((rdev->pdev->device == 0x6660) ||
1756 		      (rdev->pdev->device == 0x6663) ||
1757 		      (rdev->pdev->device == 0x6665) ||
1758 		      (rdev->pdev->device == 0x6667))))
1759 			new_smc = true;
1760 		else if ((rdev->pdev->revision == 0xc3) &&
1761 			 (rdev->pdev->device == 0x6665))
1762 			banks2_fw = true;
1763 		new_chip_name = "hainan";
1764 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1765 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1766 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1767 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1768 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1769 		smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1770 		break;
1771 	default: BUG();
1772 	}
1773 
1774 	/* this memory configuration requires special firmware */
1775 	if (((RREG32(MC_SEQ_MISC0) & 0xff000000) >> 24) == 0x58)
1776 		si58_fw = true;
1777 
1778 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
1779 
1780 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
1781 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1782 	if (err) {
1783 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1784 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1785 		if (err)
1786 			goto out;
1787 		if (rdev->pfp_fw->size != pfp_req_size) {
1788 			pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1789 			       rdev->pfp_fw->size, fw_name);
1790 			err = -EINVAL;
1791 			goto out;
1792 		}
1793 	} else {
1794 		err = radeon_ucode_validate(rdev->pfp_fw);
1795 		if (err) {
1796 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1797 			       fw_name);
1798 			goto out;
1799 		} else {
1800 			new_fw++;
1801 		}
1802 	}
1803 
1804 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
1805 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1806 	if (err) {
1807 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1808 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1809 		if (err)
1810 			goto out;
1811 		if (rdev->me_fw->size != me_req_size) {
1812 			pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1813 			       rdev->me_fw->size, fw_name);
1814 			err = -EINVAL;
1815 		}
1816 	} else {
1817 		err = radeon_ucode_validate(rdev->me_fw);
1818 		if (err) {
1819 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1820 			       fw_name);
1821 			goto out;
1822 		} else {
1823 			new_fw++;
1824 		}
1825 	}
1826 
1827 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
1828 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1829 	if (err) {
1830 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1831 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1832 		if (err)
1833 			goto out;
1834 		if (rdev->ce_fw->size != ce_req_size) {
1835 			pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1836 			       rdev->ce_fw->size, fw_name);
1837 			err = -EINVAL;
1838 		}
1839 	} else {
1840 		err = radeon_ucode_validate(rdev->ce_fw);
1841 		if (err) {
1842 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1843 			       fw_name);
1844 			goto out;
1845 		} else {
1846 			new_fw++;
1847 		}
1848 	}
1849 
1850 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
1851 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1852 	if (err) {
1853 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1854 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1855 		if (err)
1856 			goto out;
1857 		if (rdev->rlc_fw->size != rlc_req_size) {
1858 			pr_err("si_rlc: Bogus length %zu in firmware \"%s\"\n",
1859 			       rdev->rlc_fw->size, fw_name);
1860 			err = -EINVAL;
1861 		}
1862 	} else {
1863 		err = radeon_ucode_validate(rdev->rlc_fw);
1864 		if (err) {
1865 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1866 			       fw_name);
1867 			goto out;
1868 		} else {
1869 			new_fw++;
1870 		}
1871 	}
1872 
1873 	if (si58_fw)
1874 		snprintf(fw_name, sizeof(fw_name), "radeon/si58_mc.bin");
1875 	else
1876 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
1877 	err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1878 	if (err) {
1879 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
1880 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1881 		if (err) {
1882 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1883 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1884 			if (err)
1885 				goto out;
1886 		}
1887 		if ((rdev->mc_fw->size != mc_req_size) &&
1888 		    (rdev->mc_fw->size != mc2_req_size)) {
1889 			pr_err("si_mc: Bogus length %zu in firmware \"%s\"\n",
1890 			       rdev->mc_fw->size, fw_name);
1891 			err = -EINVAL;
1892 		}
1893 		DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
1894 	} else {
1895 		err = radeon_ucode_validate(rdev->mc_fw);
1896 		if (err) {
1897 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1898 			       fw_name);
1899 			goto out;
1900 		} else {
1901 			new_fw++;
1902 		}
1903 	}
1904 
1905 	if (banks2_fw)
1906 		snprintf(fw_name, sizeof(fw_name), "radeon/banks_k_2_smc.bin");
1907 	else if (new_smc)
1908 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
1909 	else
1910 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
1911 	err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1912 	if (err) {
1913 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1914 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1915 		if (err) {
1916 			pr_err("smc: error loading firmware \"%s\"\n", fw_name);
1917 			release_firmware(rdev->smc_fw);
1918 			rdev->smc_fw = NULL;
1919 			err = 0;
1920 		} else if (rdev->smc_fw->size != smc_req_size) {
1921 			pr_err("si_smc: Bogus length %zu in firmware \"%s\"\n",
1922 			       rdev->smc_fw->size, fw_name);
1923 			err = -EINVAL;
1924 		}
1925 	} else {
1926 		err = radeon_ucode_validate(rdev->smc_fw);
1927 		if (err) {
1928 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1929 			       fw_name);
1930 			goto out;
1931 		} else {
1932 			new_fw++;
1933 		}
1934 	}
1935 
1936 	if (new_fw == 0) {
1937 		rdev->new_fw = false;
1938 	} else if (new_fw < 6) {
1939 		pr_err("si_fw: mixing new and old firmware!\n");
1940 		err = -EINVAL;
1941 	} else {
1942 		rdev->new_fw = true;
1943 	}
1944 out:
1945 	if (err) {
1946 		if (err != -EINVAL)
1947 			pr_err("si_cp: Failed to load firmware \"%s\"\n",
1948 			       fw_name);
1949 		release_firmware(rdev->pfp_fw);
1950 		rdev->pfp_fw = NULL;
1951 		release_firmware(rdev->me_fw);
1952 		rdev->me_fw = NULL;
1953 		release_firmware(rdev->ce_fw);
1954 		rdev->ce_fw = NULL;
1955 		release_firmware(rdev->rlc_fw);
1956 		rdev->rlc_fw = NULL;
1957 		release_firmware(rdev->mc_fw);
1958 		rdev->mc_fw = NULL;
1959 		release_firmware(rdev->smc_fw);
1960 		rdev->smc_fw = NULL;
1961 	}
1962 	return err;
1963 }
1964 
1965 /* watermark setup */
1966 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1967 				   struct radeon_crtc *radeon_crtc,
1968 				   struct drm_display_mode *mode,
1969 				   struct drm_display_mode *other_mode)
1970 {
1971 	u32 tmp, buffer_alloc, i;
1972 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1973 	/*
1974 	 * Line Buffer Setup
1975 	 * There are 3 line buffers, each one shared by 2 display controllers.
1976 	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1977 	 * the display controllers.  The paritioning is done via one of four
1978 	 * preset allocations specified in bits 21:20:
1979 	 *  0 - half lb
1980 	 *  2 - whole lb, other crtc must be disabled
1981 	 */
1982 	/* this can get tricky if we have two large displays on a paired group
1983 	 * of crtcs.  Ideally for multiple large displays we'd assign them to
1984 	 * non-linked crtcs for maximum line buffer allocation.
1985 	 */
1986 	if (radeon_crtc->base.enabled && mode) {
1987 		if (other_mode) {
1988 			tmp = 0; /* 1/2 */
1989 			buffer_alloc = 1;
1990 		} else {
1991 			tmp = 2; /* whole */
1992 			buffer_alloc = 2;
1993 		}
1994 	} else {
1995 		tmp = 0;
1996 		buffer_alloc = 0;
1997 	}
1998 
1999 	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
2000 	       DC_LB_MEMORY_CONFIG(tmp));
2001 
2002 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
2003 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
2004 	for (i = 0; i < rdev->usec_timeout; i++) {
2005 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
2006 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
2007 			break;
2008 		udelay(1);
2009 	}
2010 
2011 	if (radeon_crtc->base.enabled && mode) {
2012 		switch (tmp) {
2013 		case 0:
2014 		default:
2015 			return 4096 * 2;
2016 		case 2:
2017 			return 8192 * 2;
2018 		}
2019 	}
2020 
2021 	/* controller not enabled, so no lb used */
2022 	return 0;
2023 }
2024 
2025 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
2026 {
2027 	u32 tmp = RREG32(MC_SHARED_CHMAP);
2028 
2029 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
2030 	case 0:
2031 	default:
2032 		return 1;
2033 	case 1:
2034 		return 2;
2035 	case 2:
2036 		return 4;
2037 	case 3:
2038 		return 8;
2039 	case 4:
2040 		return 3;
2041 	case 5:
2042 		return 6;
2043 	case 6:
2044 		return 10;
2045 	case 7:
2046 		return 12;
2047 	case 8:
2048 		return 16;
2049 	}
2050 }
2051 
2052 struct dce6_wm_params {
2053 	u32 dram_channels; /* number of dram channels */
2054 	u32 yclk;          /* bandwidth per dram data pin in kHz */
2055 	u32 sclk;          /* engine clock in kHz */
2056 	u32 disp_clk;      /* display clock in kHz */
2057 	u32 src_width;     /* viewport width */
2058 	u32 active_time;   /* active display time in ns */
2059 	u32 blank_time;    /* blank time in ns */
2060 	bool interlaced;    /* mode is interlaced */
2061 	fixed20_12 vsc;    /* vertical scale ratio */
2062 	u32 num_heads;     /* number of active crtcs */
2063 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
2064 	u32 lb_size;       /* line buffer allocated to pipe */
2065 	u32 vtaps;         /* vertical scaler taps */
2066 };
2067 
2068 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
2069 {
2070 	/* Calculate raw DRAM Bandwidth */
2071 	fixed20_12 dram_efficiency; /* 0.7 */
2072 	fixed20_12 yclk, dram_channels, bandwidth;
2073 	fixed20_12 a;
2074 
2075 	a.full = dfixed_const(1000);
2076 	yclk.full = dfixed_const(wm->yclk);
2077 	yclk.full = dfixed_div(yclk, a);
2078 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2079 	a.full = dfixed_const(10);
2080 	dram_efficiency.full = dfixed_const(7);
2081 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
2082 	bandwidth.full = dfixed_mul(dram_channels, yclk);
2083 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
2084 
2085 	return dfixed_trunc(bandwidth);
2086 }
2087 
2088 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2089 {
2090 	/* Calculate DRAM Bandwidth and the part allocated to display. */
2091 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
2092 	fixed20_12 yclk, dram_channels, bandwidth;
2093 	fixed20_12 a;
2094 
2095 	a.full = dfixed_const(1000);
2096 	yclk.full = dfixed_const(wm->yclk);
2097 	yclk.full = dfixed_div(yclk, a);
2098 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2099 	a.full = dfixed_const(10);
2100 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
2101 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
2102 	bandwidth.full = dfixed_mul(dram_channels, yclk);
2103 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
2104 
2105 	return dfixed_trunc(bandwidth);
2106 }
2107 
2108 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
2109 {
2110 	/* Calculate the display Data return Bandwidth */
2111 	fixed20_12 return_efficiency; /* 0.8 */
2112 	fixed20_12 sclk, bandwidth;
2113 	fixed20_12 a;
2114 
2115 	a.full = dfixed_const(1000);
2116 	sclk.full = dfixed_const(wm->sclk);
2117 	sclk.full = dfixed_div(sclk, a);
2118 	a.full = dfixed_const(10);
2119 	return_efficiency.full = dfixed_const(8);
2120 	return_efficiency.full = dfixed_div(return_efficiency, a);
2121 	a.full = dfixed_const(32);
2122 	bandwidth.full = dfixed_mul(a, sclk);
2123 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
2124 
2125 	return dfixed_trunc(bandwidth);
2126 }
2127 
2128 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
2129 {
2130 	return 32;
2131 }
2132 
2133 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
2134 {
2135 	/* Calculate the DMIF Request Bandwidth */
2136 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
2137 	fixed20_12 disp_clk, sclk, bandwidth;
2138 	fixed20_12 a, b1, b2;
2139 	u32 min_bandwidth;
2140 
2141 	a.full = dfixed_const(1000);
2142 	disp_clk.full = dfixed_const(wm->disp_clk);
2143 	disp_clk.full = dfixed_div(disp_clk, a);
2144 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
2145 	b1.full = dfixed_mul(a, disp_clk);
2146 
2147 	a.full = dfixed_const(1000);
2148 	sclk.full = dfixed_const(wm->sclk);
2149 	sclk.full = dfixed_div(sclk, a);
2150 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
2151 	b2.full = dfixed_mul(a, sclk);
2152 
2153 	a.full = dfixed_const(10);
2154 	disp_clk_request_efficiency.full = dfixed_const(8);
2155 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
2156 
2157 	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
2158 
2159 	a.full = dfixed_const(min_bandwidth);
2160 	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
2161 
2162 	return dfixed_trunc(bandwidth);
2163 }
2164 
2165 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
2166 {
2167 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
2168 	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
2169 	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
2170 	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
2171 
2172 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
2173 }
2174 
2175 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
2176 {
2177 	/* Calculate the display mode Average Bandwidth
2178 	 * DisplayMode should contain the source and destination dimensions,
2179 	 * timing, etc.
2180 	 */
2181 	fixed20_12 bpp;
2182 	fixed20_12 line_time;
2183 	fixed20_12 src_width;
2184 	fixed20_12 bandwidth;
2185 	fixed20_12 a;
2186 
2187 	a.full = dfixed_const(1000);
2188 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
2189 	line_time.full = dfixed_div(line_time, a);
2190 	bpp.full = dfixed_const(wm->bytes_per_pixel);
2191 	src_width.full = dfixed_const(wm->src_width);
2192 	bandwidth.full = dfixed_mul(src_width, bpp);
2193 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
2194 	bandwidth.full = dfixed_div(bandwidth, line_time);
2195 
2196 	return dfixed_trunc(bandwidth);
2197 }
2198 
2199 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
2200 {
2201 	/* First calcualte the latency in ns */
2202 	u32 mc_latency = 2000; /* 2000 ns. */
2203 	u32 available_bandwidth = dce6_available_bandwidth(wm);
2204 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
2205 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
2206 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
2207 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
2208 		(wm->num_heads * cursor_line_pair_return_time);
2209 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
2210 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
2211 	u32 tmp, dmif_size = 12288;
2212 	fixed20_12 a, b, c;
2213 
2214 	if (wm->num_heads == 0)
2215 		return 0;
2216 
2217 	a.full = dfixed_const(2);
2218 	b.full = dfixed_const(1);
2219 	if ((wm->vsc.full > a.full) ||
2220 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
2221 	    (wm->vtaps >= 5) ||
2222 	    ((wm->vsc.full >= a.full) && wm->interlaced))
2223 		max_src_lines_per_dst_line = 4;
2224 	else
2225 		max_src_lines_per_dst_line = 2;
2226 
2227 	a.full = dfixed_const(available_bandwidth);
2228 	b.full = dfixed_const(wm->num_heads);
2229 	a.full = dfixed_div(a, b);
2230 	tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
2231 	tmp = min(dfixed_trunc(a), tmp);
2232 
2233 	lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
2234 
2235 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2236 	b.full = dfixed_const(1000);
2237 	c.full = dfixed_const(lb_fill_bw);
2238 	b.full = dfixed_div(c, b);
2239 	a.full = dfixed_div(a, b);
2240 	line_fill_time = dfixed_trunc(a);
2241 
2242 	if (line_fill_time < wm->active_time)
2243 		return latency;
2244 	else
2245 		return latency + (line_fill_time - wm->active_time);
2246 
2247 }
2248 
2249 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2250 {
2251 	if (dce6_average_bandwidth(wm) <=
2252 	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2253 		return true;
2254 	else
2255 		return false;
2256 };
2257 
2258 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2259 {
2260 	if (dce6_average_bandwidth(wm) <=
2261 	    (dce6_available_bandwidth(wm) / wm->num_heads))
2262 		return true;
2263 	else
2264 		return false;
2265 };
2266 
2267 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2268 {
2269 	u32 lb_partitions = wm->lb_size / wm->src_width;
2270 	u32 line_time = wm->active_time + wm->blank_time;
2271 	u32 latency_tolerant_lines;
2272 	u32 latency_hiding;
2273 	fixed20_12 a;
2274 
2275 	a.full = dfixed_const(1);
2276 	if (wm->vsc.full > a.full)
2277 		latency_tolerant_lines = 1;
2278 	else {
2279 		if (lb_partitions <= (wm->vtaps + 1))
2280 			latency_tolerant_lines = 1;
2281 		else
2282 			latency_tolerant_lines = 2;
2283 	}
2284 
2285 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2286 
2287 	if (dce6_latency_watermark(wm) <= latency_hiding)
2288 		return true;
2289 	else
2290 		return false;
2291 }
2292 
2293 static void dce6_program_watermarks(struct radeon_device *rdev,
2294 					 struct radeon_crtc *radeon_crtc,
2295 					 u32 lb_size, u32 num_heads)
2296 {
2297 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
2298 	struct dce6_wm_params wm_low, wm_high;
2299 	u32 dram_channels;
2300 	u32 active_time;
2301 	u32 line_time = 0;
2302 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
2303 	u32 priority_a_mark = 0, priority_b_mark = 0;
2304 	u32 priority_a_cnt = PRIORITY_OFF;
2305 	u32 priority_b_cnt = PRIORITY_OFF;
2306 	u32 tmp, arb_control3;
2307 	fixed20_12 a, b, c;
2308 
2309 	if (radeon_crtc->base.enabled && num_heads && mode) {
2310 		active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
2311 					    (u32)mode->clock);
2312 		line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
2313 					  (u32)mode->clock);
2314 		line_time = min(line_time, (u32)65535);
2315 		priority_a_cnt = 0;
2316 		priority_b_cnt = 0;
2317 
2318 		if (rdev->family == CHIP_ARUBA)
2319 			dram_channels = evergreen_get_number_of_dram_channels(rdev);
2320 		else
2321 			dram_channels = si_get_number_of_dram_channels(rdev);
2322 
2323 		/* watermark for high clocks */
2324 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2325 			wm_high.yclk =
2326 				radeon_dpm_get_mclk(rdev, false) * 10;
2327 			wm_high.sclk =
2328 				radeon_dpm_get_sclk(rdev, false) * 10;
2329 		} else {
2330 			wm_high.yclk = rdev->pm.current_mclk * 10;
2331 			wm_high.sclk = rdev->pm.current_sclk * 10;
2332 		}
2333 
2334 		wm_high.disp_clk = mode->clock;
2335 		wm_high.src_width = mode->crtc_hdisplay;
2336 		wm_high.active_time = active_time;
2337 		wm_high.blank_time = line_time - wm_high.active_time;
2338 		wm_high.interlaced = false;
2339 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2340 			wm_high.interlaced = true;
2341 		wm_high.vsc = radeon_crtc->vsc;
2342 		wm_high.vtaps = 1;
2343 		if (radeon_crtc->rmx_type != RMX_OFF)
2344 			wm_high.vtaps = 2;
2345 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2346 		wm_high.lb_size = lb_size;
2347 		wm_high.dram_channels = dram_channels;
2348 		wm_high.num_heads = num_heads;
2349 
2350 		/* watermark for low clocks */
2351 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2352 			wm_low.yclk =
2353 				radeon_dpm_get_mclk(rdev, true) * 10;
2354 			wm_low.sclk =
2355 				radeon_dpm_get_sclk(rdev, true) * 10;
2356 		} else {
2357 			wm_low.yclk = rdev->pm.current_mclk * 10;
2358 			wm_low.sclk = rdev->pm.current_sclk * 10;
2359 		}
2360 
2361 		wm_low.disp_clk = mode->clock;
2362 		wm_low.src_width = mode->crtc_hdisplay;
2363 		wm_low.active_time = active_time;
2364 		wm_low.blank_time = line_time - wm_low.active_time;
2365 		wm_low.interlaced = false;
2366 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2367 			wm_low.interlaced = true;
2368 		wm_low.vsc = radeon_crtc->vsc;
2369 		wm_low.vtaps = 1;
2370 		if (radeon_crtc->rmx_type != RMX_OFF)
2371 			wm_low.vtaps = 2;
2372 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2373 		wm_low.lb_size = lb_size;
2374 		wm_low.dram_channels = dram_channels;
2375 		wm_low.num_heads = num_heads;
2376 
2377 		/* set for high clocks */
2378 		latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2379 		/* set for low clocks */
2380 		latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2381 
2382 		/* possibly force display priority to high */
2383 		/* should really do this at mode validation time... */
2384 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2385 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2386 		    !dce6_check_latency_hiding(&wm_high) ||
2387 		    (rdev->disp_priority == 2)) {
2388 			DRM_DEBUG_KMS("force priority to high\n");
2389 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2390 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2391 		}
2392 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2393 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2394 		    !dce6_check_latency_hiding(&wm_low) ||
2395 		    (rdev->disp_priority == 2)) {
2396 			DRM_DEBUG_KMS("force priority to high\n");
2397 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2398 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2399 		}
2400 
2401 		a.full = dfixed_const(1000);
2402 		b.full = dfixed_const(mode->clock);
2403 		b.full = dfixed_div(b, a);
2404 		c.full = dfixed_const(latency_watermark_a);
2405 		c.full = dfixed_mul(c, b);
2406 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2407 		c.full = dfixed_div(c, a);
2408 		a.full = dfixed_const(16);
2409 		c.full = dfixed_div(c, a);
2410 		priority_a_mark = dfixed_trunc(c);
2411 		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2412 
2413 		a.full = dfixed_const(1000);
2414 		b.full = dfixed_const(mode->clock);
2415 		b.full = dfixed_div(b, a);
2416 		c.full = dfixed_const(latency_watermark_b);
2417 		c.full = dfixed_mul(c, b);
2418 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2419 		c.full = dfixed_div(c, a);
2420 		a.full = dfixed_const(16);
2421 		c.full = dfixed_div(c, a);
2422 		priority_b_mark = dfixed_trunc(c);
2423 		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2424 
2425 		/* Save number of lines the linebuffer leads before the scanout */
2426 		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
2427 	}
2428 
2429 	/* select wm A */
2430 	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2431 	tmp = arb_control3;
2432 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2433 	tmp |= LATENCY_WATERMARK_MASK(1);
2434 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2435 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2436 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2437 		LATENCY_HIGH_WATERMARK(line_time)));
2438 	/* select wm B */
2439 	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2440 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2441 	tmp |= LATENCY_WATERMARK_MASK(2);
2442 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2443 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2444 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2445 		LATENCY_HIGH_WATERMARK(line_time)));
2446 	/* restore original selection */
2447 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2448 
2449 	/* write the priority marks */
2450 	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2451 	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2452 
2453 	/* save values for DPM */
2454 	radeon_crtc->line_time = line_time;
2455 	radeon_crtc->wm_high = latency_watermark_a;
2456 	radeon_crtc->wm_low = latency_watermark_b;
2457 }
2458 
2459 void dce6_bandwidth_update(struct radeon_device *rdev)
2460 {
2461 	struct drm_display_mode *mode0 = NULL;
2462 	struct drm_display_mode *mode1 = NULL;
2463 	u32 num_heads = 0, lb_size;
2464 	int i;
2465 
2466 	if (!rdev->mode_info.mode_config_initialized)
2467 		return;
2468 
2469 	radeon_update_display_priority(rdev);
2470 
2471 	for (i = 0; i < rdev->num_crtc; i++) {
2472 		if (rdev->mode_info.crtcs[i]->base.enabled)
2473 			num_heads++;
2474 	}
2475 	for (i = 0; i < rdev->num_crtc; i += 2) {
2476 		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2477 		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2478 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2479 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2480 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2481 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2482 	}
2483 }
2484 
2485 /*
2486  * Core functions
2487  */
2488 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2489 {
2490 	u32 *tile = rdev->config.si.tile_mode_array;
2491 	const u32 num_tile_mode_states =
2492 			ARRAY_SIZE(rdev->config.si.tile_mode_array);
2493 	u32 reg_offset, split_equal_to_row_size;
2494 
2495 	switch (rdev->config.si.mem_row_size_in_kb) {
2496 	case 1:
2497 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2498 		break;
2499 	case 2:
2500 	default:
2501 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2502 		break;
2503 	case 4:
2504 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2505 		break;
2506 	}
2507 
2508 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2509 		tile[reg_offset] = 0;
2510 
2511 	switch(rdev->family) {
2512 	case CHIP_TAHITI:
2513 	case CHIP_PITCAIRN:
2514 		/* non-AA compressed depth or any compressed stencil */
2515 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2516 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2517 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2518 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2519 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2520 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2521 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2522 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2523 		/* 2xAA/4xAA compressed depth only */
2524 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2525 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2526 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2527 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2528 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2529 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2530 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2531 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2532 		/* 8xAA compressed depth only */
2533 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2534 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2535 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2536 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2537 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2538 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2539 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2540 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2541 		/* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2542 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2543 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2544 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2545 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2546 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2547 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2548 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2549 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2550 		/* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2551 		tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2552 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2553 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2554 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2555 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2556 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2557 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2558 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2559 		/* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2560 		tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2561 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2562 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2563 			   TILE_SPLIT(split_equal_to_row_size) |
2564 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2565 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2566 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2567 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2568 		/* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2569 		tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2570 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2571 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2572 			   TILE_SPLIT(split_equal_to_row_size) |
2573 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2574 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2575 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2576 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2577 		/* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2578 		tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2579 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2580 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2581 			   TILE_SPLIT(split_equal_to_row_size) |
2582 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2583 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2584 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2585 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2586 		/* 1D and 1D Array Surfaces */
2587 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2588 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2589 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2590 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2591 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2592 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2593 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2594 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2595 		/* Displayable maps. */
2596 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2597 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2598 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2599 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2600 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2601 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2602 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2603 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2604 		/* Display 8bpp. */
2605 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2606 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2607 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2608 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2609 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2610 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2611 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2612 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2613 		/* Display 16bpp. */
2614 		tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2615 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2616 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2617 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2618 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2619 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2620 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2621 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2622 		/* Display 32bpp. */
2623 		tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2624 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2625 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2626 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2627 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2628 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2629 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2630 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2631 		/* Thin. */
2632 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2633 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2634 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2635 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2636 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2637 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2638 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2639 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2640 		/* Thin 8 bpp. */
2641 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2642 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2643 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2644 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2645 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2646 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2647 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2648 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2649 		/* Thin 16 bpp. */
2650 		tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2651 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2652 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2653 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2654 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2655 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2656 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2657 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2658 		/* Thin 32 bpp. */
2659 		tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2660 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2661 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2662 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2663 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2664 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2665 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2666 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2667 		/* Thin 64 bpp. */
2668 		tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2669 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2670 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2671 			   TILE_SPLIT(split_equal_to_row_size) |
2672 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2673 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2674 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2675 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2676 		/* 8 bpp PRT. */
2677 		tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2678 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2679 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2680 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2681 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2682 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2683 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2684 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2685 		/* 16 bpp PRT */
2686 		tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2687 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2688 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2689 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2690 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2691 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2692 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2693 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2694 		/* 32 bpp PRT */
2695 		tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2696 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2697 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2698 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2699 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2700 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2701 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2702 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2703 		/* 64 bpp PRT */
2704 		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2705 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2706 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2707 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2708 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2709 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2710 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2711 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2712 		/* 128 bpp PRT */
2713 		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2714 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2715 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2716 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2717 			   NUM_BANKS(ADDR_SURF_8_BANK) |
2718 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2719 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2720 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2721 
2722 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2723 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2724 		break;
2725 
2726 	case CHIP_VERDE:
2727 	case CHIP_OLAND:
2728 	case CHIP_HAINAN:
2729 		/* non-AA compressed depth or any compressed stencil */
2730 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2731 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2732 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2733 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2734 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2735 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2736 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2737 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2738 		/* 2xAA/4xAA compressed depth only */
2739 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2740 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2741 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2742 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2743 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2744 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2745 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2746 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2747 		/* 8xAA compressed depth only */
2748 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2749 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2750 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2751 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2752 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2753 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2754 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2755 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2756 		/* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2757 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2758 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2759 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2760 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2761 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2762 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2763 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2764 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2765 		/* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2766 		tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2767 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2768 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2769 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2770 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2771 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2772 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2773 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2774 		/* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2775 		tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2776 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2777 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2778 			   TILE_SPLIT(split_equal_to_row_size) |
2779 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2780 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2781 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2782 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2783 		/* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2784 		tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2785 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2786 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2787 			   TILE_SPLIT(split_equal_to_row_size) |
2788 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2789 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2790 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2791 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2792 		/* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2793 		tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2794 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2795 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2796 			   TILE_SPLIT(split_equal_to_row_size) |
2797 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2798 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2799 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2800 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2801 		/* 1D and 1D Array Surfaces */
2802 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2803 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2804 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2805 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2806 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2807 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2808 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2809 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2810 		/* Displayable maps. */
2811 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2812 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2813 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2814 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2815 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2816 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2817 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2818 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2819 		/* Display 8bpp. */
2820 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2821 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2822 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2823 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2824 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2825 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2826 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2827 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2828 		/* Display 16bpp. */
2829 		tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2830 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2831 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2832 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2833 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2834 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2835 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2836 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2837 		/* Display 32bpp. */
2838 		tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2839 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2840 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2841 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2842 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2843 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2844 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2845 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2846 		/* Thin. */
2847 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2848 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2849 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2850 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2851 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2852 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2854 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2855 		/* Thin 8 bpp. */
2856 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2857 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2858 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2859 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2860 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2861 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2862 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2863 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2864 		/* Thin 16 bpp. */
2865 		tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2866 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2867 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2868 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2869 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2870 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2871 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2872 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2873 		/* Thin 32 bpp. */
2874 		tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2875 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2876 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2877 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2878 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2879 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2880 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2881 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2882 		/* Thin 64 bpp. */
2883 		tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2884 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2885 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2886 			   TILE_SPLIT(split_equal_to_row_size) |
2887 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2888 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2889 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2890 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2891 		/* 8 bpp PRT. */
2892 		tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2893 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2894 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2895 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2896 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2897 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2898 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2899 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2900 		/* 16 bpp PRT */
2901 		tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2902 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2903 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2904 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2905 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2906 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2907 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2908 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2909 		/* 32 bpp PRT */
2910 		tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2911 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2912 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2913 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2914 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2915 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2916 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2917 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2918 		/* 64 bpp PRT */
2919 		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2920 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2921 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2922 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2923 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2924 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2925 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2926 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2927 		/* 128 bpp PRT */
2928 		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2929 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2930 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2931 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2932 			   NUM_BANKS(ADDR_SURF_8_BANK) |
2933 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2934 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2935 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2936 
2937 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2938 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2939 		break;
2940 
2941 	default:
2942 		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2943 	}
2944 }
2945 
2946 static void si_select_se_sh(struct radeon_device *rdev,
2947 			    u32 se_num, u32 sh_num)
2948 {
2949 	u32 data = INSTANCE_BROADCAST_WRITES;
2950 
2951 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2952 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2953 	else if (se_num == 0xffffffff)
2954 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2955 	else if (sh_num == 0xffffffff)
2956 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2957 	else
2958 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2959 	WREG32(GRBM_GFX_INDEX, data);
2960 }
2961 
2962 static u32 si_create_bitmask(u32 bit_width)
2963 {
2964 	u32 i, mask = 0;
2965 
2966 	for (i = 0; i < bit_width; i++) {
2967 		mask <<= 1;
2968 		mask |= 1;
2969 	}
2970 	return mask;
2971 }
2972 
2973 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2974 {
2975 	u32 data, mask;
2976 
2977 	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2978 	if (data & 1)
2979 		data &= INACTIVE_CUS_MASK;
2980 	else
2981 		data = 0;
2982 	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2983 
2984 	data >>= INACTIVE_CUS_SHIFT;
2985 
2986 	mask = si_create_bitmask(cu_per_sh);
2987 
2988 	return ~data & mask;
2989 }
2990 
2991 static void si_setup_spi(struct radeon_device *rdev,
2992 			 u32 se_num, u32 sh_per_se,
2993 			 u32 cu_per_sh)
2994 {
2995 	int i, j, k;
2996 	u32 data, mask, active_cu;
2997 
2998 	for (i = 0; i < se_num; i++) {
2999 		for (j = 0; j < sh_per_se; j++) {
3000 			si_select_se_sh(rdev, i, j);
3001 			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
3002 			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
3003 
3004 			mask = 1;
3005 			for (k = 0; k < 16; k++) {
3006 				mask <<= k;
3007 				if (active_cu & mask) {
3008 					data &= ~mask;
3009 					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
3010 					break;
3011 				}
3012 			}
3013 		}
3014 	}
3015 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3016 }
3017 
3018 static u32 si_get_rb_disabled(struct radeon_device *rdev,
3019 			      u32 max_rb_num_per_se,
3020 			      u32 sh_per_se)
3021 {
3022 	u32 data, mask;
3023 
3024 	data = RREG32(CC_RB_BACKEND_DISABLE);
3025 	if (data & 1)
3026 		data &= BACKEND_DISABLE_MASK;
3027 	else
3028 		data = 0;
3029 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3030 
3031 	data >>= BACKEND_DISABLE_SHIFT;
3032 
3033 	mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
3034 
3035 	return data & mask;
3036 }
3037 
3038 static void si_setup_rb(struct radeon_device *rdev,
3039 			u32 se_num, u32 sh_per_se,
3040 			u32 max_rb_num_per_se)
3041 {
3042 	int i, j;
3043 	u32 data, mask;
3044 	u32 disabled_rbs = 0;
3045 	u32 enabled_rbs = 0;
3046 
3047 	for (i = 0; i < se_num; i++) {
3048 		for (j = 0; j < sh_per_se; j++) {
3049 			si_select_se_sh(rdev, i, j);
3050 			data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3051 			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
3052 		}
3053 	}
3054 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3055 
3056 	mask = 1;
3057 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3058 		if (!(disabled_rbs & mask))
3059 			enabled_rbs |= mask;
3060 		mask <<= 1;
3061 	}
3062 
3063 	rdev->config.si.backend_enable_mask = enabled_rbs;
3064 
3065 	for (i = 0; i < se_num; i++) {
3066 		si_select_se_sh(rdev, i, 0xffffffff);
3067 		data = 0;
3068 		for (j = 0; j < sh_per_se; j++) {
3069 			switch (enabled_rbs & 3) {
3070 			case 1:
3071 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3072 				break;
3073 			case 2:
3074 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3075 				break;
3076 			case 3:
3077 			default:
3078 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3079 				break;
3080 			}
3081 			enabled_rbs >>= 2;
3082 		}
3083 		WREG32(PA_SC_RASTER_CONFIG, data);
3084 	}
3085 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3086 }
3087 
3088 static void si_gpu_init(struct radeon_device *rdev)
3089 {
3090 	u32 gb_addr_config = 0;
3091 	u32 mc_arb_ramcfg;
3092 	u32 sx_debug_1;
3093 	u32 hdp_host_path_cntl;
3094 	u32 tmp;
3095 	int i, j;
3096 
3097 	switch (rdev->family) {
3098 	case CHIP_TAHITI:
3099 		rdev->config.si.max_shader_engines = 2;
3100 		rdev->config.si.max_tile_pipes = 12;
3101 		rdev->config.si.max_cu_per_sh = 8;
3102 		rdev->config.si.max_sh_per_se = 2;
3103 		rdev->config.si.max_backends_per_se = 4;
3104 		rdev->config.si.max_texture_channel_caches = 12;
3105 		rdev->config.si.max_gprs = 256;
3106 		rdev->config.si.max_gs_threads = 32;
3107 		rdev->config.si.max_hw_contexts = 8;
3108 
3109 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3110 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3111 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3112 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3113 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3114 		break;
3115 	case CHIP_PITCAIRN:
3116 		rdev->config.si.max_shader_engines = 2;
3117 		rdev->config.si.max_tile_pipes = 8;
3118 		rdev->config.si.max_cu_per_sh = 5;
3119 		rdev->config.si.max_sh_per_se = 2;
3120 		rdev->config.si.max_backends_per_se = 4;
3121 		rdev->config.si.max_texture_channel_caches = 8;
3122 		rdev->config.si.max_gprs = 256;
3123 		rdev->config.si.max_gs_threads = 32;
3124 		rdev->config.si.max_hw_contexts = 8;
3125 
3126 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3127 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3128 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3129 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3130 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3131 		break;
3132 	case CHIP_VERDE:
3133 	default:
3134 		rdev->config.si.max_shader_engines = 1;
3135 		rdev->config.si.max_tile_pipes = 4;
3136 		rdev->config.si.max_cu_per_sh = 5;
3137 		rdev->config.si.max_sh_per_se = 2;
3138 		rdev->config.si.max_backends_per_se = 4;
3139 		rdev->config.si.max_texture_channel_caches = 4;
3140 		rdev->config.si.max_gprs = 256;
3141 		rdev->config.si.max_gs_threads = 32;
3142 		rdev->config.si.max_hw_contexts = 8;
3143 
3144 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3145 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3146 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3147 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3148 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3149 		break;
3150 	case CHIP_OLAND:
3151 		rdev->config.si.max_shader_engines = 1;
3152 		rdev->config.si.max_tile_pipes = 4;
3153 		rdev->config.si.max_cu_per_sh = 6;
3154 		rdev->config.si.max_sh_per_se = 1;
3155 		rdev->config.si.max_backends_per_se = 2;
3156 		rdev->config.si.max_texture_channel_caches = 4;
3157 		rdev->config.si.max_gprs = 256;
3158 		rdev->config.si.max_gs_threads = 16;
3159 		rdev->config.si.max_hw_contexts = 8;
3160 
3161 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3162 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3163 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3164 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3165 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3166 		break;
3167 	case CHIP_HAINAN:
3168 		rdev->config.si.max_shader_engines = 1;
3169 		rdev->config.si.max_tile_pipes = 4;
3170 		rdev->config.si.max_cu_per_sh = 5;
3171 		rdev->config.si.max_sh_per_se = 1;
3172 		rdev->config.si.max_backends_per_se = 1;
3173 		rdev->config.si.max_texture_channel_caches = 2;
3174 		rdev->config.si.max_gprs = 256;
3175 		rdev->config.si.max_gs_threads = 16;
3176 		rdev->config.si.max_hw_contexts = 8;
3177 
3178 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3179 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3180 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3181 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3182 		gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
3183 		break;
3184 	}
3185 
3186 	/* Initialize HDP */
3187 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3188 		WREG32((0x2c14 + j), 0x00000000);
3189 		WREG32((0x2c18 + j), 0x00000000);
3190 		WREG32((0x2c1c + j), 0x00000000);
3191 		WREG32((0x2c20 + j), 0x00000000);
3192 		WREG32((0x2c24 + j), 0x00000000);
3193 	}
3194 
3195 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3196 	WREG32(SRBM_INT_CNTL, 1);
3197 	WREG32(SRBM_INT_ACK, 1);
3198 
3199 	evergreen_fix_pci_max_read_req_size(rdev);
3200 
3201 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3202 
3203 	RREG32(MC_SHARED_CHMAP);
3204 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3205 
3206 	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3207 	rdev->config.si.mem_max_burst_length_bytes = 256;
3208 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3209 	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3210 	if (rdev->config.si.mem_row_size_in_kb > 4)
3211 		rdev->config.si.mem_row_size_in_kb = 4;
3212 	/* XXX use MC settings? */
3213 	rdev->config.si.shader_engine_tile_size = 32;
3214 	rdev->config.si.num_gpus = 1;
3215 	rdev->config.si.multi_gpu_tile_size = 64;
3216 
3217 	/* fix up row size */
3218 	gb_addr_config &= ~ROW_SIZE_MASK;
3219 	switch (rdev->config.si.mem_row_size_in_kb) {
3220 	case 1:
3221 	default:
3222 		gb_addr_config |= ROW_SIZE(0);
3223 		break;
3224 	case 2:
3225 		gb_addr_config |= ROW_SIZE(1);
3226 		break;
3227 	case 4:
3228 		gb_addr_config |= ROW_SIZE(2);
3229 		break;
3230 	}
3231 
3232 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3233 	 * not have bank info, so create a custom tiling dword.
3234 	 * bits 3:0   num_pipes
3235 	 * bits 7:4   num_banks
3236 	 * bits 11:8  group_size
3237 	 * bits 15:12 row_size
3238 	 */
3239 	rdev->config.si.tile_config = 0;
3240 	switch (rdev->config.si.num_tile_pipes) {
3241 	case 1:
3242 		rdev->config.si.tile_config |= (0 << 0);
3243 		break;
3244 	case 2:
3245 		rdev->config.si.tile_config |= (1 << 0);
3246 		break;
3247 	case 4:
3248 		rdev->config.si.tile_config |= (2 << 0);
3249 		break;
3250 	case 8:
3251 	default:
3252 		/* XXX what about 12? */
3253 		rdev->config.si.tile_config |= (3 << 0);
3254 		break;
3255 	}
3256 	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3257 	case 0: /* four banks */
3258 		rdev->config.si.tile_config |= 0 << 4;
3259 		break;
3260 	case 1: /* eight banks */
3261 		rdev->config.si.tile_config |= 1 << 4;
3262 		break;
3263 	case 2: /* sixteen banks */
3264 	default:
3265 		rdev->config.si.tile_config |= 2 << 4;
3266 		break;
3267 	}
3268 	rdev->config.si.tile_config |=
3269 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3270 	rdev->config.si.tile_config |=
3271 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3272 
3273 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3274 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3275 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3276 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3277 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3278 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3279 	if (rdev->has_uvd) {
3280 		WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3281 		WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3282 		WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3283 	}
3284 
3285 	si_tiling_mode_table_init(rdev);
3286 
3287 	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3288 		    rdev->config.si.max_sh_per_se,
3289 		    rdev->config.si.max_backends_per_se);
3290 
3291 	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3292 		     rdev->config.si.max_sh_per_se,
3293 		     rdev->config.si.max_cu_per_sh);
3294 
3295 	rdev->config.si.active_cus = 0;
3296 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
3297 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
3298 			rdev->config.si.active_cus +=
3299 				hweight32(si_get_cu_active_bitmap(rdev, i, j));
3300 		}
3301 	}
3302 
3303 	/* set HW defaults for 3D engine */
3304 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3305 				     ROQ_IB2_START(0x2b)));
3306 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3307 
3308 	sx_debug_1 = RREG32(SX_DEBUG_1);
3309 	WREG32(SX_DEBUG_1, sx_debug_1);
3310 
3311 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3312 
3313 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3314 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3315 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3316 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3317 
3318 	WREG32(VGT_NUM_INSTANCES, 1);
3319 
3320 	WREG32(CP_PERFMON_CNTL, 0);
3321 
3322 	WREG32(SQ_CONFIG, 0);
3323 
3324 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3325 					  FORCE_EOV_MAX_REZ_CNT(255)));
3326 
3327 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3328 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3329 
3330 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3331 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3332 
3333 	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3334 	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3335 	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3336 	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3337 	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3338 	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3339 	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3340 	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3341 
3342 	tmp = RREG32(HDP_MISC_CNTL);
3343 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3344 	WREG32(HDP_MISC_CNTL, tmp);
3345 
3346 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3347 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3348 
3349 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3350 
3351 	udelay(50);
3352 }
3353 
3354 /*
3355  * GPU scratch registers helpers function.
3356  */
3357 static void si_scratch_init(struct radeon_device *rdev)
3358 {
3359 	int i;
3360 
3361 	rdev->scratch.num_reg = 7;
3362 	rdev->scratch.reg_base = SCRATCH_REG0;
3363 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3364 		rdev->scratch.free[i] = true;
3365 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3366 	}
3367 }
3368 
3369 void si_fence_ring_emit(struct radeon_device *rdev,
3370 			struct radeon_fence *fence)
3371 {
3372 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3373 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3374 
3375 	/* flush read cache over gart */
3376 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3377 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3378 	radeon_ring_write(ring, 0);
3379 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3380 	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3381 			  PACKET3_TC_ACTION_ENA |
3382 			  PACKET3_SH_KCACHE_ACTION_ENA |
3383 			  PACKET3_SH_ICACHE_ACTION_ENA);
3384 	radeon_ring_write(ring, 0xFFFFFFFF);
3385 	radeon_ring_write(ring, 0);
3386 	radeon_ring_write(ring, 10); /* poll interval */
3387 	/* EVENT_WRITE_EOP - flush caches, send int */
3388 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3389 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3390 	radeon_ring_write(ring, lower_32_bits(addr));
3391 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3392 	radeon_ring_write(ring, fence->seq);
3393 	radeon_ring_write(ring, 0);
3394 }
3395 
3396 /*
3397  * IB stuff
3398  */
3399 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3400 {
3401 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3402 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3403 	u32 header;
3404 
3405 	if (ib->is_const_ib) {
3406 		/* set switch buffer packet before const IB */
3407 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3408 		radeon_ring_write(ring, 0);
3409 
3410 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3411 	} else {
3412 		u32 next_rptr;
3413 		if (ring->rptr_save_reg) {
3414 			next_rptr = ring->wptr + 3 + 4 + 8;
3415 			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3416 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3417 						  PACKET3_SET_CONFIG_REG_START) >> 2));
3418 			radeon_ring_write(ring, next_rptr);
3419 		} else if (rdev->wb.enabled) {
3420 			next_rptr = ring->wptr + 5 + 4 + 8;
3421 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3422 			radeon_ring_write(ring, (1 << 8));
3423 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3424 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3425 			radeon_ring_write(ring, next_rptr);
3426 		}
3427 
3428 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3429 	}
3430 
3431 	radeon_ring_write(ring, header);
3432 	radeon_ring_write(ring,
3433 #ifdef __BIG_ENDIAN
3434 			  (2 << 0) |
3435 #endif
3436 			  (ib->gpu_addr & 0xFFFFFFFC));
3437 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3438 	radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
3439 
3440 	if (!ib->is_const_ib) {
3441 		/* flush read cache over gart for this vmid */
3442 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3443 		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3444 		radeon_ring_write(ring, vm_id);
3445 		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3446 		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3447 				  PACKET3_TC_ACTION_ENA |
3448 				  PACKET3_SH_KCACHE_ACTION_ENA |
3449 				  PACKET3_SH_ICACHE_ACTION_ENA);
3450 		radeon_ring_write(ring, 0xFFFFFFFF);
3451 		radeon_ring_write(ring, 0);
3452 		radeon_ring_write(ring, 10); /* poll interval */
3453 	}
3454 }
3455 
3456 /*
3457  * CP.
3458  */
3459 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3460 {
3461 	if (enable)
3462 		WREG32(CP_ME_CNTL, 0);
3463 	else {
3464 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3465 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3466 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3467 		WREG32(SCRATCH_UMSK, 0);
3468 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3469 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3470 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3471 	}
3472 	udelay(50);
3473 }
3474 
3475 static int si_cp_load_microcode(struct radeon_device *rdev)
3476 {
3477 	int i;
3478 
3479 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3480 		return -EINVAL;
3481 
3482 	si_cp_enable(rdev, false);
3483 
3484 	if (rdev->new_fw) {
3485 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3486 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3487 		const struct gfx_firmware_header_v1_0 *ce_hdr =
3488 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3489 		const struct gfx_firmware_header_v1_0 *me_hdr =
3490 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3491 		const __le32 *fw_data;
3492 		u32 fw_size;
3493 
3494 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3495 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3496 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3497 
3498 		/* PFP */
3499 		fw_data = (const __le32 *)
3500 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3501 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3502 		WREG32(CP_PFP_UCODE_ADDR, 0);
3503 		for (i = 0; i < fw_size; i++)
3504 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3505 		WREG32(CP_PFP_UCODE_ADDR, 0);
3506 
3507 		/* CE */
3508 		fw_data = (const __le32 *)
3509 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3510 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3511 		WREG32(CP_CE_UCODE_ADDR, 0);
3512 		for (i = 0; i < fw_size; i++)
3513 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3514 		WREG32(CP_CE_UCODE_ADDR, 0);
3515 
3516 		/* ME */
3517 		fw_data = (const __be32 *)
3518 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3519 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3520 		WREG32(CP_ME_RAM_WADDR, 0);
3521 		for (i = 0; i < fw_size; i++)
3522 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3523 		WREG32(CP_ME_RAM_WADDR, 0);
3524 	} else {
3525 		const __be32 *fw_data;
3526 
3527 		/* PFP */
3528 		fw_data = (const __be32 *)rdev->pfp_fw->data;
3529 		WREG32(CP_PFP_UCODE_ADDR, 0);
3530 		for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3531 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3532 		WREG32(CP_PFP_UCODE_ADDR, 0);
3533 
3534 		/* CE */
3535 		fw_data = (const __be32 *)rdev->ce_fw->data;
3536 		WREG32(CP_CE_UCODE_ADDR, 0);
3537 		for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3538 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3539 		WREG32(CP_CE_UCODE_ADDR, 0);
3540 
3541 		/* ME */
3542 		fw_data = (const __be32 *)rdev->me_fw->data;
3543 		WREG32(CP_ME_RAM_WADDR, 0);
3544 		for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3545 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3546 		WREG32(CP_ME_RAM_WADDR, 0);
3547 	}
3548 
3549 	WREG32(CP_PFP_UCODE_ADDR, 0);
3550 	WREG32(CP_CE_UCODE_ADDR, 0);
3551 	WREG32(CP_ME_RAM_WADDR, 0);
3552 	WREG32(CP_ME_RAM_RADDR, 0);
3553 	return 0;
3554 }
3555 
3556 static int si_cp_start(struct radeon_device *rdev)
3557 {
3558 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3559 	int r, i;
3560 
3561 	r = radeon_ring_lock(rdev, ring, 7 + 4);
3562 	if (r) {
3563 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3564 		return r;
3565 	}
3566 	/* init the CP */
3567 	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3568 	radeon_ring_write(ring, 0x1);
3569 	radeon_ring_write(ring, 0x0);
3570 	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3571 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3572 	radeon_ring_write(ring, 0);
3573 	radeon_ring_write(ring, 0);
3574 
3575 	/* init the CE partitions */
3576 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3577 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3578 	radeon_ring_write(ring, 0xc000);
3579 	radeon_ring_write(ring, 0xe000);
3580 	radeon_ring_unlock_commit(rdev, ring, false);
3581 
3582 	si_cp_enable(rdev, true);
3583 
3584 	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3585 	if (r) {
3586 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3587 		return r;
3588 	}
3589 
3590 	/* setup clear context state */
3591 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3592 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3593 
3594 	for (i = 0; i < si_default_size; i++)
3595 		radeon_ring_write(ring, si_default_state[i]);
3596 
3597 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3598 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3599 
3600 	/* set clear context state */
3601 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3602 	radeon_ring_write(ring, 0);
3603 
3604 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3605 	radeon_ring_write(ring, 0x00000316);
3606 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3607 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3608 
3609 	radeon_ring_unlock_commit(rdev, ring, false);
3610 
3611 	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3612 		ring = &rdev->ring[i];
3613 		r = radeon_ring_lock(rdev, ring, 2);
3614 		if (r) {
3615 			DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3616 			return r;
3617 		}
3618 
3619 		/* clear the compute context state */
3620 		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3621 		radeon_ring_write(ring, 0);
3622 
3623 		radeon_ring_unlock_commit(rdev, ring, false);
3624 	}
3625 
3626 	return 0;
3627 }
3628 
3629 static void si_cp_fini(struct radeon_device *rdev)
3630 {
3631 	struct radeon_ring *ring;
3632 	si_cp_enable(rdev, false);
3633 
3634 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3635 	radeon_ring_fini(rdev, ring);
3636 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3637 
3638 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3639 	radeon_ring_fini(rdev, ring);
3640 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3641 
3642 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3643 	radeon_ring_fini(rdev, ring);
3644 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3645 }
3646 
3647 static int si_cp_resume(struct radeon_device *rdev)
3648 {
3649 	struct radeon_ring *ring;
3650 	u32 tmp;
3651 	u32 rb_bufsz;
3652 	int r;
3653 
3654 	si_enable_gui_idle_interrupt(rdev, false);
3655 
3656 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3657 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3658 
3659 	/* Set the write pointer delay */
3660 	WREG32(CP_RB_WPTR_DELAY, 0);
3661 
3662 	WREG32(CP_DEBUG, 0);
3663 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3664 
3665 	/* ring 0 - compute and gfx */
3666 	/* Set ring buffer size */
3667 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3668 	rb_bufsz = order_base_2(ring->ring_size / 8);
3669 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3670 #ifdef __BIG_ENDIAN
3671 	tmp |= BUF_SWAP_32BIT;
3672 #endif
3673 	WREG32(CP_RB0_CNTL, tmp);
3674 
3675 	/* Initialize the ring buffer's read and write pointers */
3676 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3677 	ring->wptr = 0;
3678 	WREG32(CP_RB0_WPTR, ring->wptr);
3679 
3680 	/* set the wb address whether it's enabled or not */
3681 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3682 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3683 
3684 	if (rdev->wb.enabled)
3685 		WREG32(SCRATCH_UMSK, 0xff);
3686 	else {
3687 		tmp |= RB_NO_UPDATE;
3688 		WREG32(SCRATCH_UMSK, 0);
3689 	}
3690 
3691 	mdelay(1);
3692 	WREG32(CP_RB0_CNTL, tmp);
3693 
3694 	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3695 
3696 	/* ring1  - compute only */
3697 	/* Set ring buffer size */
3698 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3699 	rb_bufsz = order_base_2(ring->ring_size / 8);
3700 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3701 #ifdef __BIG_ENDIAN
3702 	tmp |= BUF_SWAP_32BIT;
3703 #endif
3704 	WREG32(CP_RB1_CNTL, tmp);
3705 
3706 	/* Initialize the ring buffer's read and write pointers */
3707 	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3708 	ring->wptr = 0;
3709 	WREG32(CP_RB1_WPTR, ring->wptr);
3710 
3711 	/* set the wb address whether it's enabled or not */
3712 	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3713 	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3714 
3715 	mdelay(1);
3716 	WREG32(CP_RB1_CNTL, tmp);
3717 
3718 	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3719 
3720 	/* ring2 - compute only */
3721 	/* Set ring buffer size */
3722 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3723 	rb_bufsz = order_base_2(ring->ring_size / 8);
3724 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3725 #ifdef __BIG_ENDIAN
3726 	tmp |= BUF_SWAP_32BIT;
3727 #endif
3728 	WREG32(CP_RB2_CNTL, tmp);
3729 
3730 	/* Initialize the ring buffer's read and write pointers */
3731 	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3732 	ring->wptr = 0;
3733 	WREG32(CP_RB2_WPTR, ring->wptr);
3734 
3735 	/* set the wb address whether it's enabled or not */
3736 	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3737 	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3738 
3739 	mdelay(1);
3740 	WREG32(CP_RB2_CNTL, tmp);
3741 
3742 	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3743 
3744 	/* start the rings */
3745 	si_cp_start(rdev);
3746 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3747 	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3748 	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3749 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3750 	if (r) {
3751 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3752 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3753 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3754 		return r;
3755 	}
3756 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3757 	if (r) {
3758 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3759 	}
3760 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3761 	if (r) {
3762 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3763 	}
3764 
3765 	si_enable_gui_idle_interrupt(rdev, true);
3766 
3767 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3768 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3769 
3770 	return 0;
3771 }
3772 
3773 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3774 {
3775 	u32 reset_mask = 0;
3776 	u32 tmp;
3777 
3778 	/* GRBM_STATUS */
3779 	tmp = RREG32(GRBM_STATUS);
3780 	if (tmp & (PA_BUSY | SC_BUSY |
3781 		   BCI_BUSY | SX_BUSY |
3782 		   TA_BUSY | VGT_BUSY |
3783 		   DB_BUSY | CB_BUSY |
3784 		   GDS_BUSY | SPI_BUSY |
3785 		   IA_BUSY | IA_BUSY_NO_DMA))
3786 		reset_mask |= RADEON_RESET_GFX;
3787 
3788 	if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3789 		   CP_BUSY | CP_COHERENCY_BUSY))
3790 		reset_mask |= RADEON_RESET_CP;
3791 
3792 	if (tmp & GRBM_EE_BUSY)
3793 		reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3794 
3795 	/* GRBM_STATUS2 */
3796 	tmp = RREG32(GRBM_STATUS2);
3797 	if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3798 		reset_mask |= RADEON_RESET_RLC;
3799 
3800 	/* DMA_STATUS_REG 0 */
3801 	tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3802 	if (!(tmp & DMA_IDLE))
3803 		reset_mask |= RADEON_RESET_DMA;
3804 
3805 	/* DMA_STATUS_REG 1 */
3806 	tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3807 	if (!(tmp & DMA_IDLE))
3808 		reset_mask |= RADEON_RESET_DMA1;
3809 
3810 	/* SRBM_STATUS2 */
3811 	tmp = RREG32(SRBM_STATUS2);
3812 	if (tmp & DMA_BUSY)
3813 		reset_mask |= RADEON_RESET_DMA;
3814 
3815 	if (tmp & DMA1_BUSY)
3816 		reset_mask |= RADEON_RESET_DMA1;
3817 
3818 	/* SRBM_STATUS */
3819 	tmp = RREG32(SRBM_STATUS);
3820 
3821 	if (tmp & IH_BUSY)
3822 		reset_mask |= RADEON_RESET_IH;
3823 
3824 	if (tmp & SEM_BUSY)
3825 		reset_mask |= RADEON_RESET_SEM;
3826 
3827 	if (tmp & GRBM_RQ_PENDING)
3828 		reset_mask |= RADEON_RESET_GRBM;
3829 
3830 	if (tmp & VMC_BUSY)
3831 		reset_mask |= RADEON_RESET_VMC;
3832 
3833 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3834 		   MCC_BUSY | MCD_BUSY))
3835 		reset_mask |= RADEON_RESET_MC;
3836 
3837 	if (evergreen_is_display_hung(rdev))
3838 		reset_mask |= RADEON_RESET_DISPLAY;
3839 
3840 	/* VM_L2_STATUS */
3841 	tmp = RREG32(VM_L2_STATUS);
3842 	if (tmp & L2_BUSY)
3843 		reset_mask |= RADEON_RESET_VMC;
3844 
3845 	/* Skip MC reset as it's mostly likely not hung, just busy */
3846 	if (reset_mask & RADEON_RESET_MC) {
3847 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3848 		reset_mask &= ~RADEON_RESET_MC;
3849 	}
3850 
3851 	return reset_mask;
3852 }
3853 
3854 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3855 {
3856 	struct evergreen_mc_save save;
3857 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3858 	u32 tmp;
3859 
3860 	if (reset_mask == 0)
3861 		return;
3862 
3863 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3864 
3865 	evergreen_print_gpu_status_regs(rdev);
3866 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3867 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3868 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3869 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3870 
3871 	/* disable PG/CG */
3872 	si_fini_pg(rdev);
3873 	si_fini_cg(rdev);
3874 
3875 	/* stop the rlc */
3876 	si_rlc_stop(rdev);
3877 
3878 	/* Disable CP parsing/prefetching */
3879 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3880 
3881 	if (reset_mask & RADEON_RESET_DMA) {
3882 		/* dma0 */
3883 		tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3884 		tmp &= ~DMA_RB_ENABLE;
3885 		WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3886 	}
3887 	if (reset_mask & RADEON_RESET_DMA1) {
3888 		/* dma1 */
3889 		tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3890 		tmp &= ~DMA_RB_ENABLE;
3891 		WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3892 	}
3893 
3894 	udelay(50);
3895 
3896 	evergreen_mc_stop(rdev, &save);
3897 	if (evergreen_mc_wait_for_idle(rdev)) {
3898 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3899 	}
3900 
3901 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3902 		grbm_soft_reset = SOFT_RESET_CB |
3903 			SOFT_RESET_DB |
3904 			SOFT_RESET_GDS |
3905 			SOFT_RESET_PA |
3906 			SOFT_RESET_SC |
3907 			SOFT_RESET_BCI |
3908 			SOFT_RESET_SPI |
3909 			SOFT_RESET_SX |
3910 			SOFT_RESET_TC |
3911 			SOFT_RESET_TA |
3912 			SOFT_RESET_VGT |
3913 			SOFT_RESET_IA;
3914 	}
3915 
3916 	if (reset_mask & RADEON_RESET_CP) {
3917 		grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3918 
3919 		srbm_soft_reset |= SOFT_RESET_GRBM;
3920 	}
3921 
3922 	if (reset_mask & RADEON_RESET_DMA)
3923 		srbm_soft_reset |= SOFT_RESET_DMA;
3924 
3925 	if (reset_mask & RADEON_RESET_DMA1)
3926 		srbm_soft_reset |= SOFT_RESET_DMA1;
3927 
3928 	if (reset_mask & RADEON_RESET_DISPLAY)
3929 		srbm_soft_reset |= SOFT_RESET_DC;
3930 
3931 	if (reset_mask & RADEON_RESET_RLC)
3932 		grbm_soft_reset |= SOFT_RESET_RLC;
3933 
3934 	if (reset_mask & RADEON_RESET_SEM)
3935 		srbm_soft_reset |= SOFT_RESET_SEM;
3936 
3937 	if (reset_mask & RADEON_RESET_IH)
3938 		srbm_soft_reset |= SOFT_RESET_IH;
3939 
3940 	if (reset_mask & RADEON_RESET_GRBM)
3941 		srbm_soft_reset |= SOFT_RESET_GRBM;
3942 
3943 	if (reset_mask & RADEON_RESET_VMC)
3944 		srbm_soft_reset |= SOFT_RESET_VMC;
3945 
3946 	if (reset_mask & RADEON_RESET_MC)
3947 		srbm_soft_reset |= SOFT_RESET_MC;
3948 
3949 	if (grbm_soft_reset) {
3950 		tmp = RREG32(GRBM_SOFT_RESET);
3951 		tmp |= grbm_soft_reset;
3952 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3953 		WREG32(GRBM_SOFT_RESET, tmp);
3954 		tmp = RREG32(GRBM_SOFT_RESET);
3955 
3956 		udelay(50);
3957 
3958 		tmp &= ~grbm_soft_reset;
3959 		WREG32(GRBM_SOFT_RESET, tmp);
3960 		tmp = RREG32(GRBM_SOFT_RESET);
3961 	}
3962 
3963 	if (srbm_soft_reset) {
3964 		tmp = RREG32(SRBM_SOFT_RESET);
3965 		tmp |= srbm_soft_reset;
3966 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3967 		WREG32(SRBM_SOFT_RESET, tmp);
3968 		tmp = RREG32(SRBM_SOFT_RESET);
3969 
3970 		udelay(50);
3971 
3972 		tmp &= ~srbm_soft_reset;
3973 		WREG32(SRBM_SOFT_RESET, tmp);
3974 		tmp = RREG32(SRBM_SOFT_RESET);
3975 	}
3976 
3977 	/* Wait a little for things to settle down */
3978 	udelay(50);
3979 
3980 	evergreen_mc_resume(rdev, &save);
3981 	udelay(50);
3982 
3983 	evergreen_print_gpu_status_regs(rdev);
3984 }
3985 
3986 static void si_set_clk_bypass_mode(struct radeon_device *rdev)
3987 {
3988 	u32 tmp, i;
3989 
3990 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3991 	tmp |= SPLL_BYPASS_EN;
3992 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3993 
3994 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3995 	tmp |= SPLL_CTLREQ_CHG;
3996 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3997 
3998 	for (i = 0; i < rdev->usec_timeout; i++) {
3999 		if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
4000 			break;
4001 		udelay(1);
4002 	}
4003 
4004 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
4005 	tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
4006 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
4007 
4008 	tmp = RREG32(MPLL_CNTL_MODE);
4009 	tmp &= ~MPLL_MCLK_SEL;
4010 	WREG32(MPLL_CNTL_MODE, tmp);
4011 }
4012 
4013 static void si_spll_powerdown(struct radeon_device *rdev)
4014 {
4015 	u32 tmp;
4016 
4017 	tmp = RREG32(SPLL_CNTL_MODE);
4018 	tmp |= SPLL_SW_DIR_CONTROL;
4019 	WREG32(SPLL_CNTL_MODE, tmp);
4020 
4021 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
4022 	tmp |= SPLL_RESET;
4023 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
4024 
4025 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
4026 	tmp |= SPLL_SLEEP;
4027 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
4028 
4029 	tmp = RREG32(SPLL_CNTL_MODE);
4030 	tmp &= ~SPLL_SW_DIR_CONTROL;
4031 	WREG32(SPLL_CNTL_MODE, tmp);
4032 }
4033 
4034 static void si_gpu_pci_config_reset(struct radeon_device *rdev)
4035 {
4036 	struct evergreen_mc_save save;
4037 	u32 tmp, i;
4038 
4039 	dev_info(rdev->dev, "GPU pci config reset\n");
4040 
4041 	/* disable dpm? */
4042 
4043 	/* disable cg/pg */
4044 	si_fini_pg(rdev);
4045 	si_fini_cg(rdev);
4046 
4047 	/* Disable CP parsing/prefetching */
4048 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4049 	/* dma0 */
4050 	tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
4051 	tmp &= ~DMA_RB_ENABLE;
4052 	WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
4053 	/* dma1 */
4054 	tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
4055 	tmp &= ~DMA_RB_ENABLE;
4056 	WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
4057 	/* XXX other engines? */
4058 
4059 	/* halt the rlc, disable cp internal ints */
4060 	si_rlc_stop(rdev);
4061 
4062 	udelay(50);
4063 
4064 	/* disable mem access */
4065 	evergreen_mc_stop(rdev, &save);
4066 	if (evergreen_mc_wait_for_idle(rdev)) {
4067 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
4068 	}
4069 
4070 	/* set mclk/sclk to bypass */
4071 	si_set_clk_bypass_mode(rdev);
4072 	/* powerdown spll */
4073 	si_spll_powerdown(rdev);
4074 	/* disable BM */
4075 	pci_clear_master(rdev->pdev);
4076 	/* reset */
4077 	radeon_pci_config_reset(rdev);
4078 	/* wait for asic to come out of reset */
4079 	for (i = 0; i < rdev->usec_timeout; i++) {
4080 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
4081 			break;
4082 		udelay(1);
4083 	}
4084 }
4085 
4086 int si_asic_reset(struct radeon_device *rdev, bool hard)
4087 {
4088 	u32 reset_mask;
4089 
4090 	if (hard) {
4091 		si_gpu_pci_config_reset(rdev);
4092 		return 0;
4093 	}
4094 
4095 	reset_mask = si_gpu_check_soft_reset(rdev);
4096 
4097 	if (reset_mask)
4098 		r600_set_bios_scratch_engine_hung(rdev, true);
4099 
4100 	/* try soft reset */
4101 	si_gpu_soft_reset(rdev, reset_mask);
4102 
4103 	reset_mask = si_gpu_check_soft_reset(rdev);
4104 
4105 	/* try pci config reset */
4106 	if (reset_mask && radeon_hard_reset)
4107 		si_gpu_pci_config_reset(rdev);
4108 
4109 	reset_mask = si_gpu_check_soft_reset(rdev);
4110 
4111 	if (!reset_mask)
4112 		r600_set_bios_scratch_engine_hung(rdev, false);
4113 
4114 	return 0;
4115 }
4116 
4117 /**
4118  * si_gfx_is_lockup - Check if the GFX engine is locked up
4119  *
4120  * @rdev: radeon_device pointer
4121  * @ring: radeon_ring structure holding ring information
4122  *
4123  * Check if the GFX engine is locked up.
4124  * Returns true if the engine appears to be locked up, false if not.
4125  */
4126 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4127 {
4128 	u32 reset_mask = si_gpu_check_soft_reset(rdev);
4129 
4130 	if (!(reset_mask & (RADEON_RESET_GFX |
4131 			    RADEON_RESET_COMPUTE |
4132 			    RADEON_RESET_CP))) {
4133 		radeon_ring_lockup_update(rdev, ring);
4134 		return false;
4135 	}
4136 	return radeon_ring_test_lockup(rdev, ring);
4137 }
4138 
4139 /* MC */
4140 static void si_mc_program(struct radeon_device *rdev)
4141 {
4142 	struct evergreen_mc_save save;
4143 	u32 tmp;
4144 	int i, j;
4145 
4146 	/* Initialize HDP */
4147 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4148 		WREG32((0x2c14 + j), 0x00000000);
4149 		WREG32((0x2c18 + j), 0x00000000);
4150 		WREG32((0x2c1c + j), 0x00000000);
4151 		WREG32((0x2c20 + j), 0x00000000);
4152 		WREG32((0x2c24 + j), 0x00000000);
4153 	}
4154 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4155 
4156 	evergreen_mc_stop(rdev, &save);
4157 	if (radeon_mc_wait_for_idle(rdev)) {
4158 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4159 	}
4160 	if (!ASIC_IS_NODCE(rdev))
4161 		/* Lockout access through VGA aperture*/
4162 		WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4163 	/* Update configuration */
4164 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4165 	       rdev->mc.vram_start >> 12);
4166 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4167 	       rdev->mc.vram_end >> 12);
4168 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4169 	       rdev->vram_scratch.gpu_addr >> 12);
4170 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4171 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4172 	WREG32(MC_VM_FB_LOCATION, tmp);
4173 	/* XXX double check these! */
4174 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4175 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4176 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4177 	WREG32(MC_VM_AGP_BASE, 0);
4178 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4179 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4180 	if (radeon_mc_wait_for_idle(rdev)) {
4181 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4182 	}
4183 	evergreen_mc_resume(rdev, &save);
4184 	if (!ASIC_IS_NODCE(rdev)) {
4185 		/* we need to own VRAM, so turn off the VGA renderer here
4186 		 * to stop it overwriting our objects */
4187 		rv515_vga_render_disable(rdev);
4188 	}
4189 }
4190 
4191 void si_vram_gtt_location(struct radeon_device *rdev,
4192 			  struct radeon_mc *mc)
4193 {
4194 	if (mc->mc_vram_size > 0xFFC0000000ULL) {
4195 		/* leave room for at least 1024M GTT */
4196 		dev_warn(rdev->dev, "limiting VRAM\n");
4197 		mc->real_vram_size = 0xFFC0000000ULL;
4198 		mc->mc_vram_size = 0xFFC0000000ULL;
4199 	}
4200 	radeon_vram_location(rdev, &rdev->mc, 0);
4201 	rdev->mc.gtt_base_align = 0;
4202 	radeon_gtt_location(rdev, mc);
4203 }
4204 
4205 static int si_mc_init(struct radeon_device *rdev)
4206 {
4207 	u32 tmp;
4208 	int chansize, numchan;
4209 
4210 	/* Get VRAM informations */
4211 	rdev->mc.vram_is_ddr = true;
4212 	tmp = RREG32(MC_ARB_RAMCFG);
4213 	if (tmp & CHANSIZE_OVERRIDE) {
4214 		chansize = 16;
4215 	} else if (tmp & CHANSIZE_MASK) {
4216 		chansize = 64;
4217 	} else {
4218 		chansize = 32;
4219 	}
4220 	tmp = RREG32(MC_SHARED_CHMAP);
4221 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4222 	case 0:
4223 	default:
4224 		numchan = 1;
4225 		break;
4226 	case 1:
4227 		numchan = 2;
4228 		break;
4229 	case 2:
4230 		numchan = 4;
4231 		break;
4232 	case 3:
4233 		numchan = 8;
4234 		break;
4235 	case 4:
4236 		numchan = 3;
4237 		break;
4238 	case 5:
4239 		numchan = 6;
4240 		break;
4241 	case 6:
4242 		numchan = 10;
4243 		break;
4244 	case 7:
4245 		numchan = 12;
4246 		break;
4247 	case 8:
4248 		numchan = 16;
4249 		break;
4250 	}
4251 	rdev->mc.vram_width = numchan * chansize;
4252 	/* Could aper size report 0 ? */
4253 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4254 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4255 	/* size in MB on si */
4256 	tmp = RREG32(CONFIG_MEMSIZE);
4257 	/* some boards may have garbage in the upper 16 bits */
4258 	if (tmp & 0xffff0000) {
4259 		DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
4260 		if (tmp & 0xffff)
4261 			tmp &= 0xffff;
4262 	}
4263 	rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
4264 	rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
4265 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
4266 	si_vram_gtt_location(rdev, &rdev->mc);
4267 	radeon_update_bandwidth_info(rdev);
4268 
4269 	return 0;
4270 }
4271 
4272 /*
4273  * GART
4274  */
4275 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
4276 {
4277 	/* flush hdp cache */
4278 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
4279 
4280 	/* bits 0-15 are the VM contexts0-15 */
4281 	WREG32(VM_INVALIDATE_REQUEST, 1);
4282 }
4283 
4284 static int si_pcie_gart_enable(struct radeon_device *rdev)
4285 {
4286 	int r, i;
4287 
4288 	if (rdev->gart.robj == NULL) {
4289 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4290 		return -EINVAL;
4291 	}
4292 	r = radeon_gart_table_vram_pin(rdev);
4293 	if (r)
4294 		return r;
4295 	/* Setup TLB control */
4296 	WREG32(MC_VM_MX_L1_TLB_CNTL,
4297 	       (0xA << 7) |
4298 	       ENABLE_L1_TLB |
4299 	       ENABLE_L1_FRAGMENT_PROCESSING |
4300 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4301 	       ENABLE_ADVANCED_DRIVER_MODEL |
4302 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4303 	/* Setup L2 cache */
4304 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4305 	       ENABLE_L2_FRAGMENT_PROCESSING |
4306 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4307 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4308 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4309 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4310 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4311 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4312 	       BANK_SELECT(4) |
4313 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
4314 	/* setup context0 */
4315 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4316 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4317 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4318 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4319 			(u32)(rdev->dummy_page.addr >> 12));
4320 	WREG32(VM_CONTEXT0_CNTL2, 0);
4321 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4322 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4323 
4324 	WREG32(0x15D4, 0);
4325 	WREG32(0x15D8, 0);
4326 	WREG32(0x15DC, 0);
4327 
4328 	/* empty context1-15 */
4329 	/* set vm size, must be a multiple of 4 */
4330 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4331 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
4332 	/* Assign the pt base to something valid for now; the pts used for
4333 	 * the VMs are determined by the application and setup and assigned
4334 	 * on the fly in the vm part of radeon_gart.c
4335 	 */
4336 	for (i = 1; i < 16; i++) {
4337 		if (i < 8)
4338 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4339 			       rdev->vm_manager.saved_table_addr[i]);
4340 		else
4341 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4342 			       rdev->vm_manager.saved_table_addr[i]);
4343 	}
4344 
4345 	/* enable context1-15 */
4346 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4347 	       (u32)(rdev->dummy_page.addr >> 12));
4348 	WREG32(VM_CONTEXT1_CNTL2, 4);
4349 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4350 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
4351 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4352 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4353 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4354 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4355 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4356 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4357 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4358 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4359 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4360 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4361 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4362 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4363 
4364 	si_pcie_gart_tlb_flush(rdev);
4365 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4366 		 (unsigned)(rdev->mc.gtt_size >> 20),
4367 		 (unsigned long long)rdev->gart.table_addr);
4368 	rdev->gart.ready = true;
4369 	return 0;
4370 }
4371 
4372 static void si_pcie_gart_disable(struct radeon_device *rdev)
4373 {
4374 	unsigned i;
4375 
4376 	for (i = 1; i < 16; ++i) {
4377 		uint32_t reg;
4378 		if (i < 8)
4379 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
4380 		else
4381 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
4382 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
4383 	}
4384 
4385 	/* Disable all tables */
4386 	WREG32(VM_CONTEXT0_CNTL, 0);
4387 	WREG32(VM_CONTEXT1_CNTL, 0);
4388 	/* Setup TLB control */
4389 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4390 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4391 	/* Setup L2 cache */
4392 	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4393 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4394 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4395 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4396 	WREG32(VM_L2_CNTL2, 0);
4397 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4398 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4399 	radeon_gart_table_vram_unpin(rdev);
4400 }
4401 
4402 static void si_pcie_gart_fini(struct radeon_device *rdev)
4403 {
4404 	si_pcie_gart_disable(rdev);
4405 	radeon_gart_table_vram_free(rdev);
4406 	radeon_gart_fini(rdev);
4407 }
4408 
4409 /* vm parser */
4410 static bool si_vm_reg_valid(u32 reg)
4411 {
4412 	/* context regs are fine */
4413 	if (reg >= 0x28000)
4414 		return true;
4415 
4416 	/* shader regs are also fine */
4417 	if (reg >= 0xB000 && reg < 0xC000)
4418 		return true;
4419 
4420 	/* check config regs */
4421 	switch (reg) {
4422 	case GRBM_GFX_INDEX:
4423 	case CP_STRMOUT_CNTL:
4424 	case VGT_VTX_VECT_EJECT_REG:
4425 	case VGT_CACHE_INVALIDATION:
4426 	case VGT_ESGS_RING_SIZE:
4427 	case VGT_GSVS_RING_SIZE:
4428 	case VGT_GS_VERTEX_REUSE:
4429 	case VGT_PRIMITIVE_TYPE:
4430 	case VGT_INDEX_TYPE:
4431 	case VGT_NUM_INDICES:
4432 	case VGT_NUM_INSTANCES:
4433 	case VGT_TF_RING_SIZE:
4434 	case VGT_HS_OFFCHIP_PARAM:
4435 	case VGT_TF_MEMORY_BASE:
4436 	case PA_CL_ENHANCE:
4437 	case PA_SU_LINE_STIPPLE_VALUE:
4438 	case PA_SC_LINE_STIPPLE_STATE:
4439 	case PA_SC_ENHANCE:
4440 	case SQC_CACHES:
4441 	case SPI_STATIC_THREAD_MGMT_1:
4442 	case SPI_STATIC_THREAD_MGMT_2:
4443 	case SPI_STATIC_THREAD_MGMT_3:
4444 	case SPI_PS_MAX_WAVE_ID:
4445 	case SPI_CONFIG_CNTL:
4446 	case SPI_CONFIG_CNTL_1:
4447 	case TA_CNTL_AUX:
4448 	case TA_CS_BC_BASE_ADDR:
4449 		return true;
4450 	default:
4451 		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4452 		return false;
4453 	}
4454 }
4455 
4456 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4457 				  u32 *ib, struct radeon_cs_packet *pkt)
4458 {
4459 	switch (pkt->opcode) {
4460 	case PACKET3_NOP:
4461 	case PACKET3_SET_BASE:
4462 	case PACKET3_SET_CE_DE_COUNTERS:
4463 	case PACKET3_LOAD_CONST_RAM:
4464 	case PACKET3_WRITE_CONST_RAM:
4465 	case PACKET3_WRITE_CONST_RAM_OFFSET:
4466 	case PACKET3_DUMP_CONST_RAM:
4467 	case PACKET3_INCREMENT_CE_COUNTER:
4468 	case PACKET3_WAIT_ON_DE_COUNTER:
4469 	case PACKET3_CE_WRITE:
4470 		break;
4471 	default:
4472 		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4473 		return -EINVAL;
4474 	}
4475 	return 0;
4476 }
4477 
4478 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4479 {
4480 	u32 start_reg, reg, i;
4481 	u32 command = ib[idx + 4];
4482 	u32 info = ib[idx + 1];
4483 	u32 idx_value = ib[idx];
4484 	if (command & PACKET3_CP_DMA_CMD_SAS) {
4485 		/* src address space is register */
4486 		if (((info & 0x60000000) >> 29) == 0) {
4487 			start_reg = idx_value << 2;
4488 			if (command & PACKET3_CP_DMA_CMD_SAIC) {
4489 				reg = start_reg;
4490 				if (!si_vm_reg_valid(reg)) {
4491 					DRM_ERROR("CP DMA Bad SRC register\n");
4492 					return -EINVAL;
4493 				}
4494 			} else {
4495 				for (i = 0; i < (command & 0x1fffff); i++) {
4496 					reg = start_reg + (4 * i);
4497 					if (!si_vm_reg_valid(reg)) {
4498 						DRM_ERROR("CP DMA Bad SRC register\n");
4499 						return -EINVAL;
4500 					}
4501 				}
4502 			}
4503 		}
4504 	}
4505 	if (command & PACKET3_CP_DMA_CMD_DAS) {
4506 		/* dst address space is register */
4507 		if (((info & 0x00300000) >> 20) == 0) {
4508 			start_reg = ib[idx + 2];
4509 			if (command & PACKET3_CP_DMA_CMD_DAIC) {
4510 				reg = start_reg;
4511 				if (!si_vm_reg_valid(reg)) {
4512 					DRM_ERROR("CP DMA Bad DST register\n");
4513 					return -EINVAL;
4514 				}
4515 			} else {
4516 				for (i = 0; i < (command & 0x1fffff); i++) {
4517 					reg = start_reg + (4 * i);
4518 					if (!si_vm_reg_valid(reg)) {
4519 						DRM_ERROR("CP DMA Bad DST register\n");
4520 						return -EINVAL;
4521 					}
4522 				}
4523 			}
4524 		}
4525 	}
4526 	return 0;
4527 }
4528 
4529 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4530 				   u32 *ib, struct radeon_cs_packet *pkt)
4531 {
4532 	int r;
4533 	u32 idx = pkt->idx + 1;
4534 	u32 idx_value = ib[idx];
4535 	u32 start_reg, end_reg, reg, i;
4536 
4537 	switch (pkt->opcode) {
4538 	case PACKET3_NOP:
4539 	case PACKET3_SET_BASE:
4540 	case PACKET3_CLEAR_STATE:
4541 	case PACKET3_INDEX_BUFFER_SIZE:
4542 	case PACKET3_DISPATCH_DIRECT:
4543 	case PACKET3_DISPATCH_INDIRECT:
4544 	case PACKET3_ALLOC_GDS:
4545 	case PACKET3_WRITE_GDS_RAM:
4546 	case PACKET3_ATOMIC_GDS:
4547 	case PACKET3_ATOMIC:
4548 	case PACKET3_OCCLUSION_QUERY:
4549 	case PACKET3_SET_PREDICATION:
4550 	case PACKET3_COND_EXEC:
4551 	case PACKET3_PRED_EXEC:
4552 	case PACKET3_DRAW_INDIRECT:
4553 	case PACKET3_DRAW_INDEX_INDIRECT:
4554 	case PACKET3_INDEX_BASE:
4555 	case PACKET3_DRAW_INDEX_2:
4556 	case PACKET3_CONTEXT_CONTROL:
4557 	case PACKET3_INDEX_TYPE:
4558 	case PACKET3_DRAW_INDIRECT_MULTI:
4559 	case PACKET3_DRAW_INDEX_AUTO:
4560 	case PACKET3_DRAW_INDEX_IMMD:
4561 	case PACKET3_NUM_INSTANCES:
4562 	case PACKET3_DRAW_INDEX_MULTI_AUTO:
4563 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4564 	case PACKET3_DRAW_INDEX_OFFSET_2:
4565 	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4566 	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4567 	case PACKET3_MPEG_INDEX:
4568 	case PACKET3_WAIT_REG_MEM:
4569 	case PACKET3_MEM_WRITE:
4570 	case PACKET3_PFP_SYNC_ME:
4571 	case PACKET3_SURFACE_SYNC:
4572 	case PACKET3_EVENT_WRITE:
4573 	case PACKET3_EVENT_WRITE_EOP:
4574 	case PACKET3_EVENT_WRITE_EOS:
4575 	case PACKET3_SET_CONTEXT_REG:
4576 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4577 	case PACKET3_SET_SH_REG:
4578 	case PACKET3_SET_SH_REG_OFFSET:
4579 	case PACKET3_INCREMENT_DE_COUNTER:
4580 	case PACKET3_WAIT_ON_CE_COUNTER:
4581 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4582 	case PACKET3_ME_WRITE:
4583 		break;
4584 	case PACKET3_COPY_DATA:
4585 		if ((idx_value & 0xf00) == 0) {
4586 			reg = ib[idx + 3] * 4;
4587 			if (!si_vm_reg_valid(reg))
4588 				return -EINVAL;
4589 		}
4590 		break;
4591 	case PACKET3_WRITE_DATA:
4592 		if ((idx_value & 0xf00) == 0) {
4593 			start_reg = ib[idx + 1] * 4;
4594 			if (idx_value & 0x10000) {
4595 				if (!si_vm_reg_valid(start_reg))
4596 					return -EINVAL;
4597 			} else {
4598 				for (i = 0; i < (pkt->count - 2); i++) {
4599 					reg = start_reg + (4 * i);
4600 					if (!si_vm_reg_valid(reg))
4601 						return -EINVAL;
4602 				}
4603 			}
4604 		}
4605 		break;
4606 	case PACKET3_COND_WRITE:
4607 		if (idx_value & 0x100) {
4608 			reg = ib[idx + 5] * 4;
4609 			if (!si_vm_reg_valid(reg))
4610 				return -EINVAL;
4611 		}
4612 		break;
4613 	case PACKET3_COPY_DW:
4614 		if (idx_value & 0x2) {
4615 			reg = ib[idx + 3] * 4;
4616 			if (!si_vm_reg_valid(reg))
4617 				return -EINVAL;
4618 		}
4619 		break;
4620 	case PACKET3_SET_CONFIG_REG:
4621 		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4622 		end_reg = 4 * pkt->count + start_reg - 4;
4623 		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4624 		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4625 		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4626 			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4627 			return -EINVAL;
4628 		}
4629 		for (i = 0; i < pkt->count; i++) {
4630 			reg = start_reg + (4 * i);
4631 			if (!si_vm_reg_valid(reg))
4632 				return -EINVAL;
4633 		}
4634 		break;
4635 	case PACKET3_CP_DMA:
4636 		r = si_vm_packet3_cp_dma_check(ib, idx);
4637 		if (r)
4638 			return r;
4639 		break;
4640 	default:
4641 		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4642 		return -EINVAL;
4643 	}
4644 	return 0;
4645 }
4646 
4647 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4648 				       u32 *ib, struct radeon_cs_packet *pkt)
4649 {
4650 	int r;
4651 	u32 idx = pkt->idx + 1;
4652 	u32 idx_value = ib[idx];
4653 	u32 start_reg, reg, i;
4654 
4655 	switch (pkt->opcode) {
4656 	case PACKET3_NOP:
4657 	case PACKET3_SET_BASE:
4658 	case PACKET3_CLEAR_STATE:
4659 	case PACKET3_DISPATCH_DIRECT:
4660 	case PACKET3_DISPATCH_INDIRECT:
4661 	case PACKET3_ALLOC_GDS:
4662 	case PACKET3_WRITE_GDS_RAM:
4663 	case PACKET3_ATOMIC_GDS:
4664 	case PACKET3_ATOMIC:
4665 	case PACKET3_OCCLUSION_QUERY:
4666 	case PACKET3_SET_PREDICATION:
4667 	case PACKET3_COND_EXEC:
4668 	case PACKET3_PRED_EXEC:
4669 	case PACKET3_CONTEXT_CONTROL:
4670 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4671 	case PACKET3_WAIT_REG_MEM:
4672 	case PACKET3_MEM_WRITE:
4673 	case PACKET3_PFP_SYNC_ME:
4674 	case PACKET3_SURFACE_SYNC:
4675 	case PACKET3_EVENT_WRITE:
4676 	case PACKET3_EVENT_WRITE_EOP:
4677 	case PACKET3_EVENT_WRITE_EOS:
4678 	case PACKET3_SET_CONTEXT_REG:
4679 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4680 	case PACKET3_SET_SH_REG:
4681 	case PACKET3_SET_SH_REG_OFFSET:
4682 	case PACKET3_INCREMENT_DE_COUNTER:
4683 	case PACKET3_WAIT_ON_CE_COUNTER:
4684 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4685 	case PACKET3_ME_WRITE:
4686 		break;
4687 	case PACKET3_COPY_DATA:
4688 		if ((idx_value & 0xf00) == 0) {
4689 			reg = ib[idx + 3] * 4;
4690 			if (!si_vm_reg_valid(reg))
4691 				return -EINVAL;
4692 		}
4693 		break;
4694 	case PACKET3_WRITE_DATA:
4695 		if ((idx_value & 0xf00) == 0) {
4696 			start_reg = ib[idx + 1] * 4;
4697 			if (idx_value & 0x10000) {
4698 				if (!si_vm_reg_valid(start_reg))
4699 					return -EINVAL;
4700 			} else {
4701 				for (i = 0; i < (pkt->count - 2); i++) {
4702 					reg = start_reg + (4 * i);
4703 					if (!si_vm_reg_valid(reg))
4704 						return -EINVAL;
4705 				}
4706 			}
4707 		}
4708 		break;
4709 	case PACKET3_COND_WRITE:
4710 		if (idx_value & 0x100) {
4711 			reg = ib[idx + 5] * 4;
4712 			if (!si_vm_reg_valid(reg))
4713 				return -EINVAL;
4714 		}
4715 		break;
4716 	case PACKET3_COPY_DW:
4717 		if (idx_value & 0x2) {
4718 			reg = ib[idx + 3] * 4;
4719 			if (!si_vm_reg_valid(reg))
4720 				return -EINVAL;
4721 		}
4722 		break;
4723 	case PACKET3_CP_DMA:
4724 		r = si_vm_packet3_cp_dma_check(ib, idx);
4725 		if (r)
4726 			return r;
4727 		break;
4728 	default:
4729 		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4730 		return -EINVAL;
4731 	}
4732 	return 0;
4733 }
4734 
4735 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4736 {
4737 	int ret = 0;
4738 	u32 idx = 0, i;
4739 	struct radeon_cs_packet pkt;
4740 
4741 	do {
4742 		pkt.idx = idx;
4743 		pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4744 		pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4745 		pkt.one_reg_wr = 0;
4746 		switch (pkt.type) {
4747 		case RADEON_PACKET_TYPE0:
4748 			dev_err(rdev->dev, "Packet0 not allowed!\n");
4749 			ret = -EINVAL;
4750 			break;
4751 		case RADEON_PACKET_TYPE2:
4752 			idx += 1;
4753 			break;
4754 		case RADEON_PACKET_TYPE3:
4755 			pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4756 			if (ib->is_const_ib)
4757 				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4758 			else {
4759 				switch (ib->ring) {
4760 				case RADEON_RING_TYPE_GFX_INDEX:
4761 					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4762 					break;
4763 				case CAYMAN_RING_TYPE_CP1_INDEX:
4764 				case CAYMAN_RING_TYPE_CP2_INDEX:
4765 					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4766 					break;
4767 				default:
4768 					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4769 					ret = -EINVAL;
4770 					break;
4771 				}
4772 			}
4773 			idx += pkt.count + 2;
4774 			break;
4775 		default:
4776 			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4777 			ret = -EINVAL;
4778 			break;
4779 		}
4780 		if (ret) {
4781 			for (i = 0; i < ib->length_dw; i++) {
4782 				if (i == idx)
4783 					printk("\t0x%08x <---\n", ib->ptr[i]);
4784 				else
4785 					printk("\t0x%08x\n", ib->ptr[i]);
4786 			}
4787 			break;
4788 		}
4789 	} while (idx < ib->length_dw);
4790 
4791 	return ret;
4792 }
4793 
4794 /*
4795  * vm
4796  */
4797 int si_vm_init(struct radeon_device *rdev)
4798 {
4799 	/* number of VMs */
4800 	rdev->vm_manager.nvm = 16;
4801 	/* base offset of vram pages */
4802 	rdev->vm_manager.vram_base_offset = 0;
4803 
4804 	return 0;
4805 }
4806 
4807 void si_vm_fini(struct radeon_device *rdev)
4808 {
4809 }
4810 
4811 /**
4812  * si_vm_decode_fault - print human readable fault info
4813  *
4814  * @rdev: radeon_device pointer
4815  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4816  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4817  *
4818  * Print human readable fault information (SI).
4819  */
4820 static void si_vm_decode_fault(struct radeon_device *rdev,
4821 			       u32 status, u32 addr)
4822 {
4823 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4824 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4825 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4826 	char *block;
4827 
4828 	if (rdev->family == CHIP_TAHITI) {
4829 		switch (mc_id) {
4830 		case 160:
4831 		case 144:
4832 		case 96:
4833 		case 80:
4834 		case 224:
4835 		case 208:
4836 		case 32:
4837 		case 16:
4838 			block = "CB";
4839 			break;
4840 		case 161:
4841 		case 145:
4842 		case 97:
4843 		case 81:
4844 		case 225:
4845 		case 209:
4846 		case 33:
4847 		case 17:
4848 			block = "CB_FMASK";
4849 			break;
4850 		case 162:
4851 		case 146:
4852 		case 98:
4853 		case 82:
4854 		case 226:
4855 		case 210:
4856 		case 34:
4857 		case 18:
4858 			block = "CB_CMASK";
4859 			break;
4860 		case 163:
4861 		case 147:
4862 		case 99:
4863 		case 83:
4864 		case 227:
4865 		case 211:
4866 		case 35:
4867 		case 19:
4868 			block = "CB_IMMED";
4869 			break;
4870 		case 164:
4871 		case 148:
4872 		case 100:
4873 		case 84:
4874 		case 228:
4875 		case 212:
4876 		case 36:
4877 		case 20:
4878 			block = "DB";
4879 			break;
4880 		case 165:
4881 		case 149:
4882 		case 101:
4883 		case 85:
4884 		case 229:
4885 		case 213:
4886 		case 37:
4887 		case 21:
4888 			block = "DB_HTILE";
4889 			break;
4890 		case 167:
4891 		case 151:
4892 		case 103:
4893 		case 87:
4894 		case 231:
4895 		case 215:
4896 		case 39:
4897 		case 23:
4898 			block = "DB_STEN";
4899 			break;
4900 		case 72:
4901 		case 68:
4902 		case 64:
4903 		case 8:
4904 		case 4:
4905 		case 0:
4906 		case 136:
4907 		case 132:
4908 		case 128:
4909 		case 200:
4910 		case 196:
4911 		case 192:
4912 			block = "TC";
4913 			break;
4914 		case 112:
4915 		case 48:
4916 			block = "CP";
4917 			break;
4918 		case 49:
4919 		case 177:
4920 		case 50:
4921 		case 178:
4922 			block = "SH";
4923 			break;
4924 		case 53:
4925 		case 190:
4926 			block = "VGT";
4927 			break;
4928 		case 117:
4929 			block = "IH";
4930 			break;
4931 		case 51:
4932 		case 115:
4933 			block = "RLC";
4934 			break;
4935 		case 119:
4936 		case 183:
4937 			block = "DMA0";
4938 			break;
4939 		case 61:
4940 			block = "DMA1";
4941 			break;
4942 		case 248:
4943 		case 120:
4944 			block = "HDP";
4945 			break;
4946 		default:
4947 			block = "unknown";
4948 			break;
4949 		}
4950 	} else {
4951 		switch (mc_id) {
4952 		case 32:
4953 		case 16:
4954 		case 96:
4955 		case 80:
4956 		case 160:
4957 		case 144:
4958 		case 224:
4959 		case 208:
4960 			block = "CB";
4961 			break;
4962 		case 33:
4963 		case 17:
4964 		case 97:
4965 		case 81:
4966 		case 161:
4967 		case 145:
4968 		case 225:
4969 		case 209:
4970 			block = "CB_FMASK";
4971 			break;
4972 		case 34:
4973 		case 18:
4974 		case 98:
4975 		case 82:
4976 		case 162:
4977 		case 146:
4978 		case 226:
4979 		case 210:
4980 			block = "CB_CMASK";
4981 			break;
4982 		case 35:
4983 		case 19:
4984 		case 99:
4985 		case 83:
4986 		case 163:
4987 		case 147:
4988 		case 227:
4989 		case 211:
4990 			block = "CB_IMMED";
4991 			break;
4992 		case 36:
4993 		case 20:
4994 		case 100:
4995 		case 84:
4996 		case 164:
4997 		case 148:
4998 		case 228:
4999 		case 212:
5000 			block = "DB";
5001 			break;
5002 		case 37:
5003 		case 21:
5004 		case 101:
5005 		case 85:
5006 		case 165:
5007 		case 149:
5008 		case 229:
5009 		case 213:
5010 			block = "DB_HTILE";
5011 			break;
5012 		case 39:
5013 		case 23:
5014 		case 103:
5015 		case 87:
5016 		case 167:
5017 		case 151:
5018 		case 231:
5019 		case 215:
5020 			block = "DB_STEN";
5021 			break;
5022 		case 72:
5023 		case 68:
5024 		case 8:
5025 		case 4:
5026 		case 136:
5027 		case 132:
5028 		case 200:
5029 		case 196:
5030 			block = "TC";
5031 			break;
5032 		case 112:
5033 		case 48:
5034 			block = "CP";
5035 			break;
5036 		case 49:
5037 		case 177:
5038 		case 50:
5039 		case 178:
5040 			block = "SH";
5041 			break;
5042 		case 53:
5043 			block = "VGT";
5044 			break;
5045 		case 117:
5046 			block = "IH";
5047 			break;
5048 		case 51:
5049 		case 115:
5050 			block = "RLC";
5051 			break;
5052 		case 119:
5053 		case 183:
5054 			block = "DMA0";
5055 			break;
5056 		case 61:
5057 			block = "DMA1";
5058 			break;
5059 		case 248:
5060 		case 120:
5061 			block = "HDP";
5062 			break;
5063 		default:
5064 			block = "unknown";
5065 			break;
5066 		}
5067 	}
5068 
5069 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5070 	       protections, vmid, addr,
5071 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5072 	       block, mc_id);
5073 }
5074 
5075 void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5076 		 unsigned vm_id, uint64_t pd_addr)
5077 {
5078 	/* write new base address */
5079 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5080 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5081 				 WRITE_DATA_DST_SEL(0)));
5082 
5083 	if (vm_id < 8) {
5084 		radeon_ring_write(ring,
5085 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5086 	} else {
5087 		radeon_ring_write(ring,
5088 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5089 	}
5090 	radeon_ring_write(ring, 0);
5091 	radeon_ring_write(ring, pd_addr >> 12);
5092 
5093 	/* flush hdp cache */
5094 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5095 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5096 				 WRITE_DATA_DST_SEL(0)));
5097 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5098 	radeon_ring_write(ring, 0);
5099 	radeon_ring_write(ring, 0x1);
5100 
5101 	/* bits 0-15 are the VM contexts0-15 */
5102 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5103 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5104 				 WRITE_DATA_DST_SEL(0)));
5105 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5106 	radeon_ring_write(ring, 0);
5107 	radeon_ring_write(ring, 1 << vm_id);
5108 
5109 	/* wait for the invalidate to complete */
5110 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5111 	radeon_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) |  /* always */
5112 				 WAIT_REG_MEM_ENGINE(0))); /* me */
5113 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5114 	radeon_ring_write(ring, 0);
5115 	radeon_ring_write(ring, 0); /* ref */
5116 	radeon_ring_write(ring, 0); /* mask */
5117 	radeon_ring_write(ring, 0x20); /* poll interval */
5118 
5119 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
5120 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5121 	radeon_ring_write(ring, 0x0);
5122 }
5123 
5124 /*
5125  *  Power and clock gating
5126  */
5127 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
5128 {
5129 	int i;
5130 
5131 	for (i = 0; i < rdev->usec_timeout; i++) {
5132 		if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
5133 			break;
5134 		udelay(1);
5135 	}
5136 
5137 	for (i = 0; i < rdev->usec_timeout; i++) {
5138 		if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
5139 			break;
5140 		udelay(1);
5141 	}
5142 }
5143 
5144 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
5145 					 bool enable)
5146 {
5147 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5148 	u32 mask;
5149 	int i;
5150 
5151 	if (enable)
5152 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5153 	else
5154 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5155 	WREG32(CP_INT_CNTL_RING0, tmp);
5156 
5157 	if (!enable) {
5158 		/* read a gfx register */
5159 		tmp = RREG32(DB_DEPTH_INFO);
5160 
5161 		mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
5162 		for (i = 0; i < rdev->usec_timeout; i++) {
5163 			if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
5164 				break;
5165 			udelay(1);
5166 		}
5167 	}
5168 }
5169 
5170 static void si_set_uvd_dcm(struct radeon_device *rdev,
5171 			   bool sw_mode)
5172 {
5173 	u32 tmp, tmp2;
5174 
5175 	tmp = RREG32(UVD_CGC_CTRL);
5176 	tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
5177 	tmp |= DCM | CG_DT(1) | CLK_OD(4);
5178 
5179 	if (sw_mode) {
5180 		tmp &= ~0x7ffff800;
5181 		tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
5182 	} else {
5183 		tmp |= 0x7ffff800;
5184 		tmp2 = 0;
5185 	}
5186 
5187 	WREG32(UVD_CGC_CTRL, tmp);
5188 	WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
5189 }
5190 
5191 void si_init_uvd_internal_cg(struct radeon_device *rdev)
5192 {
5193 	bool hw_mode = true;
5194 
5195 	if (hw_mode) {
5196 		si_set_uvd_dcm(rdev, false);
5197 	} else {
5198 		u32 tmp = RREG32(UVD_CGC_CTRL);
5199 		tmp &= ~DCM;
5200 		WREG32(UVD_CGC_CTRL, tmp);
5201 	}
5202 }
5203 
5204 static u32 si_halt_rlc(struct radeon_device *rdev)
5205 {
5206 	u32 data, orig;
5207 
5208 	orig = data = RREG32(RLC_CNTL);
5209 
5210 	if (data & RLC_ENABLE) {
5211 		data &= ~RLC_ENABLE;
5212 		WREG32(RLC_CNTL, data);
5213 
5214 		si_wait_for_rlc_serdes(rdev);
5215 	}
5216 
5217 	return orig;
5218 }
5219 
5220 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
5221 {
5222 	u32 tmp;
5223 
5224 	tmp = RREG32(RLC_CNTL);
5225 	if (tmp != rlc)
5226 		WREG32(RLC_CNTL, rlc);
5227 }
5228 
5229 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
5230 {
5231 	u32 data, orig;
5232 
5233 	orig = data = RREG32(DMA_PG);
5234 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
5235 		data |= PG_CNTL_ENABLE;
5236 	else
5237 		data &= ~PG_CNTL_ENABLE;
5238 	if (orig != data)
5239 		WREG32(DMA_PG, data);
5240 }
5241 
5242 static void si_init_dma_pg(struct radeon_device *rdev)
5243 {
5244 	u32 tmp;
5245 
5246 	WREG32(DMA_PGFSM_WRITE,  0x00002000);
5247 	WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
5248 
5249 	for (tmp = 0; tmp < 5; tmp++)
5250 		WREG32(DMA_PGFSM_WRITE, 0);
5251 }
5252 
5253 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
5254 			       bool enable)
5255 {
5256 	u32 tmp;
5257 
5258 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5259 		tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
5260 		WREG32(RLC_TTOP_D, tmp);
5261 
5262 		tmp = RREG32(RLC_PG_CNTL);
5263 		tmp |= GFX_PG_ENABLE;
5264 		WREG32(RLC_PG_CNTL, tmp);
5265 
5266 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5267 		tmp |= AUTO_PG_EN;
5268 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5269 	} else {
5270 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5271 		tmp &= ~AUTO_PG_EN;
5272 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5273 
5274 		tmp = RREG32(DB_RENDER_CONTROL);
5275 	}
5276 }
5277 
5278 static void si_init_gfx_cgpg(struct radeon_device *rdev)
5279 {
5280 	u32 tmp;
5281 
5282 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5283 
5284 	tmp = RREG32(RLC_PG_CNTL);
5285 	tmp |= GFX_PG_SRC;
5286 	WREG32(RLC_PG_CNTL, tmp);
5287 
5288 	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5289 
5290 	tmp = RREG32(RLC_AUTO_PG_CTRL);
5291 
5292 	tmp &= ~GRBM_REG_SGIT_MASK;
5293 	tmp |= GRBM_REG_SGIT(0x700);
5294 	tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5295 	WREG32(RLC_AUTO_PG_CTRL, tmp);
5296 }
5297 
5298 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5299 {
5300 	u32 mask = 0, tmp, tmp1;
5301 	int i;
5302 
5303 	si_select_se_sh(rdev, se, sh);
5304 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5305 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5306 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5307 
5308 	tmp &= 0xffff0000;
5309 
5310 	tmp |= tmp1;
5311 	tmp >>= 16;
5312 
5313 	for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5314 		mask <<= 1;
5315 		mask |= 1;
5316 	}
5317 
5318 	return (~tmp) & mask;
5319 }
5320 
5321 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5322 {
5323 	u32 i, j, k, active_cu_number = 0;
5324 	u32 mask, counter, cu_bitmap;
5325 	u32 tmp = 0;
5326 
5327 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5328 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5329 			mask = 1;
5330 			cu_bitmap = 0;
5331 			counter  = 0;
5332 			for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5333 				if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5334 					if (counter < 2)
5335 						cu_bitmap |= mask;
5336 					counter++;
5337 				}
5338 				mask <<= 1;
5339 			}
5340 
5341 			active_cu_number += counter;
5342 			tmp |= (cu_bitmap << (i * 16 + j * 8));
5343 		}
5344 	}
5345 
5346 	WREG32(RLC_PG_AO_CU_MASK, tmp);
5347 
5348 	tmp = RREG32(RLC_MAX_PG_CU);
5349 	tmp &= ~MAX_PU_CU_MASK;
5350 	tmp |= MAX_PU_CU(active_cu_number);
5351 	WREG32(RLC_MAX_PG_CU, tmp);
5352 }
5353 
5354 static void si_enable_cgcg(struct radeon_device *rdev,
5355 			   bool enable)
5356 {
5357 	u32 data, orig, tmp;
5358 
5359 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5360 
5361 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5362 		si_enable_gui_idle_interrupt(rdev, true);
5363 
5364 		WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5365 
5366 		tmp = si_halt_rlc(rdev);
5367 
5368 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5369 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5370 		WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5371 
5372 		si_wait_for_rlc_serdes(rdev);
5373 
5374 		si_update_rlc(rdev, tmp);
5375 
5376 		WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5377 
5378 		data |= CGCG_EN | CGLS_EN;
5379 	} else {
5380 		si_enable_gui_idle_interrupt(rdev, false);
5381 
5382 		RREG32(CB_CGTT_SCLK_CTRL);
5383 		RREG32(CB_CGTT_SCLK_CTRL);
5384 		RREG32(CB_CGTT_SCLK_CTRL);
5385 		RREG32(CB_CGTT_SCLK_CTRL);
5386 
5387 		data &= ~(CGCG_EN | CGLS_EN);
5388 	}
5389 
5390 	if (orig != data)
5391 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5392 }
5393 
5394 static void si_enable_mgcg(struct radeon_device *rdev,
5395 			   bool enable)
5396 {
5397 	u32 data, orig, tmp = 0;
5398 
5399 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5400 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5401 		data = 0x96940200;
5402 		if (orig != data)
5403 			WREG32(CGTS_SM_CTRL_REG, data);
5404 
5405 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5406 			orig = data = RREG32(CP_MEM_SLP_CNTL);
5407 			data |= CP_MEM_LS_EN;
5408 			if (orig != data)
5409 				WREG32(CP_MEM_SLP_CNTL, data);
5410 		}
5411 
5412 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5413 		data &= 0xffffffc0;
5414 		if (orig != data)
5415 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5416 
5417 		tmp = si_halt_rlc(rdev);
5418 
5419 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5420 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5421 		WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5422 
5423 		si_update_rlc(rdev, tmp);
5424 	} else {
5425 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5426 		data |= 0x00000003;
5427 		if (orig != data)
5428 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5429 
5430 		data = RREG32(CP_MEM_SLP_CNTL);
5431 		if (data & CP_MEM_LS_EN) {
5432 			data &= ~CP_MEM_LS_EN;
5433 			WREG32(CP_MEM_SLP_CNTL, data);
5434 		}
5435 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5436 		data |= LS_OVERRIDE | OVERRIDE;
5437 		if (orig != data)
5438 			WREG32(CGTS_SM_CTRL_REG, data);
5439 
5440 		tmp = si_halt_rlc(rdev);
5441 
5442 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5443 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5444 		WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5445 
5446 		si_update_rlc(rdev, tmp);
5447 	}
5448 }
5449 
5450 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5451 			       bool enable)
5452 {
5453 	u32 orig, data, tmp;
5454 
5455 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5456 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5457 		tmp |= 0x3fff;
5458 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5459 
5460 		orig = data = RREG32(UVD_CGC_CTRL);
5461 		data |= DCM;
5462 		if (orig != data)
5463 			WREG32(UVD_CGC_CTRL, data);
5464 
5465 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5466 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5467 	} else {
5468 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5469 		tmp &= ~0x3fff;
5470 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5471 
5472 		orig = data = RREG32(UVD_CGC_CTRL);
5473 		data &= ~DCM;
5474 		if (orig != data)
5475 			WREG32(UVD_CGC_CTRL, data);
5476 
5477 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5478 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5479 	}
5480 }
5481 
5482 static const u32 mc_cg_registers[] =
5483 {
5484 	MC_HUB_MISC_HUB_CG,
5485 	MC_HUB_MISC_SIP_CG,
5486 	MC_HUB_MISC_VM_CG,
5487 	MC_XPB_CLK_GAT,
5488 	ATC_MISC_CG,
5489 	MC_CITF_MISC_WR_CG,
5490 	MC_CITF_MISC_RD_CG,
5491 	MC_CITF_MISC_VM_CG,
5492 	VM_L2_CG,
5493 };
5494 
5495 static void si_enable_mc_ls(struct radeon_device *rdev,
5496 			    bool enable)
5497 {
5498 	int i;
5499 	u32 orig, data;
5500 
5501 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5502 		orig = data = RREG32(mc_cg_registers[i]);
5503 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5504 			data |= MC_LS_ENABLE;
5505 		else
5506 			data &= ~MC_LS_ENABLE;
5507 		if (data != orig)
5508 			WREG32(mc_cg_registers[i], data);
5509 	}
5510 }
5511 
5512 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5513 			       bool enable)
5514 {
5515 	int i;
5516 	u32 orig, data;
5517 
5518 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5519 		orig = data = RREG32(mc_cg_registers[i]);
5520 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5521 			data |= MC_CG_ENABLE;
5522 		else
5523 			data &= ~MC_CG_ENABLE;
5524 		if (data != orig)
5525 			WREG32(mc_cg_registers[i], data);
5526 	}
5527 }
5528 
5529 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5530 			       bool enable)
5531 {
5532 	u32 orig, data, offset;
5533 	int i;
5534 
5535 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5536 		for (i = 0; i < 2; i++) {
5537 			if (i == 0)
5538 				offset = DMA0_REGISTER_OFFSET;
5539 			else
5540 				offset = DMA1_REGISTER_OFFSET;
5541 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5542 			data &= ~MEM_POWER_OVERRIDE;
5543 			if (data != orig)
5544 				WREG32(DMA_POWER_CNTL + offset, data);
5545 			WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5546 		}
5547 	} else {
5548 		for (i = 0; i < 2; i++) {
5549 			if (i == 0)
5550 				offset = DMA0_REGISTER_OFFSET;
5551 			else
5552 				offset = DMA1_REGISTER_OFFSET;
5553 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5554 			data |= MEM_POWER_OVERRIDE;
5555 			if (data != orig)
5556 				WREG32(DMA_POWER_CNTL + offset, data);
5557 
5558 			orig = data = RREG32(DMA_CLK_CTRL + offset);
5559 			data = 0xff000000;
5560 			if (data != orig)
5561 				WREG32(DMA_CLK_CTRL + offset, data);
5562 		}
5563 	}
5564 }
5565 
5566 static void si_enable_bif_mgls(struct radeon_device *rdev,
5567 			       bool enable)
5568 {
5569 	u32 orig, data;
5570 
5571 	orig = data = RREG32_PCIE(PCIE_CNTL2);
5572 
5573 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5574 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5575 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5576 	else
5577 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5578 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5579 
5580 	if (orig != data)
5581 		WREG32_PCIE(PCIE_CNTL2, data);
5582 }
5583 
5584 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5585 			       bool enable)
5586 {
5587 	u32 orig, data;
5588 
5589 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
5590 
5591 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5592 		data &= ~CLOCK_GATING_DIS;
5593 	else
5594 		data |= CLOCK_GATING_DIS;
5595 
5596 	if (orig != data)
5597 		WREG32(HDP_HOST_PATH_CNTL, data);
5598 }
5599 
5600 static void si_enable_hdp_ls(struct radeon_device *rdev,
5601 			     bool enable)
5602 {
5603 	u32 orig, data;
5604 
5605 	orig = data = RREG32(HDP_MEM_POWER_LS);
5606 
5607 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5608 		data |= HDP_LS_ENABLE;
5609 	else
5610 		data &= ~HDP_LS_ENABLE;
5611 
5612 	if (orig != data)
5613 		WREG32(HDP_MEM_POWER_LS, data);
5614 }
5615 
5616 static void si_update_cg(struct radeon_device *rdev,
5617 			 u32 block, bool enable)
5618 {
5619 	if (block & RADEON_CG_BLOCK_GFX) {
5620 		si_enable_gui_idle_interrupt(rdev, false);
5621 		/* order matters! */
5622 		if (enable) {
5623 			si_enable_mgcg(rdev, true);
5624 			si_enable_cgcg(rdev, true);
5625 		} else {
5626 			si_enable_cgcg(rdev, false);
5627 			si_enable_mgcg(rdev, false);
5628 		}
5629 		si_enable_gui_idle_interrupt(rdev, true);
5630 	}
5631 
5632 	if (block & RADEON_CG_BLOCK_MC) {
5633 		si_enable_mc_mgcg(rdev, enable);
5634 		si_enable_mc_ls(rdev, enable);
5635 	}
5636 
5637 	if (block & RADEON_CG_BLOCK_SDMA) {
5638 		si_enable_dma_mgcg(rdev, enable);
5639 	}
5640 
5641 	if (block & RADEON_CG_BLOCK_BIF) {
5642 		si_enable_bif_mgls(rdev, enable);
5643 	}
5644 
5645 	if (block & RADEON_CG_BLOCK_UVD) {
5646 		if (rdev->has_uvd) {
5647 			si_enable_uvd_mgcg(rdev, enable);
5648 		}
5649 	}
5650 
5651 	if (block & RADEON_CG_BLOCK_HDP) {
5652 		si_enable_hdp_mgcg(rdev, enable);
5653 		si_enable_hdp_ls(rdev, enable);
5654 	}
5655 }
5656 
5657 static void si_init_cg(struct radeon_device *rdev)
5658 {
5659 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5660 			    RADEON_CG_BLOCK_MC |
5661 			    RADEON_CG_BLOCK_SDMA |
5662 			    RADEON_CG_BLOCK_BIF |
5663 			    RADEON_CG_BLOCK_HDP), true);
5664 	if (rdev->has_uvd) {
5665 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5666 		si_init_uvd_internal_cg(rdev);
5667 	}
5668 }
5669 
5670 static void si_fini_cg(struct radeon_device *rdev)
5671 {
5672 	if (rdev->has_uvd) {
5673 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5674 	}
5675 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5676 			    RADEON_CG_BLOCK_MC |
5677 			    RADEON_CG_BLOCK_SDMA |
5678 			    RADEON_CG_BLOCK_BIF |
5679 			    RADEON_CG_BLOCK_HDP), false);
5680 }
5681 
5682 u32 si_get_csb_size(struct radeon_device *rdev)
5683 {
5684 	u32 count = 0;
5685 	const struct cs_section_def *sect = NULL;
5686 	const struct cs_extent_def *ext = NULL;
5687 
5688 	if (rdev->rlc.cs_data == NULL)
5689 		return 0;
5690 
5691 	/* begin clear state */
5692 	count += 2;
5693 	/* context control state */
5694 	count += 3;
5695 
5696 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5697 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5698 			if (sect->id == SECT_CONTEXT)
5699 				count += 2 + ext->reg_count;
5700 			else
5701 				return 0;
5702 		}
5703 	}
5704 	/* pa_sc_raster_config */
5705 	count += 3;
5706 	/* end clear state */
5707 	count += 2;
5708 	/* clear state */
5709 	count += 2;
5710 
5711 	return count;
5712 }
5713 
5714 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5715 {
5716 	u32 count = 0, i;
5717 	const struct cs_section_def *sect = NULL;
5718 	const struct cs_extent_def *ext = NULL;
5719 
5720 	if (rdev->rlc.cs_data == NULL)
5721 		return;
5722 	if (buffer == NULL)
5723 		return;
5724 
5725 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5726 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5727 
5728 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5729 	buffer[count++] = cpu_to_le32(0x80000000);
5730 	buffer[count++] = cpu_to_le32(0x80000000);
5731 
5732 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5733 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5734 			if (sect->id == SECT_CONTEXT) {
5735 				buffer[count++] =
5736 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5737 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5738 				for (i = 0; i < ext->reg_count; i++)
5739 					buffer[count++] = cpu_to_le32(ext->extent[i]);
5740 			} else {
5741 				return;
5742 			}
5743 		}
5744 	}
5745 
5746 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5747 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5748 	switch (rdev->family) {
5749 	case CHIP_TAHITI:
5750 	case CHIP_PITCAIRN:
5751 		buffer[count++] = cpu_to_le32(0x2a00126a);
5752 		break;
5753 	case CHIP_VERDE:
5754 		buffer[count++] = cpu_to_le32(0x0000124a);
5755 		break;
5756 	case CHIP_OLAND:
5757 		buffer[count++] = cpu_to_le32(0x00000082);
5758 		break;
5759 	case CHIP_HAINAN:
5760 		buffer[count++] = cpu_to_le32(0x00000000);
5761 		break;
5762 	default:
5763 		buffer[count++] = cpu_to_le32(0x00000000);
5764 		break;
5765 	}
5766 
5767 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5768 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5769 
5770 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5771 	buffer[count++] = cpu_to_le32(0);
5772 }
5773 
5774 static void si_init_pg(struct radeon_device *rdev)
5775 {
5776 	if (rdev->pg_flags) {
5777 		if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5778 			si_init_dma_pg(rdev);
5779 		}
5780 		si_init_ao_cu_mask(rdev);
5781 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5782 			si_init_gfx_cgpg(rdev);
5783 		} else {
5784 			WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5785 			WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5786 		}
5787 		si_enable_dma_pg(rdev, true);
5788 		si_enable_gfx_cgpg(rdev, true);
5789 	} else {
5790 		WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5791 		WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5792 	}
5793 }
5794 
5795 static void si_fini_pg(struct radeon_device *rdev)
5796 {
5797 	if (rdev->pg_flags) {
5798 		si_enable_dma_pg(rdev, false);
5799 		si_enable_gfx_cgpg(rdev, false);
5800 	}
5801 }
5802 
5803 /*
5804  * RLC
5805  */
5806 void si_rlc_reset(struct radeon_device *rdev)
5807 {
5808 	u32 tmp = RREG32(GRBM_SOFT_RESET);
5809 
5810 	tmp |= SOFT_RESET_RLC;
5811 	WREG32(GRBM_SOFT_RESET, tmp);
5812 	udelay(50);
5813 	tmp &= ~SOFT_RESET_RLC;
5814 	WREG32(GRBM_SOFT_RESET, tmp);
5815 	udelay(50);
5816 }
5817 
5818 static void si_rlc_stop(struct radeon_device *rdev)
5819 {
5820 	WREG32(RLC_CNTL, 0);
5821 
5822 	si_enable_gui_idle_interrupt(rdev, false);
5823 
5824 	si_wait_for_rlc_serdes(rdev);
5825 }
5826 
5827 static void si_rlc_start(struct radeon_device *rdev)
5828 {
5829 	WREG32(RLC_CNTL, RLC_ENABLE);
5830 
5831 	si_enable_gui_idle_interrupt(rdev, true);
5832 
5833 	udelay(50);
5834 }
5835 
5836 static bool si_lbpw_supported(struct radeon_device *rdev)
5837 {
5838 	u32 tmp;
5839 
5840 	/* Enable LBPW only for DDR3 */
5841 	tmp = RREG32(MC_SEQ_MISC0);
5842 	if ((tmp & 0xF0000000) == 0xB0000000)
5843 		return true;
5844 	return false;
5845 }
5846 
5847 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5848 {
5849 	u32 tmp;
5850 
5851 	tmp = RREG32(RLC_LB_CNTL);
5852 	if (enable)
5853 		tmp |= LOAD_BALANCE_ENABLE;
5854 	else
5855 		tmp &= ~LOAD_BALANCE_ENABLE;
5856 	WREG32(RLC_LB_CNTL, tmp);
5857 
5858 	if (!enable) {
5859 		si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5860 		WREG32(SPI_LB_CU_MASK, 0x00ff);
5861 	}
5862 }
5863 
5864 static int si_rlc_resume(struct radeon_device *rdev)
5865 {
5866 	u32 i;
5867 
5868 	if (!rdev->rlc_fw)
5869 		return -EINVAL;
5870 
5871 	si_rlc_stop(rdev);
5872 
5873 	si_rlc_reset(rdev);
5874 
5875 	si_init_pg(rdev);
5876 
5877 	si_init_cg(rdev);
5878 
5879 	WREG32(RLC_RL_BASE, 0);
5880 	WREG32(RLC_RL_SIZE, 0);
5881 	WREG32(RLC_LB_CNTL, 0);
5882 	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5883 	WREG32(RLC_LB_CNTR_INIT, 0);
5884 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5885 
5886 	WREG32(RLC_MC_CNTL, 0);
5887 	WREG32(RLC_UCODE_CNTL, 0);
5888 
5889 	if (rdev->new_fw) {
5890 		const struct rlc_firmware_header_v1_0 *hdr =
5891 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5892 		u32 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5893 		const __le32 *fw_data = (const __le32 *)
5894 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5895 
5896 		radeon_ucode_print_rlc_hdr(&hdr->header);
5897 
5898 		for (i = 0; i < fw_size; i++) {
5899 			WREG32(RLC_UCODE_ADDR, i);
5900 			WREG32(RLC_UCODE_DATA, le32_to_cpup(fw_data++));
5901 		}
5902 	} else {
5903 		const __be32 *fw_data =
5904 			(const __be32 *)rdev->rlc_fw->data;
5905 		for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5906 			WREG32(RLC_UCODE_ADDR, i);
5907 			WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5908 		}
5909 	}
5910 	WREG32(RLC_UCODE_ADDR, 0);
5911 
5912 	si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5913 
5914 	si_rlc_start(rdev);
5915 
5916 	return 0;
5917 }
5918 
5919 static void si_enable_interrupts(struct radeon_device *rdev)
5920 {
5921 	u32 ih_cntl = RREG32(IH_CNTL);
5922 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5923 
5924 	ih_cntl |= ENABLE_INTR;
5925 	ih_rb_cntl |= IH_RB_ENABLE;
5926 	WREG32(IH_CNTL, ih_cntl);
5927 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5928 	rdev->ih.enabled = true;
5929 }
5930 
5931 static void si_disable_interrupts(struct radeon_device *rdev)
5932 {
5933 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5934 	u32 ih_cntl = RREG32(IH_CNTL);
5935 
5936 	ih_rb_cntl &= ~IH_RB_ENABLE;
5937 	ih_cntl &= ~ENABLE_INTR;
5938 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5939 	WREG32(IH_CNTL, ih_cntl);
5940 	/* set rptr, wptr to 0 */
5941 	WREG32(IH_RB_RPTR, 0);
5942 	WREG32(IH_RB_WPTR, 0);
5943 	rdev->ih.enabled = false;
5944 	rdev->ih.rptr = 0;
5945 }
5946 
5947 static void si_disable_interrupt_state(struct radeon_device *rdev)
5948 {
5949 	int i;
5950 	u32 tmp;
5951 
5952 	tmp = RREG32(CP_INT_CNTL_RING0) &
5953 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5954 	WREG32(CP_INT_CNTL_RING0, tmp);
5955 	WREG32(CP_INT_CNTL_RING1, 0);
5956 	WREG32(CP_INT_CNTL_RING2, 0);
5957 	tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5958 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5959 	tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5960 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5961 	WREG32(GRBM_INT_CNTL, 0);
5962 	WREG32(SRBM_INT_CNTL, 0);
5963 	for (i = 0; i < rdev->num_crtc; i++)
5964 		WREG32(INT_MASK + crtc_offsets[i], 0);
5965 	for (i = 0; i < rdev->num_crtc; i++)
5966 		WREG32(GRPH_INT_CONTROL + crtc_offsets[i], 0);
5967 
5968 	if (!ASIC_IS_NODCE(rdev)) {
5969 		WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5970 
5971 		for (i = 0; i < 6; i++)
5972 			WREG32_AND(DC_HPDx_INT_CONTROL(i),
5973 				   DC_HPDx_INT_POLARITY);
5974 	}
5975 }
5976 
5977 static int si_irq_init(struct radeon_device *rdev)
5978 {
5979 	int ret = 0;
5980 	int rb_bufsz;
5981 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5982 
5983 	/* allocate ring */
5984 	ret = r600_ih_ring_alloc(rdev);
5985 	if (ret)
5986 		return ret;
5987 
5988 	/* disable irqs */
5989 	si_disable_interrupts(rdev);
5990 
5991 	/* init rlc */
5992 	ret = si_rlc_resume(rdev);
5993 	if (ret) {
5994 		r600_ih_ring_fini(rdev);
5995 		return ret;
5996 	}
5997 
5998 	/* setup interrupt control */
5999 	/* set dummy read address to dummy page address */
6000 	WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8);
6001 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
6002 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6003 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6004 	 */
6005 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6006 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6007 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6008 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6009 
6010 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6011 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6012 
6013 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6014 		      IH_WPTR_OVERFLOW_CLEAR |
6015 		      (rb_bufsz << 1));
6016 
6017 	if (rdev->wb.enabled)
6018 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6019 
6020 	/* set the writeback address whether it's enabled or not */
6021 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6022 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6023 
6024 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6025 
6026 	/* set rptr, wptr to 0 */
6027 	WREG32(IH_RB_RPTR, 0);
6028 	WREG32(IH_RB_WPTR, 0);
6029 
6030 	/* Default settings for IH_CNTL (disabled at first) */
6031 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6032 	/* RPTR_REARM only works if msi's are enabled */
6033 	if (rdev->msi_enabled)
6034 		ih_cntl |= RPTR_REARM;
6035 	WREG32(IH_CNTL, ih_cntl);
6036 
6037 	/* force the active interrupt state to all disabled */
6038 	si_disable_interrupt_state(rdev);
6039 
6040 	pci_set_master(rdev->pdev);
6041 
6042 	/* enable irqs */
6043 	si_enable_interrupts(rdev);
6044 
6045 	return ret;
6046 }
6047 
6048 /* The order we write back each register here is important */
6049 int si_irq_set(struct radeon_device *rdev)
6050 {
6051 	int i;
6052 	u32 cp_int_cntl;
6053 	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
6054 	u32 grbm_int_cntl = 0;
6055 	u32 dma_cntl, dma_cntl1;
6056 	u32 thermal_int = 0;
6057 
6058 	if (!rdev->irq.installed) {
6059 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6060 		return -EINVAL;
6061 	}
6062 	/* don't enable anything if the ih is disabled */
6063 	if (!rdev->ih.enabled) {
6064 		si_disable_interrupts(rdev);
6065 		/* force the active interrupt state to all disabled */
6066 		si_disable_interrupt_state(rdev);
6067 		return 0;
6068 	}
6069 
6070 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6071 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6072 
6073 	dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6074 	dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6075 
6076 	thermal_int = RREG32(CG_THERMAL_INT) &
6077 		~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6078 
6079 	/* enable CP interrupts on all rings */
6080 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6081 		DRM_DEBUG("si_irq_set: sw int gfx\n");
6082 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6083 	}
6084 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6085 		DRM_DEBUG("si_irq_set: sw int cp1\n");
6086 		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
6087 	}
6088 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6089 		DRM_DEBUG("si_irq_set: sw int cp2\n");
6090 		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
6091 	}
6092 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6093 		DRM_DEBUG("si_irq_set: sw int dma\n");
6094 		dma_cntl |= TRAP_ENABLE;
6095 	}
6096 
6097 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6098 		DRM_DEBUG("si_irq_set: sw int dma1\n");
6099 		dma_cntl1 |= TRAP_ENABLE;
6100 	}
6101 
6102 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6103 	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
6104 	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
6105 
6106 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
6107 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
6108 
6109 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6110 
6111 	if (rdev->irq.dpm_thermal) {
6112 		DRM_DEBUG("dpm thermal\n");
6113 		thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6114 	}
6115 
6116 	for (i = 0; i < rdev->num_crtc; i++) {
6117 		radeon_irq_kms_set_irq_n_enabled(
6118 		    rdev, INT_MASK + crtc_offsets[i], VBLANK_INT_MASK,
6119 		    rdev->irq.crtc_vblank_int[i] ||
6120 		    atomic_read(&rdev->irq.pflip[i]), "vblank", i);
6121 	}
6122 
6123 	for (i = 0; i < rdev->num_crtc; i++)
6124 		WREG32(GRPH_INT_CONTROL + crtc_offsets[i], GRPH_PFLIP_INT_MASK);
6125 
6126 	if (!ASIC_IS_NODCE(rdev)) {
6127 		for (i = 0; i < 6; i++) {
6128 			radeon_irq_kms_set_irq_n_enabled(
6129 			    rdev, DC_HPDx_INT_CONTROL(i),
6130 			    DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN,
6131 			    rdev->irq.hpd[i], "HPD", i);
6132 		}
6133 	}
6134 
6135 	WREG32(CG_THERMAL_INT, thermal_int);
6136 
6137 	/* posting read */
6138 	RREG32(SRBM_STATUS);
6139 
6140 	return 0;
6141 }
6142 
6143 /* The order we write back each register here is important */
6144 static inline void si_irq_ack(struct radeon_device *rdev)
6145 {
6146 	int i, j;
6147 	u32 *disp_int = rdev->irq.stat_regs.evergreen.disp_int;
6148 	u32 *grph_int = rdev->irq.stat_regs.evergreen.grph_int;
6149 
6150 	if (ASIC_IS_NODCE(rdev))
6151 		return;
6152 
6153 	for (i = 0; i < 6; i++) {
6154 		disp_int[i] = RREG32(si_disp_int_status[i]);
6155 		if (i < rdev->num_crtc)
6156 			grph_int[i] = RREG32(GRPH_INT_STATUS + crtc_offsets[i]);
6157 	}
6158 
6159 	/* We write back each interrupt register in pairs of two */
6160 	for (i = 0; i < rdev->num_crtc; i += 2) {
6161 		for (j = i; j < (i + 2); j++) {
6162 			if (grph_int[j] & GRPH_PFLIP_INT_OCCURRED)
6163 				WREG32(GRPH_INT_STATUS + crtc_offsets[j],
6164 				       GRPH_PFLIP_INT_CLEAR);
6165 		}
6166 
6167 		for (j = i; j < (i + 2); j++) {
6168 			if (disp_int[j] & LB_D1_VBLANK_INTERRUPT)
6169 				WREG32(VBLANK_STATUS + crtc_offsets[j],
6170 				       VBLANK_ACK);
6171 			if (disp_int[j] & LB_D1_VLINE_INTERRUPT)
6172 				WREG32(VLINE_STATUS + crtc_offsets[j],
6173 				       VLINE_ACK);
6174 		}
6175 	}
6176 
6177 	for (i = 0; i < 6; i++) {
6178 		if (disp_int[i] & DC_HPD1_INTERRUPT)
6179 			WREG32_OR(DC_HPDx_INT_CONTROL(i), DC_HPDx_INT_ACK);
6180 	}
6181 
6182 	for (i = 0; i < 6; i++) {
6183 		if (disp_int[i] & DC_HPD1_RX_INTERRUPT)
6184 			WREG32_OR(DC_HPDx_INT_CONTROL(i), DC_HPDx_RX_INT_ACK);
6185 	}
6186 }
6187 
6188 static void si_irq_disable(struct radeon_device *rdev)
6189 {
6190 	si_disable_interrupts(rdev);
6191 	/* Wait and acknowledge irq */
6192 	mdelay(1);
6193 	si_irq_ack(rdev);
6194 	si_disable_interrupt_state(rdev);
6195 }
6196 
6197 static void si_irq_suspend(struct radeon_device *rdev)
6198 {
6199 	si_irq_disable(rdev);
6200 	si_rlc_stop(rdev);
6201 }
6202 
6203 static void si_irq_fini(struct radeon_device *rdev)
6204 {
6205 	si_irq_suspend(rdev);
6206 	r600_ih_ring_fini(rdev);
6207 }
6208 
6209 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6210 {
6211 	u32 wptr, tmp;
6212 
6213 	if (rdev->wb.enabled)
6214 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6215 	else
6216 		wptr = RREG32(IH_RB_WPTR);
6217 
6218 	if (wptr & RB_OVERFLOW) {
6219 		wptr &= ~RB_OVERFLOW;
6220 		/* When a ring buffer overflow happen start parsing interrupt
6221 		 * from the last not overwritten vector (wptr + 16). Hopefully
6222 		 * this should allow us to catchup.
6223 		 */
6224 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
6225 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
6226 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6227 		tmp = RREG32(IH_RB_CNTL);
6228 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
6229 		WREG32(IH_RB_CNTL, tmp);
6230 	}
6231 	return (wptr & rdev->ih.ptr_mask);
6232 }
6233 
6234 /*        SI IV Ring
6235  * Each IV ring entry is 128 bits:
6236  * [7:0]    - interrupt source id
6237  * [31:8]   - reserved
6238  * [59:32]  - interrupt source data
6239  * [63:60]  - reserved
6240  * [71:64]  - RINGID
6241  * [79:72]  - VMID
6242  * [127:80] - reserved
6243  */
6244 int si_irq_process(struct radeon_device *rdev)
6245 {
6246 	u32 *disp_int = rdev->irq.stat_regs.evergreen.disp_int;
6247 	u32 crtc_idx, hpd_idx;
6248 	u32 mask;
6249 	u32 wptr;
6250 	u32 rptr;
6251 	u32 src_id, src_data, ring_id;
6252 	u32 ring_index;
6253 	bool queue_hotplug = false;
6254 	bool queue_dp = false;
6255 	bool queue_thermal = false;
6256 	u32 status, addr;
6257 	const char *event_name;
6258 
6259 	if (!rdev->ih.enabled || rdev->shutdown)
6260 		return IRQ_NONE;
6261 
6262 	wptr = si_get_ih_wptr(rdev);
6263 
6264 restart_ih:
6265 	/* is somebody else already processing irqs? */
6266 	if (atomic_xchg(&rdev->ih.lock, 1))
6267 		return IRQ_NONE;
6268 
6269 	rptr = rdev->ih.rptr;
6270 	DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6271 
6272 	/* Order reading of wptr vs. reading of IH ring data */
6273 	rmb();
6274 
6275 	/* display interrupts */
6276 	si_irq_ack(rdev);
6277 
6278 	while (rptr != wptr) {
6279 		/* wptr/rptr are in bytes! */
6280 		ring_index = rptr / 4;
6281 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6282 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6283 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6284 
6285 		switch (src_id) {
6286 		case 1: /* D1 vblank/vline */
6287 		case 2: /* D2 vblank/vline */
6288 		case 3: /* D3 vblank/vline */
6289 		case 4: /* D4 vblank/vline */
6290 		case 5: /* D5 vblank/vline */
6291 		case 6: /* D6 vblank/vline */
6292 			crtc_idx = src_id - 1;
6293 
6294 			if (src_data == 0) { /* vblank */
6295 				mask = LB_D1_VBLANK_INTERRUPT;
6296 				event_name = "vblank";
6297 
6298 				if (rdev->irq.crtc_vblank_int[crtc_idx]) {
6299 					drm_handle_vblank(rdev->ddev, crtc_idx);
6300 					rdev->pm.vblank_sync = true;
6301 					wake_up(&rdev->irq.vblank_queue);
6302 				}
6303 				if (atomic_read(&rdev->irq.pflip[crtc_idx])) {
6304 					radeon_crtc_handle_vblank(rdev,
6305 								  crtc_idx);
6306 				}
6307 
6308 			} else if (src_data == 1) { /* vline */
6309 				mask = LB_D1_VLINE_INTERRUPT;
6310 				event_name = "vline";
6311 			} else {
6312 				DRM_DEBUG("Unhandled interrupt: %d %d\n",
6313 					  src_id, src_data);
6314 				break;
6315 			}
6316 
6317 			if (!(disp_int[crtc_idx] & mask)) {
6318 				DRM_DEBUG("IH: D%d %s - IH event w/o asserted irq bit?\n",
6319 					  crtc_idx + 1, event_name);
6320 			}
6321 
6322 			disp_int[crtc_idx] &= ~mask;
6323 			DRM_DEBUG("IH: D%d %s\n", crtc_idx + 1, event_name);
6324 
6325 			break;
6326 		case 8: /* D1 page flip */
6327 		case 10: /* D2 page flip */
6328 		case 12: /* D3 page flip */
6329 		case 14: /* D4 page flip */
6330 		case 16: /* D5 page flip */
6331 		case 18: /* D6 page flip */
6332 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
6333 			if (radeon_use_pflipirq > 0)
6334 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
6335 			break;
6336 		case 42: /* HPD hotplug */
6337 			if (src_data <= 5) {
6338 				hpd_idx = src_data;
6339 				mask = DC_HPD1_INTERRUPT;
6340 				queue_hotplug = true;
6341 				event_name = "HPD";
6342 
6343 			} else if (src_data <= 11) {
6344 				hpd_idx = src_data - 6;
6345 				mask = DC_HPD1_RX_INTERRUPT;
6346 				queue_dp = true;
6347 				event_name = "HPD_RX";
6348 
6349 			} else {
6350 				DRM_DEBUG("Unhandled interrupt: %d %d\n",
6351 					  src_id, src_data);
6352 				break;
6353 			}
6354 
6355 			if (!(disp_int[hpd_idx] & mask))
6356 				DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6357 
6358 			disp_int[hpd_idx] &= ~mask;
6359 			DRM_DEBUG("IH: %s%d\n", event_name, hpd_idx + 1);
6360 			break;
6361 		case 96:
6362 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
6363 			WREG32(SRBM_INT_ACK, 0x1);
6364 			break;
6365 		case 124: /* UVD */
6366 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6367 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6368 			break;
6369 		case 146:
6370 		case 147:
6371 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6372 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6373 			/* reset addr and status */
6374 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6375 			if (addr == 0x0 && status == 0x0)
6376 				break;
6377 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6378 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6379 				addr);
6380 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6381 				status);
6382 			si_vm_decode_fault(rdev, status, addr);
6383 			break;
6384 		case 176: /* RINGID0 CP_INT */
6385 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6386 			break;
6387 		case 177: /* RINGID1 CP_INT */
6388 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6389 			break;
6390 		case 178: /* RINGID2 CP_INT */
6391 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6392 			break;
6393 		case 181: /* CP EOP event */
6394 			DRM_DEBUG("IH: CP EOP\n");
6395 			switch (ring_id) {
6396 			case 0:
6397 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6398 				break;
6399 			case 1:
6400 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6401 				break;
6402 			case 2:
6403 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6404 				break;
6405 			}
6406 			break;
6407 		case 224: /* DMA trap event */
6408 			DRM_DEBUG("IH: DMA trap\n");
6409 			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6410 			break;
6411 		case 230: /* thermal low to high */
6412 			DRM_DEBUG("IH: thermal low to high\n");
6413 			rdev->pm.dpm.thermal.high_to_low = false;
6414 			queue_thermal = true;
6415 			break;
6416 		case 231: /* thermal high to low */
6417 			DRM_DEBUG("IH: thermal high to low\n");
6418 			rdev->pm.dpm.thermal.high_to_low = true;
6419 			queue_thermal = true;
6420 			break;
6421 		case 233: /* GUI IDLE */
6422 			DRM_DEBUG("IH: GUI idle\n");
6423 			break;
6424 		case 244: /* DMA trap event */
6425 			DRM_DEBUG("IH: DMA1 trap\n");
6426 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6427 			break;
6428 		default:
6429 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6430 			break;
6431 		}
6432 
6433 		/* wptr/rptr are in bytes! */
6434 		rptr += 16;
6435 		rptr &= rdev->ih.ptr_mask;
6436 		WREG32(IH_RB_RPTR, rptr);
6437 	}
6438 	if (queue_dp)
6439 		schedule_work(&rdev->dp_work);
6440 	if (queue_hotplug)
6441 		schedule_delayed_work(&rdev->hotplug_work, 0);
6442 	if (queue_thermal && rdev->pm.dpm_enabled)
6443 		schedule_work(&rdev->pm.dpm.thermal.work);
6444 	rdev->ih.rptr = rptr;
6445 	atomic_set(&rdev->ih.lock, 0);
6446 
6447 	/* make sure wptr hasn't changed while processing */
6448 	wptr = si_get_ih_wptr(rdev);
6449 	if (wptr != rptr)
6450 		goto restart_ih;
6451 
6452 	return IRQ_HANDLED;
6453 }
6454 
6455 /*
6456  * startup/shutdown callbacks
6457  */
6458 static void si_uvd_init(struct radeon_device *rdev)
6459 {
6460 	int r;
6461 
6462 	if (!rdev->has_uvd)
6463 		return;
6464 
6465 	r = radeon_uvd_init(rdev);
6466 	if (r) {
6467 		dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
6468 		/*
6469 		 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
6470 		 * to early fails uvd_v2_2_resume() and thus nothing happens
6471 		 * there. So it is pointless to try to go through that code
6472 		 * hence why we disable uvd here.
6473 		 */
6474 		rdev->has_uvd = false;
6475 		return;
6476 	}
6477 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
6478 	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
6479 }
6480 
6481 static void si_uvd_start(struct radeon_device *rdev)
6482 {
6483 	int r;
6484 
6485 	if (!rdev->has_uvd)
6486 		return;
6487 
6488 	r = uvd_v2_2_resume(rdev);
6489 	if (r) {
6490 		dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
6491 		goto error;
6492 	}
6493 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
6494 	if (r) {
6495 		dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
6496 		goto error;
6497 	}
6498 	return;
6499 
6500 error:
6501 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6502 }
6503 
6504 static void si_uvd_resume(struct radeon_device *rdev)
6505 {
6506 	struct radeon_ring *ring;
6507 	int r;
6508 
6509 	if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
6510 		return;
6511 
6512 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6513 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
6514 	if (r) {
6515 		dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
6516 		return;
6517 	}
6518 	r = uvd_v1_0_init(rdev);
6519 	if (r) {
6520 		dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
6521 		return;
6522 	}
6523 }
6524 
6525 static void si_vce_init(struct radeon_device *rdev)
6526 {
6527 	int r;
6528 
6529 	if (!rdev->has_vce)
6530 		return;
6531 
6532 	r = radeon_vce_init(rdev);
6533 	if (r) {
6534 		dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
6535 		/*
6536 		 * At this point rdev->vce.vcpu_bo is NULL which trickles down
6537 		 * to early fails si_vce_start() and thus nothing happens
6538 		 * there. So it is pointless to try to go through that code
6539 		 * hence why we disable vce here.
6540 		 */
6541 		rdev->has_vce = false;
6542 		return;
6543 	}
6544 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
6545 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
6546 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
6547 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
6548 }
6549 
6550 static void si_vce_start(struct radeon_device *rdev)
6551 {
6552 	int r;
6553 
6554 	if (!rdev->has_vce)
6555 		return;
6556 
6557 	r = radeon_vce_resume(rdev);
6558 	if (r) {
6559 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6560 		goto error;
6561 	}
6562 	r = vce_v1_0_resume(rdev);
6563 	if (r) {
6564 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6565 		goto error;
6566 	}
6567 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
6568 	if (r) {
6569 		dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
6570 		goto error;
6571 	}
6572 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
6573 	if (r) {
6574 		dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
6575 		goto error;
6576 	}
6577 	return;
6578 
6579 error:
6580 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
6581 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
6582 }
6583 
6584 static void si_vce_resume(struct radeon_device *rdev)
6585 {
6586 	struct radeon_ring *ring;
6587 	int r;
6588 
6589 	if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
6590 		return;
6591 
6592 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
6593 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
6594 	if (r) {
6595 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
6596 		return;
6597 	}
6598 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
6599 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
6600 	if (r) {
6601 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
6602 		return;
6603 	}
6604 	r = vce_v1_0_init(rdev);
6605 	if (r) {
6606 		dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
6607 		return;
6608 	}
6609 }
6610 
6611 static int si_startup(struct radeon_device *rdev)
6612 {
6613 	struct radeon_ring *ring;
6614 	int r;
6615 
6616 	/* enable pcie gen2/3 link */
6617 	si_pcie_gen3_enable(rdev);
6618 	/* enable aspm */
6619 	si_program_aspm(rdev);
6620 
6621 	/* scratch needs to be initialized before MC */
6622 	r = r600_vram_scratch_init(rdev);
6623 	if (r)
6624 		return r;
6625 
6626 	si_mc_program(rdev);
6627 
6628 	if (!rdev->pm.dpm_enabled) {
6629 		r = si_mc_load_microcode(rdev);
6630 		if (r) {
6631 			DRM_ERROR("Failed to load MC firmware!\n");
6632 			return r;
6633 		}
6634 	}
6635 
6636 	r = si_pcie_gart_enable(rdev);
6637 	if (r)
6638 		return r;
6639 	si_gpu_init(rdev);
6640 
6641 	/* allocate rlc buffers */
6642 	if (rdev->family == CHIP_VERDE) {
6643 		rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
6644 		rdev->rlc.reg_list_size =
6645 			(u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
6646 	}
6647 	rdev->rlc.cs_data = si_cs_data;
6648 	r = sumo_rlc_init(rdev);
6649 	if (r) {
6650 		DRM_ERROR("Failed to init rlc BOs!\n");
6651 		return r;
6652 	}
6653 
6654 	/* allocate wb buffer */
6655 	r = radeon_wb_init(rdev);
6656 	if (r)
6657 		return r;
6658 
6659 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6660 	if (r) {
6661 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6662 		return r;
6663 	}
6664 
6665 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6666 	if (r) {
6667 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6668 		return r;
6669 	}
6670 
6671 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6672 	if (r) {
6673 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6674 		return r;
6675 	}
6676 
6677 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6678 	if (r) {
6679 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6680 		return r;
6681 	}
6682 
6683 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6684 	if (r) {
6685 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6686 		return r;
6687 	}
6688 
6689 	si_uvd_start(rdev);
6690 	si_vce_start(rdev);
6691 
6692 	/* Enable IRQ */
6693 	if (!rdev->irq.installed) {
6694 		r = radeon_irq_kms_init(rdev);
6695 		if (r)
6696 			return r;
6697 	}
6698 
6699 	r = si_irq_init(rdev);
6700 	if (r) {
6701 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
6702 		radeon_irq_kms_fini(rdev);
6703 		return r;
6704 	}
6705 	si_irq_set(rdev);
6706 
6707 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6708 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6709 			     RADEON_CP_PACKET2);
6710 	if (r)
6711 		return r;
6712 
6713 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6714 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6715 			     RADEON_CP_PACKET2);
6716 	if (r)
6717 		return r;
6718 
6719 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6720 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6721 			     RADEON_CP_PACKET2);
6722 	if (r)
6723 		return r;
6724 
6725 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6726 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6727 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6728 	if (r)
6729 		return r;
6730 
6731 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6732 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6733 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6734 	if (r)
6735 		return r;
6736 
6737 	r = si_cp_load_microcode(rdev);
6738 	if (r)
6739 		return r;
6740 	r = si_cp_resume(rdev);
6741 	if (r)
6742 		return r;
6743 
6744 	r = cayman_dma_resume(rdev);
6745 	if (r)
6746 		return r;
6747 
6748 	si_uvd_resume(rdev);
6749 	si_vce_resume(rdev);
6750 
6751 	r = radeon_ib_pool_init(rdev);
6752 	if (r) {
6753 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6754 		return r;
6755 	}
6756 
6757 	r = radeon_vm_manager_init(rdev);
6758 	if (r) {
6759 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6760 		return r;
6761 	}
6762 
6763 	r = radeon_audio_init(rdev);
6764 	if (r)
6765 		return r;
6766 
6767 	return 0;
6768 }
6769 
6770 int si_resume(struct radeon_device *rdev)
6771 {
6772 	int r;
6773 
6774 	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6775 	 * posting will perform necessary task to bring back GPU into good
6776 	 * shape.
6777 	 */
6778 	/* post card */
6779 	atom_asic_init(rdev->mode_info.atom_context);
6780 
6781 	/* init golden registers */
6782 	si_init_golden_registers(rdev);
6783 
6784 	if (rdev->pm.pm_method == PM_METHOD_DPM)
6785 		radeon_pm_resume(rdev);
6786 
6787 	rdev->accel_working = true;
6788 	r = si_startup(rdev);
6789 	if (r) {
6790 		DRM_ERROR("si startup failed on resume\n");
6791 		rdev->accel_working = false;
6792 		return r;
6793 	}
6794 
6795 	return r;
6796 
6797 }
6798 
6799 int si_suspend(struct radeon_device *rdev)
6800 {
6801 	radeon_pm_suspend(rdev);
6802 	radeon_audio_fini(rdev);
6803 	radeon_vm_manager_fini(rdev);
6804 	si_cp_enable(rdev, false);
6805 	cayman_dma_stop(rdev);
6806 	if (rdev->has_uvd) {
6807 		radeon_uvd_suspend(rdev);
6808 		uvd_v1_0_fini(rdev);
6809 	}
6810 	if (rdev->has_vce)
6811 		radeon_vce_suspend(rdev);
6812 	si_fini_pg(rdev);
6813 	si_fini_cg(rdev);
6814 	si_irq_suspend(rdev);
6815 	radeon_wb_disable(rdev);
6816 	si_pcie_gart_disable(rdev);
6817 	return 0;
6818 }
6819 
6820 /* Plan is to move initialization in that function and use
6821  * helper function so that radeon_device_init pretty much
6822  * do nothing more than calling asic specific function. This
6823  * should also allow to remove a bunch of callback function
6824  * like vram_info.
6825  */
6826 int si_init(struct radeon_device *rdev)
6827 {
6828 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6829 	int r;
6830 
6831 	/* Read BIOS */
6832 	if (!radeon_get_bios(rdev)) {
6833 		if (ASIC_IS_AVIVO(rdev))
6834 			return -EINVAL;
6835 	}
6836 	/* Must be an ATOMBIOS */
6837 	if (!rdev->is_atom_bios) {
6838 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6839 		return -EINVAL;
6840 	}
6841 	r = radeon_atombios_init(rdev);
6842 	if (r)
6843 		return r;
6844 
6845 	/* Post card if necessary */
6846 	if (!radeon_card_posted(rdev)) {
6847 		if (!rdev->bios) {
6848 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6849 			return -EINVAL;
6850 		}
6851 		DRM_INFO("GPU not posted. posting now...\n");
6852 		atom_asic_init(rdev->mode_info.atom_context);
6853 	}
6854 	/* init golden registers */
6855 	si_init_golden_registers(rdev);
6856 	/* Initialize scratch registers */
6857 	si_scratch_init(rdev);
6858 	/* Initialize surface registers */
6859 	radeon_surface_init(rdev);
6860 	/* Initialize clocks */
6861 	radeon_get_clock_info(rdev->ddev);
6862 
6863 	/* Fence driver */
6864 	radeon_fence_driver_init(rdev);
6865 
6866 	/* initialize memory controller */
6867 	r = si_mc_init(rdev);
6868 	if (r)
6869 		return r;
6870 	/* Memory manager */
6871 	r = radeon_bo_init(rdev);
6872 	if (r)
6873 		return r;
6874 
6875 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6876 	    !rdev->rlc_fw || !rdev->mc_fw) {
6877 		r = si_init_microcode(rdev);
6878 		if (r) {
6879 			DRM_ERROR("Failed to load firmware!\n");
6880 			return r;
6881 		}
6882 	}
6883 
6884 	/* Initialize power management */
6885 	radeon_pm_init(rdev);
6886 
6887 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6888 	ring->ring_obj = NULL;
6889 	r600_ring_init(rdev, ring, 1024 * 1024);
6890 
6891 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6892 	ring->ring_obj = NULL;
6893 	r600_ring_init(rdev, ring, 1024 * 1024);
6894 
6895 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6896 	ring->ring_obj = NULL;
6897 	r600_ring_init(rdev, ring, 1024 * 1024);
6898 
6899 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6900 	ring->ring_obj = NULL;
6901 	r600_ring_init(rdev, ring, 64 * 1024);
6902 
6903 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6904 	ring->ring_obj = NULL;
6905 	r600_ring_init(rdev, ring, 64 * 1024);
6906 
6907 	si_uvd_init(rdev);
6908 	si_vce_init(rdev);
6909 
6910 	rdev->ih.ring_obj = NULL;
6911 	r600_ih_ring_init(rdev, 64 * 1024);
6912 
6913 	r = r600_pcie_gart_init(rdev);
6914 	if (r)
6915 		return r;
6916 
6917 	rdev->accel_working = true;
6918 	r = si_startup(rdev);
6919 	if (r) {
6920 		dev_err(rdev->dev, "disabling GPU acceleration\n");
6921 		si_cp_fini(rdev);
6922 		cayman_dma_fini(rdev);
6923 		si_irq_fini(rdev);
6924 		sumo_rlc_fini(rdev);
6925 		radeon_wb_fini(rdev);
6926 		radeon_ib_pool_fini(rdev);
6927 		radeon_vm_manager_fini(rdev);
6928 		radeon_irq_kms_fini(rdev);
6929 		si_pcie_gart_fini(rdev);
6930 		rdev->accel_working = false;
6931 	}
6932 
6933 	/* Don't start up if the MC ucode is missing.
6934 	 * The default clocks and voltages before the MC ucode
6935 	 * is loaded are not suffient for advanced operations.
6936 	 */
6937 	if (!rdev->mc_fw) {
6938 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
6939 		return -EINVAL;
6940 	}
6941 
6942 	return 0;
6943 }
6944 
6945 void si_fini(struct radeon_device *rdev)
6946 {
6947 	radeon_pm_fini(rdev);
6948 	si_cp_fini(rdev);
6949 	cayman_dma_fini(rdev);
6950 	si_fini_pg(rdev);
6951 	si_fini_cg(rdev);
6952 	si_irq_fini(rdev);
6953 	sumo_rlc_fini(rdev);
6954 	radeon_wb_fini(rdev);
6955 	radeon_vm_manager_fini(rdev);
6956 	radeon_ib_pool_fini(rdev);
6957 	radeon_irq_kms_fini(rdev);
6958 	if (rdev->has_uvd) {
6959 		uvd_v1_0_fini(rdev);
6960 		radeon_uvd_fini(rdev);
6961 	}
6962 	if (rdev->has_vce)
6963 		radeon_vce_fini(rdev);
6964 	si_pcie_gart_fini(rdev);
6965 	r600_vram_scratch_fini(rdev);
6966 	radeon_gem_fini(rdev);
6967 	radeon_fence_driver_fini(rdev);
6968 	radeon_bo_fini(rdev);
6969 	radeon_atombios_fini(rdev);
6970 	kfree(rdev->bios);
6971 	rdev->bios = NULL;
6972 }
6973 
6974 /**
6975  * si_get_gpu_clock_counter - return GPU clock counter snapshot
6976  *
6977  * @rdev: radeon_device pointer
6978  *
6979  * Fetches a GPU clock counter snapshot (SI).
6980  * Returns the 64 bit clock counter snapshot.
6981  */
6982 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
6983 {
6984 	uint64_t clock;
6985 
6986 	mutex_lock(&rdev->gpu_clock_mutex);
6987 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6988 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6989 		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6990 	mutex_unlock(&rdev->gpu_clock_mutex);
6991 	return clock;
6992 }
6993 
6994 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6995 {
6996 	unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
6997 	int r;
6998 
6999 	/* bypass vclk and dclk with bclk */
7000 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7001 		VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
7002 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7003 
7004 	/* put PLL in bypass mode */
7005 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
7006 
7007 	if (!vclk || !dclk) {
7008 		/* keep the Bypass mode */
7009 		return 0;
7010 	}
7011 
7012 	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
7013 					  16384, 0x03FFFFFF, 0, 128, 5,
7014 					  &fb_div, &vclk_div, &dclk_div);
7015 	if (r)
7016 		return r;
7017 
7018 	/* set RESET_ANTI_MUX to 0 */
7019 	WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7020 
7021 	/* set VCO_MODE to 1 */
7022 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
7023 
7024 	/* disable sleep mode */
7025 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
7026 
7027 	/* deassert UPLL_RESET */
7028 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7029 
7030 	mdelay(1);
7031 
7032 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7033 	if (r)
7034 		return r;
7035 
7036 	/* assert UPLL_RESET again */
7037 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
7038 
7039 	/* disable spread spectrum. */
7040 	WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7041 
7042 	/* set feedback divider */
7043 	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
7044 
7045 	/* set ref divider to 0 */
7046 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
7047 
7048 	if (fb_div < 307200)
7049 		WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
7050 	else
7051 		WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
7052 
7053 	/* set PDIV_A and PDIV_B */
7054 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7055 		UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
7056 		~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
7057 
7058 	/* give the PLL some time to settle */
7059 	mdelay(15);
7060 
7061 	/* deassert PLL_RESET */
7062 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7063 
7064 	mdelay(15);
7065 
7066 	/* switch from bypass mode to normal mode */
7067 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
7068 
7069 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7070 	if (r)
7071 		return r;
7072 
7073 	/* switch VCLK and DCLK selection */
7074 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7075 		VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
7076 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7077 
7078 	mdelay(100);
7079 
7080 	return 0;
7081 }
7082 
7083 static void si_pcie_gen3_enable(struct radeon_device *rdev)
7084 {
7085 	struct pci_dev *root = rdev->pdev->bus->self;
7086 	enum pci_bus_speed speed_cap;
7087 	u32 speed_cntl, current_data_rate;
7088 	int i;
7089 	u16 tmp16;
7090 
7091 	if (pci_is_root_bus(rdev->pdev->bus))
7092 		return;
7093 
7094 	if (radeon_pcie_gen2 == 0)
7095 		return;
7096 
7097 	if (rdev->flags & RADEON_IS_IGP)
7098 		return;
7099 
7100 	if (!(rdev->flags & RADEON_IS_PCIE))
7101 		return;
7102 
7103 	speed_cap = pcie_get_speed_cap(root);
7104 	if (speed_cap == PCI_SPEED_UNKNOWN)
7105 		return;
7106 
7107 	if ((speed_cap != PCIE_SPEED_8_0GT) &&
7108 	    (speed_cap != PCIE_SPEED_5_0GT))
7109 		return;
7110 
7111 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7112 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7113 		LC_CURRENT_DATA_RATE_SHIFT;
7114 	if (speed_cap == PCIE_SPEED_8_0GT) {
7115 		if (current_data_rate == 2) {
7116 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7117 			return;
7118 		}
7119 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7120 	} else if (speed_cap == PCIE_SPEED_5_0GT) {
7121 		if (current_data_rate == 1) {
7122 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7123 			return;
7124 		}
7125 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7126 	}
7127 
7128 	if (!pci_is_pcie(root) || !pci_is_pcie(rdev->pdev))
7129 		return;
7130 
7131 	if (speed_cap == PCIE_SPEED_8_0GT) {
7132 		/* re-try equalization if gen3 is not already enabled */
7133 		if (current_data_rate != 2) {
7134 			u16 bridge_cfg, gpu_cfg;
7135 			u16 bridge_cfg2, gpu_cfg2;
7136 			u32 max_lw, current_lw, tmp;
7137 
7138 			pcie_capability_set_word(root, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD);
7139 			pcie_capability_set_word(rdev->pdev, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD);
7140 
7141 			tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7142 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7143 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7144 
7145 			if (current_lw < max_lw) {
7146 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7147 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
7148 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7149 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7150 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7151 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7152 				}
7153 			}
7154 
7155 			for (i = 0; i < 10; i++) {
7156 				/* check status */
7157 				pcie_capability_read_word(rdev->pdev,
7158 							  PCI_EXP_DEVSTA,
7159 							  &tmp16);
7160 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7161 					break;
7162 
7163 				pcie_capability_read_word(root, PCI_EXP_LNKCTL,
7164 							  &bridge_cfg);
7165 				pcie_capability_read_word(rdev->pdev,
7166 							  PCI_EXP_LNKCTL,
7167 							  &gpu_cfg);
7168 
7169 				pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
7170 							  &bridge_cfg2);
7171 				pcie_capability_read_word(rdev->pdev,
7172 							  PCI_EXP_LNKCTL2,
7173 							  &gpu_cfg2);
7174 
7175 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7176 				tmp |= LC_SET_QUIESCE;
7177 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7178 
7179 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7180 				tmp |= LC_REDO_EQ;
7181 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7182 
7183 				msleep(100);
7184 
7185 				/* linkctl */
7186 				pcie_capability_clear_and_set_word(root, PCI_EXP_LNKCTL,
7187 								   PCI_EXP_LNKCTL_HAWD,
7188 								   bridge_cfg &
7189 								   PCI_EXP_LNKCTL_HAWD);
7190 				pcie_capability_clear_and_set_word(rdev->pdev, PCI_EXP_LNKCTL,
7191 								   PCI_EXP_LNKCTL_HAWD,
7192 								   gpu_cfg &
7193 								   PCI_EXP_LNKCTL_HAWD);
7194 
7195 				/* linkctl2 */
7196 				pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
7197 							  &tmp16);
7198 				tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
7199 					   PCI_EXP_LNKCTL2_TX_MARGIN);
7200 				tmp16 |= (bridge_cfg2 &
7201 					  (PCI_EXP_LNKCTL2_ENTER_COMP |
7202 					   PCI_EXP_LNKCTL2_TX_MARGIN));
7203 				pcie_capability_write_word(root,
7204 							   PCI_EXP_LNKCTL2,
7205 							   tmp16);
7206 
7207 				pcie_capability_read_word(rdev->pdev,
7208 							  PCI_EXP_LNKCTL2,
7209 							  &tmp16);
7210 				tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
7211 					   PCI_EXP_LNKCTL2_TX_MARGIN);
7212 				tmp16 |= (gpu_cfg2 &
7213 					  (PCI_EXP_LNKCTL2_ENTER_COMP |
7214 					   PCI_EXP_LNKCTL2_TX_MARGIN));
7215 				pcie_capability_write_word(rdev->pdev,
7216 							   PCI_EXP_LNKCTL2,
7217 							   tmp16);
7218 
7219 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7220 				tmp &= ~LC_SET_QUIESCE;
7221 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7222 			}
7223 		}
7224 	}
7225 
7226 	/* set the link speed */
7227 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7228 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7229 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7230 
7231 	pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL2, &tmp16);
7232 	tmp16 &= ~PCI_EXP_LNKCTL2_TLS;
7233 	if (speed_cap == PCIE_SPEED_8_0GT)
7234 		tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
7235 	else if (speed_cap == PCIE_SPEED_5_0GT)
7236 		tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
7237 	else
7238 		tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
7239 	pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL2, tmp16);
7240 
7241 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7242 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7243 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7244 
7245 	for (i = 0; i < rdev->usec_timeout; i++) {
7246 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7247 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7248 			break;
7249 		udelay(1);
7250 	}
7251 }
7252 
7253 static void si_program_aspm(struct radeon_device *rdev)
7254 {
7255 	u32 data, orig;
7256 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7257 	bool disable_clkreq = false;
7258 
7259 	if (radeon_aspm == 0)
7260 		return;
7261 
7262 	if (!(rdev->flags & RADEON_IS_PCIE))
7263 		return;
7264 
7265 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7266 	data &= ~LC_XMIT_N_FTS_MASK;
7267 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7268 	if (orig != data)
7269 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7270 
7271 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7272 	data |= LC_GO_TO_RECOVERY;
7273 	if (orig != data)
7274 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7275 
7276 	orig = data = RREG32_PCIE(PCIE_P_CNTL);
7277 	data |= P_IGNORE_EDB_ERR;
7278 	if (orig != data)
7279 		WREG32_PCIE(PCIE_P_CNTL, data);
7280 
7281 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7282 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7283 	data |= LC_PMI_TO_L1_DIS;
7284 	if (!disable_l0s)
7285 		data |= LC_L0S_INACTIVITY(7);
7286 
7287 	if (!disable_l1) {
7288 		data |= LC_L1_INACTIVITY(7);
7289 		data &= ~LC_PMI_TO_L1_DIS;
7290 		if (orig != data)
7291 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7292 
7293 		if (!disable_plloff_in_l1) {
7294 			bool clk_req_support;
7295 
7296 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7297 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7298 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7299 			if (orig != data)
7300 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7301 
7302 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7303 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7304 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7305 			if (orig != data)
7306 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7307 
7308 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7309 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7310 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7311 			if (orig != data)
7312 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7313 
7314 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7315 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7316 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7317 			if (orig != data)
7318 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7319 
7320 			if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7321 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7322 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7323 				if (orig != data)
7324 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7325 
7326 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7327 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7328 				if (orig != data)
7329 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7330 
7331 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7332 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7333 				if (orig != data)
7334 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7335 
7336 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7337 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7338 				if (orig != data)
7339 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7340 
7341 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7342 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7343 				if (orig != data)
7344 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7345 
7346 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7347 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7348 				if (orig != data)
7349 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7350 
7351 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7352 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7353 				if (orig != data)
7354 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7355 
7356 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7357 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7358 				if (orig != data)
7359 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7360 			}
7361 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7362 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7363 			data |= LC_DYN_LANES_PWR_STATE(3);
7364 			if (orig != data)
7365 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7366 
7367 			orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7368 			data &= ~LS2_EXIT_TIME_MASK;
7369 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7370 				data |= LS2_EXIT_TIME(5);
7371 			if (orig != data)
7372 				WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7373 
7374 			orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7375 			data &= ~LS2_EXIT_TIME_MASK;
7376 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7377 				data |= LS2_EXIT_TIME(5);
7378 			if (orig != data)
7379 				WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7380 
7381 			if (!disable_clkreq &&
7382 			    !pci_is_root_bus(rdev->pdev->bus)) {
7383 				struct pci_dev *root = rdev->pdev->bus->self;
7384 				u32 lnkcap;
7385 
7386 				clk_req_support = false;
7387 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7388 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7389 					clk_req_support = true;
7390 			} else {
7391 				clk_req_support = false;
7392 			}
7393 
7394 			if (clk_req_support) {
7395 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7396 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7397 				if (orig != data)
7398 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7399 
7400 				orig = data = RREG32(THM_CLK_CNTL);
7401 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7402 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7403 				if (orig != data)
7404 					WREG32(THM_CLK_CNTL, data);
7405 
7406 				orig = data = RREG32(MISC_CLK_CNTL);
7407 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7408 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7409 				if (orig != data)
7410 					WREG32(MISC_CLK_CNTL, data);
7411 
7412 				orig = data = RREG32(CG_CLKPIN_CNTL);
7413 				data &= ~BCLK_AS_XCLK;
7414 				if (orig != data)
7415 					WREG32(CG_CLKPIN_CNTL, data);
7416 
7417 				orig = data = RREG32(CG_CLKPIN_CNTL_2);
7418 				data &= ~FORCE_BIF_REFCLK_EN;
7419 				if (orig != data)
7420 					WREG32(CG_CLKPIN_CNTL_2, data);
7421 
7422 				orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7423 				data &= ~MPLL_CLKOUT_SEL_MASK;
7424 				data |= MPLL_CLKOUT_SEL(4);
7425 				if (orig != data)
7426 					WREG32(MPLL_BYPASSCLK_SEL, data);
7427 
7428 				orig = data = RREG32(SPLL_CNTL_MODE);
7429 				data &= ~SPLL_REFCLK_SEL_MASK;
7430 				if (orig != data)
7431 					WREG32(SPLL_CNTL_MODE, data);
7432 			}
7433 		}
7434 	} else {
7435 		if (orig != data)
7436 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7437 	}
7438 
7439 	orig = data = RREG32_PCIE(PCIE_CNTL2);
7440 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7441 	if (orig != data)
7442 		WREG32_PCIE(PCIE_CNTL2, data);
7443 
7444 	if (!disable_l0s) {
7445 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7446 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7447 			data = RREG32_PCIE(PCIE_LC_STATUS1);
7448 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7449 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7450 				data &= ~LC_L0S_INACTIVITY_MASK;
7451 				if (orig != data)
7452 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7453 			}
7454 		}
7455 	}
7456 }
7457 
7458 static int si_vce_send_vcepll_ctlreq(struct radeon_device *rdev)
7459 {
7460 	unsigned i;
7461 
7462 	/* make sure VCEPLL_CTLREQ is deasserted */
7463 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7464 
7465 	mdelay(10);
7466 
7467 	/* assert UPLL_CTLREQ */
7468 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
7469 
7470 	/* wait for CTLACK and CTLACK2 to get asserted */
7471 	for (i = 0; i < 100; ++i) {
7472 		uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
7473 		if ((RREG32_SMC(CG_VCEPLL_FUNC_CNTL) & mask) == mask)
7474 			break;
7475 		mdelay(10);
7476 	}
7477 
7478 	/* deassert UPLL_CTLREQ */
7479 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7480 
7481 	if (i == 100) {
7482 		DRM_ERROR("Timeout setting UVD clocks!\n");
7483 		return -ETIMEDOUT;
7484 	}
7485 
7486 	return 0;
7487 }
7488 
7489 int si_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
7490 {
7491 	unsigned fb_div = 0, evclk_div = 0, ecclk_div = 0;
7492 	int r;
7493 
7494 	/* bypass evclk and ecclk with bclk */
7495 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7496 		     EVCLK_SRC_SEL(1) | ECCLK_SRC_SEL(1),
7497 		     ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7498 
7499 	/* put PLL in bypass mode */
7500 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_BYPASS_EN_MASK,
7501 		     ~VCEPLL_BYPASS_EN_MASK);
7502 
7503 	if (!evclk || !ecclk) {
7504 		/* keep the Bypass mode, put PLL to sleep */
7505 		WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7506 			     ~VCEPLL_SLEEP_MASK);
7507 		return 0;
7508 	}
7509 
7510 	r = radeon_uvd_calc_upll_dividers(rdev, evclk, ecclk, 125000, 250000,
7511 					  16384, 0x03FFFFFF, 0, 128, 5,
7512 					  &fb_div, &evclk_div, &ecclk_div);
7513 	if (r)
7514 		return r;
7515 
7516 	/* set RESET_ANTI_MUX to 0 */
7517 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7518 
7519 	/* set VCO_MODE to 1 */
7520 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_VCO_MODE_MASK,
7521 		     ~VCEPLL_VCO_MODE_MASK);
7522 
7523 	/* toggle VCEPLL_SLEEP to 1 then back to 0 */
7524 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7525 		     ~VCEPLL_SLEEP_MASK);
7526 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_SLEEP_MASK);
7527 
7528 	/* deassert VCEPLL_RESET */
7529 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7530 
7531 	mdelay(1);
7532 
7533 	r = si_vce_send_vcepll_ctlreq(rdev);
7534 	if (r)
7535 		return r;
7536 
7537 	/* assert VCEPLL_RESET again */
7538 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_RESET_MASK, ~VCEPLL_RESET_MASK);
7539 
7540 	/* disable spread spectrum. */
7541 	WREG32_SMC_P(CG_VCEPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7542 
7543 	/* set feedback divider */
7544 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_3, VCEPLL_FB_DIV(fb_div), ~VCEPLL_FB_DIV_MASK);
7545 
7546 	/* set ref divider to 0 */
7547 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_REF_DIV_MASK);
7548 
7549 	/* set PDIV_A and PDIV_B */
7550 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7551 		     VCEPLL_PDIV_A(evclk_div) | VCEPLL_PDIV_B(ecclk_div),
7552 		     ~(VCEPLL_PDIV_A_MASK | VCEPLL_PDIV_B_MASK));
7553 
7554 	/* give the PLL some time to settle */
7555 	mdelay(15);
7556 
7557 	/* deassert PLL_RESET */
7558 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7559 
7560 	mdelay(15);
7561 
7562 	/* switch from bypass mode to normal mode */
7563 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_BYPASS_EN_MASK);
7564 
7565 	r = si_vce_send_vcepll_ctlreq(rdev);
7566 	if (r)
7567 		return r;
7568 
7569 	/* switch VCLK and DCLK selection */
7570 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7571 		     EVCLK_SRC_SEL(16) | ECCLK_SRC_SEL(16),
7572 		     ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7573 
7574 	mdelay(100);
7575 
7576 	return 0;
7577 }
7578