xref: /openbmc/linux/drivers/gpu/drm/radeon/si.c (revision 47010c04)
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 
25 #include <linux/firmware.h>
26 #include <linux/module.h>
27 #include <linux/pci.h>
28 #include <linux/slab.h>
29 
30 #include <drm/drm_vblank.h>
31 #include <drm/radeon_drm.h>
32 
33 #include "atom.h"
34 #include "clearstate_si.h"
35 #include "evergreen.h"
36 #include "r600.h"
37 #include "radeon.h"
38 #include "radeon_asic.h"
39 #include "radeon_audio.h"
40 #include "radeon_ucode.h"
41 #include "si_blit_shaders.h"
42 #include "si.h"
43 #include "sid.h"
44 
45 
46 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
47 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
48 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
49 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
50 MODULE_FIRMWARE("radeon/TAHITI_mc2.bin");
51 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
52 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
53 
54 MODULE_FIRMWARE("radeon/tahiti_pfp.bin");
55 MODULE_FIRMWARE("radeon/tahiti_me.bin");
56 MODULE_FIRMWARE("radeon/tahiti_ce.bin");
57 MODULE_FIRMWARE("radeon/tahiti_mc.bin");
58 MODULE_FIRMWARE("radeon/tahiti_rlc.bin");
59 MODULE_FIRMWARE("radeon/tahiti_smc.bin");
60 
61 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
62 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
63 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
64 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
65 MODULE_FIRMWARE("radeon/PITCAIRN_mc2.bin");
66 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
67 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
68 
69 MODULE_FIRMWARE("radeon/pitcairn_pfp.bin");
70 MODULE_FIRMWARE("radeon/pitcairn_me.bin");
71 MODULE_FIRMWARE("radeon/pitcairn_ce.bin");
72 MODULE_FIRMWARE("radeon/pitcairn_mc.bin");
73 MODULE_FIRMWARE("radeon/pitcairn_rlc.bin");
74 MODULE_FIRMWARE("radeon/pitcairn_smc.bin");
75 MODULE_FIRMWARE("radeon/pitcairn_k_smc.bin");
76 
77 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
78 MODULE_FIRMWARE("radeon/VERDE_me.bin");
79 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
80 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
81 MODULE_FIRMWARE("radeon/VERDE_mc2.bin");
82 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
83 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
84 
85 MODULE_FIRMWARE("radeon/verde_pfp.bin");
86 MODULE_FIRMWARE("radeon/verde_me.bin");
87 MODULE_FIRMWARE("radeon/verde_ce.bin");
88 MODULE_FIRMWARE("radeon/verde_mc.bin");
89 MODULE_FIRMWARE("radeon/verde_rlc.bin");
90 MODULE_FIRMWARE("radeon/verde_smc.bin");
91 MODULE_FIRMWARE("radeon/verde_k_smc.bin");
92 
93 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
94 MODULE_FIRMWARE("radeon/OLAND_me.bin");
95 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
96 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
97 MODULE_FIRMWARE("radeon/OLAND_mc2.bin");
98 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
99 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
100 
101 MODULE_FIRMWARE("radeon/oland_pfp.bin");
102 MODULE_FIRMWARE("radeon/oland_me.bin");
103 MODULE_FIRMWARE("radeon/oland_ce.bin");
104 MODULE_FIRMWARE("radeon/oland_mc.bin");
105 MODULE_FIRMWARE("radeon/oland_rlc.bin");
106 MODULE_FIRMWARE("radeon/oland_smc.bin");
107 MODULE_FIRMWARE("radeon/oland_k_smc.bin");
108 
109 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
110 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
111 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
112 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
113 MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
114 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
115 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
116 
117 MODULE_FIRMWARE("radeon/hainan_pfp.bin");
118 MODULE_FIRMWARE("radeon/hainan_me.bin");
119 MODULE_FIRMWARE("radeon/hainan_ce.bin");
120 MODULE_FIRMWARE("radeon/hainan_mc.bin");
121 MODULE_FIRMWARE("radeon/hainan_rlc.bin");
122 MODULE_FIRMWARE("radeon/hainan_smc.bin");
123 MODULE_FIRMWARE("radeon/hainan_k_smc.bin");
124 MODULE_FIRMWARE("radeon/banks_k_2_smc.bin");
125 
126 MODULE_FIRMWARE("radeon/si58_mc.bin");
127 
128 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
129 static void si_pcie_gen3_enable(struct radeon_device *rdev);
130 static void si_program_aspm(struct radeon_device *rdev);
131 extern void sumo_rlc_fini(struct radeon_device *rdev);
132 extern int sumo_rlc_init(struct radeon_device *rdev);
133 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
134 					 bool enable);
135 static void si_init_pg(struct radeon_device *rdev);
136 static void si_init_cg(struct radeon_device *rdev);
137 static void si_fini_pg(struct radeon_device *rdev);
138 static void si_fini_cg(struct radeon_device *rdev);
139 static void si_rlc_stop(struct radeon_device *rdev);
140 
141 static const u32 crtc_offsets[] =
142 {
143 	EVERGREEN_CRTC0_REGISTER_OFFSET,
144 	EVERGREEN_CRTC1_REGISTER_OFFSET,
145 	EVERGREEN_CRTC2_REGISTER_OFFSET,
146 	EVERGREEN_CRTC3_REGISTER_OFFSET,
147 	EVERGREEN_CRTC4_REGISTER_OFFSET,
148 	EVERGREEN_CRTC5_REGISTER_OFFSET
149 };
150 
151 static const u32 si_disp_int_status[] =
152 {
153 	DISP_INTERRUPT_STATUS,
154 	DISP_INTERRUPT_STATUS_CONTINUE,
155 	DISP_INTERRUPT_STATUS_CONTINUE2,
156 	DISP_INTERRUPT_STATUS_CONTINUE3,
157 	DISP_INTERRUPT_STATUS_CONTINUE4,
158 	DISP_INTERRUPT_STATUS_CONTINUE5
159 };
160 
161 #define DC_HPDx_CONTROL(x)        (DC_HPD1_CONTROL     + (x * 0xc))
162 #define DC_HPDx_INT_CONTROL(x)    (DC_HPD1_INT_CONTROL + (x * 0xc))
163 #define DC_HPDx_INT_STATUS_REG(x) (DC_HPD1_INT_STATUS  + (x * 0xc))
164 
165 static const u32 verde_rlc_save_restore_register_list[] =
166 {
167 	(0x8000 << 16) | (0x98f4 >> 2),
168 	0x00000000,
169 	(0x8040 << 16) | (0x98f4 >> 2),
170 	0x00000000,
171 	(0x8000 << 16) | (0xe80 >> 2),
172 	0x00000000,
173 	(0x8040 << 16) | (0xe80 >> 2),
174 	0x00000000,
175 	(0x8000 << 16) | (0x89bc >> 2),
176 	0x00000000,
177 	(0x8040 << 16) | (0x89bc >> 2),
178 	0x00000000,
179 	(0x8000 << 16) | (0x8c1c >> 2),
180 	0x00000000,
181 	(0x8040 << 16) | (0x8c1c >> 2),
182 	0x00000000,
183 	(0x9c00 << 16) | (0x98f0 >> 2),
184 	0x00000000,
185 	(0x9c00 << 16) | (0xe7c >> 2),
186 	0x00000000,
187 	(0x8000 << 16) | (0x9148 >> 2),
188 	0x00000000,
189 	(0x8040 << 16) | (0x9148 >> 2),
190 	0x00000000,
191 	(0x9c00 << 16) | (0x9150 >> 2),
192 	0x00000000,
193 	(0x9c00 << 16) | (0x897c >> 2),
194 	0x00000000,
195 	(0x9c00 << 16) | (0x8d8c >> 2),
196 	0x00000000,
197 	(0x9c00 << 16) | (0xac54 >> 2),
198 	0X00000000,
199 	0x3,
200 	(0x9c00 << 16) | (0x98f8 >> 2),
201 	0x00000000,
202 	(0x9c00 << 16) | (0x9910 >> 2),
203 	0x00000000,
204 	(0x9c00 << 16) | (0x9914 >> 2),
205 	0x00000000,
206 	(0x9c00 << 16) | (0x9918 >> 2),
207 	0x00000000,
208 	(0x9c00 << 16) | (0x991c >> 2),
209 	0x00000000,
210 	(0x9c00 << 16) | (0x9920 >> 2),
211 	0x00000000,
212 	(0x9c00 << 16) | (0x9924 >> 2),
213 	0x00000000,
214 	(0x9c00 << 16) | (0x9928 >> 2),
215 	0x00000000,
216 	(0x9c00 << 16) | (0x992c >> 2),
217 	0x00000000,
218 	(0x9c00 << 16) | (0x9930 >> 2),
219 	0x00000000,
220 	(0x9c00 << 16) | (0x9934 >> 2),
221 	0x00000000,
222 	(0x9c00 << 16) | (0x9938 >> 2),
223 	0x00000000,
224 	(0x9c00 << 16) | (0x993c >> 2),
225 	0x00000000,
226 	(0x9c00 << 16) | (0x9940 >> 2),
227 	0x00000000,
228 	(0x9c00 << 16) | (0x9944 >> 2),
229 	0x00000000,
230 	(0x9c00 << 16) | (0x9948 >> 2),
231 	0x00000000,
232 	(0x9c00 << 16) | (0x994c >> 2),
233 	0x00000000,
234 	(0x9c00 << 16) | (0x9950 >> 2),
235 	0x00000000,
236 	(0x9c00 << 16) | (0x9954 >> 2),
237 	0x00000000,
238 	(0x9c00 << 16) | (0x9958 >> 2),
239 	0x00000000,
240 	(0x9c00 << 16) | (0x995c >> 2),
241 	0x00000000,
242 	(0x9c00 << 16) | (0x9960 >> 2),
243 	0x00000000,
244 	(0x9c00 << 16) | (0x9964 >> 2),
245 	0x00000000,
246 	(0x9c00 << 16) | (0x9968 >> 2),
247 	0x00000000,
248 	(0x9c00 << 16) | (0x996c >> 2),
249 	0x00000000,
250 	(0x9c00 << 16) | (0x9970 >> 2),
251 	0x00000000,
252 	(0x9c00 << 16) | (0x9974 >> 2),
253 	0x00000000,
254 	(0x9c00 << 16) | (0x9978 >> 2),
255 	0x00000000,
256 	(0x9c00 << 16) | (0x997c >> 2),
257 	0x00000000,
258 	(0x9c00 << 16) | (0x9980 >> 2),
259 	0x00000000,
260 	(0x9c00 << 16) | (0x9984 >> 2),
261 	0x00000000,
262 	(0x9c00 << 16) | (0x9988 >> 2),
263 	0x00000000,
264 	(0x9c00 << 16) | (0x998c >> 2),
265 	0x00000000,
266 	(0x9c00 << 16) | (0x8c00 >> 2),
267 	0x00000000,
268 	(0x9c00 << 16) | (0x8c14 >> 2),
269 	0x00000000,
270 	(0x9c00 << 16) | (0x8c04 >> 2),
271 	0x00000000,
272 	(0x9c00 << 16) | (0x8c08 >> 2),
273 	0x00000000,
274 	(0x8000 << 16) | (0x9b7c >> 2),
275 	0x00000000,
276 	(0x8040 << 16) | (0x9b7c >> 2),
277 	0x00000000,
278 	(0x8000 << 16) | (0xe84 >> 2),
279 	0x00000000,
280 	(0x8040 << 16) | (0xe84 >> 2),
281 	0x00000000,
282 	(0x8000 << 16) | (0x89c0 >> 2),
283 	0x00000000,
284 	(0x8040 << 16) | (0x89c0 >> 2),
285 	0x00000000,
286 	(0x8000 << 16) | (0x914c >> 2),
287 	0x00000000,
288 	(0x8040 << 16) | (0x914c >> 2),
289 	0x00000000,
290 	(0x8000 << 16) | (0x8c20 >> 2),
291 	0x00000000,
292 	(0x8040 << 16) | (0x8c20 >> 2),
293 	0x00000000,
294 	(0x8000 << 16) | (0x9354 >> 2),
295 	0x00000000,
296 	(0x8040 << 16) | (0x9354 >> 2),
297 	0x00000000,
298 	(0x9c00 << 16) | (0x9060 >> 2),
299 	0x00000000,
300 	(0x9c00 << 16) | (0x9364 >> 2),
301 	0x00000000,
302 	(0x9c00 << 16) | (0x9100 >> 2),
303 	0x00000000,
304 	(0x9c00 << 16) | (0x913c >> 2),
305 	0x00000000,
306 	(0x8000 << 16) | (0x90e0 >> 2),
307 	0x00000000,
308 	(0x8000 << 16) | (0x90e4 >> 2),
309 	0x00000000,
310 	(0x8000 << 16) | (0x90e8 >> 2),
311 	0x00000000,
312 	(0x8040 << 16) | (0x90e0 >> 2),
313 	0x00000000,
314 	(0x8040 << 16) | (0x90e4 >> 2),
315 	0x00000000,
316 	(0x8040 << 16) | (0x90e8 >> 2),
317 	0x00000000,
318 	(0x9c00 << 16) | (0x8bcc >> 2),
319 	0x00000000,
320 	(0x9c00 << 16) | (0x8b24 >> 2),
321 	0x00000000,
322 	(0x9c00 << 16) | (0x88c4 >> 2),
323 	0x00000000,
324 	(0x9c00 << 16) | (0x8e50 >> 2),
325 	0x00000000,
326 	(0x9c00 << 16) | (0x8c0c >> 2),
327 	0x00000000,
328 	(0x9c00 << 16) | (0x8e58 >> 2),
329 	0x00000000,
330 	(0x9c00 << 16) | (0x8e5c >> 2),
331 	0x00000000,
332 	(0x9c00 << 16) | (0x9508 >> 2),
333 	0x00000000,
334 	(0x9c00 << 16) | (0x950c >> 2),
335 	0x00000000,
336 	(0x9c00 << 16) | (0x9494 >> 2),
337 	0x00000000,
338 	(0x9c00 << 16) | (0xac0c >> 2),
339 	0x00000000,
340 	(0x9c00 << 16) | (0xac10 >> 2),
341 	0x00000000,
342 	(0x9c00 << 16) | (0xac14 >> 2),
343 	0x00000000,
344 	(0x9c00 << 16) | (0xae00 >> 2),
345 	0x00000000,
346 	(0x9c00 << 16) | (0xac08 >> 2),
347 	0x00000000,
348 	(0x9c00 << 16) | (0x88d4 >> 2),
349 	0x00000000,
350 	(0x9c00 << 16) | (0x88c8 >> 2),
351 	0x00000000,
352 	(0x9c00 << 16) | (0x88cc >> 2),
353 	0x00000000,
354 	(0x9c00 << 16) | (0x89b0 >> 2),
355 	0x00000000,
356 	(0x9c00 << 16) | (0x8b10 >> 2),
357 	0x00000000,
358 	(0x9c00 << 16) | (0x8a14 >> 2),
359 	0x00000000,
360 	(0x9c00 << 16) | (0x9830 >> 2),
361 	0x00000000,
362 	(0x9c00 << 16) | (0x9834 >> 2),
363 	0x00000000,
364 	(0x9c00 << 16) | (0x9838 >> 2),
365 	0x00000000,
366 	(0x9c00 << 16) | (0x9a10 >> 2),
367 	0x00000000,
368 	(0x8000 << 16) | (0x9870 >> 2),
369 	0x00000000,
370 	(0x8000 << 16) | (0x9874 >> 2),
371 	0x00000000,
372 	(0x8001 << 16) | (0x9870 >> 2),
373 	0x00000000,
374 	(0x8001 << 16) | (0x9874 >> 2),
375 	0x00000000,
376 	(0x8040 << 16) | (0x9870 >> 2),
377 	0x00000000,
378 	(0x8040 << 16) | (0x9874 >> 2),
379 	0x00000000,
380 	(0x8041 << 16) | (0x9870 >> 2),
381 	0x00000000,
382 	(0x8041 << 16) | (0x9874 >> 2),
383 	0x00000000,
384 	0x00000000
385 };
386 
387 static const u32 tahiti_golden_rlc_registers[] =
388 {
389 	0xc424, 0xffffffff, 0x00601005,
390 	0xc47c, 0xffffffff, 0x10104040,
391 	0xc488, 0xffffffff, 0x0100000a,
392 	0xc314, 0xffffffff, 0x00000800,
393 	0xc30c, 0xffffffff, 0x800000f4,
394 	0xf4a8, 0xffffffff, 0x00000000
395 };
396 
397 static const u32 tahiti_golden_registers[] =
398 {
399 	0x9a10, 0x00010000, 0x00018208,
400 	0x9830, 0xffffffff, 0x00000000,
401 	0x9834, 0xf00fffff, 0x00000400,
402 	0x9838, 0x0002021c, 0x00020200,
403 	0xc78, 0x00000080, 0x00000000,
404 	0xd030, 0x000300c0, 0x00800040,
405 	0xd830, 0x000300c0, 0x00800040,
406 	0x5bb0, 0x000000f0, 0x00000070,
407 	0x5bc0, 0x00200000, 0x50100000,
408 	0x7030, 0x31000311, 0x00000011,
409 	0x277c, 0x00000003, 0x000007ff,
410 	0x240c, 0x000007ff, 0x00000000,
411 	0x8a14, 0xf000001f, 0x00000007,
412 	0x8b24, 0xffffffff, 0x00ffffff,
413 	0x8b10, 0x0000ff0f, 0x00000000,
414 	0x28a4c, 0x07ffffff, 0x4e000000,
415 	0x28350, 0x3f3f3fff, 0x2a00126a,
416 	0x30, 0x000000ff, 0x0040,
417 	0x34, 0x00000040, 0x00004040,
418 	0x9100, 0x07ffffff, 0x03000000,
419 	0x8e88, 0x01ff1f3f, 0x00000000,
420 	0x8e84, 0x01ff1f3f, 0x00000000,
421 	0x9060, 0x0000007f, 0x00000020,
422 	0x9508, 0x00010000, 0x00010000,
423 	0xac14, 0x00000200, 0x000002fb,
424 	0xac10, 0xffffffff, 0x0000543b,
425 	0xac0c, 0xffffffff, 0xa9210876,
426 	0x88d0, 0xffffffff, 0x000fff40,
427 	0x88d4, 0x0000001f, 0x00000010,
428 	0x1410, 0x20000000, 0x20fffed8,
429 	0x15c0, 0x000c0fc0, 0x000c0400
430 };
431 
432 static const u32 tahiti_golden_registers2[] =
433 {
434 	0xc64, 0x00000001, 0x00000001
435 };
436 
437 static const u32 pitcairn_golden_rlc_registers[] =
438 {
439 	0xc424, 0xffffffff, 0x00601004,
440 	0xc47c, 0xffffffff, 0x10102020,
441 	0xc488, 0xffffffff, 0x01000020,
442 	0xc314, 0xffffffff, 0x00000800,
443 	0xc30c, 0xffffffff, 0x800000a4
444 };
445 
446 static const u32 pitcairn_golden_registers[] =
447 {
448 	0x9a10, 0x00010000, 0x00018208,
449 	0x9830, 0xffffffff, 0x00000000,
450 	0x9834, 0xf00fffff, 0x00000400,
451 	0x9838, 0x0002021c, 0x00020200,
452 	0xc78, 0x00000080, 0x00000000,
453 	0xd030, 0x000300c0, 0x00800040,
454 	0xd830, 0x000300c0, 0x00800040,
455 	0x5bb0, 0x000000f0, 0x00000070,
456 	0x5bc0, 0x00200000, 0x50100000,
457 	0x7030, 0x31000311, 0x00000011,
458 	0x2ae4, 0x00073ffe, 0x000022a2,
459 	0x240c, 0x000007ff, 0x00000000,
460 	0x8a14, 0xf000001f, 0x00000007,
461 	0x8b24, 0xffffffff, 0x00ffffff,
462 	0x8b10, 0x0000ff0f, 0x00000000,
463 	0x28a4c, 0x07ffffff, 0x4e000000,
464 	0x28350, 0x3f3f3fff, 0x2a00126a,
465 	0x30, 0x000000ff, 0x0040,
466 	0x34, 0x00000040, 0x00004040,
467 	0x9100, 0x07ffffff, 0x03000000,
468 	0x9060, 0x0000007f, 0x00000020,
469 	0x9508, 0x00010000, 0x00010000,
470 	0xac14, 0x000003ff, 0x000000f7,
471 	0xac10, 0xffffffff, 0x00000000,
472 	0xac0c, 0xffffffff, 0x32761054,
473 	0x88d4, 0x0000001f, 0x00000010,
474 	0x15c0, 0x000c0fc0, 0x000c0400
475 };
476 
477 static const u32 verde_golden_rlc_registers[] =
478 {
479 	0xc424, 0xffffffff, 0x033f1005,
480 	0xc47c, 0xffffffff, 0x10808020,
481 	0xc488, 0xffffffff, 0x00800008,
482 	0xc314, 0xffffffff, 0x00001000,
483 	0xc30c, 0xffffffff, 0x80010014
484 };
485 
486 static const u32 verde_golden_registers[] =
487 {
488 	0x9a10, 0x00010000, 0x00018208,
489 	0x9830, 0xffffffff, 0x00000000,
490 	0x9834, 0xf00fffff, 0x00000400,
491 	0x9838, 0x0002021c, 0x00020200,
492 	0xc78, 0x00000080, 0x00000000,
493 	0xd030, 0x000300c0, 0x00800040,
494 	0xd030, 0x000300c0, 0x00800040,
495 	0xd830, 0x000300c0, 0x00800040,
496 	0xd830, 0x000300c0, 0x00800040,
497 	0x5bb0, 0x000000f0, 0x00000070,
498 	0x5bc0, 0x00200000, 0x50100000,
499 	0x7030, 0x31000311, 0x00000011,
500 	0x2ae4, 0x00073ffe, 0x000022a2,
501 	0x2ae4, 0x00073ffe, 0x000022a2,
502 	0x2ae4, 0x00073ffe, 0x000022a2,
503 	0x240c, 0x000007ff, 0x00000000,
504 	0x240c, 0x000007ff, 0x00000000,
505 	0x240c, 0x000007ff, 0x00000000,
506 	0x8a14, 0xf000001f, 0x00000007,
507 	0x8a14, 0xf000001f, 0x00000007,
508 	0x8a14, 0xf000001f, 0x00000007,
509 	0x8b24, 0xffffffff, 0x00ffffff,
510 	0x8b10, 0x0000ff0f, 0x00000000,
511 	0x28a4c, 0x07ffffff, 0x4e000000,
512 	0x28350, 0x3f3f3fff, 0x0000124a,
513 	0x28350, 0x3f3f3fff, 0x0000124a,
514 	0x28350, 0x3f3f3fff, 0x0000124a,
515 	0x30, 0x000000ff, 0x0040,
516 	0x34, 0x00000040, 0x00004040,
517 	0x9100, 0x07ffffff, 0x03000000,
518 	0x9100, 0x07ffffff, 0x03000000,
519 	0x8e88, 0x01ff1f3f, 0x00000000,
520 	0x8e88, 0x01ff1f3f, 0x00000000,
521 	0x8e88, 0x01ff1f3f, 0x00000000,
522 	0x8e84, 0x01ff1f3f, 0x00000000,
523 	0x8e84, 0x01ff1f3f, 0x00000000,
524 	0x8e84, 0x01ff1f3f, 0x00000000,
525 	0x9060, 0x0000007f, 0x00000020,
526 	0x9508, 0x00010000, 0x00010000,
527 	0xac14, 0x000003ff, 0x00000003,
528 	0xac14, 0x000003ff, 0x00000003,
529 	0xac14, 0x000003ff, 0x00000003,
530 	0xac10, 0xffffffff, 0x00000000,
531 	0xac10, 0xffffffff, 0x00000000,
532 	0xac10, 0xffffffff, 0x00000000,
533 	0xac0c, 0xffffffff, 0x00001032,
534 	0xac0c, 0xffffffff, 0x00001032,
535 	0xac0c, 0xffffffff, 0x00001032,
536 	0x88d4, 0x0000001f, 0x00000010,
537 	0x88d4, 0x0000001f, 0x00000010,
538 	0x88d4, 0x0000001f, 0x00000010,
539 	0x15c0, 0x000c0fc0, 0x000c0400
540 };
541 
542 static const u32 oland_golden_rlc_registers[] =
543 {
544 	0xc424, 0xffffffff, 0x00601005,
545 	0xc47c, 0xffffffff, 0x10104040,
546 	0xc488, 0xffffffff, 0x0100000a,
547 	0xc314, 0xffffffff, 0x00000800,
548 	0xc30c, 0xffffffff, 0x800000f4
549 };
550 
551 static const u32 oland_golden_registers[] =
552 {
553 	0x9a10, 0x00010000, 0x00018208,
554 	0x9830, 0xffffffff, 0x00000000,
555 	0x9834, 0xf00fffff, 0x00000400,
556 	0x9838, 0x0002021c, 0x00020200,
557 	0xc78, 0x00000080, 0x00000000,
558 	0xd030, 0x000300c0, 0x00800040,
559 	0xd830, 0x000300c0, 0x00800040,
560 	0x5bb0, 0x000000f0, 0x00000070,
561 	0x5bc0, 0x00200000, 0x50100000,
562 	0x7030, 0x31000311, 0x00000011,
563 	0x2ae4, 0x00073ffe, 0x000022a2,
564 	0x240c, 0x000007ff, 0x00000000,
565 	0x8a14, 0xf000001f, 0x00000007,
566 	0x8b24, 0xffffffff, 0x00ffffff,
567 	0x8b10, 0x0000ff0f, 0x00000000,
568 	0x28a4c, 0x07ffffff, 0x4e000000,
569 	0x28350, 0x3f3f3fff, 0x00000082,
570 	0x30, 0x000000ff, 0x0040,
571 	0x34, 0x00000040, 0x00004040,
572 	0x9100, 0x07ffffff, 0x03000000,
573 	0x9060, 0x0000007f, 0x00000020,
574 	0x9508, 0x00010000, 0x00010000,
575 	0xac14, 0x000003ff, 0x000000f3,
576 	0xac10, 0xffffffff, 0x00000000,
577 	0xac0c, 0xffffffff, 0x00003210,
578 	0x88d4, 0x0000001f, 0x00000010,
579 	0x15c0, 0x000c0fc0, 0x000c0400
580 };
581 
582 static const u32 hainan_golden_registers[] =
583 {
584 	0x9a10, 0x00010000, 0x00018208,
585 	0x9830, 0xffffffff, 0x00000000,
586 	0x9834, 0xf00fffff, 0x00000400,
587 	0x9838, 0x0002021c, 0x00020200,
588 	0xd0c0, 0xff000fff, 0x00000100,
589 	0xd030, 0x000300c0, 0x00800040,
590 	0xd8c0, 0xff000fff, 0x00000100,
591 	0xd830, 0x000300c0, 0x00800040,
592 	0x2ae4, 0x00073ffe, 0x000022a2,
593 	0x240c, 0x000007ff, 0x00000000,
594 	0x8a14, 0xf000001f, 0x00000007,
595 	0x8b24, 0xffffffff, 0x00ffffff,
596 	0x8b10, 0x0000ff0f, 0x00000000,
597 	0x28a4c, 0x07ffffff, 0x4e000000,
598 	0x28350, 0x3f3f3fff, 0x00000000,
599 	0x30, 0x000000ff, 0x0040,
600 	0x34, 0x00000040, 0x00004040,
601 	0x9100, 0x03e00000, 0x03600000,
602 	0x9060, 0x0000007f, 0x00000020,
603 	0x9508, 0x00010000, 0x00010000,
604 	0xac14, 0x000003ff, 0x000000f1,
605 	0xac10, 0xffffffff, 0x00000000,
606 	0xac0c, 0xffffffff, 0x00003210,
607 	0x88d4, 0x0000001f, 0x00000010,
608 	0x15c0, 0x000c0fc0, 0x000c0400
609 };
610 
611 static const u32 hainan_golden_registers2[] =
612 {
613 	0x98f8, 0xffffffff, 0x02010001
614 };
615 
616 static const u32 tahiti_mgcg_cgcg_init[] =
617 {
618 	0xc400, 0xffffffff, 0xfffffffc,
619 	0x802c, 0xffffffff, 0xe0000000,
620 	0x9a60, 0xffffffff, 0x00000100,
621 	0x92a4, 0xffffffff, 0x00000100,
622 	0xc164, 0xffffffff, 0x00000100,
623 	0x9774, 0xffffffff, 0x00000100,
624 	0x8984, 0xffffffff, 0x06000100,
625 	0x8a18, 0xffffffff, 0x00000100,
626 	0x92a0, 0xffffffff, 0x00000100,
627 	0xc380, 0xffffffff, 0x00000100,
628 	0x8b28, 0xffffffff, 0x00000100,
629 	0x9144, 0xffffffff, 0x00000100,
630 	0x8d88, 0xffffffff, 0x00000100,
631 	0x8d8c, 0xffffffff, 0x00000100,
632 	0x9030, 0xffffffff, 0x00000100,
633 	0x9034, 0xffffffff, 0x00000100,
634 	0x9038, 0xffffffff, 0x00000100,
635 	0x903c, 0xffffffff, 0x00000100,
636 	0xad80, 0xffffffff, 0x00000100,
637 	0xac54, 0xffffffff, 0x00000100,
638 	0x897c, 0xffffffff, 0x06000100,
639 	0x9868, 0xffffffff, 0x00000100,
640 	0x9510, 0xffffffff, 0x00000100,
641 	0xaf04, 0xffffffff, 0x00000100,
642 	0xae04, 0xffffffff, 0x00000100,
643 	0x949c, 0xffffffff, 0x00000100,
644 	0x802c, 0xffffffff, 0xe0000000,
645 	0x9160, 0xffffffff, 0x00010000,
646 	0x9164, 0xffffffff, 0x00030002,
647 	0x9168, 0xffffffff, 0x00040007,
648 	0x916c, 0xffffffff, 0x00060005,
649 	0x9170, 0xffffffff, 0x00090008,
650 	0x9174, 0xffffffff, 0x00020001,
651 	0x9178, 0xffffffff, 0x00040003,
652 	0x917c, 0xffffffff, 0x00000007,
653 	0x9180, 0xffffffff, 0x00060005,
654 	0x9184, 0xffffffff, 0x00090008,
655 	0x9188, 0xffffffff, 0x00030002,
656 	0x918c, 0xffffffff, 0x00050004,
657 	0x9190, 0xffffffff, 0x00000008,
658 	0x9194, 0xffffffff, 0x00070006,
659 	0x9198, 0xffffffff, 0x000a0009,
660 	0x919c, 0xffffffff, 0x00040003,
661 	0x91a0, 0xffffffff, 0x00060005,
662 	0x91a4, 0xffffffff, 0x00000009,
663 	0x91a8, 0xffffffff, 0x00080007,
664 	0x91ac, 0xffffffff, 0x000b000a,
665 	0x91b0, 0xffffffff, 0x00050004,
666 	0x91b4, 0xffffffff, 0x00070006,
667 	0x91b8, 0xffffffff, 0x0008000b,
668 	0x91bc, 0xffffffff, 0x000a0009,
669 	0x91c0, 0xffffffff, 0x000d000c,
670 	0x91c4, 0xffffffff, 0x00060005,
671 	0x91c8, 0xffffffff, 0x00080007,
672 	0x91cc, 0xffffffff, 0x0000000b,
673 	0x91d0, 0xffffffff, 0x000a0009,
674 	0x91d4, 0xffffffff, 0x000d000c,
675 	0x91d8, 0xffffffff, 0x00070006,
676 	0x91dc, 0xffffffff, 0x00090008,
677 	0x91e0, 0xffffffff, 0x0000000c,
678 	0x91e4, 0xffffffff, 0x000b000a,
679 	0x91e8, 0xffffffff, 0x000e000d,
680 	0x91ec, 0xffffffff, 0x00080007,
681 	0x91f0, 0xffffffff, 0x000a0009,
682 	0x91f4, 0xffffffff, 0x0000000d,
683 	0x91f8, 0xffffffff, 0x000c000b,
684 	0x91fc, 0xffffffff, 0x000f000e,
685 	0x9200, 0xffffffff, 0x00090008,
686 	0x9204, 0xffffffff, 0x000b000a,
687 	0x9208, 0xffffffff, 0x000c000f,
688 	0x920c, 0xffffffff, 0x000e000d,
689 	0x9210, 0xffffffff, 0x00110010,
690 	0x9214, 0xffffffff, 0x000a0009,
691 	0x9218, 0xffffffff, 0x000c000b,
692 	0x921c, 0xffffffff, 0x0000000f,
693 	0x9220, 0xffffffff, 0x000e000d,
694 	0x9224, 0xffffffff, 0x00110010,
695 	0x9228, 0xffffffff, 0x000b000a,
696 	0x922c, 0xffffffff, 0x000d000c,
697 	0x9230, 0xffffffff, 0x00000010,
698 	0x9234, 0xffffffff, 0x000f000e,
699 	0x9238, 0xffffffff, 0x00120011,
700 	0x923c, 0xffffffff, 0x000c000b,
701 	0x9240, 0xffffffff, 0x000e000d,
702 	0x9244, 0xffffffff, 0x00000011,
703 	0x9248, 0xffffffff, 0x0010000f,
704 	0x924c, 0xffffffff, 0x00130012,
705 	0x9250, 0xffffffff, 0x000d000c,
706 	0x9254, 0xffffffff, 0x000f000e,
707 	0x9258, 0xffffffff, 0x00100013,
708 	0x925c, 0xffffffff, 0x00120011,
709 	0x9260, 0xffffffff, 0x00150014,
710 	0x9264, 0xffffffff, 0x000e000d,
711 	0x9268, 0xffffffff, 0x0010000f,
712 	0x926c, 0xffffffff, 0x00000013,
713 	0x9270, 0xffffffff, 0x00120011,
714 	0x9274, 0xffffffff, 0x00150014,
715 	0x9278, 0xffffffff, 0x000f000e,
716 	0x927c, 0xffffffff, 0x00110010,
717 	0x9280, 0xffffffff, 0x00000014,
718 	0x9284, 0xffffffff, 0x00130012,
719 	0x9288, 0xffffffff, 0x00160015,
720 	0x928c, 0xffffffff, 0x0010000f,
721 	0x9290, 0xffffffff, 0x00120011,
722 	0x9294, 0xffffffff, 0x00000015,
723 	0x9298, 0xffffffff, 0x00140013,
724 	0x929c, 0xffffffff, 0x00170016,
725 	0x9150, 0xffffffff, 0x96940200,
726 	0x8708, 0xffffffff, 0x00900100,
727 	0xc478, 0xffffffff, 0x00000080,
728 	0xc404, 0xffffffff, 0x0020003f,
729 	0x30, 0xffffffff, 0x0000001c,
730 	0x34, 0x000f0000, 0x000f0000,
731 	0x160c, 0xffffffff, 0x00000100,
732 	0x1024, 0xffffffff, 0x00000100,
733 	0x102c, 0x00000101, 0x00000000,
734 	0x20a8, 0xffffffff, 0x00000104,
735 	0x264c, 0x000c0000, 0x000c0000,
736 	0x2648, 0x000c0000, 0x000c0000,
737 	0x55e4, 0xff000fff, 0x00000100,
738 	0x55e8, 0x00000001, 0x00000001,
739 	0x2f50, 0x00000001, 0x00000001,
740 	0x30cc, 0xc0000fff, 0x00000104,
741 	0xc1e4, 0x00000001, 0x00000001,
742 	0xd0c0, 0xfffffff0, 0x00000100,
743 	0xd8c0, 0xfffffff0, 0x00000100
744 };
745 
746 static const u32 pitcairn_mgcg_cgcg_init[] =
747 {
748 	0xc400, 0xffffffff, 0xfffffffc,
749 	0x802c, 0xffffffff, 0xe0000000,
750 	0x9a60, 0xffffffff, 0x00000100,
751 	0x92a4, 0xffffffff, 0x00000100,
752 	0xc164, 0xffffffff, 0x00000100,
753 	0x9774, 0xffffffff, 0x00000100,
754 	0x8984, 0xffffffff, 0x06000100,
755 	0x8a18, 0xffffffff, 0x00000100,
756 	0x92a0, 0xffffffff, 0x00000100,
757 	0xc380, 0xffffffff, 0x00000100,
758 	0x8b28, 0xffffffff, 0x00000100,
759 	0x9144, 0xffffffff, 0x00000100,
760 	0x8d88, 0xffffffff, 0x00000100,
761 	0x8d8c, 0xffffffff, 0x00000100,
762 	0x9030, 0xffffffff, 0x00000100,
763 	0x9034, 0xffffffff, 0x00000100,
764 	0x9038, 0xffffffff, 0x00000100,
765 	0x903c, 0xffffffff, 0x00000100,
766 	0xad80, 0xffffffff, 0x00000100,
767 	0xac54, 0xffffffff, 0x00000100,
768 	0x897c, 0xffffffff, 0x06000100,
769 	0x9868, 0xffffffff, 0x00000100,
770 	0x9510, 0xffffffff, 0x00000100,
771 	0xaf04, 0xffffffff, 0x00000100,
772 	0xae04, 0xffffffff, 0x00000100,
773 	0x949c, 0xffffffff, 0x00000100,
774 	0x802c, 0xffffffff, 0xe0000000,
775 	0x9160, 0xffffffff, 0x00010000,
776 	0x9164, 0xffffffff, 0x00030002,
777 	0x9168, 0xffffffff, 0x00040007,
778 	0x916c, 0xffffffff, 0x00060005,
779 	0x9170, 0xffffffff, 0x00090008,
780 	0x9174, 0xffffffff, 0x00020001,
781 	0x9178, 0xffffffff, 0x00040003,
782 	0x917c, 0xffffffff, 0x00000007,
783 	0x9180, 0xffffffff, 0x00060005,
784 	0x9184, 0xffffffff, 0x00090008,
785 	0x9188, 0xffffffff, 0x00030002,
786 	0x918c, 0xffffffff, 0x00050004,
787 	0x9190, 0xffffffff, 0x00000008,
788 	0x9194, 0xffffffff, 0x00070006,
789 	0x9198, 0xffffffff, 0x000a0009,
790 	0x919c, 0xffffffff, 0x00040003,
791 	0x91a0, 0xffffffff, 0x00060005,
792 	0x91a4, 0xffffffff, 0x00000009,
793 	0x91a8, 0xffffffff, 0x00080007,
794 	0x91ac, 0xffffffff, 0x000b000a,
795 	0x91b0, 0xffffffff, 0x00050004,
796 	0x91b4, 0xffffffff, 0x00070006,
797 	0x91b8, 0xffffffff, 0x0008000b,
798 	0x91bc, 0xffffffff, 0x000a0009,
799 	0x91c0, 0xffffffff, 0x000d000c,
800 	0x9200, 0xffffffff, 0x00090008,
801 	0x9204, 0xffffffff, 0x000b000a,
802 	0x9208, 0xffffffff, 0x000c000f,
803 	0x920c, 0xffffffff, 0x000e000d,
804 	0x9210, 0xffffffff, 0x00110010,
805 	0x9214, 0xffffffff, 0x000a0009,
806 	0x9218, 0xffffffff, 0x000c000b,
807 	0x921c, 0xffffffff, 0x0000000f,
808 	0x9220, 0xffffffff, 0x000e000d,
809 	0x9224, 0xffffffff, 0x00110010,
810 	0x9228, 0xffffffff, 0x000b000a,
811 	0x922c, 0xffffffff, 0x000d000c,
812 	0x9230, 0xffffffff, 0x00000010,
813 	0x9234, 0xffffffff, 0x000f000e,
814 	0x9238, 0xffffffff, 0x00120011,
815 	0x923c, 0xffffffff, 0x000c000b,
816 	0x9240, 0xffffffff, 0x000e000d,
817 	0x9244, 0xffffffff, 0x00000011,
818 	0x9248, 0xffffffff, 0x0010000f,
819 	0x924c, 0xffffffff, 0x00130012,
820 	0x9250, 0xffffffff, 0x000d000c,
821 	0x9254, 0xffffffff, 0x000f000e,
822 	0x9258, 0xffffffff, 0x00100013,
823 	0x925c, 0xffffffff, 0x00120011,
824 	0x9260, 0xffffffff, 0x00150014,
825 	0x9150, 0xffffffff, 0x96940200,
826 	0x8708, 0xffffffff, 0x00900100,
827 	0xc478, 0xffffffff, 0x00000080,
828 	0xc404, 0xffffffff, 0x0020003f,
829 	0x30, 0xffffffff, 0x0000001c,
830 	0x34, 0x000f0000, 0x000f0000,
831 	0x160c, 0xffffffff, 0x00000100,
832 	0x1024, 0xffffffff, 0x00000100,
833 	0x102c, 0x00000101, 0x00000000,
834 	0x20a8, 0xffffffff, 0x00000104,
835 	0x55e4, 0xff000fff, 0x00000100,
836 	0x55e8, 0x00000001, 0x00000001,
837 	0x2f50, 0x00000001, 0x00000001,
838 	0x30cc, 0xc0000fff, 0x00000104,
839 	0xc1e4, 0x00000001, 0x00000001,
840 	0xd0c0, 0xfffffff0, 0x00000100,
841 	0xd8c0, 0xfffffff0, 0x00000100
842 };
843 
844 static const u32 verde_mgcg_cgcg_init[] =
845 {
846 	0xc400, 0xffffffff, 0xfffffffc,
847 	0x802c, 0xffffffff, 0xe0000000,
848 	0x9a60, 0xffffffff, 0x00000100,
849 	0x92a4, 0xffffffff, 0x00000100,
850 	0xc164, 0xffffffff, 0x00000100,
851 	0x9774, 0xffffffff, 0x00000100,
852 	0x8984, 0xffffffff, 0x06000100,
853 	0x8a18, 0xffffffff, 0x00000100,
854 	0x92a0, 0xffffffff, 0x00000100,
855 	0xc380, 0xffffffff, 0x00000100,
856 	0x8b28, 0xffffffff, 0x00000100,
857 	0x9144, 0xffffffff, 0x00000100,
858 	0x8d88, 0xffffffff, 0x00000100,
859 	0x8d8c, 0xffffffff, 0x00000100,
860 	0x9030, 0xffffffff, 0x00000100,
861 	0x9034, 0xffffffff, 0x00000100,
862 	0x9038, 0xffffffff, 0x00000100,
863 	0x903c, 0xffffffff, 0x00000100,
864 	0xad80, 0xffffffff, 0x00000100,
865 	0xac54, 0xffffffff, 0x00000100,
866 	0x897c, 0xffffffff, 0x06000100,
867 	0x9868, 0xffffffff, 0x00000100,
868 	0x9510, 0xffffffff, 0x00000100,
869 	0xaf04, 0xffffffff, 0x00000100,
870 	0xae04, 0xffffffff, 0x00000100,
871 	0x949c, 0xffffffff, 0x00000100,
872 	0x802c, 0xffffffff, 0xe0000000,
873 	0x9160, 0xffffffff, 0x00010000,
874 	0x9164, 0xffffffff, 0x00030002,
875 	0x9168, 0xffffffff, 0x00040007,
876 	0x916c, 0xffffffff, 0x00060005,
877 	0x9170, 0xffffffff, 0x00090008,
878 	0x9174, 0xffffffff, 0x00020001,
879 	0x9178, 0xffffffff, 0x00040003,
880 	0x917c, 0xffffffff, 0x00000007,
881 	0x9180, 0xffffffff, 0x00060005,
882 	0x9184, 0xffffffff, 0x00090008,
883 	0x9188, 0xffffffff, 0x00030002,
884 	0x918c, 0xffffffff, 0x00050004,
885 	0x9190, 0xffffffff, 0x00000008,
886 	0x9194, 0xffffffff, 0x00070006,
887 	0x9198, 0xffffffff, 0x000a0009,
888 	0x919c, 0xffffffff, 0x00040003,
889 	0x91a0, 0xffffffff, 0x00060005,
890 	0x91a4, 0xffffffff, 0x00000009,
891 	0x91a8, 0xffffffff, 0x00080007,
892 	0x91ac, 0xffffffff, 0x000b000a,
893 	0x91b0, 0xffffffff, 0x00050004,
894 	0x91b4, 0xffffffff, 0x00070006,
895 	0x91b8, 0xffffffff, 0x0008000b,
896 	0x91bc, 0xffffffff, 0x000a0009,
897 	0x91c0, 0xffffffff, 0x000d000c,
898 	0x9200, 0xffffffff, 0x00090008,
899 	0x9204, 0xffffffff, 0x000b000a,
900 	0x9208, 0xffffffff, 0x000c000f,
901 	0x920c, 0xffffffff, 0x000e000d,
902 	0x9210, 0xffffffff, 0x00110010,
903 	0x9214, 0xffffffff, 0x000a0009,
904 	0x9218, 0xffffffff, 0x000c000b,
905 	0x921c, 0xffffffff, 0x0000000f,
906 	0x9220, 0xffffffff, 0x000e000d,
907 	0x9224, 0xffffffff, 0x00110010,
908 	0x9228, 0xffffffff, 0x000b000a,
909 	0x922c, 0xffffffff, 0x000d000c,
910 	0x9230, 0xffffffff, 0x00000010,
911 	0x9234, 0xffffffff, 0x000f000e,
912 	0x9238, 0xffffffff, 0x00120011,
913 	0x923c, 0xffffffff, 0x000c000b,
914 	0x9240, 0xffffffff, 0x000e000d,
915 	0x9244, 0xffffffff, 0x00000011,
916 	0x9248, 0xffffffff, 0x0010000f,
917 	0x924c, 0xffffffff, 0x00130012,
918 	0x9250, 0xffffffff, 0x000d000c,
919 	0x9254, 0xffffffff, 0x000f000e,
920 	0x9258, 0xffffffff, 0x00100013,
921 	0x925c, 0xffffffff, 0x00120011,
922 	0x9260, 0xffffffff, 0x00150014,
923 	0x9150, 0xffffffff, 0x96940200,
924 	0x8708, 0xffffffff, 0x00900100,
925 	0xc478, 0xffffffff, 0x00000080,
926 	0xc404, 0xffffffff, 0x0020003f,
927 	0x30, 0xffffffff, 0x0000001c,
928 	0x34, 0x000f0000, 0x000f0000,
929 	0x160c, 0xffffffff, 0x00000100,
930 	0x1024, 0xffffffff, 0x00000100,
931 	0x102c, 0x00000101, 0x00000000,
932 	0x20a8, 0xffffffff, 0x00000104,
933 	0x264c, 0x000c0000, 0x000c0000,
934 	0x2648, 0x000c0000, 0x000c0000,
935 	0x55e4, 0xff000fff, 0x00000100,
936 	0x55e8, 0x00000001, 0x00000001,
937 	0x2f50, 0x00000001, 0x00000001,
938 	0x30cc, 0xc0000fff, 0x00000104,
939 	0xc1e4, 0x00000001, 0x00000001,
940 	0xd0c0, 0xfffffff0, 0x00000100,
941 	0xd8c0, 0xfffffff0, 0x00000100
942 };
943 
944 static const u32 oland_mgcg_cgcg_init[] =
945 {
946 	0xc400, 0xffffffff, 0xfffffffc,
947 	0x802c, 0xffffffff, 0xe0000000,
948 	0x9a60, 0xffffffff, 0x00000100,
949 	0x92a4, 0xffffffff, 0x00000100,
950 	0xc164, 0xffffffff, 0x00000100,
951 	0x9774, 0xffffffff, 0x00000100,
952 	0x8984, 0xffffffff, 0x06000100,
953 	0x8a18, 0xffffffff, 0x00000100,
954 	0x92a0, 0xffffffff, 0x00000100,
955 	0xc380, 0xffffffff, 0x00000100,
956 	0x8b28, 0xffffffff, 0x00000100,
957 	0x9144, 0xffffffff, 0x00000100,
958 	0x8d88, 0xffffffff, 0x00000100,
959 	0x8d8c, 0xffffffff, 0x00000100,
960 	0x9030, 0xffffffff, 0x00000100,
961 	0x9034, 0xffffffff, 0x00000100,
962 	0x9038, 0xffffffff, 0x00000100,
963 	0x903c, 0xffffffff, 0x00000100,
964 	0xad80, 0xffffffff, 0x00000100,
965 	0xac54, 0xffffffff, 0x00000100,
966 	0x897c, 0xffffffff, 0x06000100,
967 	0x9868, 0xffffffff, 0x00000100,
968 	0x9510, 0xffffffff, 0x00000100,
969 	0xaf04, 0xffffffff, 0x00000100,
970 	0xae04, 0xffffffff, 0x00000100,
971 	0x949c, 0xffffffff, 0x00000100,
972 	0x802c, 0xffffffff, 0xe0000000,
973 	0x9160, 0xffffffff, 0x00010000,
974 	0x9164, 0xffffffff, 0x00030002,
975 	0x9168, 0xffffffff, 0x00040007,
976 	0x916c, 0xffffffff, 0x00060005,
977 	0x9170, 0xffffffff, 0x00090008,
978 	0x9174, 0xffffffff, 0x00020001,
979 	0x9178, 0xffffffff, 0x00040003,
980 	0x917c, 0xffffffff, 0x00000007,
981 	0x9180, 0xffffffff, 0x00060005,
982 	0x9184, 0xffffffff, 0x00090008,
983 	0x9188, 0xffffffff, 0x00030002,
984 	0x918c, 0xffffffff, 0x00050004,
985 	0x9190, 0xffffffff, 0x00000008,
986 	0x9194, 0xffffffff, 0x00070006,
987 	0x9198, 0xffffffff, 0x000a0009,
988 	0x919c, 0xffffffff, 0x00040003,
989 	0x91a0, 0xffffffff, 0x00060005,
990 	0x91a4, 0xffffffff, 0x00000009,
991 	0x91a8, 0xffffffff, 0x00080007,
992 	0x91ac, 0xffffffff, 0x000b000a,
993 	0x91b0, 0xffffffff, 0x00050004,
994 	0x91b4, 0xffffffff, 0x00070006,
995 	0x91b8, 0xffffffff, 0x0008000b,
996 	0x91bc, 0xffffffff, 0x000a0009,
997 	0x91c0, 0xffffffff, 0x000d000c,
998 	0x91c4, 0xffffffff, 0x00060005,
999 	0x91c8, 0xffffffff, 0x00080007,
1000 	0x91cc, 0xffffffff, 0x0000000b,
1001 	0x91d0, 0xffffffff, 0x000a0009,
1002 	0x91d4, 0xffffffff, 0x000d000c,
1003 	0x9150, 0xffffffff, 0x96940200,
1004 	0x8708, 0xffffffff, 0x00900100,
1005 	0xc478, 0xffffffff, 0x00000080,
1006 	0xc404, 0xffffffff, 0x0020003f,
1007 	0x30, 0xffffffff, 0x0000001c,
1008 	0x34, 0x000f0000, 0x000f0000,
1009 	0x160c, 0xffffffff, 0x00000100,
1010 	0x1024, 0xffffffff, 0x00000100,
1011 	0x102c, 0x00000101, 0x00000000,
1012 	0x20a8, 0xffffffff, 0x00000104,
1013 	0x264c, 0x000c0000, 0x000c0000,
1014 	0x2648, 0x000c0000, 0x000c0000,
1015 	0x55e4, 0xff000fff, 0x00000100,
1016 	0x55e8, 0x00000001, 0x00000001,
1017 	0x2f50, 0x00000001, 0x00000001,
1018 	0x30cc, 0xc0000fff, 0x00000104,
1019 	0xc1e4, 0x00000001, 0x00000001,
1020 	0xd0c0, 0xfffffff0, 0x00000100,
1021 	0xd8c0, 0xfffffff0, 0x00000100
1022 };
1023 
1024 static const u32 hainan_mgcg_cgcg_init[] =
1025 {
1026 	0xc400, 0xffffffff, 0xfffffffc,
1027 	0x802c, 0xffffffff, 0xe0000000,
1028 	0x9a60, 0xffffffff, 0x00000100,
1029 	0x92a4, 0xffffffff, 0x00000100,
1030 	0xc164, 0xffffffff, 0x00000100,
1031 	0x9774, 0xffffffff, 0x00000100,
1032 	0x8984, 0xffffffff, 0x06000100,
1033 	0x8a18, 0xffffffff, 0x00000100,
1034 	0x92a0, 0xffffffff, 0x00000100,
1035 	0xc380, 0xffffffff, 0x00000100,
1036 	0x8b28, 0xffffffff, 0x00000100,
1037 	0x9144, 0xffffffff, 0x00000100,
1038 	0x8d88, 0xffffffff, 0x00000100,
1039 	0x8d8c, 0xffffffff, 0x00000100,
1040 	0x9030, 0xffffffff, 0x00000100,
1041 	0x9034, 0xffffffff, 0x00000100,
1042 	0x9038, 0xffffffff, 0x00000100,
1043 	0x903c, 0xffffffff, 0x00000100,
1044 	0xad80, 0xffffffff, 0x00000100,
1045 	0xac54, 0xffffffff, 0x00000100,
1046 	0x897c, 0xffffffff, 0x06000100,
1047 	0x9868, 0xffffffff, 0x00000100,
1048 	0x9510, 0xffffffff, 0x00000100,
1049 	0xaf04, 0xffffffff, 0x00000100,
1050 	0xae04, 0xffffffff, 0x00000100,
1051 	0x949c, 0xffffffff, 0x00000100,
1052 	0x802c, 0xffffffff, 0xe0000000,
1053 	0x9160, 0xffffffff, 0x00010000,
1054 	0x9164, 0xffffffff, 0x00030002,
1055 	0x9168, 0xffffffff, 0x00040007,
1056 	0x916c, 0xffffffff, 0x00060005,
1057 	0x9170, 0xffffffff, 0x00090008,
1058 	0x9174, 0xffffffff, 0x00020001,
1059 	0x9178, 0xffffffff, 0x00040003,
1060 	0x917c, 0xffffffff, 0x00000007,
1061 	0x9180, 0xffffffff, 0x00060005,
1062 	0x9184, 0xffffffff, 0x00090008,
1063 	0x9188, 0xffffffff, 0x00030002,
1064 	0x918c, 0xffffffff, 0x00050004,
1065 	0x9190, 0xffffffff, 0x00000008,
1066 	0x9194, 0xffffffff, 0x00070006,
1067 	0x9198, 0xffffffff, 0x000a0009,
1068 	0x919c, 0xffffffff, 0x00040003,
1069 	0x91a0, 0xffffffff, 0x00060005,
1070 	0x91a4, 0xffffffff, 0x00000009,
1071 	0x91a8, 0xffffffff, 0x00080007,
1072 	0x91ac, 0xffffffff, 0x000b000a,
1073 	0x91b0, 0xffffffff, 0x00050004,
1074 	0x91b4, 0xffffffff, 0x00070006,
1075 	0x91b8, 0xffffffff, 0x0008000b,
1076 	0x91bc, 0xffffffff, 0x000a0009,
1077 	0x91c0, 0xffffffff, 0x000d000c,
1078 	0x91c4, 0xffffffff, 0x00060005,
1079 	0x91c8, 0xffffffff, 0x00080007,
1080 	0x91cc, 0xffffffff, 0x0000000b,
1081 	0x91d0, 0xffffffff, 0x000a0009,
1082 	0x91d4, 0xffffffff, 0x000d000c,
1083 	0x9150, 0xffffffff, 0x96940200,
1084 	0x8708, 0xffffffff, 0x00900100,
1085 	0xc478, 0xffffffff, 0x00000080,
1086 	0xc404, 0xffffffff, 0x0020003f,
1087 	0x30, 0xffffffff, 0x0000001c,
1088 	0x34, 0x000f0000, 0x000f0000,
1089 	0x160c, 0xffffffff, 0x00000100,
1090 	0x1024, 0xffffffff, 0x00000100,
1091 	0x20a8, 0xffffffff, 0x00000104,
1092 	0x264c, 0x000c0000, 0x000c0000,
1093 	0x2648, 0x000c0000, 0x000c0000,
1094 	0x2f50, 0x00000001, 0x00000001,
1095 	0x30cc, 0xc0000fff, 0x00000104,
1096 	0xc1e4, 0x00000001, 0x00000001,
1097 	0xd0c0, 0xfffffff0, 0x00000100,
1098 	0xd8c0, 0xfffffff0, 0x00000100
1099 };
1100 
1101 static u32 verde_pg_init[] =
1102 {
1103 	0x353c, 0xffffffff, 0x40000,
1104 	0x3538, 0xffffffff, 0x200010ff,
1105 	0x353c, 0xffffffff, 0x0,
1106 	0x353c, 0xffffffff, 0x0,
1107 	0x353c, 0xffffffff, 0x0,
1108 	0x353c, 0xffffffff, 0x0,
1109 	0x353c, 0xffffffff, 0x0,
1110 	0x353c, 0xffffffff, 0x7007,
1111 	0x3538, 0xffffffff, 0x300010ff,
1112 	0x353c, 0xffffffff, 0x0,
1113 	0x353c, 0xffffffff, 0x0,
1114 	0x353c, 0xffffffff, 0x0,
1115 	0x353c, 0xffffffff, 0x0,
1116 	0x353c, 0xffffffff, 0x0,
1117 	0x353c, 0xffffffff, 0x400000,
1118 	0x3538, 0xffffffff, 0x100010ff,
1119 	0x353c, 0xffffffff, 0x0,
1120 	0x353c, 0xffffffff, 0x0,
1121 	0x353c, 0xffffffff, 0x0,
1122 	0x353c, 0xffffffff, 0x0,
1123 	0x353c, 0xffffffff, 0x0,
1124 	0x353c, 0xffffffff, 0x120200,
1125 	0x3538, 0xffffffff, 0x500010ff,
1126 	0x353c, 0xffffffff, 0x0,
1127 	0x353c, 0xffffffff, 0x0,
1128 	0x353c, 0xffffffff, 0x0,
1129 	0x353c, 0xffffffff, 0x0,
1130 	0x353c, 0xffffffff, 0x0,
1131 	0x353c, 0xffffffff, 0x1e1e16,
1132 	0x3538, 0xffffffff, 0x600010ff,
1133 	0x353c, 0xffffffff, 0x0,
1134 	0x353c, 0xffffffff, 0x0,
1135 	0x353c, 0xffffffff, 0x0,
1136 	0x353c, 0xffffffff, 0x0,
1137 	0x353c, 0xffffffff, 0x0,
1138 	0x353c, 0xffffffff, 0x171f1e,
1139 	0x3538, 0xffffffff, 0x700010ff,
1140 	0x353c, 0xffffffff, 0x0,
1141 	0x353c, 0xffffffff, 0x0,
1142 	0x353c, 0xffffffff, 0x0,
1143 	0x353c, 0xffffffff, 0x0,
1144 	0x353c, 0xffffffff, 0x0,
1145 	0x353c, 0xffffffff, 0x0,
1146 	0x3538, 0xffffffff, 0x9ff,
1147 	0x3500, 0xffffffff, 0x0,
1148 	0x3504, 0xffffffff, 0x10000800,
1149 	0x3504, 0xffffffff, 0xf,
1150 	0x3504, 0xffffffff, 0xf,
1151 	0x3500, 0xffffffff, 0x4,
1152 	0x3504, 0xffffffff, 0x1000051e,
1153 	0x3504, 0xffffffff, 0xffff,
1154 	0x3504, 0xffffffff, 0xffff,
1155 	0x3500, 0xffffffff, 0x8,
1156 	0x3504, 0xffffffff, 0x80500,
1157 	0x3500, 0xffffffff, 0x12,
1158 	0x3504, 0xffffffff, 0x9050c,
1159 	0x3500, 0xffffffff, 0x1d,
1160 	0x3504, 0xffffffff, 0xb052c,
1161 	0x3500, 0xffffffff, 0x2a,
1162 	0x3504, 0xffffffff, 0x1053e,
1163 	0x3500, 0xffffffff, 0x2d,
1164 	0x3504, 0xffffffff, 0x10546,
1165 	0x3500, 0xffffffff, 0x30,
1166 	0x3504, 0xffffffff, 0xa054e,
1167 	0x3500, 0xffffffff, 0x3c,
1168 	0x3504, 0xffffffff, 0x1055f,
1169 	0x3500, 0xffffffff, 0x3f,
1170 	0x3504, 0xffffffff, 0x10567,
1171 	0x3500, 0xffffffff, 0x42,
1172 	0x3504, 0xffffffff, 0x1056f,
1173 	0x3500, 0xffffffff, 0x45,
1174 	0x3504, 0xffffffff, 0x10572,
1175 	0x3500, 0xffffffff, 0x48,
1176 	0x3504, 0xffffffff, 0x20575,
1177 	0x3500, 0xffffffff, 0x4c,
1178 	0x3504, 0xffffffff, 0x190801,
1179 	0x3500, 0xffffffff, 0x67,
1180 	0x3504, 0xffffffff, 0x1082a,
1181 	0x3500, 0xffffffff, 0x6a,
1182 	0x3504, 0xffffffff, 0x1b082d,
1183 	0x3500, 0xffffffff, 0x87,
1184 	0x3504, 0xffffffff, 0x310851,
1185 	0x3500, 0xffffffff, 0xba,
1186 	0x3504, 0xffffffff, 0x891,
1187 	0x3500, 0xffffffff, 0xbc,
1188 	0x3504, 0xffffffff, 0x893,
1189 	0x3500, 0xffffffff, 0xbe,
1190 	0x3504, 0xffffffff, 0x20895,
1191 	0x3500, 0xffffffff, 0xc2,
1192 	0x3504, 0xffffffff, 0x20899,
1193 	0x3500, 0xffffffff, 0xc6,
1194 	0x3504, 0xffffffff, 0x2089d,
1195 	0x3500, 0xffffffff, 0xca,
1196 	0x3504, 0xffffffff, 0x8a1,
1197 	0x3500, 0xffffffff, 0xcc,
1198 	0x3504, 0xffffffff, 0x8a3,
1199 	0x3500, 0xffffffff, 0xce,
1200 	0x3504, 0xffffffff, 0x308a5,
1201 	0x3500, 0xffffffff, 0xd3,
1202 	0x3504, 0xffffffff, 0x6d08cd,
1203 	0x3500, 0xffffffff, 0x142,
1204 	0x3504, 0xffffffff, 0x2000095a,
1205 	0x3504, 0xffffffff, 0x1,
1206 	0x3500, 0xffffffff, 0x144,
1207 	0x3504, 0xffffffff, 0x301f095b,
1208 	0x3500, 0xffffffff, 0x165,
1209 	0x3504, 0xffffffff, 0xc094d,
1210 	0x3500, 0xffffffff, 0x173,
1211 	0x3504, 0xffffffff, 0xf096d,
1212 	0x3500, 0xffffffff, 0x184,
1213 	0x3504, 0xffffffff, 0x15097f,
1214 	0x3500, 0xffffffff, 0x19b,
1215 	0x3504, 0xffffffff, 0xc0998,
1216 	0x3500, 0xffffffff, 0x1a9,
1217 	0x3504, 0xffffffff, 0x409a7,
1218 	0x3500, 0xffffffff, 0x1af,
1219 	0x3504, 0xffffffff, 0xcdc,
1220 	0x3500, 0xffffffff, 0x1b1,
1221 	0x3504, 0xffffffff, 0x800,
1222 	0x3508, 0xffffffff, 0x6c9b2000,
1223 	0x3510, 0xfc00, 0x2000,
1224 	0x3544, 0xffffffff, 0xfc0,
1225 	0x28d4, 0x00000100, 0x100
1226 };
1227 
1228 static void si_init_golden_registers(struct radeon_device *rdev)
1229 {
1230 	switch (rdev->family) {
1231 	case CHIP_TAHITI:
1232 		radeon_program_register_sequence(rdev,
1233 						 tahiti_golden_registers,
1234 						 (const u32)ARRAY_SIZE(tahiti_golden_registers));
1235 		radeon_program_register_sequence(rdev,
1236 						 tahiti_golden_rlc_registers,
1237 						 (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1238 		radeon_program_register_sequence(rdev,
1239 						 tahiti_mgcg_cgcg_init,
1240 						 (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1241 		radeon_program_register_sequence(rdev,
1242 						 tahiti_golden_registers2,
1243 						 (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1244 		break;
1245 	case CHIP_PITCAIRN:
1246 		radeon_program_register_sequence(rdev,
1247 						 pitcairn_golden_registers,
1248 						 (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1249 		radeon_program_register_sequence(rdev,
1250 						 pitcairn_golden_rlc_registers,
1251 						 (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1252 		radeon_program_register_sequence(rdev,
1253 						 pitcairn_mgcg_cgcg_init,
1254 						 (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1255 		break;
1256 	case CHIP_VERDE:
1257 		radeon_program_register_sequence(rdev,
1258 						 verde_golden_registers,
1259 						 (const u32)ARRAY_SIZE(verde_golden_registers));
1260 		radeon_program_register_sequence(rdev,
1261 						 verde_golden_rlc_registers,
1262 						 (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1263 		radeon_program_register_sequence(rdev,
1264 						 verde_mgcg_cgcg_init,
1265 						 (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1266 		radeon_program_register_sequence(rdev,
1267 						 verde_pg_init,
1268 						 (const u32)ARRAY_SIZE(verde_pg_init));
1269 		break;
1270 	case CHIP_OLAND:
1271 		radeon_program_register_sequence(rdev,
1272 						 oland_golden_registers,
1273 						 (const u32)ARRAY_SIZE(oland_golden_registers));
1274 		radeon_program_register_sequence(rdev,
1275 						 oland_golden_rlc_registers,
1276 						 (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1277 		radeon_program_register_sequence(rdev,
1278 						 oland_mgcg_cgcg_init,
1279 						 (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1280 		break;
1281 	case CHIP_HAINAN:
1282 		radeon_program_register_sequence(rdev,
1283 						 hainan_golden_registers,
1284 						 (const u32)ARRAY_SIZE(hainan_golden_registers));
1285 		radeon_program_register_sequence(rdev,
1286 						 hainan_golden_registers2,
1287 						 (const u32)ARRAY_SIZE(hainan_golden_registers2));
1288 		radeon_program_register_sequence(rdev,
1289 						 hainan_mgcg_cgcg_init,
1290 						 (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1291 		break;
1292 	default:
1293 		break;
1294 	}
1295 }
1296 
1297 /**
1298  * si_get_allowed_info_register - fetch the register for the info ioctl
1299  *
1300  * @rdev: radeon_device pointer
1301  * @reg: register offset in bytes
1302  * @val: register value
1303  *
1304  * Returns 0 for success or -EINVAL for an invalid register
1305  *
1306  */
1307 int si_get_allowed_info_register(struct radeon_device *rdev,
1308 				 u32 reg, u32 *val)
1309 {
1310 	switch (reg) {
1311 	case GRBM_STATUS:
1312 	case GRBM_STATUS2:
1313 	case GRBM_STATUS_SE0:
1314 	case GRBM_STATUS_SE1:
1315 	case SRBM_STATUS:
1316 	case SRBM_STATUS2:
1317 	case (DMA_STATUS_REG + DMA0_REGISTER_OFFSET):
1318 	case (DMA_STATUS_REG + DMA1_REGISTER_OFFSET):
1319 	case UVD_STATUS:
1320 		*val = RREG32(reg);
1321 		return 0;
1322 	default:
1323 		return -EINVAL;
1324 	}
1325 }
1326 
1327 #define PCIE_BUS_CLK                10000
1328 #define TCLK                        (PCIE_BUS_CLK / 10)
1329 
1330 /**
1331  * si_get_xclk - get the xclk
1332  *
1333  * @rdev: radeon_device pointer
1334  *
1335  * Returns the reference clock used by the gfx engine
1336  * (SI).
1337  */
1338 u32 si_get_xclk(struct radeon_device *rdev)
1339 {
1340 	u32 reference_clock = rdev->clock.spll.reference_freq;
1341 	u32 tmp;
1342 
1343 	tmp = RREG32(CG_CLKPIN_CNTL_2);
1344 	if (tmp & MUX_TCLK_TO_XCLK)
1345 		return TCLK;
1346 
1347 	tmp = RREG32(CG_CLKPIN_CNTL);
1348 	if (tmp & XTALIN_DIVIDE)
1349 		return reference_clock / 4;
1350 
1351 	return reference_clock;
1352 }
1353 
1354 /* get temperature in millidegrees */
1355 int si_get_temp(struct radeon_device *rdev)
1356 {
1357 	u32 temp;
1358 	int actual_temp = 0;
1359 
1360 	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1361 		CTF_TEMP_SHIFT;
1362 
1363 	if (temp & 0x200)
1364 		actual_temp = 255;
1365 	else
1366 		actual_temp = temp & 0x1ff;
1367 
1368 	actual_temp = (actual_temp * 1000);
1369 
1370 	return actual_temp;
1371 }
1372 
1373 #define TAHITI_IO_MC_REGS_SIZE 36
1374 
1375 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1376 	{0x0000006f, 0x03044000},
1377 	{0x00000070, 0x0480c018},
1378 	{0x00000071, 0x00000040},
1379 	{0x00000072, 0x01000000},
1380 	{0x00000074, 0x000000ff},
1381 	{0x00000075, 0x00143400},
1382 	{0x00000076, 0x08ec0800},
1383 	{0x00000077, 0x040000cc},
1384 	{0x00000079, 0x00000000},
1385 	{0x0000007a, 0x21000409},
1386 	{0x0000007c, 0x00000000},
1387 	{0x0000007d, 0xe8000000},
1388 	{0x0000007e, 0x044408a8},
1389 	{0x0000007f, 0x00000003},
1390 	{0x00000080, 0x00000000},
1391 	{0x00000081, 0x01000000},
1392 	{0x00000082, 0x02000000},
1393 	{0x00000083, 0x00000000},
1394 	{0x00000084, 0xe3f3e4f4},
1395 	{0x00000085, 0x00052024},
1396 	{0x00000087, 0x00000000},
1397 	{0x00000088, 0x66036603},
1398 	{0x00000089, 0x01000000},
1399 	{0x0000008b, 0x1c0a0000},
1400 	{0x0000008c, 0xff010000},
1401 	{0x0000008e, 0xffffefff},
1402 	{0x0000008f, 0xfff3efff},
1403 	{0x00000090, 0xfff3efbf},
1404 	{0x00000094, 0x00101101},
1405 	{0x00000095, 0x00000fff},
1406 	{0x00000096, 0x00116fff},
1407 	{0x00000097, 0x60010000},
1408 	{0x00000098, 0x10010000},
1409 	{0x00000099, 0x00006000},
1410 	{0x0000009a, 0x00001000},
1411 	{0x0000009f, 0x00a77400}
1412 };
1413 
1414 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1415 	{0x0000006f, 0x03044000},
1416 	{0x00000070, 0x0480c018},
1417 	{0x00000071, 0x00000040},
1418 	{0x00000072, 0x01000000},
1419 	{0x00000074, 0x000000ff},
1420 	{0x00000075, 0x00143400},
1421 	{0x00000076, 0x08ec0800},
1422 	{0x00000077, 0x040000cc},
1423 	{0x00000079, 0x00000000},
1424 	{0x0000007a, 0x21000409},
1425 	{0x0000007c, 0x00000000},
1426 	{0x0000007d, 0xe8000000},
1427 	{0x0000007e, 0x044408a8},
1428 	{0x0000007f, 0x00000003},
1429 	{0x00000080, 0x00000000},
1430 	{0x00000081, 0x01000000},
1431 	{0x00000082, 0x02000000},
1432 	{0x00000083, 0x00000000},
1433 	{0x00000084, 0xe3f3e4f4},
1434 	{0x00000085, 0x00052024},
1435 	{0x00000087, 0x00000000},
1436 	{0x00000088, 0x66036603},
1437 	{0x00000089, 0x01000000},
1438 	{0x0000008b, 0x1c0a0000},
1439 	{0x0000008c, 0xff010000},
1440 	{0x0000008e, 0xffffefff},
1441 	{0x0000008f, 0xfff3efff},
1442 	{0x00000090, 0xfff3efbf},
1443 	{0x00000094, 0x00101101},
1444 	{0x00000095, 0x00000fff},
1445 	{0x00000096, 0x00116fff},
1446 	{0x00000097, 0x60010000},
1447 	{0x00000098, 0x10010000},
1448 	{0x00000099, 0x00006000},
1449 	{0x0000009a, 0x00001000},
1450 	{0x0000009f, 0x00a47400}
1451 };
1452 
1453 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1454 	{0x0000006f, 0x03044000},
1455 	{0x00000070, 0x0480c018},
1456 	{0x00000071, 0x00000040},
1457 	{0x00000072, 0x01000000},
1458 	{0x00000074, 0x000000ff},
1459 	{0x00000075, 0x00143400},
1460 	{0x00000076, 0x08ec0800},
1461 	{0x00000077, 0x040000cc},
1462 	{0x00000079, 0x00000000},
1463 	{0x0000007a, 0x21000409},
1464 	{0x0000007c, 0x00000000},
1465 	{0x0000007d, 0xe8000000},
1466 	{0x0000007e, 0x044408a8},
1467 	{0x0000007f, 0x00000003},
1468 	{0x00000080, 0x00000000},
1469 	{0x00000081, 0x01000000},
1470 	{0x00000082, 0x02000000},
1471 	{0x00000083, 0x00000000},
1472 	{0x00000084, 0xe3f3e4f4},
1473 	{0x00000085, 0x00052024},
1474 	{0x00000087, 0x00000000},
1475 	{0x00000088, 0x66036603},
1476 	{0x00000089, 0x01000000},
1477 	{0x0000008b, 0x1c0a0000},
1478 	{0x0000008c, 0xff010000},
1479 	{0x0000008e, 0xffffefff},
1480 	{0x0000008f, 0xfff3efff},
1481 	{0x00000090, 0xfff3efbf},
1482 	{0x00000094, 0x00101101},
1483 	{0x00000095, 0x00000fff},
1484 	{0x00000096, 0x00116fff},
1485 	{0x00000097, 0x60010000},
1486 	{0x00000098, 0x10010000},
1487 	{0x00000099, 0x00006000},
1488 	{0x0000009a, 0x00001000},
1489 	{0x0000009f, 0x00a37400}
1490 };
1491 
1492 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1493 	{0x0000006f, 0x03044000},
1494 	{0x00000070, 0x0480c018},
1495 	{0x00000071, 0x00000040},
1496 	{0x00000072, 0x01000000},
1497 	{0x00000074, 0x000000ff},
1498 	{0x00000075, 0x00143400},
1499 	{0x00000076, 0x08ec0800},
1500 	{0x00000077, 0x040000cc},
1501 	{0x00000079, 0x00000000},
1502 	{0x0000007a, 0x21000409},
1503 	{0x0000007c, 0x00000000},
1504 	{0x0000007d, 0xe8000000},
1505 	{0x0000007e, 0x044408a8},
1506 	{0x0000007f, 0x00000003},
1507 	{0x00000080, 0x00000000},
1508 	{0x00000081, 0x01000000},
1509 	{0x00000082, 0x02000000},
1510 	{0x00000083, 0x00000000},
1511 	{0x00000084, 0xe3f3e4f4},
1512 	{0x00000085, 0x00052024},
1513 	{0x00000087, 0x00000000},
1514 	{0x00000088, 0x66036603},
1515 	{0x00000089, 0x01000000},
1516 	{0x0000008b, 0x1c0a0000},
1517 	{0x0000008c, 0xff010000},
1518 	{0x0000008e, 0xffffefff},
1519 	{0x0000008f, 0xfff3efff},
1520 	{0x00000090, 0xfff3efbf},
1521 	{0x00000094, 0x00101101},
1522 	{0x00000095, 0x00000fff},
1523 	{0x00000096, 0x00116fff},
1524 	{0x00000097, 0x60010000},
1525 	{0x00000098, 0x10010000},
1526 	{0x00000099, 0x00006000},
1527 	{0x0000009a, 0x00001000},
1528 	{0x0000009f, 0x00a17730}
1529 };
1530 
1531 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1532 	{0x0000006f, 0x03044000},
1533 	{0x00000070, 0x0480c018},
1534 	{0x00000071, 0x00000040},
1535 	{0x00000072, 0x01000000},
1536 	{0x00000074, 0x000000ff},
1537 	{0x00000075, 0x00143400},
1538 	{0x00000076, 0x08ec0800},
1539 	{0x00000077, 0x040000cc},
1540 	{0x00000079, 0x00000000},
1541 	{0x0000007a, 0x21000409},
1542 	{0x0000007c, 0x00000000},
1543 	{0x0000007d, 0xe8000000},
1544 	{0x0000007e, 0x044408a8},
1545 	{0x0000007f, 0x00000003},
1546 	{0x00000080, 0x00000000},
1547 	{0x00000081, 0x01000000},
1548 	{0x00000082, 0x02000000},
1549 	{0x00000083, 0x00000000},
1550 	{0x00000084, 0xe3f3e4f4},
1551 	{0x00000085, 0x00052024},
1552 	{0x00000087, 0x00000000},
1553 	{0x00000088, 0x66036603},
1554 	{0x00000089, 0x01000000},
1555 	{0x0000008b, 0x1c0a0000},
1556 	{0x0000008c, 0xff010000},
1557 	{0x0000008e, 0xffffefff},
1558 	{0x0000008f, 0xfff3efff},
1559 	{0x00000090, 0xfff3efbf},
1560 	{0x00000094, 0x00101101},
1561 	{0x00000095, 0x00000fff},
1562 	{0x00000096, 0x00116fff},
1563 	{0x00000097, 0x60010000},
1564 	{0x00000098, 0x10010000},
1565 	{0x00000099, 0x00006000},
1566 	{0x0000009a, 0x00001000},
1567 	{0x0000009f, 0x00a07730}
1568 };
1569 
1570 /* ucode loading */
1571 int si_mc_load_microcode(struct radeon_device *rdev)
1572 {
1573 	const __be32 *fw_data = NULL;
1574 	const __le32 *new_fw_data = NULL;
1575 	u32 running;
1576 	u32 *io_mc_regs = NULL;
1577 	const __le32 *new_io_mc_regs = NULL;
1578 	int i, regs_size, ucode_size;
1579 
1580 	if (!rdev->mc_fw)
1581 		return -EINVAL;
1582 
1583 	if (rdev->new_fw) {
1584 		const struct mc_firmware_header_v1_0 *hdr =
1585 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1586 
1587 		radeon_ucode_print_mc_hdr(&hdr->header);
1588 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1589 		new_io_mc_regs = (const __le32 *)
1590 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1591 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1592 		new_fw_data = (const __le32 *)
1593 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1594 	} else {
1595 		ucode_size = rdev->mc_fw->size / 4;
1596 
1597 		switch (rdev->family) {
1598 		case CHIP_TAHITI:
1599 			io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1600 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1601 			break;
1602 		case CHIP_PITCAIRN:
1603 			io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1604 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1605 			break;
1606 		case CHIP_VERDE:
1607 		default:
1608 			io_mc_regs = (u32 *)&verde_io_mc_regs;
1609 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1610 			break;
1611 		case CHIP_OLAND:
1612 			io_mc_regs = (u32 *)&oland_io_mc_regs;
1613 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1614 			break;
1615 		case CHIP_HAINAN:
1616 			io_mc_regs = (u32 *)&hainan_io_mc_regs;
1617 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1618 			break;
1619 		}
1620 		fw_data = (const __be32 *)rdev->mc_fw->data;
1621 	}
1622 
1623 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1624 
1625 	if (running == 0) {
1626 		/* reset the engine and set to writable */
1627 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1628 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1629 
1630 		/* load mc io regs */
1631 		for (i = 0; i < regs_size; i++) {
1632 			if (rdev->new_fw) {
1633 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1634 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1635 			} else {
1636 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1637 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1638 			}
1639 		}
1640 		/* load the MC ucode */
1641 		for (i = 0; i < ucode_size; i++) {
1642 			if (rdev->new_fw)
1643 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1644 			else
1645 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1646 		}
1647 
1648 		/* put the engine back into the active state */
1649 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1650 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1651 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1652 
1653 		/* wait for training to complete */
1654 		for (i = 0; i < rdev->usec_timeout; i++) {
1655 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1656 				break;
1657 			udelay(1);
1658 		}
1659 		for (i = 0; i < rdev->usec_timeout; i++) {
1660 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1661 				break;
1662 			udelay(1);
1663 		}
1664 	}
1665 
1666 	return 0;
1667 }
1668 
1669 static int si_init_microcode(struct radeon_device *rdev)
1670 {
1671 	const char *chip_name;
1672 	const char *new_chip_name;
1673 	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1674 	size_t smc_req_size, mc2_req_size;
1675 	char fw_name[30];
1676 	int err;
1677 	int new_fw = 0;
1678 	bool new_smc = false;
1679 	bool si58_fw = false;
1680 	bool banks2_fw = false;
1681 
1682 	DRM_DEBUG("\n");
1683 
1684 	switch (rdev->family) {
1685 	case CHIP_TAHITI:
1686 		chip_name = "TAHITI";
1687 		new_chip_name = "tahiti";
1688 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1689 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1690 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1691 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1692 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1693 		mc2_req_size = TAHITI_MC_UCODE_SIZE * 4;
1694 		smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1695 		break;
1696 	case CHIP_PITCAIRN:
1697 		chip_name = "PITCAIRN";
1698 		if ((rdev->pdev->revision == 0x81) &&
1699 		    ((rdev->pdev->device == 0x6810) ||
1700 		     (rdev->pdev->device == 0x6811)))
1701 			new_smc = true;
1702 		new_chip_name = "pitcairn";
1703 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1704 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1705 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1706 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1707 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1708 		mc2_req_size = PITCAIRN_MC_UCODE_SIZE * 4;
1709 		smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1710 		break;
1711 	case CHIP_VERDE:
1712 		chip_name = "VERDE";
1713 		if (((rdev->pdev->device == 0x6820) &&
1714 		     ((rdev->pdev->revision == 0x81) ||
1715 		      (rdev->pdev->revision == 0x83))) ||
1716 		    ((rdev->pdev->device == 0x6821) &&
1717 		     ((rdev->pdev->revision == 0x83) ||
1718 		      (rdev->pdev->revision == 0x87))) ||
1719 		    ((rdev->pdev->revision == 0x87) &&
1720 		     ((rdev->pdev->device == 0x6823) ||
1721 		      (rdev->pdev->device == 0x682b))))
1722 			new_smc = true;
1723 		new_chip_name = "verde";
1724 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1725 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1726 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1727 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1728 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1729 		mc2_req_size = VERDE_MC_UCODE_SIZE * 4;
1730 		smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1731 		break;
1732 	case CHIP_OLAND:
1733 		chip_name = "OLAND";
1734 		if (((rdev->pdev->revision == 0x81) &&
1735 		     ((rdev->pdev->device == 0x6600) ||
1736 		      (rdev->pdev->device == 0x6604) ||
1737 		      (rdev->pdev->device == 0x6605) ||
1738 		      (rdev->pdev->device == 0x6610))) ||
1739 		    ((rdev->pdev->revision == 0x83) &&
1740 		     (rdev->pdev->device == 0x6610)))
1741 			new_smc = true;
1742 		new_chip_name = "oland";
1743 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1744 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1745 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1746 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1747 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1748 		smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1749 		break;
1750 	case CHIP_HAINAN:
1751 		chip_name = "HAINAN";
1752 		if (((rdev->pdev->revision == 0x81) &&
1753 		     (rdev->pdev->device == 0x6660)) ||
1754 		    ((rdev->pdev->revision == 0x83) &&
1755 		     ((rdev->pdev->device == 0x6660) ||
1756 		      (rdev->pdev->device == 0x6663) ||
1757 		      (rdev->pdev->device == 0x6665) ||
1758 		      (rdev->pdev->device == 0x6667))))
1759 			new_smc = true;
1760 		else if ((rdev->pdev->revision == 0xc3) &&
1761 			 (rdev->pdev->device == 0x6665))
1762 			banks2_fw = true;
1763 		new_chip_name = "hainan";
1764 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1765 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1766 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1767 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1768 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1769 		smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1770 		break;
1771 	default: BUG();
1772 	}
1773 
1774 	/* this memory configuration requires special firmware */
1775 	if (((RREG32(MC_SEQ_MISC0) & 0xff000000) >> 24) == 0x58)
1776 		si58_fw = true;
1777 
1778 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
1779 
1780 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
1781 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1782 	if (err) {
1783 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1784 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1785 		if (err)
1786 			goto out;
1787 		if (rdev->pfp_fw->size != pfp_req_size) {
1788 			pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1789 			       rdev->pfp_fw->size, fw_name);
1790 			err = -EINVAL;
1791 			goto out;
1792 		}
1793 	} else {
1794 		err = radeon_ucode_validate(rdev->pfp_fw);
1795 		if (err) {
1796 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1797 			       fw_name);
1798 			goto out;
1799 		} else {
1800 			new_fw++;
1801 		}
1802 	}
1803 
1804 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
1805 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1806 	if (err) {
1807 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1808 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1809 		if (err)
1810 			goto out;
1811 		if (rdev->me_fw->size != me_req_size) {
1812 			pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1813 			       rdev->me_fw->size, fw_name);
1814 			err = -EINVAL;
1815 		}
1816 	} else {
1817 		err = radeon_ucode_validate(rdev->me_fw);
1818 		if (err) {
1819 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1820 			       fw_name);
1821 			goto out;
1822 		} else {
1823 			new_fw++;
1824 		}
1825 	}
1826 
1827 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
1828 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1829 	if (err) {
1830 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1831 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1832 		if (err)
1833 			goto out;
1834 		if (rdev->ce_fw->size != ce_req_size) {
1835 			pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1836 			       rdev->ce_fw->size, fw_name);
1837 			err = -EINVAL;
1838 		}
1839 	} else {
1840 		err = radeon_ucode_validate(rdev->ce_fw);
1841 		if (err) {
1842 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1843 			       fw_name);
1844 			goto out;
1845 		} else {
1846 			new_fw++;
1847 		}
1848 	}
1849 
1850 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
1851 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1852 	if (err) {
1853 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1854 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1855 		if (err)
1856 			goto out;
1857 		if (rdev->rlc_fw->size != rlc_req_size) {
1858 			pr_err("si_rlc: Bogus length %zu in firmware \"%s\"\n",
1859 			       rdev->rlc_fw->size, fw_name);
1860 			err = -EINVAL;
1861 		}
1862 	} else {
1863 		err = radeon_ucode_validate(rdev->rlc_fw);
1864 		if (err) {
1865 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1866 			       fw_name);
1867 			goto out;
1868 		} else {
1869 			new_fw++;
1870 		}
1871 	}
1872 
1873 	if (si58_fw)
1874 		snprintf(fw_name, sizeof(fw_name), "radeon/si58_mc.bin");
1875 	else
1876 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
1877 	err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1878 	if (err) {
1879 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
1880 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1881 		if (err) {
1882 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1883 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1884 			if (err)
1885 				goto out;
1886 		}
1887 		if ((rdev->mc_fw->size != mc_req_size) &&
1888 		    (rdev->mc_fw->size != mc2_req_size)) {
1889 			pr_err("si_mc: Bogus length %zu in firmware \"%s\"\n",
1890 			       rdev->mc_fw->size, fw_name);
1891 			err = -EINVAL;
1892 		}
1893 		DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
1894 	} else {
1895 		err = radeon_ucode_validate(rdev->mc_fw);
1896 		if (err) {
1897 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1898 			       fw_name);
1899 			goto out;
1900 		} else {
1901 			new_fw++;
1902 		}
1903 	}
1904 
1905 	if (banks2_fw)
1906 		snprintf(fw_name, sizeof(fw_name), "radeon/banks_k_2_smc.bin");
1907 	else if (new_smc)
1908 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
1909 	else
1910 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
1911 	err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1912 	if (err) {
1913 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1914 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1915 		if (err) {
1916 			pr_err("smc: error loading firmware \"%s\"\n", fw_name);
1917 			release_firmware(rdev->smc_fw);
1918 			rdev->smc_fw = NULL;
1919 			err = 0;
1920 		} else if (rdev->smc_fw->size != smc_req_size) {
1921 			pr_err("si_smc: Bogus length %zu in firmware \"%s\"\n",
1922 			       rdev->smc_fw->size, fw_name);
1923 			err = -EINVAL;
1924 		}
1925 	} else {
1926 		err = radeon_ucode_validate(rdev->smc_fw);
1927 		if (err) {
1928 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1929 			       fw_name);
1930 			goto out;
1931 		} else {
1932 			new_fw++;
1933 		}
1934 	}
1935 
1936 	if (new_fw == 0) {
1937 		rdev->new_fw = false;
1938 	} else if (new_fw < 6) {
1939 		pr_err("si_fw: mixing new and old firmware!\n");
1940 		err = -EINVAL;
1941 	} else {
1942 		rdev->new_fw = true;
1943 	}
1944 out:
1945 	if (err) {
1946 		if (err != -EINVAL)
1947 			pr_err("si_cp: Failed to load firmware \"%s\"\n",
1948 			       fw_name);
1949 		release_firmware(rdev->pfp_fw);
1950 		rdev->pfp_fw = NULL;
1951 		release_firmware(rdev->me_fw);
1952 		rdev->me_fw = NULL;
1953 		release_firmware(rdev->ce_fw);
1954 		rdev->ce_fw = NULL;
1955 		release_firmware(rdev->rlc_fw);
1956 		rdev->rlc_fw = NULL;
1957 		release_firmware(rdev->mc_fw);
1958 		rdev->mc_fw = NULL;
1959 		release_firmware(rdev->smc_fw);
1960 		rdev->smc_fw = NULL;
1961 	}
1962 	return err;
1963 }
1964 
1965 /* watermark setup */
1966 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1967 				   struct radeon_crtc *radeon_crtc,
1968 				   struct drm_display_mode *mode,
1969 				   struct drm_display_mode *other_mode)
1970 {
1971 	u32 tmp, buffer_alloc, i;
1972 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1973 	/*
1974 	 * Line Buffer Setup
1975 	 * There are 3 line buffers, each one shared by 2 display controllers.
1976 	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1977 	 * the display controllers.  The paritioning is done via one of four
1978 	 * preset allocations specified in bits 21:20:
1979 	 *  0 - half lb
1980 	 *  2 - whole lb, other crtc must be disabled
1981 	 */
1982 	/* this can get tricky if we have two large displays on a paired group
1983 	 * of crtcs.  Ideally for multiple large displays we'd assign them to
1984 	 * non-linked crtcs for maximum line buffer allocation.
1985 	 */
1986 	if (radeon_crtc->base.enabled && mode) {
1987 		if (other_mode) {
1988 			tmp = 0; /* 1/2 */
1989 			buffer_alloc = 1;
1990 		} else {
1991 			tmp = 2; /* whole */
1992 			buffer_alloc = 2;
1993 		}
1994 	} else {
1995 		tmp = 0;
1996 		buffer_alloc = 0;
1997 	}
1998 
1999 	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
2000 	       DC_LB_MEMORY_CONFIG(tmp));
2001 
2002 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
2003 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
2004 	for (i = 0; i < rdev->usec_timeout; i++) {
2005 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
2006 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
2007 			break;
2008 		udelay(1);
2009 	}
2010 
2011 	if (radeon_crtc->base.enabled && mode) {
2012 		switch (tmp) {
2013 		case 0:
2014 		default:
2015 			return 4096 * 2;
2016 		case 2:
2017 			return 8192 * 2;
2018 		}
2019 	}
2020 
2021 	/* controller not enabled, so no lb used */
2022 	return 0;
2023 }
2024 
2025 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
2026 {
2027 	u32 tmp = RREG32(MC_SHARED_CHMAP);
2028 
2029 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
2030 	case 0:
2031 	default:
2032 		return 1;
2033 	case 1:
2034 		return 2;
2035 	case 2:
2036 		return 4;
2037 	case 3:
2038 		return 8;
2039 	case 4:
2040 		return 3;
2041 	case 5:
2042 		return 6;
2043 	case 6:
2044 		return 10;
2045 	case 7:
2046 		return 12;
2047 	case 8:
2048 		return 16;
2049 	}
2050 }
2051 
2052 struct dce6_wm_params {
2053 	u32 dram_channels; /* number of dram channels */
2054 	u32 yclk;          /* bandwidth per dram data pin in kHz */
2055 	u32 sclk;          /* engine clock in kHz */
2056 	u32 disp_clk;      /* display clock in kHz */
2057 	u32 src_width;     /* viewport width */
2058 	u32 active_time;   /* active display time in ns */
2059 	u32 blank_time;    /* blank time in ns */
2060 	bool interlaced;    /* mode is interlaced */
2061 	fixed20_12 vsc;    /* vertical scale ratio */
2062 	u32 num_heads;     /* number of active crtcs */
2063 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
2064 	u32 lb_size;       /* line buffer allocated to pipe */
2065 	u32 vtaps;         /* vertical scaler taps */
2066 };
2067 
2068 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
2069 {
2070 	/* Calculate raw DRAM Bandwidth */
2071 	fixed20_12 dram_efficiency; /* 0.7 */
2072 	fixed20_12 yclk, dram_channels, bandwidth;
2073 	fixed20_12 a;
2074 
2075 	a.full = dfixed_const(1000);
2076 	yclk.full = dfixed_const(wm->yclk);
2077 	yclk.full = dfixed_div(yclk, a);
2078 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2079 	a.full = dfixed_const(10);
2080 	dram_efficiency.full = dfixed_const(7);
2081 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
2082 	bandwidth.full = dfixed_mul(dram_channels, yclk);
2083 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
2084 
2085 	return dfixed_trunc(bandwidth);
2086 }
2087 
2088 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2089 {
2090 	/* Calculate DRAM Bandwidth and the part allocated to display. */
2091 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
2092 	fixed20_12 yclk, dram_channels, bandwidth;
2093 	fixed20_12 a;
2094 
2095 	a.full = dfixed_const(1000);
2096 	yclk.full = dfixed_const(wm->yclk);
2097 	yclk.full = dfixed_div(yclk, a);
2098 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2099 	a.full = dfixed_const(10);
2100 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
2101 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
2102 	bandwidth.full = dfixed_mul(dram_channels, yclk);
2103 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
2104 
2105 	return dfixed_trunc(bandwidth);
2106 }
2107 
2108 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
2109 {
2110 	/* Calculate the display Data return Bandwidth */
2111 	fixed20_12 return_efficiency; /* 0.8 */
2112 	fixed20_12 sclk, bandwidth;
2113 	fixed20_12 a;
2114 
2115 	a.full = dfixed_const(1000);
2116 	sclk.full = dfixed_const(wm->sclk);
2117 	sclk.full = dfixed_div(sclk, a);
2118 	a.full = dfixed_const(10);
2119 	return_efficiency.full = dfixed_const(8);
2120 	return_efficiency.full = dfixed_div(return_efficiency, a);
2121 	a.full = dfixed_const(32);
2122 	bandwidth.full = dfixed_mul(a, sclk);
2123 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
2124 
2125 	return dfixed_trunc(bandwidth);
2126 }
2127 
2128 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
2129 {
2130 	return 32;
2131 }
2132 
2133 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
2134 {
2135 	/* Calculate the DMIF Request Bandwidth */
2136 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
2137 	fixed20_12 disp_clk, sclk, bandwidth;
2138 	fixed20_12 a, b1, b2;
2139 	u32 min_bandwidth;
2140 
2141 	a.full = dfixed_const(1000);
2142 	disp_clk.full = dfixed_const(wm->disp_clk);
2143 	disp_clk.full = dfixed_div(disp_clk, a);
2144 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
2145 	b1.full = dfixed_mul(a, disp_clk);
2146 
2147 	a.full = dfixed_const(1000);
2148 	sclk.full = dfixed_const(wm->sclk);
2149 	sclk.full = dfixed_div(sclk, a);
2150 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
2151 	b2.full = dfixed_mul(a, sclk);
2152 
2153 	a.full = dfixed_const(10);
2154 	disp_clk_request_efficiency.full = dfixed_const(8);
2155 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
2156 
2157 	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
2158 
2159 	a.full = dfixed_const(min_bandwidth);
2160 	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
2161 
2162 	return dfixed_trunc(bandwidth);
2163 }
2164 
2165 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
2166 {
2167 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
2168 	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
2169 	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
2170 	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
2171 
2172 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
2173 }
2174 
2175 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
2176 {
2177 	/* Calculate the display mode Average Bandwidth
2178 	 * DisplayMode should contain the source and destination dimensions,
2179 	 * timing, etc.
2180 	 */
2181 	fixed20_12 bpp;
2182 	fixed20_12 line_time;
2183 	fixed20_12 src_width;
2184 	fixed20_12 bandwidth;
2185 	fixed20_12 a;
2186 
2187 	a.full = dfixed_const(1000);
2188 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
2189 	line_time.full = dfixed_div(line_time, a);
2190 	bpp.full = dfixed_const(wm->bytes_per_pixel);
2191 	src_width.full = dfixed_const(wm->src_width);
2192 	bandwidth.full = dfixed_mul(src_width, bpp);
2193 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
2194 	bandwidth.full = dfixed_div(bandwidth, line_time);
2195 
2196 	return dfixed_trunc(bandwidth);
2197 }
2198 
2199 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
2200 {
2201 	/* First calcualte the latency in ns */
2202 	u32 mc_latency = 2000; /* 2000 ns. */
2203 	u32 available_bandwidth = dce6_available_bandwidth(wm);
2204 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
2205 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
2206 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
2207 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
2208 		(wm->num_heads * cursor_line_pair_return_time);
2209 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
2210 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
2211 	u32 tmp, dmif_size = 12288;
2212 	fixed20_12 a, b, c;
2213 
2214 	if (wm->num_heads == 0)
2215 		return 0;
2216 
2217 	a.full = dfixed_const(2);
2218 	b.full = dfixed_const(1);
2219 	if ((wm->vsc.full > a.full) ||
2220 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
2221 	    (wm->vtaps >= 5) ||
2222 	    ((wm->vsc.full >= a.full) && wm->interlaced))
2223 		max_src_lines_per_dst_line = 4;
2224 	else
2225 		max_src_lines_per_dst_line = 2;
2226 
2227 	a.full = dfixed_const(available_bandwidth);
2228 	b.full = dfixed_const(wm->num_heads);
2229 	a.full = dfixed_div(a, b);
2230 	tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
2231 	tmp = min(dfixed_trunc(a), tmp);
2232 
2233 	lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
2234 
2235 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2236 	b.full = dfixed_const(1000);
2237 	c.full = dfixed_const(lb_fill_bw);
2238 	b.full = dfixed_div(c, b);
2239 	a.full = dfixed_div(a, b);
2240 	line_fill_time = dfixed_trunc(a);
2241 
2242 	if (line_fill_time < wm->active_time)
2243 		return latency;
2244 	else
2245 		return latency + (line_fill_time - wm->active_time);
2246 
2247 }
2248 
2249 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2250 {
2251 	if (dce6_average_bandwidth(wm) <=
2252 	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2253 		return true;
2254 	else
2255 		return false;
2256 };
2257 
2258 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2259 {
2260 	if (dce6_average_bandwidth(wm) <=
2261 	    (dce6_available_bandwidth(wm) / wm->num_heads))
2262 		return true;
2263 	else
2264 		return false;
2265 };
2266 
2267 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2268 {
2269 	u32 lb_partitions = wm->lb_size / wm->src_width;
2270 	u32 line_time = wm->active_time + wm->blank_time;
2271 	u32 latency_tolerant_lines;
2272 	u32 latency_hiding;
2273 	fixed20_12 a;
2274 
2275 	a.full = dfixed_const(1);
2276 	if (wm->vsc.full > a.full)
2277 		latency_tolerant_lines = 1;
2278 	else {
2279 		if (lb_partitions <= (wm->vtaps + 1))
2280 			latency_tolerant_lines = 1;
2281 		else
2282 			latency_tolerant_lines = 2;
2283 	}
2284 
2285 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2286 
2287 	if (dce6_latency_watermark(wm) <= latency_hiding)
2288 		return true;
2289 	else
2290 		return false;
2291 }
2292 
2293 static void dce6_program_watermarks(struct radeon_device *rdev,
2294 					 struct radeon_crtc *radeon_crtc,
2295 					 u32 lb_size, u32 num_heads)
2296 {
2297 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
2298 	struct dce6_wm_params wm_low, wm_high;
2299 	u32 dram_channels;
2300 	u32 active_time;
2301 	u32 line_time = 0;
2302 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
2303 	u32 priority_a_mark = 0, priority_b_mark = 0;
2304 	u32 priority_a_cnt = PRIORITY_OFF;
2305 	u32 priority_b_cnt = PRIORITY_OFF;
2306 	u32 tmp, arb_control3;
2307 	fixed20_12 a, b, c;
2308 
2309 	if (radeon_crtc->base.enabled && num_heads && mode) {
2310 		active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
2311 					    (u32)mode->clock);
2312 		line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
2313 					  (u32)mode->clock);
2314 		line_time = min(line_time, (u32)65535);
2315 		priority_a_cnt = 0;
2316 		priority_b_cnt = 0;
2317 
2318 		if (rdev->family == CHIP_ARUBA)
2319 			dram_channels = evergreen_get_number_of_dram_channels(rdev);
2320 		else
2321 			dram_channels = si_get_number_of_dram_channels(rdev);
2322 
2323 		/* watermark for high clocks */
2324 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2325 			wm_high.yclk =
2326 				radeon_dpm_get_mclk(rdev, false) * 10;
2327 			wm_high.sclk =
2328 				radeon_dpm_get_sclk(rdev, false) * 10;
2329 		} else {
2330 			wm_high.yclk = rdev->pm.current_mclk * 10;
2331 			wm_high.sclk = rdev->pm.current_sclk * 10;
2332 		}
2333 
2334 		wm_high.disp_clk = mode->clock;
2335 		wm_high.src_width = mode->crtc_hdisplay;
2336 		wm_high.active_time = active_time;
2337 		wm_high.blank_time = line_time - wm_high.active_time;
2338 		wm_high.interlaced = false;
2339 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2340 			wm_high.interlaced = true;
2341 		wm_high.vsc = radeon_crtc->vsc;
2342 		wm_high.vtaps = 1;
2343 		if (radeon_crtc->rmx_type != RMX_OFF)
2344 			wm_high.vtaps = 2;
2345 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2346 		wm_high.lb_size = lb_size;
2347 		wm_high.dram_channels = dram_channels;
2348 		wm_high.num_heads = num_heads;
2349 
2350 		/* watermark for low clocks */
2351 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2352 			wm_low.yclk =
2353 				radeon_dpm_get_mclk(rdev, true) * 10;
2354 			wm_low.sclk =
2355 				radeon_dpm_get_sclk(rdev, true) * 10;
2356 		} else {
2357 			wm_low.yclk = rdev->pm.current_mclk * 10;
2358 			wm_low.sclk = rdev->pm.current_sclk * 10;
2359 		}
2360 
2361 		wm_low.disp_clk = mode->clock;
2362 		wm_low.src_width = mode->crtc_hdisplay;
2363 		wm_low.active_time = active_time;
2364 		wm_low.blank_time = line_time - wm_low.active_time;
2365 		wm_low.interlaced = false;
2366 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2367 			wm_low.interlaced = true;
2368 		wm_low.vsc = radeon_crtc->vsc;
2369 		wm_low.vtaps = 1;
2370 		if (radeon_crtc->rmx_type != RMX_OFF)
2371 			wm_low.vtaps = 2;
2372 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2373 		wm_low.lb_size = lb_size;
2374 		wm_low.dram_channels = dram_channels;
2375 		wm_low.num_heads = num_heads;
2376 
2377 		/* set for high clocks */
2378 		latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2379 		/* set for low clocks */
2380 		latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2381 
2382 		/* possibly force display priority to high */
2383 		/* should really do this at mode validation time... */
2384 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2385 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2386 		    !dce6_check_latency_hiding(&wm_high) ||
2387 		    (rdev->disp_priority == 2)) {
2388 			DRM_DEBUG_KMS("force priority to high\n");
2389 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2390 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2391 		}
2392 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2393 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2394 		    !dce6_check_latency_hiding(&wm_low) ||
2395 		    (rdev->disp_priority == 2)) {
2396 			DRM_DEBUG_KMS("force priority to high\n");
2397 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2398 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2399 		}
2400 
2401 		a.full = dfixed_const(1000);
2402 		b.full = dfixed_const(mode->clock);
2403 		b.full = dfixed_div(b, a);
2404 		c.full = dfixed_const(latency_watermark_a);
2405 		c.full = dfixed_mul(c, b);
2406 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2407 		c.full = dfixed_div(c, a);
2408 		a.full = dfixed_const(16);
2409 		c.full = dfixed_div(c, a);
2410 		priority_a_mark = dfixed_trunc(c);
2411 		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2412 
2413 		a.full = dfixed_const(1000);
2414 		b.full = dfixed_const(mode->clock);
2415 		b.full = dfixed_div(b, a);
2416 		c.full = dfixed_const(latency_watermark_b);
2417 		c.full = dfixed_mul(c, b);
2418 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2419 		c.full = dfixed_div(c, a);
2420 		a.full = dfixed_const(16);
2421 		c.full = dfixed_div(c, a);
2422 		priority_b_mark = dfixed_trunc(c);
2423 		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2424 
2425 		/* Save number of lines the linebuffer leads before the scanout */
2426 		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
2427 	}
2428 
2429 	/* select wm A */
2430 	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2431 	tmp = arb_control3;
2432 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2433 	tmp |= LATENCY_WATERMARK_MASK(1);
2434 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2435 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2436 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2437 		LATENCY_HIGH_WATERMARK(line_time)));
2438 	/* select wm B */
2439 	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2440 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2441 	tmp |= LATENCY_WATERMARK_MASK(2);
2442 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2443 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2444 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2445 		LATENCY_HIGH_WATERMARK(line_time)));
2446 	/* restore original selection */
2447 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2448 
2449 	/* write the priority marks */
2450 	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2451 	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2452 
2453 	/* save values for DPM */
2454 	radeon_crtc->line_time = line_time;
2455 	radeon_crtc->wm_high = latency_watermark_a;
2456 	radeon_crtc->wm_low = latency_watermark_b;
2457 }
2458 
2459 void dce6_bandwidth_update(struct radeon_device *rdev)
2460 {
2461 	struct drm_display_mode *mode0 = NULL;
2462 	struct drm_display_mode *mode1 = NULL;
2463 	u32 num_heads = 0, lb_size;
2464 	int i;
2465 
2466 	if (!rdev->mode_info.mode_config_initialized)
2467 		return;
2468 
2469 	radeon_update_display_priority(rdev);
2470 
2471 	for (i = 0; i < rdev->num_crtc; i++) {
2472 		if (rdev->mode_info.crtcs[i]->base.enabled)
2473 			num_heads++;
2474 	}
2475 	for (i = 0; i < rdev->num_crtc; i += 2) {
2476 		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2477 		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2478 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2479 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2480 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2481 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2482 	}
2483 }
2484 
2485 /*
2486  * Core functions
2487  */
2488 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2489 {
2490 	u32 *tile = rdev->config.si.tile_mode_array;
2491 	const u32 num_tile_mode_states =
2492 			ARRAY_SIZE(rdev->config.si.tile_mode_array);
2493 	u32 reg_offset, split_equal_to_row_size;
2494 
2495 	switch (rdev->config.si.mem_row_size_in_kb) {
2496 	case 1:
2497 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2498 		break;
2499 	case 2:
2500 	default:
2501 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2502 		break;
2503 	case 4:
2504 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2505 		break;
2506 	}
2507 
2508 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2509 		tile[reg_offset] = 0;
2510 
2511 	switch(rdev->family) {
2512 	case CHIP_TAHITI:
2513 	case CHIP_PITCAIRN:
2514 		/* non-AA compressed depth or any compressed stencil */
2515 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2516 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2517 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2518 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2519 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2520 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2521 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2522 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2523 		/* 2xAA/4xAA compressed depth only */
2524 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2525 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2526 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2527 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2528 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2529 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2530 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2531 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2532 		/* 8xAA compressed depth only */
2533 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2534 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2535 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2536 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2537 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2538 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2539 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2540 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2541 		/* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2542 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2543 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2544 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2545 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2546 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2547 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2548 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2549 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2550 		/* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2551 		tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2552 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2553 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2554 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2555 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2556 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2557 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2558 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2559 		/* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2560 		tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2561 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2562 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2563 			   TILE_SPLIT(split_equal_to_row_size) |
2564 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2565 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2566 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2567 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2568 		/* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2569 		tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2570 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2571 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2572 			   TILE_SPLIT(split_equal_to_row_size) |
2573 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2574 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2575 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2576 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2577 		/* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2578 		tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2579 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2580 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2581 			   TILE_SPLIT(split_equal_to_row_size) |
2582 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2583 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2584 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2585 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2586 		/* 1D and 1D Array Surfaces */
2587 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2588 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2589 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2590 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2591 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2592 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2593 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2594 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2595 		/* Displayable maps. */
2596 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2597 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2598 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2599 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2600 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2601 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2602 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2603 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2604 		/* Display 8bpp. */
2605 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2606 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2607 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2608 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2609 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2610 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2611 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2612 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2613 		/* Display 16bpp. */
2614 		tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2615 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2616 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2617 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2618 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2619 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2620 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2621 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2622 		/* Display 32bpp. */
2623 		tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2624 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2625 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2626 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2627 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2628 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2629 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2630 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2631 		/* Thin. */
2632 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2633 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2634 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2635 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2636 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2637 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2638 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2639 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2640 		/* Thin 8 bpp. */
2641 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2642 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2643 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2644 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2645 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2646 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2647 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2648 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2649 		/* Thin 16 bpp. */
2650 		tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2651 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2652 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2653 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2654 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2655 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2656 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2657 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2658 		/* Thin 32 bpp. */
2659 		tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2660 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2661 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2662 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2663 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2664 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2665 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2666 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2667 		/* Thin 64 bpp. */
2668 		tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2669 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2670 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2671 			   TILE_SPLIT(split_equal_to_row_size) |
2672 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2673 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2674 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2675 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2676 		/* 8 bpp PRT. */
2677 		tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2678 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2679 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2680 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2681 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2682 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2683 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2684 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2685 		/* 16 bpp PRT */
2686 		tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2687 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2688 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2689 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2690 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2691 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2692 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2693 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2694 		/* 32 bpp PRT */
2695 		tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2696 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2697 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2698 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2699 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2700 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2701 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2702 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2703 		/* 64 bpp PRT */
2704 		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2705 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2706 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2707 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2708 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2709 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2710 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2711 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2712 		/* 128 bpp PRT */
2713 		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2714 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2715 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2716 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2717 			   NUM_BANKS(ADDR_SURF_8_BANK) |
2718 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2719 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2720 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2721 
2722 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2723 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2724 		break;
2725 
2726 	case CHIP_VERDE:
2727 	case CHIP_OLAND:
2728 	case CHIP_HAINAN:
2729 		/* non-AA compressed depth or any compressed stencil */
2730 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2731 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2732 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2733 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2734 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2735 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2736 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2737 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2738 		/* 2xAA/4xAA compressed depth only */
2739 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2740 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2741 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2742 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2743 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2744 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2745 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2746 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2747 		/* 8xAA compressed depth only */
2748 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2749 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2750 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2751 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2752 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2753 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2754 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2755 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2756 		/* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2757 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2758 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2759 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2760 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2761 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2762 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2763 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2764 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2765 		/* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2766 		tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2767 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2768 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2769 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2770 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2771 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2772 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2773 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2774 		/* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2775 		tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2776 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2777 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2778 			   TILE_SPLIT(split_equal_to_row_size) |
2779 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2780 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2781 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2782 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2783 		/* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2784 		tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2785 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2786 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2787 			   TILE_SPLIT(split_equal_to_row_size) |
2788 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2789 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2790 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2791 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2792 		/* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2793 		tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2794 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2795 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2796 			   TILE_SPLIT(split_equal_to_row_size) |
2797 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2798 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2799 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2800 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2801 		/* 1D and 1D Array Surfaces */
2802 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2803 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2804 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2805 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2806 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2807 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2808 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2809 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2810 		/* Displayable maps. */
2811 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2812 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2813 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2814 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2815 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2816 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2817 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2818 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2819 		/* Display 8bpp. */
2820 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2821 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2822 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2823 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2824 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2825 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2826 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2827 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2828 		/* Display 16bpp. */
2829 		tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2830 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2831 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2832 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2833 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2834 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2835 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2836 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2837 		/* Display 32bpp. */
2838 		tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2839 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2840 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2841 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2842 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2843 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2844 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2845 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2846 		/* Thin. */
2847 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2848 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2849 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2850 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2851 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2852 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2854 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2855 		/* Thin 8 bpp. */
2856 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2857 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2858 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2859 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2860 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2861 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2862 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2863 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2864 		/* Thin 16 bpp. */
2865 		tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2866 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2867 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2868 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2869 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2870 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2871 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2872 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2873 		/* Thin 32 bpp. */
2874 		tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2875 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2876 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2877 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2878 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2879 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2880 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2881 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2882 		/* Thin 64 bpp. */
2883 		tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2884 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2885 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2886 			   TILE_SPLIT(split_equal_to_row_size) |
2887 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2888 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2889 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2890 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2891 		/* 8 bpp PRT. */
2892 		tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2893 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2894 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2895 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2896 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2897 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2898 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2899 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2900 		/* 16 bpp PRT */
2901 		tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2902 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2903 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2904 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2905 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2906 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2907 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2908 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2909 		/* 32 bpp PRT */
2910 		tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2911 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2912 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2913 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2914 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2915 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2916 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2917 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2918 		/* 64 bpp PRT */
2919 		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2920 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2921 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2922 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2923 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2924 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2925 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2926 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2927 		/* 128 bpp PRT */
2928 		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2929 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2930 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2931 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2932 			   NUM_BANKS(ADDR_SURF_8_BANK) |
2933 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2934 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2935 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2936 
2937 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2938 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2939 		break;
2940 
2941 	default:
2942 		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2943 	}
2944 }
2945 
2946 static void si_select_se_sh(struct radeon_device *rdev,
2947 			    u32 se_num, u32 sh_num)
2948 {
2949 	u32 data = INSTANCE_BROADCAST_WRITES;
2950 
2951 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2952 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2953 	else if (se_num == 0xffffffff)
2954 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2955 	else if (sh_num == 0xffffffff)
2956 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2957 	else
2958 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2959 	WREG32(GRBM_GFX_INDEX, data);
2960 }
2961 
2962 static u32 si_create_bitmask(u32 bit_width)
2963 {
2964 	u32 i, mask = 0;
2965 
2966 	for (i = 0; i < bit_width; i++) {
2967 		mask <<= 1;
2968 		mask |= 1;
2969 	}
2970 	return mask;
2971 }
2972 
2973 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2974 {
2975 	u32 data, mask;
2976 
2977 	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2978 	if (data & 1)
2979 		data &= INACTIVE_CUS_MASK;
2980 	else
2981 		data = 0;
2982 	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2983 
2984 	data >>= INACTIVE_CUS_SHIFT;
2985 
2986 	mask = si_create_bitmask(cu_per_sh);
2987 
2988 	return ~data & mask;
2989 }
2990 
2991 static void si_setup_spi(struct radeon_device *rdev,
2992 			 u32 se_num, u32 sh_per_se,
2993 			 u32 cu_per_sh)
2994 {
2995 	int i, j, k;
2996 	u32 data, mask, active_cu;
2997 
2998 	for (i = 0; i < se_num; i++) {
2999 		for (j = 0; j < sh_per_se; j++) {
3000 			si_select_se_sh(rdev, i, j);
3001 			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
3002 			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
3003 
3004 			mask = 1;
3005 			for (k = 0; k < 16; k++) {
3006 				mask <<= k;
3007 				if (active_cu & mask) {
3008 					data &= ~mask;
3009 					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
3010 					break;
3011 				}
3012 			}
3013 		}
3014 	}
3015 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3016 }
3017 
3018 static u32 si_get_rb_disabled(struct radeon_device *rdev,
3019 			      u32 max_rb_num_per_se,
3020 			      u32 sh_per_se)
3021 {
3022 	u32 data, mask;
3023 
3024 	data = RREG32(CC_RB_BACKEND_DISABLE);
3025 	if (data & 1)
3026 		data &= BACKEND_DISABLE_MASK;
3027 	else
3028 		data = 0;
3029 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3030 
3031 	data >>= BACKEND_DISABLE_SHIFT;
3032 
3033 	mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
3034 
3035 	return data & mask;
3036 }
3037 
3038 static void si_setup_rb(struct radeon_device *rdev,
3039 			u32 se_num, u32 sh_per_se,
3040 			u32 max_rb_num_per_se)
3041 {
3042 	int i, j;
3043 	u32 data, mask;
3044 	u32 disabled_rbs = 0;
3045 	u32 enabled_rbs = 0;
3046 
3047 	for (i = 0; i < se_num; i++) {
3048 		for (j = 0; j < sh_per_se; j++) {
3049 			si_select_se_sh(rdev, i, j);
3050 			data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3051 			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
3052 		}
3053 	}
3054 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3055 
3056 	mask = 1;
3057 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3058 		if (!(disabled_rbs & mask))
3059 			enabled_rbs |= mask;
3060 		mask <<= 1;
3061 	}
3062 
3063 	rdev->config.si.backend_enable_mask = enabled_rbs;
3064 
3065 	for (i = 0; i < se_num; i++) {
3066 		si_select_se_sh(rdev, i, 0xffffffff);
3067 		data = 0;
3068 		for (j = 0; j < sh_per_se; j++) {
3069 			switch (enabled_rbs & 3) {
3070 			case 1:
3071 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3072 				break;
3073 			case 2:
3074 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3075 				break;
3076 			case 3:
3077 			default:
3078 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3079 				break;
3080 			}
3081 			enabled_rbs >>= 2;
3082 		}
3083 		WREG32(PA_SC_RASTER_CONFIG, data);
3084 	}
3085 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3086 }
3087 
3088 static void si_gpu_init(struct radeon_device *rdev)
3089 {
3090 	u32 gb_addr_config = 0;
3091 	u32 mc_arb_ramcfg;
3092 	u32 sx_debug_1;
3093 	u32 hdp_host_path_cntl;
3094 	u32 tmp;
3095 	int i, j;
3096 
3097 	switch (rdev->family) {
3098 	case CHIP_TAHITI:
3099 		rdev->config.si.max_shader_engines = 2;
3100 		rdev->config.si.max_tile_pipes = 12;
3101 		rdev->config.si.max_cu_per_sh = 8;
3102 		rdev->config.si.max_sh_per_se = 2;
3103 		rdev->config.si.max_backends_per_se = 4;
3104 		rdev->config.si.max_texture_channel_caches = 12;
3105 		rdev->config.si.max_gprs = 256;
3106 		rdev->config.si.max_gs_threads = 32;
3107 		rdev->config.si.max_hw_contexts = 8;
3108 
3109 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3110 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3111 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3112 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3113 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3114 		break;
3115 	case CHIP_PITCAIRN:
3116 		rdev->config.si.max_shader_engines = 2;
3117 		rdev->config.si.max_tile_pipes = 8;
3118 		rdev->config.si.max_cu_per_sh = 5;
3119 		rdev->config.si.max_sh_per_se = 2;
3120 		rdev->config.si.max_backends_per_se = 4;
3121 		rdev->config.si.max_texture_channel_caches = 8;
3122 		rdev->config.si.max_gprs = 256;
3123 		rdev->config.si.max_gs_threads = 32;
3124 		rdev->config.si.max_hw_contexts = 8;
3125 
3126 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3127 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3128 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3129 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3130 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3131 		break;
3132 	case CHIP_VERDE:
3133 	default:
3134 		rdev->config.si.max_shader_engines = 1;
3135 		rdev->config.si.max_tile_pipes = 4;
3136 		rdev->config.si.max_cu_per_sh = 5;
3137 		rdev->config.si.max_sh_per_se = 2;
3138 		rdev->config.si.max_backends_per_se = 4;
3139 		rdev->config.si.max_texture_channel_caches = 4;
3140 		rdev->config.si.max_gprs = 256;
3141 		rdev->config.si.max_gs_threads = 32;
3142 		rdev->config.si.max_hw_contexts = 8;
3143 
3144 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3145 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3146 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3147 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3148 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3149 		break;
3150 	case CHIP_OLAND:
3151 		rdev->config.si.max_shader_engines = 1;
3152 		rdev->config.si.max_tile_pipes = 4;
3153 		rdev->config.si.max_cu_per_sh = 6;
3154 		rdev->config.si.max_sh_per_se = 1;
3155 		rdev->config.si.max_backends_per_se = 2;
3156 		rdev->config.si.max_texture_channel_caches = 4;
3157 		rdev->config.si.max_gprs = 256;
3158 		rdev->config.si.max_gs_threads = 16;
3159 		rdev->config.si.max_hw_contexts = 8;
3160 
3161 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3162 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3163 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3164 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3165 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3166 		break;
3167 	case CHIP_HAINAN:
3168 		rdev->config.si.max_shader_engines = 1;
3169 		rdev->config.si.max_tile_pipes = 4;
3170 		rdev->config.si.max_cu_per_sh = 5;
3171 		rdev->config.si.max_sh_per_se = 1;
3172 		rdev->config.si.max_backends_per_se = 1;
3173 		rdev->config.si.max_texture_channel_caches = 2;
3174 		rdev->config.si.max_gprs = 256;
3175 		rdev->config.si.max_gs_threads = 16;
3176 		rdev->config.si.max_hw_contexts = 8;
3177 
3178 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3179 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3180 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3181 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3182 		gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
3183 		break;
3184 	}
3185 
3186 	/* Initialize HDP */
3187 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3188 		WREG32((0x2c14 + j), 0x00000000);
3189 		WREG32((0x2c18 + j), 0x00000000);
3190 		WREG32((0x2c1c + j), 0x00000000);
3191 		WREG32((0x2c20 + j), 0x00000000);
3192 		WREG32((0x2c24 + j), 0x00000000);
3193 	}
3194 
3195 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3196 	WREG32(SRBM_INT_CNTL, 1);
3197 	WREG32(SRBM_INT_ACK, 1);
3198 
3199 	evergreen_fix_pci_max_read_req_size(rdev);
3200 
3201 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3202 
3203 	RREG32(MC_SHARED_CHMAP);
3204 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3205 
3206 	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3207 	rdev->config.si.mem_max_burst_length_bytes = 256;
3208 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3209 	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3210 	if (rdev->config.si.mem_row_size_in_kb > 4)
3211 		rdev->config.si.mem_row_size_in_kb = 4;
3212 	/* XXX use MC settings? */
3213 	rdev->config.si.shader_engine_tile_size = 32;
3214 	rdev->config.si.num_gpus = 1;
3215 	rdev->config.si.multi_gpu_tile_size = 64;
3216 
3217 	/* fix up row size */
3218 	gb_addr_config &= ~ROW_SIZE_MASK;
3219 	switch (rdev->config.si.mem_row_size_in_kb) {
3220 	case 1:
3221 	default:
3222 		gb_addr_config |= ROW_SIZE(0);
3223 		break;
3224 	case 2:
3225 		gb_addr_config |= ROW_SIZE(1);
3226 		break;
3227 	case 4:
3228 		gb_addr_config |= ROW_SIZE(2);
3229 		break;
3230 	}
3231 
3232 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3233 	 * not have bank info, so create a custom tiling dword.
3234 	 * bits 3:0   num_pipes
3235 	 * bits 7:4   num_banks
3236 	 * bits 11:8  group_size
3237 	 * bits 15:12 row_size
3238 	 */
3239 	rdev->config.si.tile_config = 0;
3240 	switch (rdev->config.si.num_tile_pipes) {
3241 	case 1:
3242 		rdev->config.si.tile_config |= (0 << 0);
3243 		break;
3244 	case 2:
3245 		rdev->config.si.tile_config |= (1 << 0);
3246 		break;
3247 	case 4:
3248 		rdev->config.si.tile_config |= (2 << 0);
3249 		break;
3250 	case 8:
3251 	default:
3252 		/* XXX what about 12? */
3253 		rdev->config.si.tile_config |= (3 << 0);
3254 		break;
3255 	}
3256 	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3257 	case 0: /* four banks */
3258 		rdev->config.si.tile_config |= 0 << 4;
3259 		break;
3260 	case 1: /* eight banks */
3261 		rdev->config.si.tile_config |= 1 << 4;
3262 		break;
3263 	case 2: /* sixteen banks */
3264 	default:
3265 		rdev->config.si.tile_config |= 2 << 4;
3266 		break;
3267 	}
3268 	rdev->config.si.tile_config |=
3269 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3270 	rdev->config.si.tile_config |=
3271 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3272 
3273 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3274 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3275 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3276 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3277 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3278 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3279 	if (rdev->has_uvd) {
3280 		WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3281 		WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3282 		WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3283 	}
3284 
3285 	si_tiling_mode_table_init(rdev);
3286 
3287 	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3288 		    rdev->config.si.max_sh_per_se,
3289 		    rdev->config.si.max_backends_per_se);
3290 
3291 	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3292 		     rdev->config.si.max_sh_per_se,
3293 		     rdev->config.si.max_cu_per_sh);
3294 
3295 	rdev->config.si.active_cus = 0;
3296 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
3297 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
3298 			rdev->config.si.active_cus +=
3299 				hweight32(si_get_cu_active_bitmap(rdev, i, j));
3300 		}
3301 	}
3302 
3303 	/* set HW defaults for 3D engine */
3304 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3305 				     ROQ_IB2_START(0x2b)));
3306 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3307 
3308 	sx_debug_1 = RREG32(SX_DEBUG_1);
3309 	WREG32(SX_DEBUG_1, sx_debug_1);
3310 
3311 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3312 
3313 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3314 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3315 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3316 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3317 
3318 	WREG32(VGT_NUM_INSTANCES, 1);
3319 
3320 	WREG32(CP_PERFMON_CNTL, 0);
3321 
3322 	WREG32(SQ_CONFIG, 0);
3323 
3324 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3325 					  FORCE_EOV_MAX_REZ_CNT(255)));
3326 
3327 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3328 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3329 
3330 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3331 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3332 
3333 	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3334 	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3335 	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3336 	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3337 	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3338 	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3339 	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3340 	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3341 
3342 	tmp = RREG32(HDP_MISC_CNTL);
3343 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3344 	WREG32(HDP_MISC_CNTL, tmp);
3345 
3346 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3347 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3348 
3349 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3350 
3351 	udelay(50);
3352 }
3353 
3354 /*
3355  * GPU scratch registers helpers function.
3356  */
3357 static void si_scratch_init(struct radeon_device *rdev)
3358 {
3359 	int i;
3360 
3361 	rdev->scratch.num_reg = 7;
3362 	rdev->scratch.reg_base = SCRATCH_REG0;
3363 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3364 		rdev->scratch.free[i] = true;
3365 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3366 	}
3367 }
3368 
3369 void si_fence_ring_emit(struct radeon_device *rdev,
3370 			struct radeon_fence *fence)
3371 {
3372 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3373 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3374 
3375 	/* flush read cache over gart */
3376 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3377 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3378 	radeon_ring_write(ring, 0);
3379 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3380 	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3381 			  PACKET3_TC_ACTION_ENA |
3382 			  PACKET3_SH_KCACHE_ACTION_ENA |
3383 			  PACKET3_SH_ICACHE_ACTION_ENA);
3384 	radeon_ring_write(ring, 0xFFFFFFFF);
3385 	radeon_ring_write(ring, 0);
3386 	radeon_ring_write(ring, 10); /* poll interval */
3387 	/* EVENT_WRITE_EOP - flush caches, send int */
3388 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3389 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3390 	radeon_ring_write(ring, lower_32_bits(addr));
3391 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3392 	radeon_ring_write(ring, fence->seq);
3393 	radeon_ring_write(ring, 0);
3394 }
3395 
3396 /*
3397  * IB stuff
3398  */
3399 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3400 {
3401 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3402 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3403 	u32 header;
3404 
3405 	if (ib->is_const_ib) {
3406 		/* set switch buffer packet before const IB */
3407 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3408 		radeon_ring_write(ring, 0);
3409 
3410 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3411 	} else {
3412 		u32 next_rptr;
3413 		if (ring->rptr_save_reg) {
3414 			next_rptr = ring->wptr + 3 + 4 + 8;
3415 			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3416 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3417 						  PACKET3_SET_CONFIG_REG_START) >> 2));
3418 			radeon_ring_write(ring, next_rptr);
3419 		} else if (rdev->wb.enabled) {
3420 			next_rptr = ring->wptr + 5 + 4 + 8;
3421 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3422 			radeon_ring_write(ring, (1 << 8));
3423 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3424 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3425 			radeon_ring_write(ring, next_rptr);
3426 		}
3427 
3428 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3429 	}
3430 
3431 	radeon_ring_write(ring, header);
3432 	radeon_ring_write(ring,
3433 #ifdef __BIG_ENDIAN
3434 			  (2 << 0) |
3435 #endif
3436 			  (ib->gpu_addr & 0xFFFFFFFC));
3437 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3438 	radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
3439 
3440 	if (!ib->is_const_ib) {
3441 		/* flush read cache over gart for this vmid */
3442 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3443 		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3444 		radeon_ring_write(ring, vm_id);
3445 		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3446 		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3447 				  PACKET3_TC_ACTION_ENA |
3448 				  PACKET3_SH_KCACHE_ACTION_ENA |
3449 				  PACKET3_SH_ICACHE_ACTION_ENA);
3450 		radeon_ring_write(ring, 0xFFFFFFFF);
3451 		radeon_ring_write(ring, 0);
3452 		radeon_ring_write(ring, 10); /* poll interval */
3453 	}
3454 }
3455 
3456 /*
3457  * CP.
3458  */
3459 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3460 {
3461 	if (enable)
3462 		WREG32(CP_ME_CNTL, 0);
3463 	else {
3464 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3465 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3466 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3467 		WREG32(SCRATCH_UMSK, 0);
3468 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3469 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3470 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3471 	}
3472 	udelay(50);
3473 }
3474 
3475 static int si_cp_load_microcode(struct radeon_device *rdev)
3476 {
3477 	int i;
3478 
3479 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3480 		return -EINVAL;
3481 
3482 	si_cp_enable(rdev, false);
3483 
3484 	if (rdev->new_fw) {
3485 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3486 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3487 		const struct gfx_firmware_header_v1_0 *ce_hdr =
3488 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3489 		const struct gfx_firmware_header_v1_0 *me_hdr =
3490 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3491 		const __le32 *fw_data;
3492 		u32 fw_size;
3493 
3494 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3495 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3496 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3497 
3498 		/* PFP */
3499 		fw_data = (const __le32 *)
3500 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3501 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3502 		WREG32(CP_PFP_UCODE_ADDR, 0);
3503 		for (i = 0; i < fw_size; i++)
3504 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3505 		WREG32(CP_PFP_UCODE_ADDR, 0);
3506 
3507 		/* CE */
3508 		fw_data = (const __le32 *)
3509 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3510 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3511 		WREG32(CP_CE_UCODE_ADDR, 0);
3512 		for (i = 0; i < fw_size; i++)
3513 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3514 		WREG32(CP_CE_UCODE_ADDR, 0);
3515 
3516 		/* ME */
3517 		fw_data = (const __be32 *)
3518 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3519 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3520 		WREG32(CP_ME_RAM_WADDR, 0);
3521 		for (i = 0; i < fw_size; i++)
3522 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3523 		WREG32(CP_ME_RAM_WADDR, 0);
3524 	} else {
3525 		const __be32 *fw_data;
3526 
3527 		/* PFP */
3528 		fw_data = (const __be32 *)rdev->pfp_fw->data;
3529 		WREG32(CP_PFP_UCODE_ADDR, 0);
3530 		for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3531 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3532 		WREG32(CP_PFP_UCODE_ADDR, 0);
3533 
3534 		/* CE */
3535 		fw_data = (const __be32 *)rdev->ce_fw->data;
3536 		WREG32(CP_CE_UCODE_ADDR, 0);
3537 		for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3538 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3539 		WREG32(CP_CE_UCODE_ADDR, 0);
3540 
3541 		/* ME */
3542 		fw_data = (const __be32 *)rdev->me_fw->data;
3543 		WREG32(CP_ME_RAM_WADDR, 0);
3544 		for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3545 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3546 		WREG32(CP_ME_RAM_WADDR, 0);
3547 	}
3548 
3549 	WREG32(CP_PFP_UCODE_ADDR, 0);
3550 	WREG32(CP_CE_UCODE_ADDR, 0);
3551 	WREG32(CP_ME_RAM_WADDR, 0);
3552 	WREG32(CP_ME_RAM_RADDR, 0);
3553 	return 0;
3554 }
3555 
3556 static int si_cp_start(struct radeon_device *rdev)
3557 {
3558 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3559 	int r, i;
3560 
3561 	r = radeon_ring_lock(rdev, ring, 7 + 4);
3562 	if (r) {
3563 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3564 		return r;
3565 	}
3566 	/* init the CP */
3567 	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3568 	radeon_ring_write(ring, 0x1);
3569 	radeon_ring_write(ring, 0x0);
3570 	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3571 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3572 	radeon_ring_write(ring, 0);
3573 	radeon_ring_write(ring, 0);
3574 
3575 	/* init the CE partitions */
3576 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3577 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3578 	radeon_ring_write(ring, 0xc000);
3579 	radeon_ring_write(ring, 0xe000);
3580 	radeon_ring_unlock_commit(rdev, ring, false);
3581 
3582 	si_cp_enable(rdev, true);
3583 
3584 	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3585 	if (r) {
3586 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3587 		return r;
3588 	}
3589 
3590 	/* setup clear context state */
3591 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3592 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3593 
3594 	for (i = 0; i < si_default_size; i++)
3595 		radeon_ring_write(ring, si_default_state[i]);
3596 
3597 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3598 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3599 
3600 	/* set clear context state */
3601 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3602 	radeon_ring_write(ring, 0);
3603 
3604 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3605 	radeon_ring_write(ring, 0x00000316);
3606 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3607 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3608 
3609 	radeon_ring_unlock_commit(rdev, ring, false);
3610 
3611 	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3612 		ring = &rdev->ring[i];
3613 		r = radeon_ring_lock(rdev, ring, 2);
3614 
3615 		/* clear the compute context state */
3616 		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3617 		radeon_ring_write(ring, 0);
3618 
3619 		radeon_ring_unlock_commit(rdev, ring, false);
3620 	}
3621 
3622 	return 0;
3623 }
3624 
3625 static void si_cp_fini(struct radeon_device *rdev)
3626 {
3627 	struct radeon_ring *ring;
3628 	si_cp_enable(rdev, false);
3629 
3630 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3631 	radeon_ring_fini(rdev, ring);
3632 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3633 
3634 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3635 	radeon_ring_fini(rdev, ring);
3636 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3637 
3638 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3639 	radeon_ring_fini(rdev, ring);
3640 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3641 }
3642 
3643 static int si_cp_resume(struct radeon_device *rdev)
3644 {
3645 	struct radeon_ring *ring;
3646 	u32 tmp;
3647 	u32 rb_bufsz;
3648 	int r;
3649 
3650 	si_enable_gui_idle_interrupt(rdev, false);
3651 
3652 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3653 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3654 
3655 	/* Set the write pointer delay */
3656 	WREG32(CP_RB_WPTR_DELAY, 0);
3657 
3658 	WREG32(CP_DEBUG, 0);
3659 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3660 
3661 	/* ring 0 - compute and gfx */
3662 	/* Set ring buffer size */
3663 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3664 	rb_bufsz = order_base_2(ring->ring_size / 8);
3665 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3666 #ifdef __BIG_ENDIAN
3667 	tmp |= BUF_SWAP_32BIT;
3668 #endif
3669 	WREG32(CP_RB0_CNTL, tmp);
3670 
3671 	/* Initialize the ring buffer's read and write pointers */
3672 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3673 	ring->wptr = 0;
3674 	WREG32(CP_RB0_WPTR, ring->wptr);
3675 
3676 	/* set the wb address whether it's enabled or not */
3677 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3678 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3679 
3680 	if (rdev->wb.enabled)
3681 		WREG32(SCRATCH_UMSK, 0xff);
3682 	else {
3683 		tmp |= RB_NO_UPDATE;
3684 		WREG32(SCRATCH_UMSK, 0);
3685 	}
3686 
3687 	mdelay(1);
3688 	WREG32(CP_RB0_CNTL, tmp);
3689 
3690 	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3691 
3692 	/* ring1  - compute only */
3693 	/* Set ring buffer size */
3694 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3695 	rb_bufsz = order_base_2(ring->ring_size / 8);
3696 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3697 #ifdef __BIG_ENDIAN
3698 	tmp |= BUF_SWAP_32BIT;
3699 #endif
3700 	WREG32(CP_RB1_CNTL, tmp);
3701 
3702 	/* Initialize the ring buffer's read and write pointers */
3703 	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3704 	ring->wptr = 0;
3705 	WREG32(CP_RB1_WPTR, ring->wptr);
3706 
3707 	/* set the wb address whether it's enabled or not */
3708 	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3709 	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3710 
3711 	mdelay(1);
3712 	WREG32(CP_RB1_CNTL, tmp);
3713 
3714 	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3715 
3716 	/* ring2 - compute only */
3717 	/* Set ring buffer size */
3718 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3719 	rb_bufsz = order_base_2(ring->ring_size / 8);
3720 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3721 #ifdef __BIG_ENDIAN
3722 	tmp |= BUF_SWAP_32BIT;
3723 #endif
3724 	WREG32(CP_RB2_CNTL, tmp);
3725 
3726 	/* Initialize the ring buffer's read and write pointers */
3727 	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3728 	ring->wptr = 0;
3729 	WREG32(CP_RB2_WPTR, ring->wptr);
3730 
3731 	/* set the wb address whether it's enabled or not */
3732 	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3733 	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3734 
3735 	mdelay(1);
3736 	WREG32(CP_RB2_CNTL, tmp);
3737 
3738 	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3739 
3740 	/* start the rings */
3741 	si_cp_start(rdev);
3742 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3743 	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3744 	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3745 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3746 	if (r) {
3747 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3748 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3749 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3750 		return r;
3751 	}
3752 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3753 	if (r) {
3754 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3755 	}
3756 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3757 	if (r) {
3758 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3759 	}
3760 
3761 	si_enable_gui_idle_interrupt(rdev, true);
3762 
3763 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3764 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3765 
3766 	return 0;
3767 }
3768 
3769 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3770 {
3771 	u32 reset_mask = 0;
3772 	u32 tmp;
3773 
3774 	/* GRBM_STATUS */
3775 	tmp = RREG32(GRBM_STATUS);
3776 	if (tmp & (PA_BUSY | SC_BUSY |
3777 		   BCI_BUSY | SX_BUSY |
3778 		   TA_BUSY | VGT_BUSY |
3779 		   DB_BUSY | CB_BUSY |
3780 		   GDS_BUSY | SPI_BUSY |
3781 		   IA_BUSY | IA_BUSY_NO_DMA))
3782 		reset_mask |= RADEON_RESET_GFX;
3783 
3784 	if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3785 		   CP_BUSY | CP_COHERENCY_BUSY))
3786 		reset_mask |= RADEON_RESET_CP;
3787 
3788 	if (tmp & GRBM_EE_BUSY)
3789 		reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3790 
3791 	/* GRBM_STATUS2 */
3792 	tmp = RREG32(GRBM_STATUS2);
3793 	if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3794 		reset_mask |= RADEON_RESET_RLC;
3795 
3796 	/* DMA_STATUS_REG 0 */
3797 	tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3798 	if (!(tmp & DMA_IDLE))
3799 		reset_mask |= RADEON_RESET_DMA;
3800 
3801 	/* DMA_STATUS_REG 1 */
3802 	tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3803 	if (!(tmp & DMA_IDLE))
3804 		reset_mask |= RADEON_RESET_DMA1;
3805 
3806 	/* SRBM_STATUS2 */
3807 	tmp = RREG32(SRBM_STATUS2);
3808 	if (tmp & DMA_BUSY)
3809 		reset_mask |= RADEON_RESET_DMA;
3810 
3811 	if (tmp & DMA1_BUSY)
3812 		reset_mask |= RADEON_RESET_DMA1;
3813 
3814 	/* SRBM_STATUS */
3815 	tmp = RREG32(SRBM_STATUS);
3816 
3817 	if (tmp & IH_BUSY)
3818 		reset_mask |= RADEON_RESET_IH;
3819 
3820 	if (tmp & SEM_BUSY)
3821 		reset_mask |= RADEON_RESET_SEM;
3822 
3823 	if (tmp & GRBM_RQ_PENDING)
3824 		reset_mask |= RADEON_RESET_GRBM;
3825 
3826 	if (tmp & VMC_BUSY)
3827 		reset_mask |= RADEON_RESET_VMC;
3828 
3829 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3830 		   MCC_BUSY | MCD_BUSY))
3831 		reset_mask |= RADEON_RESET_MC;
3832 
3833 	if (evergreen_is_display_hung(rdev))
3834 		reset_mask |= RADEON_RESET_DISPLAY;
3835 
3836 	/* VM_L2_STATUS */
3837 	tmp = RREG32(VM_L2_STATUS);
3838 	if (tmp & L2_BUSY)
3839 		reset_mask |= RADEON_RESET_VMC;
3840 
3841 	/* Skip MC reset as it's mostly likely not hung, just busy */
3842 	if (reset_mask & RADEON_RESET_MC) {
3843 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3844 		reset_mask &= ~RADEON_RESET_MC;
3845 	}
3846 
3847 	return reset_mask;
3848 }
3849 
3850 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3851 {
3852 	struct evergreen_mc_save save;
3853 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3854 	u32 tmp;
3855 
3856 	if (reset_mask == 0)
3857 		return;
3858 
3859 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3860 
3861 	evergreen_print_gpu_status_regs(rdev);
3862 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3863 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3864 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3865 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3866 
3867 	/* disable PG/CG */
3868 	si_fini_pg(rdev);
3869 	si_fini_cg(rdev);
3870 
3871 	/* stop the rlc */
3872 	si_rlc_stop(rdev);
3873 
3874 	/* Disable CP parsing/prefetching */
3875 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3876 
3877 	if (reset_mask & RADEON_RESET_DMA) {
3878 		/* dma0 */
3879 		tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3880 		tmp &= ~DMA_RB_ENABLE;
3881 		WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3882 	}
3883 	if (reset_mask & RADEON_RESET_DMA1) {
3884 		/* dma1 */
3885 		tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3886 		tmp &= ~DMA_RB_ENABLE;
3887 		WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3888 	}
3889 
3890 	udelay(50);
3891 
3892 	evergreen_mc_stop(rdev, &save);
3893 	if (evergreen_mc_wait_for_idle(rdev)) {
3894 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3895 	}
3896 
3897 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3898 		grbm_soft_reset = SOFT_RESET_CB |
3899 			SOFT_RESET_DB |
3900 			SOFT_RESET_GDS |
3901 			SOFT_RESET_PA |
3902 			SOFT_RESET_SC |
3903 			SOFT_RESET_BCI |
3904 			SOFT_RESET_SPI |
3905 			SOFT_RESET_SX |
3906 			SOFT_RESET_TC |
3907 			SOFT_RESET_TA |
3908 			SOFT_RESET_VGT |
3909 			SOFT_RESET_IA;
3910 	}
3911 
3912 	if (reset_mask & RADEON_RESET_CP) {
3913 		grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3914 
3915 		srbm_soft_reset |= SOFT_RESET_GRBM;
3916 	}
3917 
3918 	if (reset_mask & RADEON_RESET_DMA)
3919 		srbm_soft_reset |= SOFT_RESET_DMA;
3920 
3921 	if (reset_mask & RADEON_RESET_DMA1)
3922 		srbm_soft_reset |= SOFT_RESET_DMA1;
3923 
3924 	if (reset_mask & RADEON_RESET_DISPLAY)
3925 		srbm_soft_reset |= SOFT_RESET_DC;
3926 
3927 	if (reset_mask & RADEON_RESET_RLC)
3928 		grbm_soft_reset |= SOFT_RESET_RLC;
3929 
3930 	if (reset_mask & RADEON_RESET_SEM)
3931 		srbm_soft_reset |= SOFT_RESET_SEM;
3932 
3933 	if (reset_mask & RADEON_RESET_IH)
3934 		srbm_soft_reset |= SOFT_RESET_IH;
3935 
3936 	if (reset_mask & RADEON_RESET_GRBM)
3937 		srbm_soft_reset |= SOFT_RESET_GRBM;
3938 
3939 	if (reset_mask & RADEON_RESET_VMC)
3940 		srbm_soft_reset |= SOFT_RESET_VMC;
3941 
3942 	if (reset_mask & RADEON_RESET_MC)
3943 		srbm_soft_reset |= SOFT_RESET_MC;
3944 
3945 	if (grbm_soft_reset) {
3946 		tmp = RREG32(GRBM_SOFT_RESET);
3947 		tmp |= grbm_soft_reset;
3948 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3949 		WREG32(GRBM_SOFT_RESET, tmp);
3950 		tmp = RREG32(GRBM_SOFT_RESET);
3951 
3952 		udelay(50);
3953 
3954 		tmp &= ~grbm_soft_reset;
3955 		WREG32(GRBM_SOFT_RESET, tmp);
3956 		tmp = RREG32(GRBM_SOFT_RESET);
3957 	}
3958 
3959 	if (srbm_soft_reset) {
3960 		tmp = RREG32(SRBM_SOFT_RESET);
3961 		tmp |= srbm_soft_reset;
3962 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3963 		WREG32(SRBM_SOFT_RESET, tmp);
3964 		tmp = RREG32(SRBM_SOFT_RESET);
3965 
3966 		udelay(50);
3967 
3968 		tmp &= ~srbm_soft_reset;
3969 		WREG32(SRBM_SOFT_RESET, tmp);
3970 		tmp = RREG32(SRBM_SOFT_RESET);
3971 	}
3972 
3973 	/* Wait a little for things to settle down */
3974 	udelay(50);
3975 
3976 	evergreen_mc_resume(rdev, &save);
3977 	udelay(50);
3978 
3979 	evergreen_print_gpu_status_regs(rdev);
3980 }
3981 
3982 static void si_set_clk_bypass_mode(struct radeon_device *rdev)
3983 {
3984 	u32 tmp, i;
3985 
3986 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3987 	tmp |= SPLL_BYPASS_EN;
3988 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3989 
3990 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3991 	tmp |= SPLL_CTLREQ_CHG;
3992 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3993 
3994 	for (i = 0; i < rdev->usec_timeout; i++) {
3995 		if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
3996 			break;
3997 		udelay(1);
3998 	}
3999 
4000 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
4001 	tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
4002 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
4003 
4004 	tmp = RREG32(MPLL_CNTL_MODE);
4005 	tmp &= ~MPLL_MCLK_SEL;
4006 	WREG32(MPLL_CNTL_MODE, tmp);
4007 }
4008 
4009 static void si_spll_powerdown(struct radeon_device *rdev)
4010 {
4011 	u32 tmp;
4012 
4013 	tmp = RREG32(SPLL_CNTL_MODE);
4014 	tmp |= SPLL_SW_DIR_CONTROL;
4015 	WREG32(SPLL_CNTL_MODE, tmp);
4016 
4017 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
4018 	tmp |= SPLL_RESET;
4019 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
4020 
4021 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
4022 	tmp |= SPLL_SLEEP;
4023 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
4024 
4025 	tmp = RREG32(SPLL_CNTL_MODE);
4026 	tmp &= ~SPLL_SW_DIR_CONTROL;
4027 	WREG32(SPLL_CNTL_MODE, tmp);
4028 }
4029 
4030 static void si_gpu_pci_config_reset(struct radeon_device *rdev)
4031 {
4032 	struct evergreen_mc_save save;
4033 	u32 tmp, i;
4034 
4035 	dev_info(rdev->dev, "GPU pci config reset\n");
4036 
4037 	/* disable dpm? */
4038 
4039 	/* disable cg/pg */
4040 	si_fini_pg(rdev);
4041 	si_fini_cg(rdev);
4042 
4043 	/* Disable CP parsing/prefetching */
4044 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4045 	/* dma0 */
4046 	tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
4047 	tmp &= ~DMA_RB_ENABLE;
4048 	WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
4049 	/* dma1 */
4050 	tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
4051 	tmp &= ~DMA_RB_ENABLE;
4052 	WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
4053 	/* XXX other engines? */
4054 
4055 	/* halt the rlc, disable cp internal ints */
4056 	si_rlc_stop(rdev);
4057 
4058 	udelay(50);
4059 
4060 	/* disable mem access */
4061 	evergreen_mc_stop(rdev, &save);
4062 	if (evergreen_mc_wait_for_idle(rdev)) {
4063 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
4064 	}
4065 
4066 	/* set mclk/sclk to bypass */
4067 	si_set_clk_bypass_mode(rdev);
4068 	/* powerdown spll */
4069 	si_spll_powerdown(rdev);
4070 	/* disable BM */
4071 	pci_clear_master(rdev->pdev);
4072 	/* reset */
4073 	radeon_pci_config_reset(rdev);
4074 	/* wait for asic to come out of reset */
4075 	for (i = 0; i < rdev->usec_timeout; i++) {
4076 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
4077 			break;
4078 		udelay(1);
4079 	}
4080 }
4081 
4082 int si_asic_reset(struct radeon_device *rdev, bool hard)
4083 {
4084 	u32 reset_mask;
4085 
4086 	if (hard) {
4087 		si_gpu_pci_config_reset(rdev);
4088 		return 0;
4089 	}
4090 
4091 	reset_mask = si_gpu_check_soft_reset(rdev);
4092 
4093 	if (reset_mask)
4094 		r600_set_bios_scratch_engine_hung(rdev, true);
4095 
4096 	/* try soft reset */
4097 	si_gpu_soft_reset(rdev, reset_mask);
4098 
4099 	reset_mask = si_gpu_check_soft_reset(rdev);
4100 
4101 	/* try pci config reset */
4102 	if (reset_mask && radeon_hard_reset)
4103 		si_gpu_pci_config_reset(rdev);
4104 
4105 	reset_mask = si_gpu_check_soft_reset(rdev);
4106 
4107 	if (!reset_mask)
4108 		r600_set_bios_scratch_engine_hung(rdev, false);
4109 
4110 	return 0;
4111 }
4112 
4113 /**
4114  * si_gfx_is_lockup - Check if the GFX engine is locked up
4115  *
4116  * @rdev: radeon_device pointer
4117  * @ring: radeon_ring structure holding ring information
4118  *
4119  * Check if the GFX engine is locked up.
4120  * Returns true if the engine appears to be locked up, false if not.
4121  */
4122 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4123 {
4124 	u32 reset_mask = si_gpu_check_soft_reset(rdev);
4125 
4126 	if (!(reset_mask & (RADEON_RESET_GFX |
4127 			    RADEON_RESET_COMPUTE |
4128 			    RADEON_RESET_CP))) {
4129 		radeon_ring_lockup_update(rdev, ring);
4130 		return false;
4131 	}
4132 	return radeon_ring_test_lockup(rdev, ring);
4133 }
4134 
4135 /* MC */
4136 static void si_mc_program(struct radeon_device *rdev)
4137 {
4138 	struct evergreen_mc_save save;
4139 	u32 tmp;
4140 	int i, j;
4141 
4142 	/* Initialize HDP */
4143 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4144 		WREG32((0x2c14 + j), 0x00000000);
4145 		WREG32((0x2c18 + j), 0x00000000);
4146 		WREG32((0x2c1c + j), 0x00000000);
4147 		WREG32((0x2c20 + j), 0x00000000);
4148 		WREG32((0x2c24 + j), 0x00000000);
4149 	}
4150 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4151 
4152 	evergreen_mc_stop(rdev, &save);
4153 	if (radeon_mc_wait_for_idle(rdev)) {
4154 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4155 	}
4156 	if (!ASIC_IS_NODCE(rdev))
4157 		/* Lockout access through VGA aperture*/
4158 		WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4159 	/* Update configuration */
4160 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4161 	       rdev->mc.vram_start >> 12);
4162 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4163 	       rdev->mc.vram_end >> 12);
4164 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4165 	       rdev->vram_scratch.gpu_addr >> 12);
4166 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4167 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4168 	WREG32(MC_VM_FB_LOCATION, tmp);
4169 	/* XXX double check these! */
4170 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4171 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4172 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4173 	WREG32(MC_VM_AGP_BASE, 0);
4174 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4175 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4176 	if (radeon_mc_wait_for_idle(rdev)) {
4177 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4178 	}
4179 	evergreen_mc_resume(rdev, &save);
4180 	if (!ASIC_IS_NODCE(rdev)) {
4181 		/* we need to own VRAM, so turn off the VGA renderer here
4182 		 * to stop it overwriting our objects */
4183 		rv515_vga_render_disable(rdev);
4184 	}
4185 }
4186 
4187 void si_vram_gtt_location(struct radeon_device *rdev,
4188 			  struct radeon_mc *mc)
4189 {
4190 	if (mc->mc_vram_size > 0xFFC0000000ULL) {
4191 		/* leave room for at least 1024M GTT */
4192 		dev_warn(rdev->dev, "limiting VRAM\n");
4193 		mc->real_vram_size = 0xFFC0000000ULL;
4194 		mc->mc_vram_size = 0xFFC0000000ULL;
4195 	}
4196 	radeon_vram_location(rdev, &rdev->mc, 0);
4197 	rdev->mc.gtt_base_align = 0;
4198 	radeon_gtt_location(rdev, mc);
4199 }
4200 
4201 static int si_mc_init(struct radeon_device *rdev)
4202 {
4203 	u32 tmp;
4204 	int chansize, numchan;
4205 
4206 	/* Get VRAM informations */
4207 	rdev->mc.vram_is_ddr = true;
4208 	tmp = RREG32(MC_ARB_RAMCFG);
4209 	if (tmp & CHANSIZE_OVERRIDE) {
4210 		chansize = 16;
4211 	} else if (tmp & CHANSIZE_MASK) {
4212 		chansize = 64;
4213 	} else {
4214 		chansize = 32;
4215 	}
4216 	tmp = RREG32(MC_SHARED_CHMAP);
4217 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4218 	case 0:
4219 	default:
4220 		numchan = 1;
4221 		break;
4222 	case 1:
4223 		numchan = 2;
4224 		break;
4225 	case 2:
4226 		numchan = 4;
4227 		break;
4228 	case 3:
4229 		numchan = 8;
4230 		break;
4231 	case 4:
4232 		numchan = 3;
4233 		break;
4234 	case 5:
4235 		numchan = 6;
4236 		break;
4237 	case 6:
4238 		numchan = 10;
4239 		break;
4240 	case 7:
4241 		numchan = 12;
4242 		break;
4243 	case 8:
4244 		numchan = 16;
4245 		break;
4246 	}
4247 	rdev->mc.vram_width = numchan * chansize;
4248 	/* Could aper size report 0 ? */
4249 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4250 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4251 	/* size in MB on si */
4252 	tmp = RREG32(CONFIG_MEMSIZE);
4253 	/* some boards may have garbage in the upper 16 bits */
4254 	if (tmp & 0xffff0000) {
4255 		DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
4256 		if (tmp & 0xffff)
4257 			tmp &= 0xffff;
4258 	}
4259 	rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
4260 	rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
4261 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
4262 	si_vram_gtt_location(rdev, &rdev->mc);
4263 	radeon_update_bandwidth_info(rdev);
4264 
4265 	return 0;
4266 }
4267 
4268 /*
4269  * GART
4270  */
4271 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
4272 {
4273 	/* flush hdp cache */
4274 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
4275 
4276 	/* bits 0-15 are the VM contexts0-15 */
4277 	WREG32(VM_INVALIDATE_REQUEST, 1);
4278 }
4279 
4280 static int si_pcie_gart_enable(struct radeon_device *rdev)
4281 {
4282 	int r, i;
4283 
4284 	if (rdev->gart.robj == NULL) {
4285 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4286 		return -EINVAL;
4287 	}
4288 	r = radeon_gart_table_vram_pin(rdev);
4289 	if (r)
4290 		return r;
4291 	/* Setup TLB control */
4292 	WREG32(MC_VM_MX_L1_TLB_CNTL,
4293 	       (0xA << 7) |
4294 	       ENABLE_L1_TLB |
4295 	       ENABLE_L1_FRAGMENT_PROCESSING |
4296 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4297 	       ENABLE_ADVANCED_DRIVER_MODEL |
4298 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4299 	/* Setup L2 cache */
4300 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4301 	       ENABLE_L2_FRAGMENT_PROCESSING |
4302 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4303 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4304 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4305 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4306 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4307 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4308 	       BANK_SELECT(4) |
4309 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
4310 	/* setup context0 */
4311 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4312 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4313 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4314 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4315 			(u32)(rdev->dummy_page.addr >> 12));
4316 	WREG32(VM_CONTEXT0_CNTL2, 0);
4317 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4318 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4319 
4320 	WREG32(0x15D4, 0);
4321 	WREG32(0x15D8, 0);
4322 	WREG32(0x15DC, 0);
4323 
4324 	/* empty context1-15 */
4325 	/* set vm size, must be a multiple of 4 */
4326 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4327 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
4328 	/* Assign the pt base to something valid for now; the pts used for
4329 	 * the VMs are determined by the application and setup and assigned
4330 	 * on the fly in the vm part of radeon_gart.c
4331 	 */
4332 	for (i = 1; i < 16; i++) {
4333 		if (i < 8)
4334 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4335 			       rdev->vm_manager.saved_table_addr[i]);
4336 		else
4337 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4338 			       rdev->vm_manager.saved_table_addr[i]);
4339 	}
4340 
4341 	/* enable context1-15 */
4342 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4343 	       (u32)(rdev->dummy_page.addr >> 12));
4344 	WREG32(VM_CONTEXT1_CNTL2, 4);
4345 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4346 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
4347 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4348 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4349 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4350 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4351 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4352 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4353 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4354 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4355 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4356 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4357 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4358 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4359 
4360 	si_pcie_gart_tlb_flush(rdev);
4361 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4362 		 (unsigned)(rdev->mc.gtt_size >> 20),
4363 		 (unsigned long long)rdev->gart.table_addr);
4364 	rdev->gart.ready = true;
4365 	return 0;
4366 }
4367 
4368 static void si_pcie_gart_disable(struct radeon_device *rdev)
4369 {
4370 	unsigned i;
4371 
4372 	for (i = 1; i < 16; ++i) {
4373 		uint32_t reg;
4374 		if (i < 8)
4375 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
4376 		else
4377 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
4378 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
4379 	}
4380 
4381 	/* Disable all tables */
4382 	WREG32(VM_CONTEXT0_CNTL, 0);
4383 	WREG32(VM_CONTEXT1_CNTL, 0);
4384 	/* Setup TLB control */
4385 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4386 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4387 	/* Setup L2 cache */
4388 	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4389 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4390 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4391 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4392 	WREG32(VM_L2_CNTL2, 0);
4393 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4394 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4395 	radeon_gart_table_vram_unpin(rdev);
4396 }
4397 
4398 static void si_pcie_gart_fini(struct radeon_device *rdev)
4399 {
4400 	si_pcie_gart_disable(rdev);
4401 	radeon_gart_table_vram_free(rdev);
4402 	radeon_gart_fini(rdev);
4403 }
4404 
4405 /* vm parser */
4406 static bool si_vm_reg_valid(u32 reg)
4407 {
4408 	/* context regs are fine */
4409 	if (reg >= 0x28000)
4410 		return true;
4411 
4412 	/* shader regs are also fine */
4413 	if (reg >= 0xB000 && reg < 0xC000)
4414 		return true;
4415 
4416 	/* check config regs */
4417 	switch (reg) {
4418 	case GRBM_GFX_INDEX:
4419 	case CP_STRMOUT_CNTL:
4420 	case VGT_VTX_VECT_EJECT_REG:
4421 	case VGT_CACHE_INVALIDATION:
4422 	case VGT_ESGS_RING_SIZE:
4423 	case VGT_GSVS_RING_SIZE:
4424 	case VGT_GS_VERTEX_REUSE:
4425 	case VGT_PRIMITIVE_TYPE:
4426 	case VGT_INDEX_TYPE:
4427 	case VGT_NUM_INDICES:
4428 	case VGT_NUM_INSTANCES:
4429 	case VGT_TF_RING_SIZE:
4430 	case VGT_HS_OFFCHIP_PARAM:
4431 	case VGT_TF_MEMORY_BASE:
4432 	case PA_CL_ENHANCE:
4433 	case PA_SU_LINE_STIPPLE_VALUE:
4434 	case PA_SC_LINE_STIPPLE_STATE:
4435 	case PA_SC_ENHANCE:
4436 	case SQC_CACHES:
4437 	case SPI_STATIC_THREAD_MGMT_1:
4438 	case SPI_STATIC_THREAD_MGMT_2:
4439 	case SPI_STATIC_THREAD_MGMT_3:
4440 	case SPI_PS_MAX_WAVE_ID:
4441 	case SPI_CONFIG_CNTL:
4442 	case SPI_CONFIG_CNTL_1:
4443 	case TA_CNTL_AUX:
4444 	case TA_CS_BC_BASE_ADDR:
4445 		return true;
4446 	default:
4447 		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4448 		return false;
4449 	}
4450 }
4451 
4452 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4453 				  u32 *ib, struct radeon_cs_packet *pkt)
4454 {
4455 	switch (pkt->opcode) {
4456 	case PACKET3_NOP:
4457 	case PACKET3_SET_BASE:
4458 	case PACKET3_SET_CE_DE_COUNTERS:
4459 	case PACKET3_LOAD_CONST_RAM:
4460 	case PACKET3_WRITE_CONST_RAM:
4461 	case PACKET3_WRITE_CONST_RAM_OFFSET:
4462 	case PACKET3_DUMP_CONST_RAM:
4463 	case PACKET3_INCREMENT_CE_COUNTER:
4464 	case PACKET3_WAIT_ON_DE_COUNTER:
4465 	case PACKET3_CE_WRITE:
4466 		break;
4467 	default:
4468 		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4469 		return -EINVAL;
4470 	}
4471 	return 0;
4472 }
4473 
4474 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4475 {
4476 	u32 start_reg, reg, i;
4477 	u32 command = ib[idx + 4];
4478 	u32 info = ib[idx + 1];
4479 	u32 idx_value = ib[idx];
4480 	if (command & PACKET3_CP_DMA_CMD_SAS) {
4481 		/* src address space is register */
4482 		if (((info & 0x60000000) >> 29) == 0) {
4483 			start_reg = idx_value << 2;
4484 			if (command & PACKET3_CP_DMA_CMD_SAIC) {
4485 				reg = start_reg;
4486 				if (!si_vm_reg_valid(reg)) {
4487 					DRM_ERROR("CP DMA Bad SRC register\n");
4488 					return -EINVAL;
4489 				}
4490 			} else {
4491 				for (i = 0; i < (command & 0x1fffff); i++) {
4492 					reg = start_reg + (4 * i);
4493 					if (!si_vm_reg_valid(reg)) {
4494 						DRM_ERROR("CP DMA Bad SRC register\n");
4495 						return -EINVAL;
4496 					}
4497 				}
4498 			}
4499 		}
4500 	}
4501 	if (command & PACKET3_CP_DMA_CMD_DAS) {
4502 		/* dst address space is register */
4503 		if (((info & 0x00300000) >> 20) == 0) {
4504 			start_reg = ib[idx + 2];
4505 			if (command & PACKET3_CP_DMA_CMD_DAIC) {
4506 				reg = start_reg;
4507 				if (!si_vm_reg_valid(reg)) {
4508 					DRM_ERROR("CP DMA Bad DST register\n");
4509 					return -EINVAL;
4510 				}
4511 			} else {
4512 				for (i = 0; i < (command & 0x1fffff); i++) {
4513 					reg = start_reg + (4 * i);
4514 					if (!si_vm_reg_valid(reg)) {
4515 						DRM_ERROR("CP DMA Bad DST register\n");
4516 						return -EINVAL;
4517 					}
4518 				}
4519 			}
4520 		}
4521 	}
4522 	return 0;
4523 }
4524 
4525 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4526 				   u32 *ib, struct radeon_cs_packet *pkt)
4527 {
4528 	int r;
4529 	u32 idx = pkt->idx + 1;
4530 	u32 idx_value = ib[idx];
4531 	u32 start_reg, end_reg, reg, i;
4532 
4533 	switch (pkt->opcode) {
4534 	case PACKET3_NOP:
4535 	case PACKET3_SET_BASE:
4536 	case PACKET3_CLEAR_STATE:
4537 	case PACKET3_INDEX_BUFFER_SIZE:
4538 	case PACKET3_DISPATCH_DIRECT:
4539 	case PACKET3_DISPATCH_INDIRECT:
4540 	case PACKET3_ALLOC_GDS:
4541 	case PACKET3_WRITE_GDS_RAM:
4542 	case PACKET3_ATOMIC_GDS:
4543 	case PACKET3_ATOMIC:
4544 	case PACKET3_OCCLUSION_QUERY:
4545 	case PACKET3_SET_PREDICATION:
4546 	case PACKET3_COND_EXEC:
4547 	case PACKET3_PRED_EXEC:
4548 	case PACKET3_DRAW_INDIRECT:
4549 	case PACKET3_DRAW_INDEX_INDIRECT:
4550 	case PACKET3_INDEX_BASE:
4551 	case PACKET3_DRAW_INDEX_2:
4552 	case PACKET3_CONTEXT_CONTROL:
4553 	case PACKET3_INDEX_TYPE:
4554 	case PACKET3_DRAW_INDIRECT_MULTI:
4555 	case PACKET3_DRAW_INDEX_AUTO:
4556 	case PACKET3_DRAW_INDEX_IMMD:
4557 	case PACKET3_NUM_INSTANCES:
4558 	case PACKET3_DRAW_INDEX_MULTI_AUTO:
4559 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4560 	case PACKET3_DRAW_INDEX_OFFSET_2:
4561 	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4562 	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4563 	case PACKET3_MPEG_INDEX:
4564 	case PACKET3_WAIT_REG_MEM:
4565 	case PACKET3_MEM_WRITE:
4566 	case PACKET3_PFP_SYNC_ME:
4567 	case PACKET3_SURFACE_SYNC:
4568 	case PACKET3_EVENT_WRITE:
4569 	case PACKET3_EVENT_WRITE_EOP:
4570 	case PACKET3_EVENT_WRITE_EOS:
4571 	case PACKET3_SET_CONTEXT_REG:
4572 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4573 	case PACKET3_SET_SH_REG:
4574 	case PACKET3_SET_SH_REG_OFFSET:
4575 	case PACKET3_INCREMENT_DE_COUNTER:
4576 	case PACKET3_WAIT_ON_CE_COUNTER:
4577 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4578 	case PACKET3_ME_WRITE:
4579 		break;
4580 	case PACKET3_COPY_DATA:
4581 		if ((idx_value & 0xf00) == 0) {
4582 			reg = ib[idx + 3] * 4;
4583 			if (!si_vm_reg_valid(reg))
4584 				return -EINVAL;
4585 		}
4586 		break;
4587 	case PACKET3_WRITE_DATA:
4588 		if ((idx_value & 0xf00) == 0) {
4589 			start_reg = ib[idx + 1] * 4;
4590 			if (idx_value & 0x10000) {
4591 				if (!si_vm_reg_valid(start_reg))
4592 					return -EINVAL;
4593 			} else {
4594 				for (i = 0; i < (pkt->count - 2); i++) {
4595 					reg = start_reg + (4 * i);
4596 					if (!si_vm_reg_valid(reg))
4597 						return -EINVAL;
4598 				}
4599 			}
4600 		}
4601 		break;
4602 	case PACKET3_COND_WRITE:
4603 		if (idx_value & 0x100) {
4604 			reg = ib[idx + 5] * 4;
4605 			if (!si_vm_reg_valid(reg))
4606 				return -EINVAL;
4607 		}
4608 		break;
4609 	case PACKET3_COPY_DW:
4610 		if (idx_value & 0x2) {
4611 			reg = ib[idx + 3] * 4;
4612 			if (!si_vm_reg_valid(reg))
4613 				return -EINVAL;
4614 		}
4615 		break;
4616 	case PACKET3_SET_CONFIG_REG:
4617 		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4618 		end_reg = 4 * pkt->count + start_reg - 4;
4619 		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4620 		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4621 		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4622 			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4623 			return -EINVAL;
4624 		}
4625 		for (i = 0; i < pkt->count; i++) {
4626 			reg = start_reg + (4 * i);
4627 			if (!si_vm_reg_valid(reg))
4628 				return -EINVAL;
4629 		}
4630 		break;
4631 	case PACKET3_CP_DMA:
4632 		r = si_vm_packet3_cp_dma_check(ib, idx);
4633 		if (r)
4634 			return r;
4635 		break;
4636 	default:
4637 		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4638 		return -EINVAL;
4639 	}
4640 	return 0;
4641 }
4642 
4643 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4644 				       u32 *ib, struct radeon_cs_packet *pkt)
4645 {
4646 	int r;
4647 	u32 idx = pkt->idx + 1;
4648 	u32 idx_value = ib[idx];
4649 	u32 start_reg, reg, i;
4650 
4651 	switch (pkt->opcode) {
4652 	case PACKET3_NOP:
4653 	case PACKET3_SET_BASE:
4654 	case PACKET3_CLEAR_STATE:
4655 	case PACKET3_DISPATCH_DIRECT:
4656 	case PACKET3_DISPATCH_INDIRECT:
4657 	case PACKET3_ALLOC_GDS:
4658 	case PACKET3_WRITE_GDS_RAM:
4659 	case PACKET3_ATOMIC_GDS:
4660 	case PACKET3_ATOMIC:
4661 	case PACKET3_OCCLUSION_QUERY:
4662 	case PACKET3_SET_PREDICATION:
4663 	case PACKET3_COND_EXEC:
4664 	case PACKET3_PRED_EXEC:
4665 	case PACKET3_CONTEXT_CONTROL:
4666 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4667 	case PACKET3_WAIT_REG_MEM:
4668 	case PACKET3_MEM_WRITE:
4669 	case PACKET3_PFP_SYNC_ME:
4670 	case PACKET3_SURFACE_SYNC:
4671 	case PACKET3_EVENT_WRITE:
4672 	case PACKET3_EVENT_WRITE_EOP:
4673 	case PACKET3_EVENT_WRITE_EOS:
4674 	case PACKET3_SET_CONTEXT_REG:
4675 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4676 	case PACKET3_SET_SH_REG:
4677 	case PACKET3_SET_SH_REG_OFFSET:
4678 	case PACKET3_INCREMENT_DE_COUNTER:
4679 	case PACKET3_WAIT_ON_CE_COUNTER:
4680 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4681 	case PACKET3_ME_WRITE:
4682 		break;
4683 	case PACKET3_COPY_DATA:
4684 		if ((idx_value & 0xf00) == 0) {
4685 			reg = ib[idx + 3] * 4;
4686 			if (!si_vm_reg_valid(reg))
4687 				return -EINVAL;
4688 		}
4689 		break;
4690 	case PACKET3_WRITE_DATA:
4691 		if ((idx_value & 0xf00) == 0) {
4692 			start_reg = ib[idx + 1] * 4;
4693 			if (idx_value & 0x10000) {
4694 				if (!si_vm_reg_valid(start_reg))
4695 					return -EINVAL;
4696 			} else {
4697 				for (i = 0; i < (pkt->count - 2); i++) {
4698 					reg = start_reg + (4 * i);
4699 					if (!si_vm_reg_valid(reg))
4700 						return -EINVAL;
4701 				}
4702 			}
4703 		}
4704 		break;
4705 	case PACKET3_COND_WRITE:
4706 		if (idx_value & 0x100) {
4707 			reg = ib[idx + 5] * 4;
4708 			if (!si_vm_reg_valid(reg))
4709 				return -EINVAL;
4710 		}
4711 		break;
4712 	case PACKET3_COPY_DW:
4713 		if (idx_value & 0x2) {
4714 			reg = ib[idx + 3] * 4;
4715 			if (!si_vm_reg_valid(reg))
4716 				return -EINVAL;
4717 		}
4718 		break;
4719 	case PACKET3_CP_DMA:
4720 		r = si_vm_packet3_cp_dma_check(ib, idx);
4721 		if (r)
4722 			return r;
4723 		break;
4724 	default:
4725 		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4726 		return -EINVAL;
4727 	}
4728 	return 0;
4729 }
4730 
4731 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4732 {
4733 	int ret = 0;
4734 	u32 idx = 0, i;
4735 	struct radeon_cs_packet pkt;
4736 
4737 	do {
4738 		pkt.idx = idx;
4739 		pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4740 		pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4741 		pkt.one_reg_wr = 0;
4742 		switch (pkt.type) {
4743 		case RADEON_PACKET_TYPE0:
4744 			dev_err(rdev->dev, "Packet0 not allowed!\n");
4745 			ret = -EINVAL;
4746 			break;
4747 		case RADEON_PACKET_TYPE2:
4748 			idx += 1;
4749 			break;
4750 		case RADEON_PACKET_TYPE3:
4751 			pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4752 			if (ib->is_const_ib)
4753 				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4754 			else {
4755 				switch (ib->ring) {
4756 				case RADEON_RING_TYPE_GFX_INDEX:
4757 					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4758 					break;
4759 				case CAYMAN_RING_TYPE_CP1_INDEX:
4760 				case CAYMAN_RING_TYPE_CP2_INDEX:
4761 					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4762 					break;
4763 				default:
4764 					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4765 					ret = -EINVAL;
4766 					break;
4767 				}
4768 			}
4769 			idx += pkt.count + 2;
4770 			break;
4771 		default:
4772 			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4773 			ret = -EINVAL;
4774 			break;
4775 		}
4776 		if (ret) {
4777 			for (i = 0; i < ib->length_dw; i++) {
4778 				if (i == idx)
4779 					printk("\t0x%08x <---\n", ib->ptr[i]);
4780 				else
4781 					printk("\t0x%08x\n", ib->ptr[i]);
4782 			}
4783 			break;
4784 		}
4785 	} while (idx < ib->length_dw);
4786 
4787 	return ret;
4788 }
4789 
4790 /*
4791  * vm
4792  */
4793 int si_vm_init(struct radeon_device *rdev)
4794 {
4795 	/* number of VMs */
4796 	rdev->vm_manager.nvm = 16;
4797 	/* base offset of vram pages */
4798 	rdev->vm_manager.vram_base_offset = 0;
4799 
4800 	return 0;
4801 }
4802 
4803 void si_vm_fini(struct radeon_device *rdev)
4804 {
4805 }
4806 
4807 /**
4808  * si_vm_decode_fault - print human readable fault info
4809  *
4810  * @rdev: radeon_device pointer
4811  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4812  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4813  *
4814  * Print human readable fault information (SI).
4815  */
4816 static void si_vm_decode_fault(struct radeon_device *rdev,
4817 			       u32 status, u32 addr)
4818 {
4819 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4820 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4821 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4822 	char *block;
4823 
4824 	if (rdev->family == CHIP_TAHITI) {
4825 		switch (mc_id) {
4826 		case 160:
4827 		case 144:
4828 		case 96:
4829 		case 80:
4830 		case 224:
4831 		case 208:
4832 		case 32:
4833 		case 16:
4834 			block = "CB";
4835 			break;
4836 		case 161:
4837 		case 145:
4838 		case 97:
4839 		case 81:
4840 		case 225:
4841 		case 209:
4842 		case 33:
4843 		case 17:
4844 			block = "CB_FMASK";
4845 			break;
4846 		case 162:
4847 		case 146:
4848 		case 98:
4849 		case 82:
4850 		case 226:
4851 		case 210:
4852 		case 34:
4853 		case 18:
4854 			block = "CB_CMASK";
4855 			break;
4856 		case 163:
4857 		case 147:
4858 		case 99:
4859 		case 83:
4860 		case 227:
4861 		case 211:
4862 		case 35:
4863 		case 19:
4864 			block = "CB_IMMED";
4865 			break;
4866 		case 164:
4867 		case 148:
4868 		case 100:
4869 		case 84:
4870 		case 228:
4871 		case 212:
4872 		case 36:
4873 		case 20:
4874 			block = "DB";
4875 			break;
4876 		case 165:
4877 		case 149:
4878 		case 101:
4879 		case 85:
4880 		case 229:
4881 		case 213:
4882 		case 37:
4883 		case 21:
4884 			block = "DB_HTILE";
4885 			break;
4886 		case 167:
4887 		case 151:
4888 		case 103:
4889 		case 87:
4890 		case 231:
4891 		case 215:
4892 		case 39:
4893 		case 23:
4894 			block = "DB_STEN";
4895 			break;
4896 		case 72:
4897 		case 68:
4898 		case 64:
4899 		case 8:
4900 		case 4:
4901 		case 0:
4902 		case 136:
4903 		case 132:
4904 		case 128:
4905 		case 200:
4906 		case 196:
4907 		case 192:
4908 			block = "TC";
4909 			break;
4910 		case 112:
4911 		case 48:
4912 			block = "CP";
4913 			break;
4914 		case 49:
4915 		case 177:
4916 		case 50:
4917 		case 178:
4918 			block = "SH";
4919 			break;
4920 		case 53:
4921 		case 190:
4922 			block = "VGT";
4923 			break;
4924 		case 117:
4925 			block = "IH";
4926 			break;
4927 		case 51:
4928 		case 115:
4929 			block = "RLC";
4930 			break;
4931 		case 119:
4932 		case 183:
4933 			block = "DMA0";
4934 			break;
4935 		case 61:
4936 			block = "DMA1";
4937 			break;
4938 		case 248:
4939 		case 120:
4940 			block = "HDP";
4941 			break;
4942 		default:
4943 			block = "unknown";
4944 			break;
4945 		}
4946 	} else {
4947 		switch (mc_id) {
4948 		case 32:
4949 		case 16:
4950 		case 96:
4951 		case 80:
4952 		case 160:
4953 		case 144:
4954 		case 224:
4955 		case 208:
4956 			block = "CB";
4957 			break;
4958 		case 33:
4959 		case 17:
4960 		case 97:
4961 		case 81:
4962 		case 161:
4963 		case 145:
4964 		case 225:
4965 		case 209:
4966 			block = "CB_FMASK";
4967 			break;
4968 		case 34:
4969 		case 18:
4970 		case 98:
4971 		case 82:
4972 		case 162:
4973 		case 146:
4974 		case 226:
4975 		case 210:
4976 			block = "CB_CMASK";
4977 			break;
4978 		case 35:
4979 		case 19:
4980 		case 99:
4981 		case 83:
4982 		case 163:
4983 		case 147:
4984 		case 227:
4985 		case 211:
4986 			block = "CB_IMMED";
4987 			break;
4988 		case 36:
4989 		case 20:
4990 		case 100:
4991 		case 84:
4992 		case 164:
4993 		case 148:
4994 		case 228:
4995 		case 212:
4996 			block = "DB";
4997 			break;
4998 		case 37:
4999 		case 21:
5000 		case 101:
5001 		case 85:
5002 		case 165:
5003 		case 149:
5004 		case 229:
5005 		case 213:
5006 			block = "DB_HTILE";
5007 			break;
5008 		case 39:
5009 		case 23:
5010 		case 103:
5011 		case 87:
5012 		case 167:
5013 		case 151:
5014 		case 231:
5015 		case 215:
5016 			block = "DB_STEN";
5017 			break;
5018 		case 72:
5019 		case 68:
5020 		case 8:
5021 		case 4:
5022 		case 136:
5023 		case 132:
5024 		case 200:
5025 		case 196:
5026 			block = "TC";
5027 			break;
5028 		case 112:
5029 		case 48:
5030 			block = "CP";
5031 			break;
5032 		case 49:
5033 		case 177:
5034 		case 50:
5035 		case 178:
5036 			block = "SH";
5037 			break;
5038 		case 53:
5039 			block = "VGT";
5040 			break;
5041 		case 117:
5042 			block = "IH";
5043 			break;
5044 		case 51:
5045 		case 115:
5046 			block = "RLC";
5047 			break;
5048 		case 119:
5049 		case 183:
5050 			block = "DMA0";
5051 			break;
5052 		case 61:
5053 			block = "DMA1";
5054 			break;
5055 		case 248:
5056 		case 120:
5057 			block = "HDP";
5058 			break;
5059 		default:
5060 			block = "unknown";
5061 			break;
5062 		}
5063 	}
5064 
5065 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5066 	       protections, vmid, addr,
5067 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5068 	       block, mc_id);
5069 }
5070 
5071 void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5072 		 unsigned vm_id, uint64_t pd_addr)
5073 {
5074 	/* write new base address */
5075 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5076 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5077 				 WRITE_DATA_DST_SEL(0)));
5078 
5079 	if (vm_id < 8) {
5080 		radeon_ring_write(ring,
5081 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5082 	} else {
5083 		radeon_ring_write(ring,
5084 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5085 	}
5086 	radeon_ring_write(ring, 0);
5087 	radeon_ring_write(ring, pd_addr >> 12);
5088 
5089 	/* flush hdp cache */
5090 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5091 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5092 				 WRITE_DATA_DST_SEL(0)));
5093 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5094 	radeon_ring_write(ring, 0);
5095 	radeon_ring_write(ring, 0x1);
5096 
5097 	/* bits 0-15 are the VM contexts0-15 */
5098 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5099 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5100 				 WRITE_DATA_DST_SEL(0)));
5101 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5102 	radeon_ring_write(ring, 0);
5103 	radeon_ring_write(ring, 1 << vm_id);
5104 
5105 	/* wait for the invalidate to complete */
5106 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5107 	radeon_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) |  /* always */
5108 				 WAIT_REG_MEM_ENGINE(0))); /* me */
5109 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5110 	radeon_ring_write(ring, 0);
5111 	radeon_ring_write(ring, 0); /* ref */
5112 	radeon_ring_write(ring, 0); /* mask */
5113 	radeon_ring_write(ring, 0x20); /* poll interval */
5114 
5115 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
5116 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5117 	radeon_ring_write(ring, 0x0);
5118 }
5119 
5120 /*
5121  *  Power and clock gating
5122  */
5123 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
5124 {
5125 	int i;
5126 
5127 	for (i = 0; i < rdev->usec_timeout; i++) {
5128 		if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
5129 			break;
5130 		udelay(1);
5131 	}
5132 
5133 	for (i = 0; i < rdev->usec_timeout; i++) {
5134 		if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
5135 			break;
5136 		udelay(1);
5137 	}
5138 }
5139 
5140 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
5141 					 bool enable)
5142 {
5143 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5144 	u32 mask;
5145 	int i;
5146 
5147 	if (enable)
5148 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5149 	else
5150 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5151 	WREG32(CP_INT_CNTL_RING0, tmp);
5152 
5153 	if (!enable) {
5154 		/* read a gfx register */
5155 		tmp = RREG32(DB_DEPTH_INFO);
5156 
5157 		mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
5158 		for (i = 0; i < rdev->usec_timeout; i++) {
5159 			if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
5160 				break;
5161 			udelay(1);
5162 		}
5163 	}
5164 }
5165 
5166 static void si_set_uvd_dcm(struct radeon_device *rdev,
5167 			   bool sw_mode)
5168 {
5169 	u32 tmp, tmp2;
5170 
5171 	tmp = RREG32(UVD_CGC_CTRL);
5172 	tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
5173 	tmp |= DCM | CG_DT(1) | CLK_OD(4);
5174 
5175 	if (sw_mode) {
5176 		tmp &= ~0x7ffff800;
5177 		tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
5178 	} else {
5179 		tmp |= 0x7ffff800;
5180 		tmp2 = 0;
5181 	}
5182 
5183 	WREG32(UVD_CGC_CTRL, tmp);
5184 	WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
5185 }
5186 
5187 void si_init_uvd_internal_cg(struct radeon_device *rdev)
5188 {
5189 	bool hw_mode = true;
5190 
5191 	if (hw_mode) {
5192 		si_set_uvd_dcm(rdev, false);
5193 	} else {
5194 		u32 tmp = RREG32(UVD_CGC_CTRL);
5195 		tmp &= ~DCM;
5196 		WREG32(UVD_CGC_CTRL, tmp);
5197 	}
5198 }
5199 
5200 static u32 si_halt_rlc(struct radeon_device *rdev)
5201 {
5202 	u32 data, orig;
5203 
5204 	orig = data = RREG32(RLC_CNTL);
5205 
5206 	if (data & RLC_ENABLE) {
5207 		data &= ~RLC_ENABLE;
5208 		WREG32(RLC_CNTL, data);
5209 
5210 		si_wait_for_rlc_serdes(rdev);
5211 	}
5212 
5213 	return orig;
5214 }
5215 
5216 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
5217 {
5218 	u32 tmp;
5219 
5220 	tmp = RREG32(RLC_CNTL);
5221 	if (tmp != rlc)
5222 		WREG32(RLC_CNTL, rlc);
5223 }
5224 
5225 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
5226 {
5227 	u32 data, orig;
5228 
5229 	orig = data = RREG32(DMA_PG);
5230 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
5231 		data |= PG_CNTL_ENABLE;
5232 	else
5233 		data &= ~PG_CNTL_ENABLE;
5234 	if (orig != data)
5235 		WREG32(DMA_PG, data);
5236 }
5237 
5238 static void si_init_dma_pg(struct radeon_device *rdev)
5239 {
5240 	u32 tmp;
5241 
5242 	WREG32(DMA_PGFSM_WRITE,  0x00002000);
5243 	WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
5244 
5245 	for (tmp = 0; tmp < 5; tmp++)
5246 		WREG32(DMA_PGFSM_WRITE, 0);
5247 }
5248 
5249 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
5250 			       bool enable)
5251 {
5252 	u32 tmp;
5253 
5254 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5255 		tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
5256 		WREG32(RLC_TTOP_D, tmp);
5257 
5258 		tmp = RREG32(RLC_PG_CNTL);
5259 		tmp |= GFX_PG_ENABLE;
5260 		WREG32(RLC_PG_CNTL, tmp);
5261 
5262 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5263 		tmp |= AUTO_PG_EN;
5264 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5265 	} else {
5266 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5267 		tmp &= ~AUTO_PG_EN;
5268 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5269 
5270 		tmp = RREG32(DB_RENDER_CONTROL);
5271 	}
5272 }
5273 
5274 static void si_init_gfx_cgpg(struct radeon_device *rdev)
5275 {
5276 	u32 tmp;
5277 
5278 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5279 
5280 	tmp = RREG32(RLC_PG_CNTL);
5281 	tmp |= GFX_PG_SRC;
5282 	WREG32(RLC_PG_CNTL, tmp);
5283 
5284 	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5285 
5286 	tmp = RREG32(RLC_AUTO_PG_CTRL);
5287 
5288 	tmp &= ~GRBM_REG_SGIT_MASK;
5289 	tmp |= GRBM_REG_SGIT(0x700);
5290 	tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5291 	WREG32(RLC_AUTO_PG_CTRL, tmp);
5292 }
5293 
5294 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5295 {
5296 	u32 mask = 0, tmp, tmp1;
5297 	int i;
5298 
5299 	si_select_se_sh(rdev, se, sh);
5300 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5301 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5302 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5303 
5304 	tmp &= 0xffff0000;
5305 
5306 	tmp |= tmp1;
5307 	tmp >>= 16;
5308 
5309 	for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5310 		mask <<= 1;
5311 		mask |= 1;
5312 	}
5313 
5314 	return (~tmp) & mask;
5315 }
5316 
5317 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5318 {
5319 	u32 i, j, k, active_cu_number = 0;
5320 	u32 mask, counter, cu_bitmap;
5321 	u32 tmp = 0;
5322 
5323 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5324 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5325 			mask = 1;
5326 			cu_bitmap = 0;
5327 			counter  = 0;
5328 			for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5329 				if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5330 					if (counter < 2)
5331 						cu_bitmap |= mask;
5332 					counter++;
5333 				}
5334 				mask <<= 1;
5335 			}
5336 
5337 			active_cu_number += counter;
5338 			tmp |= (cu_bitmap << (i * 16 + j * 8));
5339 		}
5340 	}
5341 
5342 	WREG32(RLC_PG_AO_CU_MASK, tmp);
5343 
5344 	tmp = RREG32(RLC_MAX_PG_CU);
5345 	tmp &= ~MAX_PU_CU_MASK;
5346 	tmp |= MAX_PU_CU(active_cu_number);
5347 	WREG32(RLC_MAX_PG_CU, tmp);
5348 }
5349 
5350 static void si_enable_cgcg(struct radeon_device *rdev,
5351 			   bool enable)
5352 {
5353 	u32 data, orig, tmp;
5354 
5355 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5356 
5357 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5358 		si_enable_gui_idle_interrupt(rdev, true);
5359 
5360 		WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5361 
5362 		tmp = si_halt_rlc(rdev);
5363 
5364 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5365 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5366 		WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5367 
5368 		si_wait_for_rlc_serdes(rdev);
5369 
5370 		si_update_rlc(rdev, tmp);
5371 
5372 		WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5373 
5374 		data |= CGCG_EN | CGLS_EN;
5375 	} else {
5376 		si_enable_gui_idle_interrupt(rdev, false);
5377 
5378 		RREG32(CB_CGTT_SCLK_CTRL);
5379 		RREG32(CB_CGTT_SCLK_CTRL);
5380 		RREG32(CB_CGTT_SCLK_CTRL);
5381 		RREG32(CB_CGTT_SCLK_CTRL);
5382 
5383 		data &= ~(CGCG_EN | CGLS_EN);
5384 	}
5385 
5386 	if (orig != data)
5387 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5388 }
5389 
5390 static void si_enable_mgcg(struct radeon_device *rdev,
5391 			   bool enable)
5392 {
5393 	u32 data, orig, tmp = 0;
5394 
5395 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5396 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5397 		data = 0x96940200;
5398 		if (orig != data)
5399 			WREG32(CGTS_SM_CTRL_REG, data);
5400 
5401 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5402 			orig = data = RREG32(CP_MEM_SLP_CNTL);
5403 			data |= CP_MEM_LS_EN;
5404 			if (orig != data)
5405 				WREG32(CP_MEM_SLP_CNTL, data);
5406 		}
5407 
5408 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5409 		data &= 0xffffffc0;
5410 		if (orig != data)
5411 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5412 
5413 		tmp = si_halt_rlc(rdev);
5414 
5415 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5416 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5417 		WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5418 
5419 		si_update_rlc(rdev, tmp);
5420 	} else {
5421 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5422 		data |= 0x00000003;
5423 		if (orig != data)
5424 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5425 
5426 		data = RREG32(CP_MEM_SLP_CNTL);
5427 		if (data & CP_MEM_LS_EN) {
5428 			data &= ~CP_MEM_LS_EN;
5429 			WREG32(CP_MEM_SLP_CNTL, data);
5430 		}
5431 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5432 		data |= LS_OVERRIDE | OVERRIDE;
5433 		if (orig != data)
5434 			WREG32(CGTS_SM_CTRL_REG, data);
5435 
5436 		tmp = si_halt_rlc(rdev);
5437 
5438 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5439 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5440 		WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5441 
5442 		si_update_rlc(rdev, tmp);
5443 	}
5444 }
5445 
5446 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5447 			       bool enable)
5448 {
5449 	u32 orig, data, tmp;
5450 
5451 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5452 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5453 		tmp |= 0x3fff;
5454 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5455 
5456 		orig = data = RREG32(UVD_CGC_CTRL);
5457 		data |= DCM;
5458 		if (orig != data)
5459 			WREG32(UVD_CGC_CTRL, data);
5460 
5461 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5462 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5463 	} else {
5464 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5465 		tmp &= ~0x3fff;
5466 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5467 
5468 		orig = data = RREG32(UVD_CGC_CTRL);
5469 		data &= ~DCM;
5470 		if (orig != data)
5471 			WREG32(UVD_CGC_CTRL, data);
5472 
5473 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5474 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5475 	}
5476 }
5477 
5478 static const u32 mc_cg_registers[] =
5479 {
5480 	MC_HUB_MISC_HUB_CG,
5481 	MC_HUB_MISC_SIP_CG,
5482 	MC_HUB_MISC_VM_CG,
5483 	MC_XPB_CLK_GAT,
5484 	ATC_MISC_CG,
5485 	MC_CITF_MISC_WR_CG,
5486 	MC_CITF_MISC_RD_CG,
5487 	MC_CITF_MISC_VM_CG,
5488 	VM_L2_CG,
5489 };
5490 
5491 static void si_enable_mc_ls(struct radeon_device *rdev,
5492 			    bool enable)
5493 {
5494 	int i;
5495 	u32 orig, data;
5496 
5497 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5498 		orig = data = RREG32(mc_cg_registers[i]);
5499 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5500 			data |= MC_LS_ENABLE;
5501 		else
5502 			data &= ~MC_LS_ENABLE;
5503 		if (data != orig)
5504 			WREG32(mc_cg_registers[i], data);
5505 	}
5506 }
5507 
5508 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5509 			       bool enable)
5510 {
5511 	int i;
5512 	u32 orig, data;
5513 
5514 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5515 		orig = data = RREG32(mc_cg_registers[i]);
5516 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5517 			data |= MC_CG_ENABLE;
5518 		else
5519 			data &= ~MC_CG_ENABLE;
5520 		if (data != orig)
5521 			WREG32(mc_cg_registers[i], data);
5522 	}
5523 }
5524 
5525 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5526 			       bool enable)
5527 {
5528 	u32 orig, data, offset;
5529 	int i;
5530 
5531 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5532 		for (i = 0; i < 2; i++) {
5533 			if (i == 0)
5534 				offset = DMA0_REGISTER_OFFSET;
5535 			else
5536 				offset = DMA1_REGISTER_OFFSET;
5537 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5538 			data &= ~MEM_POWER_OVERRIDE;
5539 			if (data != orig)
5540 				WREG32(DMA_POWER_CNTL + offset, data);
5541 			WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5542 		}
5543 	} else {
5544 		for (i = 0; i < 2; i++) {
5545 			if (i == 0)
5546 				offset = DMA0_REGISTER_OFFSET;
5547 			else
5548 				offset = DMA1_REGISTER_OFFSET;
5549 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5550 			data |= MEM_POWER_OVERRIDE;
5551 			if (data != orig)
5552 				WREG32(DMA_POWER_CNTL + offset, data);
5553 
5554 			orig = data = RREG32(DMA_CLK_CTRL + offset);
5555 			data = 0xff000000;
5556 			if (data != orig)
5557 				WREG32(DMA_CLK_CTRL + offset, data);
5558 		}
5559 	}
5560 }
5561 
5562 static void si_enable_bif_mgls(struct radeon_device *rdev,
5563 			       bool enable)
5564 {
5565 	u32 orig, data;
5566 
5567 	orig = data = RREG32_PCIE(PCIE_CNTL2);
5568 
5569 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5570 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5571 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5572 	else
5573 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5574 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5575 
5576 	if (orig != data)
5577 		WREG32_PCIE(PCIE_CNTL2, data);
5578 }
5579 
5580 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5581 			       bool enable)
5582 {
5583 	u32 orig, data;
5584 
5585 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
5586 
5587 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5588 		data &= ~CLOCK_GATING_DIS;
5589 	else
5590 		data |= CLOCK_GATING_DIS;
5591 
5592 	if (orig != data)
5593 		WREG32(HDP_HOST_PATH_CNTL, data);
5594 }
5595 
5596 static void si_enable_hdp_ls(struct radeon_device *rdev,
5597 			     bool enable)
5598 {
5599 	u32 orig, data;
5600 
5601 	orig = data = RREG32(HDP_MEM_POWER_LS);
5602 
5603 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5604 		data |= HDP_LS_ENABLE;
5605 	else
5606 		data &= ~HDP_LS_ENABLE;
5607 
5608 	if (orig != data)
5609 		WREG32(HDP_MEM_POWER_LS, data);
5610 }
5611 
5612 static void si_update_cg(struct radeon_device *rdev,
5613 			 u32 block, bool enable)
5614 {
5615 	if (block & RADEON_CG_BLOCK_GFX) {
5616 		si_enable_gui_idle_interrupt(rdev, false);
5617 		/* order matters! */
5618 		if (enable) {
5619 			si_enable_mgcg(rdev, true);
5620 			si_enable_cgcg(rdev, true);
5621 		} else {
5622 			si_enable_cgcg(rdev, false);
5623 			si_enable_mgcg(rdev, false);
5624 		}
5625 		si_enable_gui_idle_interrupt(rdev, true);
5626 	}
5627 
5628 	if (block & RADEON_CG_BLOCK_MC) {
5629 		si_enable_mc_mgcg(rdev, enable);
5630 		si_enable_mc_ls(rdev, enable);
5631 	}
5632 
5633 	if (block & RADEON_CG_BLOCK_SDMA) {
5634 		si_enable_dma_mgcg(rdev, enable);
5635 	}
5636 
5637 	if (block & RADEON_CG_BLOCK_BIF) {
5638 		si_enable_bif_mgls(rdev, enable);
5639 	}
5640 
5641 	if (block & RADEON_CG_BLOCK_UVD) {
5642 		if (rdev->has_uvd) {
5643 			si_enable_uvd_mgcg(rdev, enable);
5644 		}
5645 	}
5646 
5647 	if (block & RADEON_CG_BLOCK_HDP) {
5648 		si_enable_hdp_mgcg(rdev, enable);
5649 		si_enable_hdp_ls(rdev, enable);
5650 	}
5651 }
5652 
5653 static void si_init_cg(struct radeon_device *rdev)
5654 {
5655 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5656 			    RADEON_CG_BLOCK_MC |
5657 			    RADEON_CG_BLOCK_SDMA |
5658 			    RADEON_CG_BLOCK_BIF |
5659 			    RADEON_CG_BLOCK_HDP), true);
5660 	if (rdev->has_uvd) {
5661 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5662 		si_init_uvd_internal_cg(rdev);
5663 	}
5664 }
5665 
5666 static void si_fini_cg(struct radeon_device *rdev)
5667 {
5668 	if (rdev->has_uvd) {
5669 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5670 	}
5671 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5672 			    RADEON_CG_BLOCK_MC |
5673 			    RADEON_CG_BLOCK_SDMA |
5674 			    RADEON_CG_BLOCK_BIF |
5675 			    RADEON_CG_BLOCK_HDP), false);
5676 }
5677 
5678 u32 si_get_csb_size(struct radeon_device *rdev)
5679 {
5680 	u32 count = 0;
5681 	const struct cs_section_def *sect = NULL;
5682 	const struct cs_extent_def *ext = NULL;
5683 
5684 	if (rdev->rlc.cs_data == NULL)
5685 		return 0;
5686 
5687 	/* begin clear state */
5688 	count += 2;
5689 	/* context control state */
5690 	count += 3;
5691 
5692 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5693 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5694 			if (sect->id == SECT_CONTEXT)
5695 				count += 2 + ext->reg_count;
5696 			else
5697 				return 0;
5698 		}
5699 	}
5700 	/* pa_sc_raster_config */
5701 	count += 3;
5702 	/* end clear state */
5703 	count += 2;
5704 	/* clear state */
5705 	count += 2;
5706 
5707 	return count;
5708 }
5709 
5710 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5711 {
5712 	u32 count = 0, i;
5713 	const struct cs_section_def *sect = NULL;
5714 	const struct cs_extent_def *ext = NULL;
5715 
5716 	if (rdev->rlc.cs_data == NULL)
5717 		return;
5718 	if (buffer == NULL)
5719 		return;
5720 
5721 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5722 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5723 
5724 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5725 	buffer[count++] = cpu_to_le32(0x80000000);
5726 	buffer[count++] = cpu_to_le32(0x80000000);
5727 
5728 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5729 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5730 			if (sect->id == SECT_CONTEXT) {
5731 				buffer[count++] =
5732 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5733 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5734 				for (i = 0; i < ext->reg_count; i++)
5735 					buffer[count++] = cpu_to_le32(ext->extent[i]);
5736 			} else {
5737 				return;
5738 			}
5739 		}
5740 	}
5741 
5742 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5743 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5744 	switch (rdev->family) {
5745 	case CHIP_TAHITI:
5746 	case CHIP_PITCAIRN:
5747 		buffer[count++] = cpu_to_le32(0x2a00126a);
5748 		break;
5749 	case CHIP_VERDE:
5750 		buffer[count++] = cpu_to_le32(0x0000124a);
5751 		break;
5752 	case CHIP_OLAND:
5753 		buffer[count++] = cpu_to_le32(0x00000082);
5754 		break;
5755 	case CHIP_HAINAN:
5756 		buffer[count++] = cpu_to_le32(0x00000000);
5757 		break;
5758 	default:
5759 		buffer[count++] = cpu_to_le32(0x00000000);
5760 		break;
5761 	}
5762 
5763 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5764 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5765 
5766 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5767 	buffer[count++] = cpu_to_le32(0);
5768 }
5769 
5770 static void si_init_pg(struct radeon_device *rdev)
5771 {
5772 	if (rdev->pg_flags) {
5773 		if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5774 			si_init_dma_pg(rdev);
5775 		}
5776 		si_init_ao_cu_mask(rdev);
5777 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5778 			si_init_gfx_cgpg(rdev);
5779 		} else {
5780 			WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5781 			WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5782 		}
5783 		si_enable_dma_pg(rdev, true);
5784 		si_enable_gfx_cgpg(rdev, true);
5785 	} else {
5786 		WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5787 		WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5788 	}
5789 }
5790 
5791 static void si_fini_pg(struct radeon_device *rdev)
5792 {
5793 	if (rdev->pg_flags) {
5794 		si_enable_dma_pg(rdev, false);
5795 		si_enable_gfx_cgpg(rdev, false);
5796 	}
5797 }
5798 
5799 /*
5800  * RLC
5801  */
5802 void si_rlc_reset(struct radeon_device *rdev)
5803 {
5804 	u32 tmp = RREG32(GRBM_SOFT_RESET);
5805 
5806 	tmp |= SOFT_RESET_RLC;
5807 	WREG32(GRBM_SOFT_RESET, tmp);
5808 	udelay(50);
5809 	tmp &= ~SOFT_RESET_RLC;
5810 	WREG32(GRBM_SOFT_RESET, tmp);
5811 	udelay(50);
5812 }
5813 
5814 static void si_rlc_stop(struct radeon_device *rdev)
5815 {
5816 	WREG32(RLC_CNTL, 0);
5817 
5818 	si_enable_gui_idle_interrupt(rdev, false);
5819 
5820 	si_wait_for_rlc_serdes(rdev);
5821 }
5822 
5823 static void si_rlc_start(struct radeon_device *rdev)
5824 {
5825 	WREG32(RLC_CNTL, RLC_ENABLE);
5826 
5827 	si_enable_gui_idle_interrupt(rdev, true);
5828 
5829 	udelay(50);
5830 }
5831 
5832 static bool si_lbpw_supported(struct radeon_device *rdev)
5833 {
5834 	u32 tmp;
5835 
5836 	/* Enable LBPW only for DDR3 */
5837 	tmp = RREG32(MC_SEQ_MISC0);
5838 	if ((tmp & 0xF0000000) == 0xB0000000)
5839 		return true;
5840 	return false;
5841 }
5842 
5843 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5844 {
5845 	u32 tmp;
5846 
5847 	tmp = RREG32(RLC_LB_CNTL);
5848 	if (enable)
5849 		tmp |= LOAD_BALANCE_ENABLE;
5850 	else
5851 		tmp &= ~LOAD_BALANCE_ENABLE;
5852 	WREG32(RLC_LB_CNTL, tmp);
5853 
5854 	if (!enable) {
5855 		si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5856 		WREG32(SPI_LB_CU_MASK, 0x00ff);
5857 	}
5858 }
5859 
5860 static int si_rlc_resume(struct radeon_device *rdev)
5861 {
5862 	u32 i;
5863 
5864 	if (!rdev->rlc_fw)
5865 		return -EINVAL;
5866 
5867 	si_rlc_stop(rdev);
5868 
5869 	si_rlc_reset(rdev);
5870 
5871 	si_init_pg(rdev);
5872 
5873 	si_init_cg(rdev);
5874 
5875 	WREG32(RLC_RL_BASE, 0);
5876 	WREG32(RLC_RL_SIZE, 0);
5877 	WREG32(RLC_LB_CNTL, 0);
5878 	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5879 	WREG32(RLC_LB_CNTR_INIT, 0);
5880 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5881 
5882 	WREG32(RLC_MC_CNTL, 0);
5883 	WREG32(RLC_UCODE_CNTL, 0);
5884 
5885 	if (rdev->new_fw) {
5886 		const struct rlc_firmware_header_v1_0 *hdr =
5887 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5888 		u32 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5889 		const __le32 *fw_data = (const __le32 *)
5890 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5891 
5892 		radeon_ucode_print_rlc_hdr(&hdr->header);
5893 
5894 		for (i = 0; i < fw_size; i++) {
5895 			WREG32(RLC_UCODE_ADDR, i);
5896 			WREG32(RLC_UCODE_DATA, le32_to_cpup(fw_data++));
5897 		}
5898 	} else {
5899 		const __be32 *fw_data =
5900 			(const __be32 *)rdev->rlc_fw->data;
5901 		for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5902 			WREG32(RLC_UCODE_ADDR, i);
5903 			WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5904 		}
5905 	}
5906 	WREG32(RLC_UCODE_ADDR, 0);
5907 
5908 	si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5909 
5910 	si_rlc_start(rdev);
5911 
5912 	return 0;
5913 }
5914 
5915 static void si_enable_interrupts(struct radeon_device *rdev)
5916 {
5917 	u32 ih_cntl = RREG32(IH_CNTL);
5918 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5919 
5920 	ih_cntl |= ENABLE_INTR;
5921 	ih_rb_cntl |= IH_RB_ENABLE;
5922 	WREG32(IH_CNTL, ih_cntl);
5923 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5924 	rdev->ih.enabled = true;
5925 }
5926 
5927 static void si_disable_interrupts(struct radeon_device *rdev)
5928 {
5929 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5930 	u32 ih_cntl = RREG32(IH_CNTL);
5931 
5932 	ih_rb_cntl &= ~IH_RB_ENABLE;
5933 	ih_cntl &= ~ENABLE_INTR;
5934 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5935 	WREG32(IH_CNTL, ih_cntl);
5936 	/* set rptr, wptr to 0 */
5937 	WREG32(IH_RB_RPTR, 0);
5938 	WREG32(IH_RB_WPTR, 0);
5939 	rdev->ih.enabled = false;
5940 	rdev->ih.rptr = 0;
5941 }
5942 
5943 static void si_disable_interrupt_state(struct radeon_device *rdev)
5944 {
5945 	int i;
5946 	u32 tmp;
5947 
5948 	tmp = RREG32(CP_INT_CNTL_RING0) &
5949 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5950 	WREG32(CP_INT_CNTL_RING0, tmp);
5951 	WREG32(CP_INT_CNTL_RING1, 0);
5952 	WREG32(CP_INT_CNTL_RING2, 0);
5953 	tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5954 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5955 	tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5956 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5957 	WREG32(GRBM_INT_CNTL, 0);
5958 	WREG32(SRBM_INT_CNTL, 0);
5959 	for (i = 0; i < rdev->num_crtc; i++)
5960 		WREG32(INT_MASK + crtc_offsets[i], 0);
5961 	for (i = 0; i < rdev->num_crtc; i++)
5962 		WREG32(GRPH_INT_CONTROL + crtc_offsets[i], 0);
5963 
5964 	if (!ASIC_IS_NODCE(rdev)) {
5965 		WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5966 
5967 		for (i = 0; i < 6; i++)
5968 			WREG32_AND(DC_HPDx_INT_CONTROL(i),
5969 				   DC_HPDx_INT_POLARITY);
5970 	}
5971 }
5972 
5973 static int si_irq_init(struct radeon_device *rdev)
5974 {
5975 	int ret = 0;
5976 	int rb_bufsz;
5977 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5978 
5979 	/* allocate ring */
5980 	ret = r600_ih_ring_alloc(rdev);
5981 	if (ret)
5982 		return ret;
5983 
5984 	/* disable irqs */
5985 	si_disable_interrupts(rdev);
5986 
5987 	/* init rlc */
5988 	ret = si_rlc_resume(rdev);
5989 	if (ret) {
5990 		r600_ih_ring_fini(rdev);
5991 		return ret;
5992 	}
5993 
5994 	/* setup interrupt control */
5995 	/* set dummy read address to dummy page address */
5996 	WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8);
5997 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
5998 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5999 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6000 	 */
6001 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6002 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6003 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6004 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6005 
6006 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6007 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6008 
6009 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6010 		      IH_WPTR_OVERFLOW_CLEAR |
6011 		      (rb_bufsz << 1));
6012 
6013 	if (rdev->wb.enabled)
6014 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6015 
6016 	/* set the writeback address whether it's enabled or not */
6017 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6018 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6019 
6020 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6021 
6022 	/* set rptr, wptr to 0 */
6023 	WREG32(IH_RB_RPTR, 0);
6024 	WREG32(IH_RB_WPTR, 0);
6025 
6026 	/* Default settings for IH_CNTL (disabled at first) */
6027 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6028 	/* RPTR_REARM only works if msi's are enabled */
6029 	if (rdev->msi_enabled)
6030 		ih_cntl |= RPTR_REARM;
6031 	WREG32(IH_CNTL, ih_cntl);
6032 
6033 	/* force the active interrupt state to all disabled */
6034 	si_disable_interrupt_state(rdev);
6035 
6036 	pci_set_master(rdev->pdev);
6037 
6038 	/* enable irqs */
6039 	si_enable_interrupts(rdev);
6040 
6041 	return ret;
6042 }
6043 
6044 /* The order we write back each register here is important */
6045 int si_irq_set(struct radeon_device *rdev)
6046 {
6047 	int i;
6048 	u32 cp_int_cntl;
6049 	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
6050 	u32 grbm_int_cntl = 0;
6051 	u32 dma_cntl, dma_cntl1;
6052 	u32 thermal_int = 0;
6053 
6054 	if (!rdev->irq.installed) {
6055 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6056 		return -EINVAL;
6057 	}
6058 	/* don't enable anything if the ih is disabled */
6059 	if (!rdev->ih.enabled) {
6060 		si_disable_interrupts(rdev);
6061 		/* force the active interrupt state to all disabled */
6062 		si_disable_interrupt_state(rdev);
6063 		return 0;
6064 	}
6065 
6066 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6067 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6068 
6069 	dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6070 	dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6071 
6072 	thermal_int = RREG32(CG_THERMAL_INT) &
6073 		~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6074 
6075 	/* enable CP interrupts on all rings */
6076 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6077 		DRM_DEBUG("si_irq_set: sw int gfx\n");
6078 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6079 	}
6080 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6081 		DRM_DEBUG("si_irq_set: sw int cp1\n");
6082 		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
6083 	}
6084 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6085 		DRM_DEBUG("si_irq_set: sw int cp2\n");
6086 		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
6087 	}
6088 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6089 		DRM_DEBUG("si_irq_set: sw int dma\n");
6090 		dma_cntl |= TRAP_ENABLE;
6091 	}
6092 
6093 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6094 		DRM_DEBUG("si_irq_set: sw int dma1\n");
6095 		dma_cntl1 |= TRAP_ENABLE;
6096 	}
6097 
6098 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6099 	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
6100 	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
6101 
6102 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
6103 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
6104 
6105 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6106 
6107 	if (rdev->irq.dpm_thermal) {
6108 		DRM_DEBUG("dpm thermal\n");
6109 		thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6110 	}
6111 
6112 	for (i = 0; i < rdev->num_crtc; i++) {
6113 		radeon_irq_kms_set_irq_n_enabled(
6114 		    rdev, INT_MASK + crtc_offsets[i], VBLANK_INT_MASK,
6115 		    rdev->irq.crtc_vblank_int[i] ||
6116 		    atomic_read(&rdev->irq.pflip[i]), "vblank", i);
6117 	}
6118 
6119 	for (i = 0; i < rdev->num_crtc; i++)
6120 		WREG32(GRPH_INT_CONTROL + crtc_offsets[i], GRPH_PFLIP_INT_MASK);
6121 
6122 	if (!ASIC_IS_NODCE(rdev)) {
6123 		for (i = 0; i < 6; i++) {
6124 			radeon_irq_kms_set_irq_n_enabled(
6125 			    rdev, DC_HPDx_INT_CONTROL(i),
6126 			    DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN,
6127 			    rdev->irq.hpd[i], "HPD", i);
6128 		}
6129 	}
6130 
6131 	WREG32(CG_THERMAL_INT, thermal_int);
6132 
6133 	/* posting read */
6134 	RREG32(SRBM_STATUS);
6135 
6136 	return 0;
6137 }
6138 
6139 /* The order we write back each register here is important */
6140 static inline void si_irq_ack(struct radeon_device *rdev)
6141 {
6142 	int i, j;
6143 	u32 *disp_int = rdev->irq.stat_regs.evergreen.disp_int;
6144 	u32 *grph_int = rdev->irq.stat_regs.evergreen.grph_int;
6145 
6146 	if (ASIC_IS_NODCE(rdev))
6147 		return;
6148 
6149 	for (i = 0; i < 6; i++) {
6150 		disp_int[i] = RREG32(si_disp_int_status[i]);
6151 		if (i < rdev->num_crtc)
6152 			grph_int[i] = RREG32(GRPH_INT_STATUS + crtc_offsets[i]);
6153 	}
6154 
6155 	/* We write back each interrupt register in pairs of two */
6156 	for (i = 0; i < rdev->num_crtc; i += 2) {
6157 		for (j = i; j < (i + 2); j++) {
6158 			if (grph_int[j] & GRPH_PFLIP_INT_OCCURRED)
6159 				WREG32(GRPH_INT_STATUS + crtc_offsets[j],
6160 				       GRPH_PFLIP_INT_CLEAR);
6161 		}
6162 
6163 		for (j = i; j < (i + 2); j++) {
6164 			if (disp_int[j] & LB_D1_VBLANK_INTERRUPT)
6165 				WREG32(VBLANK_STATUS + crtc_offsets[j],
6166 				       VBLANK_ACK);
6167 			if (disp_int[j] & LB_D1_VLINE_INTERRUPT)
6168 				WREG32(VLINE_STATUS + crtc_offsets[j],
6169 				       VLINE_ACK);
6170 		}
6171 	}
6172 
6173 	for (i = 0; i < 6; i++) {
6174 		if (disp_int[i] & DC_HPD1_INTERRUPT)
6175 			WREG32_OR(DC_HPDx_INT_CONTROL(i), DC_HPDx_INT_ACK);
6176 	}
6177 
6178 	for (i = 0; i < 6; i++) {
6179 		if (disp_int[i] & DC_HPD1_RX_INTERRUPT)
6180 			WREG32_OR(DC_HPDx_INT_CONTROL(i), DC_HPDx_RX_INT_ACK);
6181 	}
6182 }
6183 
6184 static void si_irq_disable(struct radeon_device *rdev)
6185 {
6186 	si_disable_interrupts(rdev);
6187 	/* Wait and acknowledge irq */
6188 	mdelay(1);
6189 	si_irq_ack(rdev);
6190 	si_disable_interrupt_state(rdev);
6191 }
6192 
6193 static void si_irq_suspend(struct radeon_device *rdev)
6194 {
6195 	si_irq_disable(rdev);
6196 	si_rlc_stop(rdev);
6197 }
6198 
6199 static void si_irq_fini(struct radeon_device *rdev)
6200 {
6201 	si_irq_suspend(rdev);
6202 	r600_ih_ring_fini(rdev);
6203 }
6204 
6205 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6206 {
6207 	u32 wptr, tmp;
6208 
6209 	if (rdev->wb.enabled)
6210 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6211 	else
6212 		wptr = RREG32(IH_RB_WPTR);
6213 
6214 	if (wptr & RB_OVERFLOW) {
6215 		wptr &= ~RB_OVERFLOW;
6216 		/* When a ring buffer overflow happen start parsing interrupt
6217 		 * from the last not overwritten vector (wptr + 16). Hopefully
6218 		 * this should allow us to catchup.
6219 		 */
6220 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
6221 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
6222 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6223 		tmp = RREG32(IH_RB_CNTL);
6224 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
6225 		WREG32(IH_RB_CNTL, tmp);
6226 	}
6227 	return (wptr & rdev->ih.ptr_mask);
6228 }
6229 
6230 /*        SI IV Ring
6231  * Each IV ring entry is 128 bits:
6232  * [7:0]    - interrupt source id
6233  * [31:8]   - reserved
6234  * [59:32]  - interrupt source data
6235  * [63:60]  - reserved
6236  * [71:64]  - RINGID
6237  * [79:72]  - VMID
6238  * [127:80] - reserved
6239  */
6240 int si_irq_process(struct radeon_device *rdev)
6241 {
6242 	u32 *disp_int = rdev->irq.stat_regs.evergreen.disp_int;
6243 	u32 crtc_idx, hpd_idx;
6244 	u32 mask;
6245 	u32 wptr;
6246 	u32 rptr;
6247 	u32 src_id, src_data, ring_id;
6248 	u32 ring_index;
6249 	bool queue_hotplug = false;
6250 	bool queue_dp = false;
6251 	bool queue_thermal = false;
6252 	u32 status, addr;
6253 	const char *event_name;
6254 
6255 	if (!rdev->ih.enabled || rdev->shutdown)
6256 		return IRQ_NONE;
6257 
6258 	wptr = si_get_ih_wptr(rdev);
6259 
6260 restart_ih:
6261 	/* is somebody else already processing irqs? */
6262 	if (atomic_xchg(&rdev->ih.lock, 1))
6263 		return IRQ_NONE;
6264 
6265 	rptr = rdev->ih.rptr;
6266 	DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6267 
6268 	/* Order reading of wptr vs. reading of IH ring data */
6269 	rmb();
6270 
6271 	/* display interrupts */
6272 	si_irq_ack(rdev);
6273 
6274 	while (rptr != wptr) {
6275 		/* wptr/rptr are in bytes! */
6276 		ring_index = rptr / 4;
6277 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6278 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6279 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6280 
6281 		switch (src_id) {
6282 		case 1: /* D1 vblank/vline */
6283 		case 2: /* D2 vblank/vline */
6284 		case 3: /* D3 vblank/vline */
6285 		case 4: /* D4 vblank/vline */
6286 		case 5: /* D5 vblank/vline */
6287 		case 6: /* D6 vblank/vline */
6288 			crtc_idx = src_id - 1;
6289 
6290 			if (src_data == 0) { /* vblank */
6291 				mask = LB_D1_VBLANK_INTERRUPT;
6292 				event_name = "vblank";
6293 
6294 				if (rdev->irq.crtc_vblank_int[crtc_idx]) {
6295 					drm_handle_vblank(rdev->ddev, crtc_idx);
6296 					rdev->pm.vblank_sync = true;
6297 					wake_up(&rdev->irq.vblank_queue);
6298 				}
6299 				if (atomic_read(&rdev->irq.pflip[crtc_idx])) {
6300 					radeon_crtc_handle_vblank(rdev,
6301 								  crtc_idx);
6302 				}
6303 
6304 			} else if (src_data == 1) { /* vline */
6305 				mask = LB_D1_VLINE_INTERRUPT;
6306 				event_name = "vline";
6307 			} else {
6308 				DRM_DEBUG("Unhandled interrupt: %d %d\n",
6309 					  src_id, src_data);
6310 				break;
6311 			}
6312 
6313 			if (!(disp_int[crtc_idx] & mask)) {
6314 				DRM_DEBUG("IH: D%d %s - IH event w/o asserted irq bit?\n",
6315 					  crtc_idx + 1, event_name);
6316 			}
6317 
6318 			disp_int[crtc_idx] &= ~mask;
6319 			DRM_DEBUG("IH: D%d %s\n", crtc_idx + 1, event_name);
6320 
6321 			break;
6322 		case 8: /* D1 page flip */
6323 		case 10: /* D2 page flip */
6324 		case 12: /* D3 page flip */
6325 		case 14: /* D4 page flip */
6326 		case 16: /* D5 page flip */
6327 		case 18: /* D6 page flip */
6328 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
6329 			if (radeon_use_pflipirq > 0)
6330 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
6331 			break;
6332 		case 42: /* HPD hotplug */
6333 			if (src_data <= 5) {
6334 				hpd_idx = src_data;
6335 				mask = DC_HPD1_INTERRUPT;
6336 				queue_hotplug = true;
6337 				event_name = "HPD";
6338 
6339 			} else if (src_data <= 11) {
6340 				hpd_idx = src_data - 6;
6341 				mask = DC_HPD1_RX_INTERRUPT;
6342 				queue_dp = true;
6343 				event_name = "HPD_RX";
6344 
6345 			} else {
6346 				DRM_DEBUG("Unhandled interrupt: %d %d\n",
6347 					  src_id, src_data);
6348 				break;
6349 			}
6350 
6351 			if (!(disp_int[hpd_idx] & mask))
6352 				DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6353 
6354 			disp_int[hpd_idx] &= ~mask;
6355 			DRM_DEBUG("IH: %s%d\n", event_name, hpd_idx + 1);
6356 			break;
6357 		case 96:
6358 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
6359 			WREG32(SRBM_INT_ACK, 0x1);
6360 			break;
6361 		case 124: /* UVD */
6362 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6363 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6364 			break;
6365 		case 146:
6366 		case 147:
6367 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6368 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6369 			/* reset addr and status */
6370 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6371 			if (addr == 0x0 && status == 0x0)
6372 				break;
6373 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6374 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6375 				addr);
6376 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6377 				status);
6378 			si_vm_decode_fault(rdev, status, addr);
6379 			break;
6380 		case 176: /* RINGID0 CP_INT */
6381 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6382 			break;
6383 		case 177: /* RINGID1 CP_INT */
6384 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6385 			break;
6386 		case 178: /* RINGID2 CP_INT */
6387 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6388 			break;
6389 		case 181: /* CP EOP event */
6390 			DRM_DEBUG("IH: CP EOP\n");
6391 			switch (ring_id) {
6392 			case 0:
6393 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6394 				break;
6395 			case 1:
6396 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6397 				break;
6398 			case 2:
6399 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6400 				break;
6401 			}
6402 			break;
6403 		case 224: /* DMA trap event */
6404 			DRM_DEBUG("IH: DMA trap\n");
6405 			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6406 			break;
6407 		case 230: /* thermal low to high */
6408 			DRM_DEBUG("IH: thermal low to high\n");
6409 			rdev->pm.dpm.thermal.high_to_low = false;
6410 			queue_thermal = true;
6411 			break;
6412 		case 231: /* thermal high to low */
6413 			DRM_DEBUG("IH: thermal high to low\n");
6414 			rdev->pm.dpm.thermal.high_to_low = true;
6415 			queue_thermal = true;
6416 			break;
6417 		case 233: /* GUI IDLE */
6418 			DRM_DEBUG("IH: GUI idle\n");
6419 			break;
6420 		case 244: /* DMA trap event */
6421 			DRM_DEBUG("IH: DMA1 trap\n");
6422 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6423 			break;
6424 		default:
6425 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6426 			break;
6427 		}
6428 
6429 		/* wptr/rptr are in bytes! */
6430 		rptr += 16;
6431 		rptr &= rdev->ih.ptr_mask;
6432 		WREG32(IH_RB_RPTR, rptr);
6433 	}
6434 	if (queue_dp)
6435 		schedule_work(&rdev->dp_work);
6436 	if (queue_hotplug)
6437 		schedule_delayed_work(&rdev->hotplug_work, 0);
6438 	if (queue_thermal && rdev->pm.dpm_enabled)
6439 		schedule_work(&rdev->pm.dpm.thermal.work);
6440 	rdev->ih.rptr = rptr;
6441 	atomic_set(&rdev->ih.lock, 0);
6442 
6443 	/* make sure wptr hasn't changed while processing */
6444 	wptr = si_get_ih_wptr(rdev);
6445 	if (wptr != rptr)
6446 		goto restart_ih;
6447 
6448 	return IRQ_HANDLED;
6449 }
6450 
6451 /*
6452  * startup/shutdown callbacks
6453  */
6454 static void si_uvd_init(struct radeon_device *rdev)
6455 {
6456 	int r;
6457 
6458 	if (!rdev->has_uvd)
6459 		return;
6460 
6461 	r = radeon_uvd_init(rdev);
6462 	if (r) {
6463 		dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
6464 		/*
6465 		 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
6466 		 * to early fails uvd_v2_2_resume() and thus nothing happens
6467 		 * there. So it is pointless to try to go through that code
6468 		 * hence why we disable uvd here.
6469 		 */
6470 		rdev->has_uvd = false;
6471 		return;
6472 	}
6473 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
6474 	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
6475 }
6476 
6477 static void si_uvd_start(struct radeon_device *rdev)
6478 {
6479 	int r;
6480 
6481 	if (!rdev->has_uvd)
6482 		return;
6483 
6484 	r = uvd_v2_2_resume(rdev);
6485 	if (r) {
6486 		dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
6487 		goto error;
6488 	}
6489 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
6490 	if (r) {
6491 		dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
6492 		goto error;
6493 	}
6494 	return;
6495 
6496 error:
6497 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6498 }
6499 
6500 static void si_uvd_resume(struct radeon_device *rdev)
6501 {
6502 	struct radeon_ring *ring;
6503 	int r;
6504 
6505 	if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
6506 		return;
6507 
6508 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6509 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
6510 	if (r) {
6511 		dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
6512 		return;
6513 	}
6514 	r = uvd_v1_0_init(rdev);
6515 	if (r) {
6516 		dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
6517 		return;
6518 	}
6519 }
6520 
6521 static void si_vce_init(struct radeon_device *rdev)
6522 {
6523 	int r;
6524 
6525 	if (!rdev->has_vce)
6526 		return;
6527 
6528 	r = radeon_vce_init(rdev);
6529 	if (r) {
6530 		dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
6531 		/*
6532 		 * At this point rdev->vce.vcpu_bo is NULL which trickles down
6533 		 * to early fails si_vce_start() and thus nothing happens
6534 		 * there. So it is pointless to try to go through that code
6535 		 * hence why we disable vce here.
6536 		 */
6537 		rdev->has_vce = false;
6538 		return;
6539 	}
6540 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
6541 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
6542 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
6543 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
6544 }
6545 
6546 static void si_vce_start(struct radeon_device *rdev)
6547 {
6548 	int r;
6549 
6550 	if (!rdev->has_vce)
6551 		return;
6552 
6553 	r = radeon_vce_resume(rdev);
6554 	if (r) {
6555 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6556 		goto error;
6557 	}
6558 	r = vce_v1_0_resume(rdev);
6559 	if (r) {
6560 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6561 		goto error;
6562 	}
6563 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
6564 	if (r) {
6565 		dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
6566 		goto error;
6567 	}
6568 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
6569 	if (r) {
6570 		dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
6571 		goto error;
6572 	}
6573 	return;
6574 
6575 error:
6576 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
6577 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
6578 }
6579 
6580 static void si_vce_resume(struct radeon_device *rdev)
6581 {
6582 	struct radeon_ring *ring;
6583 	int r;
6584 
6585 	if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
6586 		return;
6587 
6588 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
6589 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
6590 	if (r) {
6591 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
6592 		return;
6593 	}
6594 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
6595 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
6596 	if (r) {
6597 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
6598 		return;
6599 	}
6600 	r = vce_v1_0_init(rdev);
6601 	if (r) {
6602 		dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
6603 		return;
6604 	}
6605 }
6606 
6607 static int si_startup(struct radeon_device *rdev)
6608 {
6609 	struct radeon_ring *ring;
6610 	int r;
6611 
6612 	/* enable pcie gen2/3 link */
6613 	si_pcie_gen3_enable(rdev);
6614 	/* enable aspm */
6615 	si_program_aspm(rdev);
6616 
6617 	/* scratch needs to be initialized before MC */
6618 	r = r600_vram_scratch_init(rdev);
6619 	if (r)
6620 		return r;
6621 
6622 	si_mc_program(rdev);
6623 
6624 	if (!rdev->pm.dpm_enabled) {
6625 		r = si_mc_load_microcode(rdev);
6626 		if (r) {
6627 			DRM_ERROR("Failed to load MC firmware!\n");
6628 			return r;
6629 		}
6630 	}
6631 
6632 	r = si_pcie_gart_enable(rdev);
6633 	if (r)
6634 		return r;
6635 	si_gpu_init(rdev);
6636 
6637 	/* allocate rlc buffers */
6638 	if (rdev->family == CHIP_VERDE) {
6639 		rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
6640 		rdev->rlc.reg_list_size =
6641 			(u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
6642 	}
6643 	rdev->rlc.cs_data = si_cs_data;
6644 	r = sumo_rlc_init(rdev);
6645 	if (r) {
6646 		DRM_ERROR("Failed to init rlc BOs!\n");
6647 		return r;
6648 	}
6649 
6650 	/* allocate wb buffer */
6651 	r = radeon_wb_init(rdev);
6652 	if (r)
6653 		return r;
6654 
6655 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6656 	if (r) {
6657 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6658 		return r;
6659 	}
6660 
6661 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6662 	if (r) {
6663 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6664 		return r;
6665 	}
6666 
6667 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6668 	if (r) {
6669 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6670 		return r;
6671 	}
6672 
6673 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6674 	if (r) {
6675 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6676 		return r;
6677 	}
6678 
6679 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6680 	if (r) {
6681 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6682 		return r;
6683 	}
6684 
6685 	si_uvd_start(rdev);
6686 	si_vce_start(rdev);
6687 
6688 	/* Enable IRQ */
6689 	if (!rdev->irq.installed) {
6690 		r = radeon_irq_kms_init(rdev);
6691 		if (r)
6692 			return r;
6693 	}
6694 
6695 	r = si_irq_init(rdev);
6696 	if (r) {
6697 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
6698 		radeon_irq_kms_fini(rdev);
6699 		return r;
6700 	}
6701 	si_irq_set(rdev);
6702 
6703 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6704 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6705 			     RADEON_CP_PACKET2);
6706 	if (r)
6707 		return r;
6708 
6709 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6710 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6711 			     RADEON_CP_PACKET2);
6712 	if (r)
6713 		return r;
6714 
6715 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6716 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6717 			     RADEON_CP_PACKET2);
6718 	if (r)
6719 		return r;
6720 
6721 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6722 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6723 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6724 	if (r)
6725 		return r;
6726 
6727 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6728 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6729 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6730 	if (r)
6731 		return r;
6732 
6733 	r = si_cp_load_microcode(rdev);
6734 	if (r)
6735 		return r;
6736 	r = si_cp_resume(rdev);
6737 	if (r)
6738 		return r;
6739 
6740 	r = cayman_dma_resume(rdev);
6741 	if (r)
6742 		return r;
6743 
6744 	si_uvd_resume(rdev);
6745 	si_vce_resume(rdev);
6746 
6747 	r = radeon_ib_pool_init(rdev);
6748 	if (r) {
6749 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6750 		return r;
6751 	}
6752 
6753 	r = radeon_vm_manager_init(rdev);
6754 	if (r) {
6755 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6756 		return r;
6757 	}
6758 
6759 	r = radeon_audio_init(rdev);
6760 	if (r)
6761 		return r;
6762 
6763 	return 0;
6764 }
6765 
6766 int si_resume(struct radeon_device *rdev)
6767 {
6768 	int r;
6769 
6770 	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6771 	 * posting will perform necessary task to bring back GPU into good
6772 	 * shape.
6773 	 */
6774 	/* post card */
6775 	atom_asic_init(rdev->mode_info.atom_context);
6776 
6777 	/* init golden registers */
6778 	si_init_golden_registers(rdev);
6779 
6780 	if (rdev->pm.pm_method == PM_METHOD_DPM)
6781 		radeon_pm_resume(rdev);
6782 
6783 	rdev->accel_working = true;
6784 	r = si_startup(rdev);
6785 	if (r) {
6786 		DRM_ERROR("si startup failed on resume\n");
6787 		rdev->accel_working = false;
6788 		return r;
6789 	}
6790 
6791 	return r;
6792 
6793 }
6794 
6795 int si_suspend(struct radeon_device *rdev)
6796 {
6797 	radeon_pm_suspend(rdev);
6798 	radeon_audio_fini(rdev);
6799 	radeon_vm_manager_fini(rdev);
6800 	si_cp_enable(rdev, false);
6801 	cayman_dma_stop(rdev);
6802 	if (rdev->has_uvd) {
6803 		radeon_uvd_suspend(rdev);
6804 		uvd_v1_0_fini(rdev);
6805 	}
6806 	if (rdev->has_vce)
6807 		radeon_vce_suspend(rdev);
6808 	si_fini_pg(rdev);
6809 	si_fini_cg(rdev);
6810 	si_irq_suspend(rdev);
6811 	radeon_wb_disable(rdev);
6812 	si_pcie_gart_disable(rdev);
6813 	return 0;
6814 }
6815 
6816 /* Plan is to move initialization in that function and use
6817  * helper function so that radeon_device_init pretty much
6818  * do nothing more than calling asic specific function. This
6819  * should also allow to remove a bunch of callback function
6820  * like vram_info.
6821  */
6822 int si_init(struct radeon_device *rdev)
6823 {
6824 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6825 	int r;
6826 
6827 	/* Read BIOS */
6828 	if (!radeon_get_bios(rdev)) {
6829 		if (ASIC_IS_AVIVO(rdev))
6830 			return -EINVAL;
6831 	}
6832 	/* Must be an ATOMBIOS */
6833 	if (!rdev->is_atom_bios) {
6834 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6835 		return -EINVAL;
6836 	}
6837 	r = radeon_atombios_init(rdev);
6838 	if (r)
6839 		return r;
6840 
6841 	/* Post card if necessary */
6842 	if (!radeon_card_posted(rdev)) {
6843 		if (!rdev->bios) {
6844 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6845 			return -EINVAL;
6846 		}
6847 		DRM_INFO("GPU not posted. posting now...\n");
6848 		atom_asic_init(rdev->mode_info.atom_context);
6849 	}
6850 	/* init golden registers */
6851 	si_init_golden_registers(rdev);
6852 	/* Initialize scratch registers */
6853 	si_scratch_init(rdev);
6854 	/* Initialize surface registers */
6855 	radeon_surface_init(rdev);
6856 	/* Initialize clocks */
6857 	radeon_get_clock_info(rdev->ddev);
6858 
6859 	/* Fence driver */
6860 	radeon_fence_driver_init(rdev);
6861 
6862 	/* initialize memory controller */
6863 	r = si_mc_init(rdev);
6864 	if (r)
6865 		return r;
6866 	/* Memory manager */
6867 	r = radeon_bo_init(rdev);
6868 	if (r)
6869 		return r;
6870 
6871 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6872 	    !rdev->rlc_fw || !rdev->mc_fw) {
6873 		r = si_init_microcode(rdev);
6874 		if (r) {
6875 			DRM_ERROR("Failed to load firmware!\n");
6876 			return r;
6877 		}
6878 	}
6879 
6880 	/* Initialize power management */
6881 	radeon_pm_init(rdev);
6882 
6883 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6884 	ring->ring_obj = NULL;
6885 	r600_ring_init(rdev, ring, 1024 * 1024);
6886 
6887 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6888 	ring->ring_obj = NULL;
6889 	r600_ring_init(rdev, ring, 1024 * 1024);
6890 
6891 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6892 	ring->ring_obj = NULL;
6893 	r600_ring_init(rdev, ring, 1024 * 1024);
6894 
6895 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6896 	ring->ring_obj = NULL;
6897 	r600_ring_init(rdev, ring, 64 * 1024);
6898 
6899 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6900 	ring->ring_obj = NULL;
6901 	r600_ring_init(rdev, ring, 64 * 1024);
6902 
6903 	si_uvd_init(rdev);
6904 	si_vce_init(rdev);
6905 
6906 	rdev->ih.ring_obj = NULL;
6907 	r600_ih_ring_init(rdev, 64 * 1024);
6908 
6909 	r = r600_pcie_gart_init(rdev);
6910 	if (r)
6911 		return r;
6912 
6913 	rdev->accel_working = true;
6914 	r = si_startup(rdev);
6915 	if (r) {
6916 		dev_err(rdev->dev, "disabling GPU acceleration\n");
6917 		si_cp_fini(rdev);
6918 		cayman_dma_fini(rdev);
6919 		si_irq_fini(rdev);
6920 		sumo_rlc_fini(rdev);
6921 		radeon_wb_fini(rdev);
6922 		radeon_ib_pool_fini(rdev);
6923 		radeon_vm_manager_fini(rdev);
6924 		radeon_irq_kms_fini(rdev);
6925 		si_pcie_gart_fini(rdev);
6926 		rdev->accel_working = false;
6927 	}
6928 
6929 	/* Don't start up if the MC ucode is missing.
6930 	 * The default clocks and voltages before the MC ucode
6931 	 * is loaded are not suffient for advanced operations.
6932 	 */
6933 	if (!rdev->mc_fw) {
6934 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
6935 		return -EINVAL;
6936 	}
6937 
6938 	return 0;
6939 }
6940 
6941 void si_fini(struct radeon_device *rdev)
6942 {
6943 	radeon_pm_fini(rdev);
6944 	si_cp_fini(rdev);
6945 	cayman_dma_fini(rdev);
6946 	si_fini_pg(rdev);
6947 	si_fini_cg(rdev);
6948 	si_irq_fini(rdev);
6949 	sumo_rlc_fini(rdev);
6950 	radeon_wb_fini(rdev);
6951 	radeon_vm_manager_fini(rdev);
6952 	radeon_ib_pool_fini(rdev);
6953 	radeon_irq_kms_fini(rdev);
6954 	if (rdev->has_uvd) {
6955 		uvd_v1_0_fini(rdev);
6956 		radeon_uvd_fini(rdev);
6957 	}
6958 	if (rdev->has_vce)
6959 		radeon_vce_fini(rdev);
6960 	si_pcie_gart_fini(rdev);
6961 	r600_vram_scratch_fini(rdev);
6962 	radeon_gem_fini(rdev);
6963 	radeon_fence_driver_fini(rdev);
6964 	radeon_bo_fini(rdev);
6965 	radeon_atombios_fini(rdev);
6966 	kfree(rdev->bios);
6967 	rdev->bios = NULL;
6968 }
6969 
6970 /**
6971  * si_get_gpu_clock_counter - return GPU clock counter snapshot
6972  *
6973  * @rdev: radeon_device pointer
6974  *
6975  * Fetches a GPU clock counter snapshot (SI).
6976  * Returns the 64 bit clock counter snapshot.
6977  */
6978 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
6979 {
6980 	uint64_t clock;
6981 
6982 	mutex_lock(&rdev->gpu_clock_mutex);
6983 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6984 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6985 		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6986 	mutex_unlock(&rdev->gpu_clock_mutex);
6987 	return clock;
6988 }
6989 
6990 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6991 {
6992 	unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
6993 	int r;
6994 
6995 	/* bypass vclk and dclk with bclk */
6996 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
6997 		VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
6998 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6999 
7000 	/* put PLL in bypass mode */
7001 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
7002 
7003 	if (!vclk || !dclk) {
7004 		/* keep the Bypass mode */
7005 		return 0;
7006 	}
7007 
7008 	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
7009 					  16384, 0x03FFFFFF, 0, 128, 5,
7010 					  &fb_div, &vclk_div, &dclk_div);
7011 	if (r)
7012 		return r;
7013 
7014 	/* set RESET_ANTI_MUX to 0 */
7015 	WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7016 
7017 	/* set VCO_MODE to 1 */
7018 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
7019 
7020 	/* disable sleep mode */
7021 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
7022 
7023 	/* deassert UPLL_RESET */
7024 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7025 
7026 	mdelay(1);
7027 
7028 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7029 	if (r)
7030 		return r;
7031 
7032 	/* assert UPLL_RESET again */
7033 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
7034 
7035 	/* disable spread spectrum. */
7036 	WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7037 
7038 	/* set feedback divider */
7039 	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
7040 
7041 	/* set ref divider to 0 */
7042 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
7043 
7044 	if (fb_div < 307200)
7045 		WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
7046 	else
7047 		WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
7048 
7049 	/* set PDIV_A and PDIV_B */
7050 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7051 		UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
7052 		~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
7053 
7054 	/* give the PLL some time to settle */
7055 	mdelay(15);
7056 
7057 	/* deassert PLL_RESET */
7058 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7059 
7060 	mdelay(15);
7061 
7062 	/* switch from bypass mode to normal mode */
7063 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
7064 
7065 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7066 	if (r)
7067 		return r;
7068 
7069 	/* switch VCLK and DCLK selection */
7070 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7071 		VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
7072 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7073 
7074 	mdelay(100);
7075 
7076 	return 0;
7077 }
7078 
7079 static void si_pcie_gen3_enable(struct radeon_device *rdev)
7080 {
7081 	struct pci_dev *root = rdev->pdev->bus->self;
7082 	enum pci_bus_speed speed_cap;
7083 	u32 speed_cntl, current_data_rate;
7084 	int i;
7085 	u16 tmp16;
7086 
7087 	if (pci_is_root_bus(rdev->pdev->bus))
7088 		return;
7089 
7090 	if (radeon_pcie_gen2 == 0)
7091 		return;
7092 
7093 	if (rdev->flags & RADEON_IS_IGP)
7094 		return;
7095 
7096 	if (!(rdev->flags & RADEON_IS_PCIE))
7097 		return;
7098 
7099 	speed_cap = pcie_get_speed_cap(root);
7100 	if (speed_cap == PCI_SPEED_UNKNOWN)
7101 		return;
7102 
7103 	if ((speed_cap != PCIE_SPEED_8_0GT) &&
7104 	    (speed_cap != PCIE_SPEED_5_0GT))
7105 		return;
7106 
7107 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7108 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7109 		LC_CURRENT_DATA_RATE_SHIFT;
7110 	if (speed_cap == PCIE_SPEED_8_0GT) {
7111 		if (current_data_rate == 2) {
7112 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7113 			return;
7114 		}
7115 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7116 	} else if (speed_cap == PCIE_SPEED_5_0GT) {
7117 		if (current_data_rate == 1) {
7118 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7119 			return;
7120 		}
7121 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7122 	}
7123 
7124 	if (!pci_is_pcie(root) || !pci_is_pcie(rdev->pdev))
7125 		return;
7126 
7127 	if (speed_cap == PCIE_SPEED_8_0GT) {
7128 		/* re-try equalization if gen3 is not already enabled */
7129 		if (current_data_rate != 2) {
7130 			u16 bridge_cfg, gpu_cfg;
7131 			u16 bridge_cfg2, gpu_cfg2;
7132 			u32 max_lw, current_lw, tmp;
7133 
7134 			pcie_capability_read_word(root, PCI_EXP_LNKCTL,
7135 						  &bridge_cfg);
7136 			pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL,
7137 						  &gpu_cfg);
7138 
7139 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7140 			pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16);
7141 
7142 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7143 			pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL,
7144 						   tmp16);
7145 
7146 			tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7147 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7148 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7149 
7150 			if (current_lw < max_lw) {
7151 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7152 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
7153 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7154 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7155 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7156 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7157 				}
7158 			}
7159 
7160 			for (i = 0; i < 10; i++) {
7161 				/* check status */
7162 				pcie_capability_read_word(rdev->pdev,
7163 							  PCI_EXP_DEVSTA,
7164 							  &tmp16);
7165 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7166 					break;
7167 
7168 				pcie_capability_read_word(root, PCI_EXP_LNKCTL,
7169 							  &bridge_cfg);
7170 				pcie_capability_read_word(rdev->pdev,
7171 							  PCI_EXP_LNKCTL,
7172 							  &gpu_cfg);
7173 
7174 				pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
7175 							  &bridge_cfg2);
7176 				pcie_capability_read_word(rdev->pdev,
7177 							  PCI_EXP_LNKCTL2,
7178 							  &gpu_cfg2);
7179 
7180 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7181 				tmp |= LC_SET_QUIESCE;
7182 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7183 
7184 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7185 				tmp |= LC_REDO_EQ;
7186 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7187 
7188 				msleep(100);
7189 
7190 				/* linkctl */
7191 				pcie_capability_read_word(root, PCI_EXP_LNKCTL,
7192 							  &tmp16);
7193 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7194 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7195 				pcie_capability_write_word(root,
7196 							   PCI_EXP_LNKCTL,
7197 							   tmp16);
7198 
7199 				pcie_capability_read_word(rdev->pdev,
7200 							  PCI_EXP_LNKCTL,
7201 							  &tmp16);
7202 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7203 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7204 				pcie_capability_write_word(rdev->pdev,
7205 							   PCI_EXP_LNKCTL,
7206 							   tmp16);
7207 
7208 				/* linkctl2 */
7209 				pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
7210 							  &tmp16);
7211 				tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
7212 					   PCI_EXP_LNKCTL2_TX_MARGIN);
7213 				tmp16 |= (bridge_cfg2 &
7214 					  (PCI_EXP_LNKCTL2_ENTER_COMP |
7215 					   PCI_EXP_LNKCTL2_TX_MARGIN));
7216 				pcie_capability_write_word(root,
7217 							   PCI_EXP_LNKCTL2,
7218 							   tmp16);
7219 
7220 				pcie_capability_read_word(rdev->pdev,
7221 							  PCI_EXP_LNKCTL2,
7222 							  &tmp16);
7223 				tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
7224 					   PCI_EXP_LNKCTL2_TX_MARGIN);
7225 				tmp16 |= (gpu_cfg2 &
7226 					  (PCI_EXP_LNKCTL2_ENTER_COMP |
7227 					   PCI_EXP_LNKCTL2_TX_MARGIN));
7228 				pcie_capability_write_word(rdev->pdev,
7229 							   PCI_EXP_LNKCTL2,
7230 							   tmp16);
7231 
7232 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7233 				tmp &= ~LC_SET_QUIESCE;
7234 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7235 			}
7236 		}
7237 	}
7238 
7239 	/* set the link speed */
7240 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7241 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7242 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7243 
7244 	pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL2, &tmp16);
7245 	tmp16 &= ~PCI_EXP_LNKCTL2_TLS;
7246 	if (speed_cap == PCIE_SPEED_8_0GT)
7247 		tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
7248 	else if (speed_cap == PCIE_SPEED_5_0GT)
7249 		tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
7250 	else
7251 		tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
7252 	pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL2, tmp16);
7253 
7254 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7255 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7256 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7257 
7258 	for (i = 0; i < rdev->usec_timeout; i++) {
7259 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7260 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7261 			break;
7262 		udelay(1);
7263 	}
7264 }
7265 
7266 static void si_program_aspm(struct radeon_device *rdev)
7267 {
7268 	u32 data, orig;
7269 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7270 	bool disable_clkreq = false;
7271 
7272 	if (radeon_aspm == 0)
7273 		return;
7274 
7275 	if (!(rdev->flags & RADEON_IS_PCIE))
7276 		return;
7277 
7278 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7279 	data &= ~LC_XMIT_N_FTS_MASK;
7280 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7281 	if (orig != data)
7282 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7283 
7284 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7285 	data |= LC_GO_TO_RECOVERY;
7286 	if (orig != data)
7287 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7288 
7289 	orig = data = RREG32_PCIE(PCIE_P_CNTL);
7290 	data |= P_IGNORE_EDB_ERR;
7291 	if (orig != data)
7292 		WREG32_PCIE(PCIE_P_CNTL, data);
7293 
7294 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7295 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7296 	data |= LC_PMI_TO_L1_DIS;
7297 	if (!disable_l0s)
7298 		data |= LC_L0S_INACTIVITY(7);
7299 
7300 	if (!disable_l1) {
7301 		data |= LC_L1_INACTIVITY(7);
7302 		data &= ~LC_PMI_TO_L1_DIS;
7303 		if (orig != data)
7304 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7305 
7306 		if (!disable_plloff_in_l1) {
7307 			bool clk_req_support;
7308 
7309 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7310 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7311 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7312 			if (orig != data)
7313 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7314 
7315 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7316 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7317 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7318 			if (orig != data)
7319 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7320 
7321 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7322 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7323 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7324 			if (orig != data)
7325 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7326 
7327 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7328 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7329 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7330 			if (orig != data)
7331 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7332 
7333 			if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7334 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7335 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7336 				if (orig != data)
7337 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7338 
7339 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7340 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7341 				if (orig != data)
7342 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7343 
7344 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7345 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7346 				if (orig != data)
7347 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7348 
7349 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7350 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7351 				if (orig != data)
7352 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7353 
7354 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7355 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7356 				if (orig != data)
7357 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7358 
7359 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7360 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7361 				if (orig != data)
7362 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7363 
7364 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7365 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7366 				if (orig != data)
7367 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7368 
7369 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7370 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7371 				if (orig != data)
7372 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7373 			}
7374 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7375 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7376 			data |= LC_DYN_LANES_PWR_STATE(3);
7377 			if (orig != data)
7378 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7379 
7380 			orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7381 			data &= ~LS2_EXIT_TIME_MASK;
7382 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7383 				data |= LS2_EXIT_TIME(5);
7384 			if (orig != data)
7385 				WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7386 
7387 			orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7388 			data &= ~LS2_EXIT_TIME_MASK;
7389 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7390 				data |= LS2_EXIT_TIME(5);
7391 			if (orig != data)
7392 				WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7393 
7394 			if (!disable_clkreq &&
7395 			    !pci_is_root_bus(rdev->pdev->bus)) {
7396 				struct pci_dev *root = rdev->pdev->bus->self;
7397 				u32 lnkcap;
7398 
7399 				clk_req_support = false;
7400 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7401 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7402 					clk_req_support = true;
7403 			} else {
7404 				clk_req_support = false;
7405 			}
7406 
7407 			if (clk_req_support) {
7408 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7409 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7410 				if (orig != data)
7411 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7412 
7413 				orig = data = RREG32(THM_CLK_CNTL);
7414 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7415 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7416 				if (orig != data)
7417 					WREG32(THM_CLK_CNTL, data);
7418 
7419 				orig = data = RREG32(MISC_CLK_CNTL);
7420 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7421 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7422 				if (orig != data)
7423 					WREG32(MISC_CLK_CNTL, data);
7424 
7425 				orig = data = RREG32(CG_CLKPIN_CNTL);
7426 				data &= ~BCLK_AS_XCLK;
7427 				if (orig != data)
7428 					WREG32(CG_CLKPIN_CNTL, data);
7429 
7430 				orig = data = RREG32(CG_CLKPIN_CNTL_2);
7431 				data &= ~FORCE_BIF_REFCLK_EN;
7432 				if (orig != data)
7433 					WREG32(CG_CLKPIN_CNTL_2, data);
7434 
7435 				orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7436 				data &= ~MPLL_CLKOUT_SEL_MASK;
7437 				data |= MPLL_CLKOUT_SEL(4);
7438 				if (orig != data)
7439 					WREG32(MPLL_BYPASSCLK_SEL, data);
7440 
7441 				orig = data = RREG32(SPLL_CNTL_MODE);
7442 				data &= ~SPLL_REFCLK_SEL_MASK;
7443 				if (orig != data)
7444 					WREG32(SPLL_CNTL_MODE, data);
7445 			}
7446 		}
7447 	} else {
7448 		if (orig != data)
7449 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7450 	}
7451 
7452 	orig = data = RREG32_PCIE(PCIE_CNTL2);
7453 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7454 	if (orig != data)
7455 		WREG32_PCIE(PCIE_CNTL2, data);
7456 
7457 	if (!disable_l0s) {
7458 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7459 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7460 			data = RREG32_PCIE(PCIE_LC_STATUS1);
7461 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7462 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7463 				data &= ~LC_L0S_INACTIVITY_MASK;
7464 				if (orig != data)
7465 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7466 			}
7467 		}
7468 	}
7469 }
7470 
7471 static int si_vce_send_vcepll_ctlreq(struct radeon_device *rdev)
7472 {
7473 	unsigned i;
7474 
7475 	/* make sure VCEPLL_CTLREQ is deasserted */
7476 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7477 
7478 	mdelay(10);
7479 
7480 	/* assert UPLL_CTLREQ */
7481 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
7482 
7483 	/* wait for CTLACK and CTLACK2 to get asserted */
7484 	for (i = 0; i < 100; ++i) {
7485 		uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
7486 		if ((RREG32_SMC(CG_VCEPLL_FUNC_CNTL) & mask) == mask)
7487 			break;
7488 		mdelay(10);
7489 	}
7490 
7491 	/* deassert UPLL_CTLREQ */
7492 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7493 
7494 	if (i == 100) {
7495 		DRM_ERROR("Timeout setting UVD clocks!\n");
7496 		return -ETIMEDOUT;
7497 	}
7498 
7499 	return 0;
7500 }
7501 
7502 int si_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
7503 {
7504 	unsigned fb_div = 0, evclk_div = 0, ecclk_div = 0;
7505 	int r;
7506 
7507 	/* bypass evclk and ecclk with bclk */
7508 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7509 		     EVCLK_SRC_SEL(1) | ECCLK_SRC_SEL(1),
7510 		     ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7511 
7512 	/* put PLL in bypass mode */
7513 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_BYPASS_EN_MASK,
7514 		     ~VCEPLL_BYPASS_EN_MASK);
7515 
7516 	if (!evclk || !ecclk) {
7517 		/* keep the Bypass mode, put PLL to sleep */
7518 		WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7519 			     ~VCEPLL_SLEEP_MASK);
7520 		return 0;
7521 	}
7522 
7523 	r = radeon_uvd_calc_upll_dividers(rdev, evclk, ecclk, 125000, 250000,
7524 					  16384, 0x03FFFFFF, 0, 128, 5,
7525 					  &fb_div, &evclk_div, &ecclk_div);
7526 	if (r)
7527 		return r;
7528 
7529 	/* set RESET_ANTI_MUX to 0 */
7530 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7531 
7532 	/* set VCO_MODE to 1 */
7533 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_VCO_MODE_MASK,
7534 		     ~VCEPLL_VCO_MODE_MASK);
7535 
7536 	/* toggle VCEPLL_SLEEP to 1 then back to 0 */
7537 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7538 		     ~VCEPLL_SLEEP_MASK);
7539 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_SLEEP_MASK);
7540 
7541 	/* deassert VCEPLL_RESET */
7542 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7543 
7544 	mdelay(1);
7545 
7546 	r = si_vce_send_vcepll_ctlreq(rdev);
7547 	if (r)
7548 		return r;
7549 
7550 	/* assert VCEPLL_RESET again */
7551 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_RESET_MASK, ~VCEPLL_RESET_MASK);
7552 
7553 	/* disable spread spectrum. */
7554 	WREG32_SMC_P(CG_VCEPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7555 
7556 	/* set feedback divider */
7557 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_3, VCEPLL_FB_DIV(fb_div), ~VCEPLL_FB_DIV_MASK);
7558 
7559 	/* set ref divider to 0 */
7560 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_REF_DIV_MASK);
7561 
7562 	/* set PDIV_A and PDIV_B */
7563 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7564 		     VCEPLL_PDIV_A(evclk_div) | VCEPLL_PDIV_B(ecclk_div),
7565 		     ~(VCEPLL_PDIV_A_MASK | VCEPLL_PDIV_B_MASK));
7566 
7567 	/* give the PLL some time to settle */
7568 	mdelay(15);
7569 
7570 	/* deassert PLL_RESET */
7571 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7572 
7573 	mdelay(15);
7574 
7575 	/* switch from bypass mode to normal mode */
7576 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_BYPASS_EN_MASK);
7577 
7578 	r = si_vce_send_vcepll_ctlreq(rdev);
7579 	if (r)
7580 		return r;
7581 
7582 	/* switch VCLK and DCLK selection */
7583 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7584 		     EVCLK_SRC_SEL(16) | ECCLK_SRC_SEL(16),
7585 		     ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7586 
7587 	mdelay(100);
7588 
7589 	return 0;
7590 }
7591