xref: /openbmc/linux/drivers/gpu/drm/radeon/si.c (revision 6dfcd296)
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include <drm/radeon_drm.h>
32 #include "sid.h"
33 #include "atom.h"
34 #include "si_blit_shaders.h"
35 #include "clearstate_si.h"
36 #include "radeon_ucode.h"
37 
38 
39 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_mc2.bin");
44 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
45 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
46 
47 MODULE_FIRMWARE("radeon/tahiti_pfp.bin");
48 MODULE_FIRMWARE("radeon/tahiti_me.bin");
49 MODULE_FIRMWARE("radeon/tahiti_ce.bin");
50 MODULE_FIRMWARE("radeon/tahiti_mc.bin");
51 MODULE_FIRMWARE("radeon/tahiti_rlc.bin");
52 MODULE_FIRMWARE("radeon/tahiti_smc.bin");
53 MODULE_FIRMWARE("radeon/tahiti_k_smc.bin");
54 
55 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
56 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
57 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
58 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
59 MODULE_FIRMWARE("radeon/PITCAIRN_mc2.bin");
60 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
61 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
62 
63 MODULE_FIRMWARE("radeon/pitcairn_pfp.bin");
64 MODULE_FIRMWARE("radeon/pitcairn_me.bin");
65 MODULE_FIRMWARE("radeon/pitcairn_ce.bin");
66 MODULE_FIRMWARE("radeon/pitcairn_mc.bin");
67 MODULE_FIRMWARE("radeon/pitcairn_rlc.bin");
68 MODULE_FIRMWARE("radeon/pitcairn_smc.bin");
69 MODULE_FIRMWARE("radeon/pitcairn_k_smc.bin");
70 
71 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
72 MODULE_FIRMWARE("radeon/VERDE_me.bin");
73 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
74 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
75 MODULE_FIRMWARE("radeon/VERDE_mc2.bin");
76 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
77 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
78 
79 MODULE_FIRMWARE("radeon/verde_pfp.bin");
80 MODULE_FIRMWARE("radeon/verde_me.bin");
81 MODULE_FIRMWARE("radeon/verde_ce.bin");
82 MODULE_FIRMWARE("radeon/verde_mc.bin");
83 MODULE_FIRMWARE("radeon/verde_rlc.bin");
84 MODULE_FIRMWARE("radeon/verde_smc.bin");
85 MODULE_FIRMWARE("radeon/verde_k_smc.bin");
86 
87 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
88 MODULE_FIRMWARE("radeon/OLAND_me.bin");
89 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
90 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
91 MODULE_FIRMWARE("radeon/OLAND_mc2.bin");
92 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
93 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
94 
95 MODULE_FIRMWARE("radeon/oland_pfp.bin");
96 MODULE_FIRMWARE("radeon/oland_me.bin");
97 MODULE_FIRMWARE("radeon/oland_ce.bin");
98 MODULE_FIRMWARE("radeon/oland_mc.bin");
99 MODULE_FIRMWARE("radeon/oland_rlc.bin");
100 MODULE_FIRMWARE("radeon/oland_smc.bin");
101 MODULE_FIRMWARE("radeon/oland_k_smc.bin");
102 
103 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
104 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
105 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
106 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
107 MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
108 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
109 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
110 
111 MODULE_FIRMWARE("radeon/hainan_pfp.bin");
112 MODULE_FIRMWARE("radeon/hainan_me.bin");
113 MODULE_FIRMWARE("radeon/hainan_ce.bin");
114 MODULE_FIRMWARE("radeon/hainan_mc.bin");
115 MODULE_FIRMWARE("radeon/hainan_rlc.bin");
116 MODULE_FIRMWARE("radeon/hainan_smc.bin");
117 MODULE_FIRMWARE("radeon/hainan_k_smc.bin");
118 
119 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
120 static void si_pcie_gen3_enable(struct radeon_device *rdev);
121 static void si_program_aspm(struct radeon_device *rdev);
122 extern void sumo_rlc_fini(struct radeon_device *rdev);
123 extern int sumo_rlc_init(struct radeon_device *rdev);
124 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
125 extern void r600_ih_ring_fini(struct radeon_device *rdev);
126 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
127 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
128 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
129 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
130 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
131 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
132 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
133 					 bool enable);
134 static void si_init_pg(struct radeon_device *rdev);
135 static void si_init_cg(struct radeon_device *rdev);
136 static void si_fini_pg(struct radeon_device *rdev);
137 static void si_fini_cg(struct radeon_device *rdev);
138 static void si_rlc_stop(struct radeon_device *rdev);
139 
140 static const u32 verde_rlc_save_restore_register_list[] =
141 {
142 	(0x8000 << 16) | (0x98f4 >> 2),
143 	0x00000000,
144 	(0x8040 << 16) | (0x98f4 >> 2),
145 	0x00000000,
146 	(0x8000 << 16) | (0xe80 >> 2),
147 	0x00000000,
148 	(0x8040 << 16) | (0xe80 >> 2),
149 	0x00000000,
150 	(0x8000 << 16) | (0x89bc >> 2),
151 	0x00000000,
152 	(0x8040 << 16) | (0x89bc >> 2),
153 	0x00000000,
154 	(0x8000 << 16) | (0x8c1c >> 2),
155 	0x00000000,
156 	(0x8040 << 16) | (0x8c1c >> 2),
157 	0x00000000,
158 	(0x9c00 << 16) | (0x98f0 >> 2),
159 	0x00000000,
160 	(0x9c00 << 16) | (0xe7c >> 2),
161 	0x00000000,
162 	(0x8000 << 16) | (0x9148 >> 2),
163 	0x00000000,
164 	(0x8040 << 16) | (0x9148 >> 2),
165 	0x00000000,
166 	(0x9c00 << 16) | (0x9150 >> 2),
167 	0x00000000,
168 	(0x9c00 << 16) | (0x897c >> 2),
169 	0x00000000,
170 	(0x9c00 << 16) | (0x8d8c >> 2),
171 	0x00000000,
172 	(0x9c00 << 16) | (0xac54 >> 2),
173 	0X00000000,
174 	0x3,
175 	(0x9c00 << 16) | (0x98f8 >> 2),
176 	0x00000000,
177 	(0x9c00 << 16) | (0x9910 >> 2),
178 	0x00000000,
179 	(0x9c00 << 16) | (0x9914 >> 2),
180 	0x00000000,
181 	(0x9c00 << 16) | (0x9918 >> 2),
182 	0x00000000,
183 	(0x9c00 << 16) | (0x991c >> 2),
184 	0x00000000,
185 	(0x9c00 << 16) | (0x9920 >> 2),
186 	0x00000000,
187 	(0x9c00 << 16) | (0x9924 >> 2),
188 	0x00000000,
189 	(0x9c00 << 16) | (0x9928 >> 2),
190 	0x00000000,
191 	(0x9c00 << 16) | (0x992c >> 2),
192 	0x00000000,
193 	(0x9c00 << 16) | (0x9930 >> 2),
194 	0x00000000,
195 	(0x9c00 << 16) | (0x9934 >> 2),
196 	0x00000000,
197 	(0x9c00 << 16) | (0x9938 >> 2),
198 	0x00000000,
199 	(0x9c00 << 16) | (0x993c >> 2),
200 	0x00000000,
201 	(0x9c00 << 16) | (0x9940 >> 2),
202 	0x00000000,
203 	(0x9c00 << 16) | (0x9944 >> 2),
204 	0x00000000,
205 	(0x9c00 << 16) | (0x9948 >> 2),
206 	0x00000000,
207 	(0x9c00 << 16) | (0x994c >> 2),
208 	0x00000000,
209 	(0x9c00 << 16) | (0x9950 >> 2),
210 	0x00000000,
211 	(0x9c00 << 16) | (0x9954 >> 2),
212 	0x00000000,
213 	(0x9c00 << 16) | (0x9958 >> 2),
214 	0x00000000,
215 	(0x9c00 << 16) | (0x995c >> 2),
216 	0x00000000,
217 	(0x9c00 << 16) | (0x9960 >> 2),
218 	0x00000000,
219 	(0x9c00 << 16) | (0x9964 >> 2),
220 	0x00000000,
221 	(0x9c00 << 16) | (0x9968 >> 2),
222 	0x00000000,
223 	(0x9c00 << 16) | (0x996c >> 2),
224 	0x00000000,
225 	(0x9c00 << 16) | (0x9970 >> 2),
226 	0x00000000,
227 	(0x9c00 << 16) | (0x9974 >> 2),
228 	0x00000000,
229 	(0x9c00 << 16) | (0x9978 >> 2),
230 	0x00000000,
231 	(0x9c00 << 16) | (0x997c >> 2),
232 	0x00000000,
233 	(0x9c00 << 16) | (0x9980 >> 2),
234 	0x00000000,
235 	(0x9c00 << 16) | (0x9984 >> 2),
236 	0x00000000,
237 	(0x9c00 << 16) | (0x9988 >> 2),
238 	0x00000000,
239 	(0x9c00 << 16) | (0x998c >> 2),
240 	0x00000000,
241 	(0x9c00 << 16) | (0x8c00 >> 2),
242 	0x00000000,
243 	(0x9c00 << 16) | (0x8c14 >> 2),
244 	0x00000000,
245 	(0x9c00 << 16) | (0x8c04 >> 2),
246 	0x00000000,
247 	(0x9c00 << 16) | (0x8c08 >> 2),
248 	0x00000000,
249 	(0x8000 << 16) | (0x9b7c >> 2),
250 	0x00000000,
251 	(0x8040 << 16) | (0x9b7c >> 2),
252 	0x00000000,
253 	(0x8000 << 16) | (0xe84 >> 2),
254 	0x00000000,
255 	(0x8040 << 16) | (0xe84 >> 2),
256 	0x00000000,
257 	(0x8000 << 16) | (0x89c0 >> 2),
258 	0x00000000,
259 	(0x8040 << 16) | (0x89c0 >> 2),
260 	0x00000000,
261 	(0x8000 << 16) | (0x914c >> 2),
262 	0x00000000,
263 	(0x8040 << 16) | (0x914c >> 2),
264 	0x00000000,
265 	(0x8000 << 16) | (0x8c20 >> 2),
266 	0x00000000,
267 	(0x8040 << 16) | (0x8c20 >> 2),
268 	0x00000000,
269 	(0x8000 << 16) | (0x9354 >> 2),
270 	0x00000000,
271 	(0x8040 << 16) | (0x9354 >> 2),
272 	0x00000000,
273 	(0x9c00 << 16) | (0x9060 >> 2),
274 	0x00000000,
275 	(0x9c00 << 16) | (0x9364 >> 2),
276 	0x00000000,
277 	(0x9c00 << 16) | (0x9100 >> 2),
278 	0x00000000,
279 	(0x9c00 << 16) | (0x913c >> 2),
280 	0x00000000,
281 	(0x8000 << 16) | (0x90e0 >> 2),
282 	0x00000000,
283 	(0x8000 << 16) | (0x90e4 >> 2),
284 	0x00000000,
285 	(0x8000 << 16) | (0x90e8 >> 2),
286 	0x00000000,
287 	(0x8040 << 16) | (0x90e0 >> 2),
288 	0x00000000,
289 	(0x8040 << 16) | (0x90e4 >> 2),
290 	0x00000000,
291 	(0x8040 << 16) | (0x90e8 >> 2),
292 	0x00000000,
293 	(0x9c00 << 16) | (0x8bcc >> 2),
294 	0x00000000,
295 	(0x9c00 << 16) | (0x8b24 >> 2),
296 	0x00000000,
297 	(0x9c00 << 16) | (0x88c4 >> 2),
298 	0x00000000,
299 	(0x9c00 << 16) | (0x8e50 >> 2),
300 	0x00000000,
301 	(0x9c00 << 16) | (0x8c0c >> 2),
302 	0x00000000,
303 	(0x9c00 << 16) | (0x8e58 >> 2),
304 	0x00000000,
305 	(0x9c00 << 16) | (0x8e5c >> 2),
306 	0x00000000,
307 	(0x9c00 << 16) | (0x9508 >> 2),
308 	0x00000000,
309 	(0x9c00 << 16) | (0x950c >> 2),
310 	0x00000000,
311 	(0x9c00 << 16) | (0x9494 >> 2),
312 	0x00000000,
313 	(0x9c00 << 16) | (0xac0c >> 2),
314 	0x00000000,
315 	(0x9c00 << 16) | (0xac10 >> 2),
316 	0x00000000,
317 	(0x9c00 << 16) | (0xac14 >> 2),
318 	0x00000000,
319 	(0x9c00 << 16) | (0xae00 >> 2),
320 	0x00000000,
321 	(0x9c00 << 16) | (0xac08 >> 2),
322 	0x00000000,
323 	(0x9c00 << 16) | (0x88d4 >> 2),
324 	0x00000000,
325 	(0x9c00 << 16) | (0x88c8 >> 2),
326 	0x00000000,
327 	(0x9c00 << 16) | (0x88cc >> 2),
328 	0x00000000,
329 	(0x9c00 << 16) | (0x89b0 >> 2),
330 	0x00000000,
331 	(0x9c00 << 16) | (0x8b10 >> 2),
332 	0x00000000,
333 	(0x9c00 << 16) | (0x8a14 >> 2),
334 	0x00000000,
335 	(0x9c00 << 16) | (0x9830 >> 2),
336 	0x00000000,
337 	(0x9c00 << 16) | (0x9834 >> 2),
338 	0x00000000,
339 	(0x9c00 << 16) | (0x9838 >> 2),
340 	0x00000000,
341 	(0x9c00 << 16) | (0x9a10 >> 2),
342 	0x00000000,
343 	(0x8000 << 16) | (0x9870 >> 2),
344 	0x00000000,
345 	(0x8000 << 16) | (0x9874 >> 2),
346 	0x00000000,
347 	(0x8001 << 16) | (0x9870 >> 2),
348 	0x00000000,
349 	(0x8001 << 16) | (0x9874 >> 2),
350 	0x00000000,
351 	(0x8040 << 16) | (0x9870 >> 2),
352 	0x00000000,
353 	(0x8040 << 16) | (0x9874 >> 2),
354 	0x00000000,
355 	(0x8041 << 16) | (0x9870 >> 2),
356 	0x00000000,
357 	(0x8041 << 16) | (0x9874 >> 2),
358 	0x00000000,
359 	0x00000000
360 };
361 
362 static const u32 tahiti_golden_rlc_registers[] =
363 {
364 	0xc424, 0xffffffff, 0x00601005,
365 	0xc47c, 0xffffffff, 0x10104040,
366 	0xc488, 0xffffffff, 0x0100000a,
367 	0xc314, 0xffffffff, 0x00000800,
368 	0xc30c, 0xffffffff, 0x800000f4,
369 	0xf4a8, 0xffffffff, 0x00000000
370 };
371 
372 static const u32 tahiti_golden_registers[] =
373 {
374 	0x9a10, 0x00010000, 0x00018208,
375 	0x9830, 0xffffffff, 0x00000000,
376 	0x9834, 0xf00fffff, 0x00000400,
377 	0x9838, 0x0002021c, 0x00020200,
378 	0xc78, 0x00000080, 0x00000000,
379 	0xd030, 0x000300c0, 0x00800040,
380 	0xd830, 0x000300c0, 0x00800040,
381 	0x5bb0, 0x000000f0, 0x00000070,
382 	0x5bc0, 0x00200000, 0x50100000,
383 	0x7030, 0x31000311, 0x00000011,
384 	0x277c, 0x00000003, 0x000007ff,
385 	0x240c, 0x000007ff, 0x00000000,
386 	0x8a14, 0xf000001f, 0x00000007,
387 	0x8b24, 0xffffffff, 0x00ffffff,
388 	0x8b10, 0x0000ff0f, 0x00000000,
389 	0x28a4c, 0x07ffffff, 0x4e000000,
390 	0x28350, 0x3f3f3fff, 0x2a00126a,
391 	0x30, 0x000000ff, 0x0040,
392 	0x34, 0x00000040, 0x00004040,
393 	0x9100, 0x07ffffff, 0x03000000,
394 	0x8e88, 0x01ff1f3f, 0x00000000,
395 	0x8e84, 0x01ff1f3f, 0x00000000,
396 	0x9060, 0x0000007f, 0x00000020,
397 	0x9508, 0x00010000, 0x00010000,
398 	0xac14, 0x00000200, 0x000002fb,
399 	0xac10, 0xffffffff, 0x0000543b,
400 	0xac0c, 0xffffffff, 0xa9210876,
401 	0x88d0, 0xffffffff, 0x000fff40,
402 	0x88d4, 0x0000001f, 0x00000010,
403 	0x1410, 0x20000000, 0x20fffed8,
404 	0x15c0, 0x000c0fc0, 0x000c0400
405 };
406 
407 static const u32 tahiti_golden_registers2[] =
408 {
409 	0xc64, 0x00000001, 0x00000001
410 };
411 
412 static const u32 pitcairn_golden_rlc_registers[] =
413 {
414 	0xc424, 0xffffffff, 0x00601004,
415 	0xc47c, 0xffffffff, 0x10102020,
416 	0xc488, 0xffffffff, 0x01000020,
417 	0xc314, 0xffffffff, 0x00000800,
418 	0xc30c, 0xffffffff, 0x800000a4
419 };
420 
421 static const u32 pitcairn_golden_registers[] =
422 {
423 	0x9a10, 0x00010000, 0x00018208,
424 	0x9830, 0xffffffff, 0x00000000,
425 	0x9834, 0xf00fffff, 0x00000400,
426 	0x9838, 0x0002021c, 0x00020200,
427 	0xc78, 0x00000080, 0x00000000,
428 	0xd030, 0x000300c0, 0x00800040,
429 	0xd830, 0x000300c0, 0x00800040,
430 	0x5bb0, 0x000000f0, 0x00000070,
431 	0x5bc0, 0x00200000, 0x50100000,
432 	0x7030, 0x31000311, 0x00000011,
433 	0x2ae4, 0x00073ffe, 0x000022a2,
434 	0x240c, 0x000007ff, 0x00000000,
435 	0x8a14, 0xf000001f, 0x00000007,
436 	0x8b24, 0xffffffff, 0x00ffffff,
437 	0x8b10, 0x0000ff0f, 0x00000000,
438 	0x28a4c, 0x07ffffff, 0x4e000000,
439 	0x28350, 0x3f3f3fff, 0x2a00126a,
440 	0x30, 0x000000ff, 0x0040,
441 	0x34, 0x00000040, 0x00004040,
442 	0x9100, 0x07ffffff, 0x03000000,
443 	0x9060, 0x0000007f, 0x00000020,
444 	0x9508, 0x00010000, 0x00010000,
445 	0xac14, 0x000003ff, 0x000000f7,
446 	0xac10, 0xffffffff, 0x00000000,
447 	0xac0c, 0xffffffff, 0x32761054,
448 	0x88d4, 0x0000001f, 0x00000010,
449 	0x15c0, 0x000c0fc0, 0x000c0400
450 };
451 
452 static const u32 verde_golden_rlc_registers[] =
453 {
454 	0xc424, 0xffffffff, 0x033f1005,
455 	0xc47c, 0xffffffff, 0x10808020,
456 	0xc488, 0xffffffff, 0x00800008,
457 	0xc314, 0xffffffff, 0x00001000,
458 	0xc30c, 0xffffffff, 0x80010014
459 };
460 
461 static const u32 verde_golden_registers[] =
462 {
463 	0x9a10, 0x00010000, 0x00018208,
464 	0x9830, 0xffffffff, 0x00000000,
465 	0x9834, 0xf00fffff, 0x00000400,
466 	0x9838, 0x0002021c, 0x00020200,
467 	0xc78, 0x00000080, 0x00000000,
468 	0xd030, 0x000300c0, 0x00800040,
469 	0xd030, 0x000300c0, 0x00800040,
470 	0xd830, 0x000300c0, 0x00800040,
471 	0xd830, 0x000300c0, 0x00800040,
472 	0x5bb0, 0x000000f0, 0x00000070,
473 	0x5bc0, 0x00200000, 0x50100000,
474 	0x7030, 0x31000311, 0x00000011,
475 	0x2ae4, 0x00073ffe, 0x000022a2,
476 	0x2ae4, 0x00073ffe, 0x000022a2,
477 	0x2ae4, 0x00073ffe, 0x000022a2,
478 	0x240c, 0x000007ff, 0x00000000,
479 	0x240c, 0x000007ff, 0x00000000,
480 	0x240c, 0x000007ff, 0x00000000,
481 	0x8a14, 0xf000001f, 0x00000007,
482 	0x8a14, 0xf000001f, 0x00000007,
483 	0x8a14, 0xf000001f, 0x00000007,
484 	0x8b24, 0xffffffff, 0x00ffffff,
485 	0x8b10, 0x0000ff0f, 0x00000000,
486 	0x28a4c, 0x07ffffff, 0x4e000000,
487 	0x28350, 0x3f3f3fff, 0x0000124a,
488 	0x28350, 0x3f3f3fff, 0x0000124a,
489 	0x28350, 0x3f3f3fff, 0x0000124a,
490 	0x30, 0x000000ff, 0x0040,
491 	0x34, 0x00000040, 0x00004040,
492 	0x9100, 0x07ffffff, 0x03000000,
493 	0x9100, 0x07ffffff, 0x03000000,
494 	0x8e88, 0x01ff1f3f, 0x00000000,
495 	0x8e88, 0x01ff1f3f, 0x00000000,
496 	0x8e88, 0x01ff1f3f, 0x00000000,
497 	0x8e84, 0x01ff1f3f, 0x00000000,
498 	0x8e84, 0x01ff1f3f, 0x00000000,
499 	0x8e84, 0x01ff1f3f, 0x00000000,
500 	0x9060, 0x0000007f, 0x00000020,
501 	0x9508, 0x00010000, 0x00010000,
502 	0xac14, 0x000003ff, 0x00000003,
503 	0xac14, 0x000003ff, 0x00000003,
504 	0xac14, 0x000003ff, 0x00000003,
505 	0xac10, 0xffffffff, 0x00000000,
506 	0xac10, 0xffffffff, 0x00000000,
507 	0xac10, 0xffffffff, 0x00000000,
508 	0xac0c, 0xffffffff, 0x00001032,
509 	0xac0c, 0xffffffff, 0x00001032,
510 	0xac0c, 0xffffffff, 0x00001032,
511 	0x88d4, 0x0000001f, 0x00000010,
512 	0x88d4, 0x0000001f, 0x00000010,
513 	0x88d4, 0x0000001f, 0x00000010,
514 	0x15c0, 0x000c0fc0, 0x000c0400
515 };
516 
517 static const u32 oland_golden_rlc_registers[] =
518 {
519 	0xc424, 0xffffffff, 0x00601005,
520 	0xc47c, 0xffffffff, 0x10104040,
521 	0xc488, 0xffffffff, 0x0100000a,
522 	0xc314, 0xffffffff, 0x00000800,
523 	0xc30c, 0xffffffff, 0x800000f4
524 };
525 
526 static const u32 oland_golden_registers[] =
527 {
528 	0x9a10, 0x00010000, 0x00018208,
529 	0x9830, 0xffffffff, 0x00000000,
530 	0x9834, 0xf00fffff, 0x00000400,
531 	0x9838, 0x0002021c, 0x00020200,
532 	0xc78, 0x00000080, 0x00000000,
533 	0xd030, 0x000300c0, 0x00800040,
534 	0xd830, 0x000300c0, 0x00800040,
535 	0x5bb0, 0x000000f0, 0x00000070,
536 	0x5bc0, 0x00200000, 0x50100000,
537 	0x7030, 0x31000311, 0x00000011,
538 	0x2ae4, 0x00073ffe, 0x000022a2,
539 	0x240c, 0x000007ff, 0x00000000,
540 	0x8a14, 0xf000001f, 0x00000007,
541 	0x8b24, 0xffffffff, 0x00ffffff,
542 	0x8b10, 0x0000ff0f, 0x00000000,
543 	0x28a4c, 0x07ffffff, 0x4e000000,
544 	0x28350, 0x3f3f3fff, 0x00000082,
545 	0x30, 0x000000ff, 0x0040,
546 	0x34, 0x00000040, 0x00004040,
547 	0x9100, 0x07ffffff, 0x03000000,
548 	0x9060, 0x0000007f, 0x00000020,
549 	0x9508, 0x00010000, 0x00010000,
550 	0xac14, 0x000003ff, 0x000000f3,
551 	0xac10, 0xffffffff, 0x00000000,
552 	0xac0c, 0xffffffff, 0x00003210,
553 	0x88d4, 0x0000001f, 0x00000010,
554 	0x15c0, 0x000c0fc0, 0x000c0400
555 };
556 
557 static const u32 hainan_golden_registers[] =
558 {
559 	0x9a10, 0x00010000, 0x00018208,
560 	0x9830, 0xffffffff, 0x00000000,
561 	0x9834, 0xf00fffff, 0x00000400,
562 	0x9838, 0x0002021c, 0x00020200,
563 	0xd0c0, 0xff000fff, 0x00000100,
564 	0xd030, 0x000300c0, 0x00800040,
565 	0xd8c0, 0xff000fff, 0x00000100,
566 	0xd830, 0x000300c0, 0x00800040,
567 	0x2ae4, 0x00073ffe, 0x000022a2,
568 	0x240c, 0x000007ff, 0x00000000,
569 	0x8a14, 0xf000001f, 0x00000007,
570 	0x8b24, 0xffffffff, 0x00ffffff,
571 	0x8b10, 0x0000ff0f, 0x00000000,
572 	0x28a4c, 0x07ffffff, 0x4e000000,
573 	0x28350, 0x3f3f3fff, 0x00000000,
574 	0x30, 0x000000ff, 0x0040,
575 	0x34, 0x00000040, 0x00004040,
576 	0x9100, 0x03e00000, 0x03600000,
577 	0x9060, 0x0000007f, 0x00000020,
578 	0x9508, 0x00010000, 0x00010000,
579 	0xac14, 0x000003ff, 0x000000f1,
580 	0xac10, 0xffffffff, 0x00000000,
581 	0xac0c, 0xffffffff, 0x00003210,
582 	0x88d4, 0x0000001f, 0x00000010,
583 	0x15c0, 0x000c0fc0, 0x000c0400
584 };
585 
586 static const u32 hainan_golden_registers2[] =
587 {
588 	0x98f8, 0xffffffff, 0x02010001
589 };
590 
591 static const u32 tahiti_mgcg_cgcg_init[] =
592 {
593 	0xc400, 0xffffffff, 0xfffffffc,
594 	0x802c, 0xffffffff, 0xe0000000,
595 	0x9a60, 0xffffffff, 0x00000100,
596 	0x92a4, 0xffffffff, 0x00000100,
597 	0xc164, 0xffffffff, 0x00000100,
598 	0x9774, 0xffffffff, 0x00000100,
599 	0x8984, 0xffffffff, 0x06000100,
600 	0x8a18, 0xffffffff, 0x00000100,
601 	0x92a0, 0xffffffff, 0x00000100,
602 	0xc380, 0xffffffff, 0x00000100,
603 	0x8b28, 0xffffffff, 0x00000100,
604 	0x9144, 0xffffffff, 0x00000100,
605 	0x8d88, 0xffffffff, 0x00000100,
606 	0x8d8c, 0xffffffff, 0x00000100,
607 	0x9030, 0xffffffff, 0x00000100,
608 	0x9034, 0xffffffff, 0x00000100,
609 	0x9038, 0xffffffff, 0x00000100,
610 	0x903c, 0xffffffff, 0x00000100,
611 	0xad80, 0xffffffff, 0x00000100,
612 	0xac54, 0xffffffff, 0x00000100,
613 	0x897c, 0xffffffff, 0x06000100,
614 	0x9868, 0xffffffff, 0x00000100,
615 	0x9510, 0xffffffff, 0x00000100,
616 	0xaf04, 0xffffffff, 0x00000100,
617 	0xae04, 0xffffffff, 0x00000100,
618 	0x949c, 0xffffffff, 0x00000100,
619 	0x802c, 0xffffffff, 0xe0000000,
620 	0x9160, 0xffffffff, 0x00010000,
621 	0x9164, 0xffffffff, 0x00030002,
622 	0x9168, 0xffffffff, 0x00040007,
623 	0x916c, 0xffffffff, 0x00060005,
624 	0x9170, 0xffffffff, 0x00090008,
625 	0x9174, 0xffffffff, 0x00020001,
626 	0x9178, 0xffffffff, 0x00040003,
627 	0x917c, 0xffffffff, 0x00000007,
628 	0x9180, 0xffffffff, 0x00060005,
629 	0x9184, 0xffffffff, 0x00090008,
630 	0x9188, 0xffffffff, 0x00030002,
631 	0x918c, 0xffffffff, 0x00050004,
632 	0x9190, 0xffffffff, 0x00000008,
633 	0x9194, 0xffffffff, 0x00070006,
634 	0x9198, 0xffffffff, 0x000a0009,
635 	0x919c, 0xffffffff, 0x00040003,
636 	0x91a0, 0xffffffff, 0x00060005,
637 	0x91a4, 0xffffffff, 0x00000009,
638 	0x91a8, 0xffffffff, 0x00080007,
639 	0x91ac, 0xffffffff, 0x000b000a,
640 	0x91b0, 0xffffffff, 0x00050004,
641 	0x91b4, 0xffffffff, 0x00070006,
642 	0x91b8, 0xffffffff, 0x0008000b,
643 	0x91bc, 0xffffffff, 0x000a0009,
644 	0x91c0, 0xffffffff, 0x000d000c,
645 	0x91c4, 0xffffffff, 0x00060005,
646 	0x91c8, 0xffffffff, 0x00080007,
647 	0x91cc, 0xffffffff, 0x0000000b,
648 	0x91d0, 0xffffffff, 0x000a0009,
649 	0x91d4, 0xffffffff, 0x000d000c,
650 	0x91d8, 0xffffffff, 0x00070006,
651 	0x91dc, 0xffffffff, 0x00090008,
652 	0x91e0, 0xffffffff, 0x0000000c,
653 	0x91e4, 0xffffffff, 0x000b000a,
654 	0x91e8, 0xffffffff, 0x000e000d,
655 	0x91ec, 0xffffffff, 0x00080007,
656 	0x91f0, 0xffffffff, 0x000a0009,
657 	0x91f4, 0xffffffff, 0x0000000d,
658 	0x91f8, 0xffffffff, 0x000c000b,
659 	0x91fc, 0xffffffff, 0x000f000e,
660 	0x9200, 0xffffffff, 0x00090008,
661 	0x9204, 0xffffffff, 0x000b000a,
662 	0x9208, 0xffffffff, 0x000c000f,
663 	0x920c, 0xffffffff, 0x000e000d,
664 	0x9210, 0xffffffff, 0x00110010,
665 	0x9214, 0xffffffff, 0x000a0009,
666 	0x9218, 0xffffffff, 0x000c000b,
667 	0x921c, 0xffffffff, 0x0000000f,
668 	0x9220, 0xffffffff, 0x000e000d,
669 	0x9224, 0xffffffff, 0x00110010,
670 	0x9228, 0xffffffff, 0x000b000a,
671 	0x922c, 0xffffffff, 0x000d000c,
672 	0x9230, 0xffffffff, 0x00000010,
673 	0x9234, 0xffffffff, 0x000f000e,
674 	0x9238, 0xffffffff, 0x00120011,
675 	0x923c, 0xffffffff, 0x000c000b,
676 	0x9240, 0xffffffff, 0x000e000d,
677 	0x9244, 0xffffffff, 0x00000011,
678 	0x9248, 0xffffffff, 0x0010000f,
679 	0x924c, 0xffffffff, 0x00130012,
680 	0x9250, 0xffffffff, 0x000d000c,
681 	0x9254, 0xffffffff, 0x000f000e,
682 	0x9258, 0xffffffff, 0x00100013,
683 	0x925c, 0xffffffff, 0x00120011,
684 	0x9260, 0xffffffff, 0x00150014,
685 	0x9264, 0xffffffff, 0x000e000d,
686 	0x9268, 0xffffffff, 0x0010000f,
687 	0x926c, 0xffffffff, 0x00000013,
688 	0x9270, 0xffffffff, 0x00120011,
689 	0x9274, 0xffffffff, 0x00150014,
690 	0x9278, 0xffffffff, 0x000f000e,
691 	0x927c, 0xffffffff, 0x00110010,
692 	0x9280, 0xffffffff, 0x00000014,
693 	0x9284, 0xffffffff, 0x00130012,
694 	0x9288, 0xffffffff, 0x00160015,
695 	0x928c, 0xffffffff, 0x0010000f,
696 	0x9290, 0xffffffff, 0x00120011,
697 	0x9294, 0xffffffff, 0x00000015,
698 	0x9298, 0xffffffff, 0x00140013,
699 	0x929c, 0xffffffff, 0x00170016,
700 	0x9150, 0xffffffff, 0x96940200,
701 	0x8708, 0xffffffff, 0x00900100,
702 	0xc478, 0xffffffff, 0x00000080,
703 	0xc404, 0xffffffff, 0x0020003f,
704 	0x30, 0xffffffff, 0x0000001c,
705 	0x34, 0x000f0000, 0x000f0000,
706 	0x160c, 0xffffffff, 0x00000100,
707 	0x1024, 0xffffffff, 0x00000100,
708 	0x102c, 0x00000101, 0x00000000,
709 	0x20a8, 0xffffffff, 0x00000104,
710 	0x264c, 0x000c0000, 0x000c0000,
711 	0x2648, 0x000c0000, 0x000c0000,
712 	0x55e4, 0xff000fff, 0x00000100,
713 	0x55e8, 0x00000001, 0x00000001,
714 	0x2f50, 0x00000001, 0x00000001,
715 	0x30cc, 0xc0000fff, 0x00000104,
716 	0xc1e4, 0x00000001, 0x00000001,
717 	0xd0c0, 0xfffffff0, 0x00000100,
718 	0xd8c0, 0xfffffff0, 0x00000100
719 };
720 
721 static const u32 pitcairn_mgcg_cgcg_init[] =
722 {
723 	0xc400, 0xffffffff, 0xfffffffc,
724 	0x802c, 0xffffffff, 0xe0000000,
725 	0x9a60, 0xffffffff, 0x00000100,
726 	0x92a4, 0xffffffff, 0x00000100,
727 	0xc164, 0xffffffff, 0x00000100,
728 	0x9774, 0xffffffff, 0x00000100,
729 	0x8984, 0xffffffff, 0x06000100,
730 	0x8a18, 0xffffffff, 0x00000100,
731 	0x92a0, 0xffffffff, 0x00000100,
732 	0xc380, 0xffffffff, 0x00000100,
733 	0x8b28, 0xffffffff, 0x00000100,
734 	0x9144, 0xffffffff, 0x00000100,
735 	0x8d88, 0xffffffff, 0x00000100,
736 	0x8d8c, 0xffffffff, 0x00000100,
737 	0x9030, 0xffffffff, 0x00000100,
738 	0x9034, 0xffffffff, 0x00000100,
739 	0x9038, 0xffffffff, 0x00000100,
740 	0x903c, 0xffffffff, 0x00000100,
741 	0xad80, 0xffffffff, 0x00000100,
742 	0xac54, 0xffffffff, 0x00000100,
743 	0x897c, 0xffffffff, 0x06000100,
744 	0x9868, 0xffffffff, 0x00000100,
745 	0x9510, 0xffffffff, 0x00000100,
746 	0xaf04, 0xffffffff, 0x00000100,
747 	0xae04, 0xffffffff, 0x00000100,
748 	0x949c, 0xffffffff, 0x00000100,
749 	0x802c, 0xffffffff, 0xe0000000,
750 	0x9160, 0xffffffff, 0x00010000,
751 	0x9164, 0xffffffff, 0x00030002,
752 	0x9168, 0xffffffff, 0x00040007,
753 	0x916c, 0xffffffff, 0x00060005,
754 	0x9170, 0xffffffff, 0x00090008,
755 	0x9174, 0xffffffff, 0x00020001,
756 	0x9178, 0xffffffff, 0x00040003,
757 	0x917c, 0xffffffff, 0x00000007,
758 	0x9180, 0xffffffff, 0x00060005,
759 	0x9184, 0xffffffff, 0x00090008,
760 	0x9188, 0xffffffff, 0x00030002,
761 	0x918c, 0xffffffff, 0x00050004,
762 	0x9190, 0xffffffff, 0x00000008,
763 	0x9194, 0xffffffff, 0x00070006,
764 	0x9198, 0xffffffff, 0x000a0009,
765 	0x919c, 0xffffffff, 0x00040003,
766 	0x91a0, 0xffffffff, 0x00060005,
767 	0x91a4, 0xffffffff, 0x00000009,
768 	0x91a8, 0xffffffff, 0x00080007,
769 	0x91ac, 0xffffffff, 0x000b000a,
770 	0x91b0, 0xffffffff, 0x00050004,
771 	0x91b4, 0xffffffff, 0x00070006,
772 	0x91b8, 0xffffffff, 0x0008000b,
773 	0x91bc, 0xffffffff, 0x000a0009,
774 	0x91c0, 0xffffffff, 0x000d000c,
775 	0x9200, 0xffffffff, 0x00090008,
776 	0x9204, 0xffffffff, 0x000b000a,
777 	0x9208, 0xffffffff, 0x000c000f,
778 	0x920c, 0xffffffff, 0x000e000d,
779 	0x9210, 0xffffffff, 0x00110010,
780 	0x9214, 0xffffffff, 0x000a0009,
781 	0x9218, 0xffffffff, 0x000c000b,
782 	0x921c, 0xffffffff, 0x0000000f,
783 	0x9220, 0xffffffff, 0x000e000d,
784 	0x9224, 0xffffffff, 0x00110010,
785 	0x9228, 0xffffffff, 0x000b000a,
786 	0x922c, 0xffffffff, 0x000d000c,
787 	0x9230, 0xffffffff, 0x00000010,
788 	0x9234, 0xffffffff, 0x000f000e,
789 	0x9238, 0xffffffff, 0x00120011,
790 	0x923c, 0xffffffff, 0x000c000b,
791 	0x9240, 0xffffffff, 0x000e000d,
792 	0x9244, 0xffffffff, 0x00000011,
793 	0x9248, 0xffffffff, 0x0010000f,
794 	0x924c, 0xffffffff, 0x00130012,
795 	0x9250, 0xffffffff, 0x000d000c,
796 	0x9254, 0xffffffff, 0x000f000e,
797 	0x9258, 0xffffffff, 0x00100013,
798 	0x925c, 0xffffffff, 0x00120011,
799 	0x9260, 0xffffffff, 0x00150014,
800 	0x9150, 0xffffffff, 0x96940200,
801 	0x8708, 0xffffffff, 0x00900100,
802 	0xc478, 0xffffffff, 0x00000080,
803 	0xc404, 0xffffffff, 0x0020003f,
804 	0x30, 0xffffffff, 0x0000001c,
805 	0x34, 0x000f0000, 0x000f0000,
806 	0x160c, 0xffffffff, 0x00000100,
807 	0x1024, 0xffffffff, 0x00000100,
808 	0x102c, 0x00000101, 0x00000000,
809 	0x20a8, 0xffffffff, 0x00000104,
810 	0x55e4, 0xff000fff, 0x00000100,
811 	0x55e8, 0x00000001, 0x00000001,
812 	0x2f50, 0x00000001, 0x00000001,
813 	0x30cc, 0xc0000fff, 0x00000104,
814 	0xc1e4, 0x00000001, 0x00000001,
815 	0xd0c0, 0xfffffff0, 0x00000100,
816 	0xd8c0, 0xfffffff0, 0x00000100
817 };
818 
819 static const u32 verde_mgcg_cgcg_init[] =
820 {
821 	0xc400, 0xffffffff, 0xfffffffc,
822 	0x802c, 0xffffffff, 0xe0000000,
823 	0x9a60, 0xffffffff, 0x00000100,
824 	0x92a4, 0xffffffff, 0x00000100,
825 	0xc164, 0xffffffff, 0x00000100,
826 	0x9774, 0xffffffff, 0x00000100,
827 	0x8984, 0xffffffff, 0x06000100,
828 	0x8a18, 0xffffffff, 0x00000100,
829 	0x92a0, 0xffffffff, 0x00000100,
830 	0xc380, 0xffffffff, 0x00000100,
831 	0x8b28, 0xffffffff, 0x00000100,
832 	0x9144, 0xffffffff, 0x00000100,
833 	0x8d88, 0xffffffff, 0x00000100,
834 	0x8d8c, 0xffffffff, 0x00000100,
835 	0x9030, 0xffffffff, 0x00000100,
836 	0x9034, 0xffffffff, 0x00000100,
837 	0x9038, 0xffffffff, 0x00000100,
838 	0x903c, 0xffffffff, 0x00000100,
839 	0xad80, 0xffffffff, 0x00000100,
840 	0xac54, 0xffffffff, 0x00000100,
841 	0x897c, 0xffffffff, 0x06000100,
842 	0x9868, 0xffffffff, 0x00000100,
843 	0x9510, 0xffffffff, 0x00000100,
844 	0xaf04, 0xffffffff, 0x00000100,
845 	0xae04, 0xffffffff, 0x00000100,
846 	0x949c, 0xffffffff, 0x00000100,
847 	0x802c, 0xffffffff, 0xe0000000,
848 	0x9160, 0xffffffff, 0x00010000,
849 	0x9164, 0xffffffff, 0x00030002,
850 	0x9168, 0xffffffff, 0x00040007,
851 	0x916c, 0xffffffff, 0x00060005,
852 	0x9170, 0xffffffff, 0x00090008,
853 	0x9174, 0xffffffff, 0x00020001,
854 	0x9178, 0xffffffff, 0x00040003,
855 	0x917c, 0xffffffff, 0x00000007,
856 	0x9180, 0xffffffff, 0x00060005,
857 	0x9184, 0xffffffff, 0x00090008,
858 	0x9188, 0xffffffff, 0x00030002,
859 	0x918c, 0xffffffff, 0x00050004,
860 	0x9190, 0xffffffff, 0x00000008,
861 	0x9194, 0xffffffff, 0x00070006,
862 	0x9198, 0xffffffff, 0x000a0009,
863 	0x919c, 0xffffffff, 0x00040003,
864 	0x91a0, 0xffffffff, 0x00060005,
865 	0x91a4, 0xffffffff, 0x00000009,
866 	0x91a8, 0xffffffff, 0x00080007,
867 	0x91ac, 0xffffffff, 0x000b000a,
868 	0x91b0, 0xffffffff, 0x00050004,
869 	0x91b4, 0xffffffff, 0x00070006,
870 	0x91b8, 0xffffffff, 0x0008000b,
871 	0x91bc, 0xffffffff, 0x000a0009,
872 	0x91c0, 0xffffffff, 0x000d000c,
873 	0x9200, 0xffffffff, 0x00090008,
874 	0x9204, 0xffffffff, 0x000b000a,
875 	0x9208, 0xffffffff, 0x000c000f,
876 	0x920c, 0xffffffff, 0x000e000d,
877 	0x9210, 0xffffffff, 0x00110010,
878 	0x9214, 0xffffffff, 0x000a0009,
879 	0x9218, 0xffffffff, 0x000c000b,
880 	0x921c, 0xffffffff, 0x0000000f,
881 	0x9220, 0xffffffff, 0x000e000d,
882 	0x9224, 0xffffffff, 0x00110010,
883 	0x9228, 0xffffffff, 0x000b000a,
884 	0x922c, 0xffffffff, 0x000d000c,
885 	0x9230, 0xffffffff, 0x00000010,
886 	0x9234, 0xffffffff, 0x000f000e,
887 	0x9238, 0xffffffff, 0x00120011,
888 	0x923c, 0xffffffff, 0x000c000b,
889 	0x9240, 0xffffffff, 0x000e000d,
890 	0x9244, 0xffffffff, 0x00000011,
891 	0x9248, 0xffffffff, 0x0010000f,
892 	0x924c, 0xffffffff, 0x00130012,
893 	0x9250, 0xffffffff, 0x000d000c,
894 	0x9254, 0xffffffff, 0x000f000e,
895 	0x9258, 0xffffffff, 0x00100013,
896 	0x925c, 0xffffffff, 0x00120011,
897 	0x9260, 0xffffffff, 0x00150014,
898 	0x9150, 0xffffffff, 0x96940200,
899 	0x8708, 0xffffffff, 0x00900100,
900 	0xc478, 0xffffffff, 0x00000080,
901 	0xc404, 0xffffffff, 0x0020003f,
902 	0x30, 0xffffffff, 0x0000001c,
903 	0x34, 0x000f0000, 0x000f0000,
904 	0x160c, 0xffffffff, 0x00000100,
905 	0x1024, 0xffffffff, 0x00000100,
906 	0x102c, 0x00000101, 0x00000000,
907 	0x20a8, 0xffffffff, 0x00000104,
908 	0x264c, 0x000c0000, 0x000c0000,
909 	0x2648, 0x000c0000, 0x000c0000,
910 	0x55e4, 0xff000fff, 0x00000100,
911 	0x55e8, 0x00000001, 0x00000001,
912 	0x2f50, 0x00000001, 0x00000001,
913 	0x30cc, 0xc0000fff, 0x00000104,
914 	0xc1e4, 0x00000001, 0x00000001,
915 	0xd0c0, 0xfffffff0, 0x00000100,
916 	0xd8c0, 0xfffffff0, 0x00000100
917 };
918 
919 static const u32 oland_mgcg_cgcg_init[] =
920 {
921 	0xc400, 0xffffffff, 0xfffffffc,
922 	0x802c, 0xffffffff, 0xe0000000,
923 	0x9a60, 0xffffffff, 0x00000100,
924 	0x92a4, 0xffffffff, 0x00000100,
925 	0xc164, 0xffffffff, 0x00000100,
926 	0x9774, 0xffffffff, 0x00000100,
927 	0x8984, 0xffffffff, 0x06000100,
928 	0x8a18, 0xffffffff, 0x00000100,
929 	0x92a0, 0xffffffff, 0x00000100,
930 	0xc380, 0xffffffff, 0x00000100,
931 	0x8b28, 0xffffffff, 0x00000100,
932 	0x9144, 0xffffffff, 0x00000100,
933 	0x8d88, 0xffffffff, 0x00000100,
934 	0x8d8c, 0xffffffff, 0x00000100,
935 	0x9030, 0xffffffff, 0x00000100,
936 	0x9034, 0xffffffff, 0x00000100,
937 	0x9038, 0xffffffff, 0x00000100,
938 	0x903c, 0xffffffff, 0x00000100,
939 	0xad80, 0xffffffff, 0x00000100,
940 	0xac54, 0xffffffff, 0x00000100,
941 	0x897c, 0xffffffff, 0x06000100,
942 	0x9868, 0xffffffff, 0x00000100,
943 	0x9510, 0xffffffff, 0x00000100,
944 	0xaf04, 0xffffffff, 0x00000100,
945 	0xae04, 0xffffffff, 0x00000100,
946 	0x949c, 0xffffffff, 0x00000100,
947 	0x802c, 0xffffffff, 0xe0000000,
948 	0x9160, 0xffffffff, 0x00010000,
949 	0x9164, 0xffffffff, 0x00030002,
950 	0x9168, 0xffffffff, 0x00040007,
951 	0x916c, 0xffffffff, 0x00060005,
952 	0x9170, 0xffffffff, 0x00090008,
953 	0x9174, 0xffffffff, 0x00020001,
954 	0x9178, 0xffffffff, 0x00040003,
955 	0x917c, 0xffffffff, 0x00000007,
956 	0x9180, 0xffffffff, 0x00060005,
957 	0x9184, 0xffffffff, 0x00090008,
958 	0x9188, 0xffffffff, 0x00030002,
959 	0x918c, 0xffffffff, 0x00050004,
960 	0x9190, 0xffffffff, 0x00000008,
961 	0x9194, 0xffffffff, 0x00070006,
962 	0x9198, 0xffffffff, 0x000a0009,
963 	0x919c, 0xffffffff, 0x00040003,
964 	0x91a0, 0xffffffff, 0x00060005,
965 	0x91a4, 0xffffffff, 0x00000009,
966 	0x91a8, 0xffffffff, 0x00080007,
967 	0x91ac, 0xffffffff, 0x000b000a,
968 	0x91b0, 0xffffffff, 0x00050004,
969 	0x91b4, 0xffffffff, 0x00070006,
970 	0x91b8, 0xffffffff, 0x0008000b,
971 	0x91bc, 0xffffffff, 0x000a0009,
972 	0x91c0, 0xffffffff, 0x000d000c,
973 	0x91c4, 0xffffffff, 0x00060005,
974 	0x91c8, 0xffffffff, 0x00080007,
975 	0x91cc, 0xffffffff, 0x0000000b,
976 	0x91d0, 0xffffffff, 0x000a0009,
977 	0x91d4, 0xffffffff, 0x000d000c,
978 	0x9150, 0xffffffff, 0x96940200,
979 	0x8708, 0xffffffff, 0x00900100,
980 	0xc478, 0xffffffff, 0x00000080,
981 	0xc404, 0xffffffff, 0x0020003f,
982 	0x30, 0xffffffff, 0x0000001c,
983 	0x34, 0x000f0000, 0x000f0000,
984 	0x160c, 0xffffffff, 0x00000100,
985 	0x1024, 0xffffffff, 0x00000100,
986 	0x102c, 0x00000101, 0x00000000,
987 	0x20a8, 0xffffffff, 0x00000104,
988 	0x264c, 0x000c0000, 0x000c0000,
989 	0x2648, 0x000c0000, 0x000c0000,
990 	0x55e4, 0xff000fff, 0x00000100,
991 	0x55e8, 0x00000001, 0x00000001,
992 	0x2f50, 0x00000001, 0x00000001,
993 	0x30cc, 0xc0000fff, 0x00000104,
994 	0xc1e4, 0x00000001, 0x00000001,
995 	0xd0c0, 0xfffffff0, 0x00000100,
996 	0xd8c0, 0xfffffff0, 0x00000100
997 };
998 
999 static const u32 hainan_mgcg_cgcg_init[] =
1000 {
1001 	0xc400, 0xffffffff, 0xfffffffc,
1002 	0x802c, 0xffffffff, 0xe0000000,
1003 	0x9a60, 0xffffffff, 0x00000100,
1004 	0x92a4, 0xffffffff, 0x00000100,
1005 	0xc164, 0xffffffff, 0x00000100,
1006 	0x9774, 0xffffffff, 0x00000100,
1007 	0x8984, 0xffffffff, 0x06000100,
1008 	0x8a18, 0xffffffff, 0x00000100,
1009 	0x92a0, 0xffffffff, 0x00000100,
1010 	0xc380, 0xffffffff, 0x00000100,
1011 	0x8b28, 0xffffffff, 0x00000100,
1012 	0x9144, 0xffffffff, 0x00000100,
1013 	0x8d88, 0xffffffff, 0x00000100,
1014 	0x8d8c, 0xffffffff, 0x00000100,
1015 	0x9030, 0xffffffff, 0x00000100,
1016 	0x9034, 0xffffffff, 0x00000100,
1017 	0x9038, 0xffffffff, 0x00000100,
1018 	0x903c, 0xffffffff, 0x00000100,
1019 	0xad80, 0xffffffff, 0x00000100,
1020 	0xac54, 0xffffffff, 0x00000100,
1021 	0x897c, 0xffffffff, 0x06000100,
1022 	0x9868, 0xffffffff, 0x00000100,
1023 	0x9510, 0xffffffff, 0x00000100,
1024 	0xaf04, 0xffffffff, 0x00000100,
1025 	0xae04, 0xffffffff, 0x00000100,
1026 	0x949c, 0xffffffff, 0x00000100,
1027 	0x802c, 0xffffffff, 0xe0000000,
1028 	0x9160, 0xffffffff, 0x00010000,
1029 	0x9164, 0xffffffff, 0x00030002,
1030 	0x9168, 0xffffffff, 0x00040007,
1031 	0x916c, 0xffffffff, 0x00060005,
1032 	0x9170, 0xffffffff, 0x00090008,
1033 	0x9174, 0xffffffff, 0x00020001,
1034 	0x9178, 0xffffffff, 0x00040003,
1035 	0x917c, 0xffffffff, 0x00000007,
1036 	0x9180, 0xffffffff, 0x00060005,
1037 	0x9184, 0xffffffff, 0x00090008,
1038 	0x9188, 0xffffffff, 0x00030002,
1039 	0x918c, 0xffffffff, 0x00050004,
1040 	0x9190, 0xffffffff, 0x00000008,
1041 	0x9194, 0xffffffff, 0x00070006,
1042 	0x9198, 0xffffffff, 0x000a0009,
1043 	0x919c, 0xffffffff, 0x00040003,
1044 	0x91a0, 0xffffffff, 0x00060005,
1045 	0x91a4, 0xffffffff, 0x00000009,
1046 	0x91a8, 0xffffffff, 0x00080007,
1047 	0x91ac, 0xffffffff, 0x000b000a,
1048 	0x91b0, 0xffffffff, 0x00050004,
1049 	0x91b4, 0xffffffff, 0x00070006,
1050 	0x91b8, 0xffffffff, 0x0008000b,
1051 	0x91bc, 0xffffffff, 0x000a0009,
1052 	0x91c0, 0xffffffff, 0x000d000c,
1053 	0x91c4, 0xffffffff, 0x00060005,
1054 	0x91c8, 0xffffffff, 0x00080007,
1055 	0x91cc, 0xffffffff, 0x0000000b,
1056 	0x91d0, 0xffffffff, 0x000a0009,
1057 	0x91d4, 0xffffffff, 0x000d000c,
1058 	0x9150, 0xffffffff, 0x96940200,
1059 	0x8708, 0xffffffff, 0x00900100,
1060 	0xc478, 0xffffffff, 0x00000080,
1061 	0xc404, 0xffffffff, 0x0020003f,
1062 	0x30, 0xffffffff, 0x0000001c,
1063 	0x34, 0x000f0000, 0x000f0000,
1064 	0x160c, 0xffffffff, 0x00000100,
1065 	0x1024, 0xffffffff, 0x00000100,
1066 	0x20a8, 0xffffffff, 0x00000104,
1067 	0x264c, 0x000c0000, 0x000c0000,
1068 	0x2648, 0x000c0000, 0x000c0000,
1069 	0x2f50, 0x00000001, 0x00000001,
1070 	0x30cc, 0xc0000fff, 0x00000104,
1071 	0xc1e4, 0x00000001, 0x00000001,
1072 	0xd0c0, 0xfffffff0, 0x00000100,
1073 	0xd8c0, 0xfffffff0, 0x00000100
1074 };
1075 
1076 static u32 verde_pg_init[] =
1077 {
1078 	0x353c, 0xffffffff, 0x40000,
1079 	0x3538, 0xffffffff, 0x200010ff,
1080 	0x353c, 0xffffffff, 0x0,
1081 	0x353c, 0xffffffff, 0x0,
1082 	0x353c, 0xffffffff, 0x0,
1083 	0x353c, 0xffffffff, 0x0,
1084 	0x353c, 0xffffffff, 0x0,
1085 	0x353c, 0xffffffff, 0x7007,
1086 	0x3538, 0xffffffff, 0x300010ff,
1087 	0x353c, 0xffffffff, 0x0,
1088 	0x353c, 0xffffffff, 0x0,
1089 	0x353c, 0xffffffff, 0x0,
1090 	0x353c, 0xffffffff, 0x0,
1091 	0x353c, 0xffffffff, 0x0,
1092 	0x353c, 0xffffffff, 0x400000,
1093 	0x3538, 0xffffffff, 0x100010ff,
1094 	0x353c, 0xffffffff, 0x0,
1095 	0x353c, 0xffffffff, 0x0,
1096 	0x353c, 0xffffffff, 0x0,
1097 	0x353c, 0xffffffff, 0x0,
1098 	0x353c, 0xffffffff, 0x0,
1099 	0x353c, 0xffffffff, 0x120200,
1100 	0x3538, 0xffffffff, 0x500010ff,
1101 	0x353c, 0xffffffff, 0x0,
1102 	0x353c, 0xffffffff, 0x0,
1103 	0x353c, 0xffffffff, 0x0,
1104 	0x353c, 0xffffffff, 0x0,
1105 	0x353c, 0xffffffff, 0x0,
1106 	0x353c, 0xffffffff, 0x1e1e16,
1107 	0x3538, 0xffffffff, 0x600010ff,
1108 	0x353c, 0xffffffff, 0x0,
1109 	0x353c, 0xffffffff, 0x0,
1110 	0x353c, 0xffffffff, 0x0,
1111 	0x353c, 0xffffffff, 0x0,
1112 	0x353c, 0xffffffff, 0x0,
1113 	0x353c, 0xffffffff, 0x171f1e,
1114 	0x3538, 0xffffffff, 0x700010ff,
1115 	0x353c, 0xffffffff, 0x0,
1116 	0x353c, 0xffffffff, 0x0,
1117 	0x353c, 0xffffffff, 0x0,
1118 	0x353c, 0xffffffff, 0x0,
1119 	0x353c, 0xffffffff, 0x0,
1120 	0x353c, 0xffffffff, 0x0,
1121 	0x3538, 0xffffffff, 0x9ff,
1122 	0x3500, 0xffffffff, 0x0,
1123 	0x3504, 0xffffffff, 0x10000800,
1124 	0x3504, 0xffffffff, 0xf,
1125 	0x3504, 0xffffffff, 0xf,
1126 	0x3500, 0xffffffff, 0x4,
1127 	0x3504, 0xffffffff, 0x1000051e,
1128 	0x3504, 0xffffffff, 0xffff,
1129 	0x3504, 0xffffffff, 0xffff,
1130 	0x3500, 0xffffffff, 0x8,
1131 	0x3504, 0xffffffff, 0x80500,
1132 	0x3500, 0xffffffff, 0x12,
1133 	0x3504, 0xffffffff, 0x9050c,
1134 	0x3500, 0xffffffff, 0x1d,
1135 	0x3504, 0xffffffff, 0xb052c,
1136 	0x3500, 0xffffffff, 0x2a,
1137 	0x3504, 0xffffffff, 0x1053e,
1138 	0x3500, 0xffffffff, 0x2d,
1139 	0x3504, 0xffffffff, 0x10546,
1140 	0x3500, 0xffffffff, 0x30,
1141 	0x3504, 0xffffffff, 0xa054e,
1142 	0x3500, 0xffffffff, 0x3c,
1143 	0x3504, 0xffffffff, 0x1055f,
1144 	0x3500, 0xffffffff, 0x3f,
1145 	0x3504, 0xffffffff, 0x10567,
1146 	0x3500, 0xffffffff, 0x42,
1147 	0x3504, 0xffffffff, 0x1056f,
1148 	0x3500, 0xffffffff, 0x45,
1149 	0x3504, 0xffffffff, 0x10572,
1150 	0x3500, 0xffffffff, 0x48,
1151 	0x3504, 0xffffffff, 0x20575,
1152 	0x3500, 0xffffffff, 0x4c,
1153 	0x3504, 0xffffffff, 0x190801,
1154 	0x3500, 0xffffffff, 0x67,
1155 	0x3504, 0xffffffff, 0x1082a,
1156 	0x3500, 0xffffffff, 0x6a,
1157 	0x3504, 0xffffffff, 0x1b082d,
1158 	0x3500, 0xffffffff, 0x87,
1159 	0x3504, 0xffffffff, 0x310851,
1160 	0x3500, 0xffffffff, 0xba,
1161 	0x3504, 0xffffffff, 0x891,
1162 	0x3500, 0xffffffff, 0xbc,
1163 	0x3504, 0xffffffff, 0x893,
1164 	0x3500, 0xffffffff, 0xbe,
1165 	0x3504, 0xffffffff, 0x20895,
1166 	0x3500, 0xffffffff, 0xc2,
1167 	0x3504, 0xffffffff, 0x20899,
1168 	0x3500, 0xffffffff, 0xc6,
1169 	0x3504, 0xffffffff, 0x2089d,
1170 	0x3500, 0xffffffff, 0xca,
1171 	0x3504, 0xffffffff, 0x8a1,
1172 	0x3500, 0xffffffff, 0xcc,
1173 	0x3504, 0xffffffff, 0x8a3,
1174 	0x3500, 0xffffffff, 0xce,
1175 	0x3504, 0xffffffff, 0x308a5,
1176 	0x3500, 0xffffffff, 0xd3,
1177 	0x3504, 0xffffffff, 0x6d08cd,
1178 	0x3500, 0xffffffff, 0x142,
1179 	0x3504, 0xffffffff, 0x2000095a,
1180 	0x3504, 0xffffffff, 0x1,
1181 	0x3500, 0xffffffff, 0x144,
1182 	0x3504, 0xffffffff, 0x301f095b,
1183 	0x3500, 0xffffffff, 0x165,
1184 	0x3504, 0xffffffff, 0xc094d,
1185 	0x3500, 0xffffffff, 0x173,
1186 	0x3504, 0xffffffff, 0xf096d,
1187 	0x3500, 0xffffffff, 0x184,
1188 	0x3504, 0xffffffff, 0x15097f,
1189 	0x3500, 0xffffffff, 0x19b,
1190 	0x3504, 0xffffffff, 0xc0998,
1191 	0x3500, 0xffffffff, 0x1a9,
1192 	0x3504, 0xffffffff, 0x409a7,
1193 	0x3500, 0xffffffff, 0x1af,
1194 	0x3504, 0xffffffff, 0xcdc,
1195 	0x3500, 0xffffffff, 0x1b1,
1196 	0x3504, 0xffffffff, 0x800,
1197 	0x3508, 0xffffffff, 0x6c9b2000,
1198 	0x3510, 0xfc00, 0x2000,
1199 	0x3544, 0xffffffff, 0xfc0,
1200 	0x28d4, 0x00000100, 0x100
1201 };
1202 
1203 static void si_init_golden_registers(struct radeon_device *rdev)
1204 {
1205 	switch (rdev->family) {
1206 	case CHIP_TAHITI:
1207 		radeon_program_register_sequence(rdev,
1208 						 tahiti_golden_registers,
1209 						 (const u32)ARRAY_SIZE(tahiti_golden_registers));
1210 		radeon_program_register_sequence(rdev,
1211 						 tahiti_golden_rlc_registers,
1212 						 (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1213 		radeon_program_register_sequence(rdev,
1214 						 tahiti_mgcg_cgcg_init,
1215 						 (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1216 		radeon_program_register_sequence(rdev,
1217 						 tahiti_golden_registers2,
1218 						 (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1219 		break;
1220 	case CHIP_PITCAIRN:
1221 		radeon_program_register_sequence(rdev,
1222 						 pitcairn_golden_registers,
1223 						 (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1224 		radeon_program_register_sequence(rdev,
1225 						 pitcairn_golden_rlc_registers,
1226 						 (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1227 		radeon_program_register_sequence(rdev,
1228 						 pitcairn_mgcg_cgcg_init,
1229 						 (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1230 		break;
1231 	case CHIP_VERDE:
1232 		radeon_program_register_sequence(rdev,
1233 						 verde_golden_registers,
1234 						 (const u32)ARRAY_SIZE(verde_golden_registers));
1235 		radeon_program_register_sequence(rdev,
1236 						 verde_golden_rlc_registers,
1237 						 (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1238 		radeon_program_register_sequence(rdev,
1239 						 verde_mgcg_cgcg_init,
1240 						 (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1241 		radeon_program_register_sequence(rdev,
1242 						 verde_pg_init,
1243 						 (const u32)ARRAY_SIZE(verde_pg_init));
1244 		break;
1245 	case CHIP_OLAND:
1246 		radeon_program_register_sequence(rdev,
1247 						 oland_golden_registers,
1248 						 (const u32)ARRAY_SIZE(oland_golden_registers));
1249 		radeon_program_register_sequence(rdev,
1250 						 oland_golden_rlc_registers,
1251 						 (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1252 		radeon_program_register_sequence(rdev,
1253 						 oland_mgcg_cgcg_init,
1254 						 (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1255 		break;
1256 	case CHIP_HAINAN:
1257 		radeon_program_register_sequence(rdev,
1258 						 hainan_golden_registers,
1259 						 (const u32)ARRAY_SIZE(hainan_golden_registers));
1260 		radeon_program_register_sequence(rdev,
1261 						 hainan_golden_registers2,
1262 						 (const u32)ARRAY_SIZE(hainan_golden_registers2));
1263 		radeon_program_register_sequence(rdev,
1264 						 hainan_mgcg_cgcg_init,
1265 						 (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1266 		break;
1267 	default:
1268 		break;
1269 	}
1270 }
1271 
1272 /**
1273  * si_get_allowed_info_register - fetch the register for the info ioctl
1274  *
1275  * @rdev: radeon_device pointer
1276  * @reg: register offset in bytes
1277  * @val: register value
1278  *
1279  * Returns 0 for success or -EINVAL for an invalid register
1280  *
1281  */
1282 int si_get_allowed_info_register(struct radeon_device *rdev,
1283 				 u32 reg, u32 *val)
1284 {
1285 	switch (reg) {
1286 	case GRBM_STATUS:
1287 	case GRBM_STATUS2:
1288 	case GRBM_STATUS_SE0:
1289 	case GRBM_STATUS_SE1:
1290 	case SRBM_STATUS:
1291 	case SRBM_STATUS2:
1292 	case (DMA_STATUS_REG + DMA0_REGISTER_OFFSET):
1293 	case (DMA_STATUS_REG + DMA1_REGISTER_OFFSET):
1294 	case UVD_STATUS:
1295 		*val = RREG32(reg);
1296 		return 0;
1297 	default:
1298 		return -EINVAL;
1299 	}
1300 }
1301 
1302 #define PCIE_BUS_CLK                10000
1303 #define TCLK                        (PCIE_BUS_CLK / 10)
1304 
1305 /**
1306  * si_get_xclk - get the xclk
1307  *
1308  * @rdev: radeon_device pointer
1309  *
1310  * Returns the reference clock used by the gfx engine
1311  * (SI).
1312  */
1313 u32 si_get_xclk(struct radeon_device *rdev)
1314 {
1315 	u32 reference_clock = rdev->clock.spll.reference_freq;
1316 	u32 tmp;
1317 
1318 	tmp = RREG32(CG_CLKPIN_CNTL_2);
1319 	if (tmp & MUX_TCLK_TO_XCLK)
1320 		return TCLK;
1321 
1322 	tmp = RREG32(CG_CLKPIN_CNTL);
1323 	if (tmp & XTALIN_DIVIDE)
1324 		return reference_clock / 4;
1325 
1326 	return reference_clock;
1327 }
1328 
1329 /* get temperature in millidegrees */
1330 int si_get_temp(struct radeon_device *rdev)
1331 {
1332 	u32 temp;
1333 	int actual_temp = 0;
1334 
1335 	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1336 		CTF_TEMP_SHIFT;
1337 
1338 	if (temp & 0x200)
1339 		actual_temp = 255;
1340 	else
1341 		actual_temp = temp & 0x1ff;
1342 
1343 	actual_temp = (actual_temp * 1000);
1344 
1345 	return actual_temp;
1346 }
1347 
1348 #define TAHITI_IO_MC_REGS_SIZE 36
1349 
1350 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1351 	{0x0000006f, 0x03044000},
1352 	{0x00000070, 0x0480c018},
1353 	{0x00000071, 0x00000040},
1354 	{0x00000072, 0x01000000},
1355 	{0x00000074, 0x000000ff},
1356 	{0x00000075, 0x00143400},
1357 	{0x00000076, 0x08ec0800},
1358 	{0x00000077, 0x040000cc},
1359 	{0x00000079, 0x00000000},
1360 	{0x0000007a, 0x21000409},
1361 	{0x0000007c, 0x00000000},
1362 	{0x0000007d, 0xe8000000},
1363 	{0x0000007e, 0x044408a8},
1364 	{0x0000007f, 0x00000003},
1365 	{0x00000080, 0x00000000},
1366 	{0x00000081, 0x01000000},
1367 	{0x00000082, 0x02000000},
1368 	{0x00000083, 0x00000000},
1369 	{0x00000084, 0xe3f3e4f4},
1370 	{0x00000085, 0x00052024},
1371 	{0x00000087, 0x00000000},
1372 	{0x00000088, 0x66036603},
1373 	{0x00000089, 0x01000000},
1374 	{0x0000008b, 0x1c0a0000},
1375 	{0x0000008c, 0xff010000},
1376 	{0x0000008e, 0xffffefff},
1377 	{0x0000008f, 0xfff3efff},
1378 	{0x00000090, 0xfff3efbf},
1379 	{0x00000094, 0x00101101},
1380 	{0x00000095, 0x00000fff},
1381 	{0x00000096, 0x00116fff},
1382 	{0x00000097, 0x60010000},
1383 	{0x00000098, 0x10010000},
1384 	{0x00000099, 0x00006000},
1385 	{0x0000009a, 0x00001000},
1386 	{0x0000009f, 0x00a77400}
1387 };
1388 
1389 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1390 	{0x0000006f, 0x03044000},
1391 	{0x00000070, 0x0480c018},
1392 	{0x00000071, 0x00000040},
1393 	{0x00000072, 0x01000000},
1394 	{0x00000074, 0x000000ff},
1395 	{0x00000075, 0x00143400},
1396 	{0x00000076, 0x08ec0800},
1397 	{0x00000077, 0x040000cc},
1398 	{0x00000079, 0x00000000},
1399 	{0x0000007a, 0x21000409},
1400 	{0x0000007c, 0x00000000},
1401 	{0x0000007d, 0xe8000000},
1402 	{0x0000007e, 0x044408a8},
1403 	{0x0000007f, 0x00000003},
1404 	{0x00000080, 0x00000000},
1405 	{0x00000081, 0x01000000},
1406 	{0x00000082, 0x02000000},
1407 	{0x00000083, 0x00000000},
1408 	{0x00000084, 0xe3f3e4f4},
1409 	{0x00000085, 0x00052024},
1410 	{0x00000087, 0x00000000},
1411 	{0x00000088, 0x66036603},
1412 	{0x00000089, 0x01000000},
1413 	{0x0000008b, 0x1c0a0000},
1414 	{0x0000008c, 0xff010000},
1415 	{0x0000008e, 0xffffefff},
1416 	{0x0000008f, 0xfff3efff},
1417 	{0x00000090, 0xfff3efbf},
1418 	{0x00000094, 0x00101101},
1419 	{0x00000095, 0x00000fff},
1420 	{0x00000096, 0x00116fff},
1421 	{0x00000097, 0x60010000},
1422 	{0x00000098, 0x10010000},
1423 	{0x00000099, 0x00006000},
1424 	{0x0000009a, 0x00001000},
1425 	{0x0000009f, 0x00a47400}
1426 };
1427 
1428 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1429 	{0x0000006f, 0x03044000},
1430 	{0x00000070, 0x0480c018},
1431 	{0x00000071, 0x00000040},
1432 	{0x00000072, 0x01000000},
1433 	{0x00000074, 0x000000ff},
1434 	{0x00000075, 0x00143400},
1435 	{0x00000076, 0x08ec0800},
1436 	{0x00000077, 0x040000cc},
1437 	{0x00000079, 0x00000000},
1438 	{0x0000007a, 0x21000409},
1439 	{0x0000007c, 0x00000000},
1440 	{0x0000007d, 0xe8000000},
1441 	{0x0000007e, 0x044408a8},
1442 	{0x0000007f, 0x00000003},
1443 	{0x00000080, 0x00000000},
1444 	{0x00000081, 0x01000000},
1445 	{0x00000082, 0x02000000},
1446 	{0x00000083, 0x00000000},
1447 	{0x00000084, 0xe3f3e4f4},
1448 	{0x00000085, 0x00052024},
1449 	{0x00000087, 0x00000000},
1450 	{0x00000088, 0x66036603},
1451 	{0x00000089, 0x01000000},
1452 	{0x0000008b, 0x1c0a0000},
1453 	{0x0000008c, 0xff010000},
1454 	{0x0000008e, 0xffffefff},
1455 	{0x0000008f, 0xfff3efff},
1456 	{0x00000090, 0xfff3efbf},
1457 	{0x00000094, 0x00101101},
1458 	{0x00000095, 0x00000fff},
1459 	{0x00000096, 0x00116fff},
1460 	{0x00000097, 0x60010000},
1461 	{0x00000098, 0x10010000},
1462 	{0x00000099, 0x00006000},
1463 	{0x0000009a, 0x00001000},
1464 	{0x0000009f, 0x00a37400}
1465 };
1466 
1467 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1468 	{0x0000006f, 0x03044000},
1469 	{0x00000070, 0x0480c018},
1470 	{0x00000071, 0x00000040},
1471 	{0x00000072, 0x01000000},
1472 	{0x00000074, 0x000000ff},
1473 	{0x00000075, 0x00143400},
1474 	{0x00000076, 0x08ec0800},
1475 	{0x00000077, 0x040000cc},
1476 	{0x00000079, 0x00000000},
1477 	{0x0000007a, 0x21000409},
1478 	{0x0000007c, 0x00000000},
1479 	{0x0000007d, 0xe8000000},
1480 	{0x0000007e, 0x044408a8},
1481 	{0x0000007f, 0x00000003},
1482 	{0x00000080, 0x00000000},
1483 	{0x00000081, 0x01000000},
1484 	{0x00000082, 0x02000000},
1485 	{0x00000083, 0x00000000},
1486 	{0x00000084, 0xe3f3e4f4},
1487 	{0x00000085, 0x00052024},
1488 	{0x00000087, 0x00000000},
1489 	{0x00000088, 0x66036603},
1490 	{0x00000089, 0x01000000},
1491 	{0x0000008b, 0x1c0a0000},
1492 	{0x0000008c, 0xff010000},
1493 	{0x0000008e, 0xffffefff},
1494 	{0x0000008f, 0xfff3efff},
1495 	{0x00000090, 0xfff3efbf},
1496 	{0x00000094, 0x00101101},
1497 	{0x00000095, 0x00000fff},
1498 	{0x00000096, 0x00116fff},
1499 	{0x00000097, 0x60010000},
1500 	{0x00000098, 0x10010000},
1501 	{0x00000099, 0x00006000},
1502 	{0x0000009a, 0x00001000},
1503 	{0x0000009f, 0x00a17730}
1504 };
1505 
1506 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1507 	{0x0000006f, 0x03044000},
1508 	{0x00000070, 0x0480c018},
1509 	{0x00000071, 0x00000040},
1510 	{0x00000072, 0x01000000},
1511 	{0x00000074, 0x000000ff},
1512 	{0x00000075, 0x00143400},
1513 	{0x00000076, 0x08ec0800},
1514 	{0x00000077, 0x040000cc},
1515 	{0x00000079, 0x00000000},
1516 	{0x0000007a, 0x21000409},
1517 	{0x0000007c, 0x00000000},
1518 	{0x0000007d, 0xe8000000},
1519 	{0x0000007e, 0x044408a8},
1520 	{0x0000007f, 0x00000003},
1521 	{0x00000080, 0x00000000},
1522 	{0x00000081, 0x01000000},
1523 	{0x00000082, 0x02000000},
1524 	{0x00000083, 0x00000000},
1525 	{0x00000084, 0xe3f3e4f4},
1526 	{0x00000085, 0x00052024},
1527 	{0x00000087, 0x00000000},
1528 	{0x00000088, 0x66036603},
1529 	{0x00000089, 0x01000000},
1530 	{0x0000008b, 0x1c0a0000},
1531 	{0x0000008c, 0xff010000},
1532 	{0x0000008e, 0xffffefff},
1533 	{0x0000008f, 0xfff3efff},
1534 	{0x00000090, 0xfff3efbf},
1535 	{0x00000094, 0x00101101},
1536 	{0x00000095, 0x00000fff},
1537 	{0x00000096, 0x00116fff},
1538 	{0x00000097, 0x60010000},
1539 	{0x00000098, 0x10010000},
1540 	{0x00000099, 0x00006000},
1541 	{0x0000009a, 0x00001000},
1542 	{0x0000009f, 0x00a07730}
1543 };
1544 
1545 /* ucode loading */
1546 int si_mc_load_microcode(struct radeon_device *rdev)
1547 {
1548 	const __be32 *fw_data = NULL;
1549 	const __le32 *new_fw_data = NULL;
1550 	u32 running;
1551 	u32 *io_mc_regs = NULL;
1552 	const __le32 *new_io_mc_regs = NULL;
1553 	int i, regs_size, ucode_size;
1554 
1555 	if (!rdev->mc_fw)
1556 		return -EINVAL;
1557 
1558 	if (rdev->new_fw) {
1559 		const struct mc_firmware_header_v1_0 *hdr =
1560 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1561 
1562 		radeon_ucode_print_mc_hdr(&hdr->header);
1563 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1564 		new_io_mc_regs = (const __le32 *)
1565 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1566 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1567 		new_fw_data = (const __le32 *)
1568 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1569 	} else {
1570 		ucode_size = rdev->mc_fw->size / 4;
1571 
1572 		switch (rdev->family) {
1573 		case CHIP_TAHITI:
1574 			io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1575 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1576 			break;
1577 		case CHIP_PITCAIRN:
1578 			io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1579 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1580 			break;
1581 		case CHIP_VERDE:
1582 		default:
1583 			io_mc_regs = (u32 *)&verde_io_mc_regs;
1584 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1585 			break;
1586 		case CHIP_OLAND:
1587 			io_mc_regs = (u32 *)&oland_io_mc_regs;
1588 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1589 			break;
1590 		case CHIP_HAINAN:
1591 			io_mc_regs = (u32 *)&hainan_io_mc_regs;
1592 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1593 			break;
1594 		}
1595 		fw_data = (const __be32 *)rdev->mc_fw->data;
1596 	}
1597 
1598 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1599 
1600 	if (running == 0) {
1601 		/* reset the engine and set to writable */
1602 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1603 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1604 
1605 		/* load mc io regs */
1606 		for (i = 0; i < regs_size; i++) {
1607 			if (rdev->new_fw) {
1608 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1609 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1610 			} else {
1611 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1612 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1613 			}
1614 		}
1615 		/* load the MC ucode */
1616 		for (i = 0; i < ucode_size; i++) {
1617 			if (rdev->new_fw)
1618 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1619 			else
1620 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1621 		}
1622 
1623 		/* put the engine back into the active state */
1624 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1625 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1626 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1627 
1628 		/* wait for training to complete */
1629 		for (i = 0; i < rdev->usec_timeout; i++) {
1630 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1631 				break;
1632 			udelay(1);
1633 		}
1634 		for (i = 0; i < rdev->usec_timeout; i++) {
1635 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1636 				break;
1637 			udelay(1);
1638 		}
1639 	}
1640 
1641 	return 0;
1642 }
1643 
1644 static int si_init_microcode(struct radeon_device *rdev)
1645 {
1646 	const char *chip_name;
1647 	const char *new_chip_name;
1648 	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1649 	size_t smc_req_size, mc2_req_size;
1650 	char fw_name[30];
1651 	int err;
1652 	int new_fw = 0;
1653 	bool new_smc = false;
1654 
1655 	DRM_DEBUG("\n");
1656 
1657 	switch (rdev->family) {
1658 	case CHIP_TAHITI:
1659 		chip_name = "TAHITI";
1660 		/* XXX: figure out which Tahitis need the new ucode */
1661 		if (0)
1662 			new_smc = true;
1663 		new_chip_name = "tahiti";
1664 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1665 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1666 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1667 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1668 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1669 		mc2_req_size = TAHITI_MC_UCODE_SIZE * 4;
1670 		smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1671 		break;
1672 	case CHIP_PITCAIRN:
1673 		chip_name = "PITCAIRN";
1674 		if ((rdev->pdev->revision == 0x81) ||
1675 		    (rdev->pdev->device == 0x6810) ||
1676 		    (rdev->pdev->device == 0x6811) ||
1677 		    (rdev->pdev->device == 0x6816) ||
1678 		    (rdev->pdev->device == 0x6817) ||
1679 		    (rdev->pdev->device == 0x6806))
1680 			new_smc = true;
1681 		new_chip_name = "pitcairn";
1682 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1683 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1684 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1685 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1686 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1687 		mc2_req_size = PITCAIRN_MC_UCODE_SIZE * 4;
1688 		smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1689 		break;
1690 	case CHIP_VERDE:
1691 		chip_name = "VERDE";
1692 		if ((rdev->pdev->revision == 0x81) ||
1693 		    (rdev->pdev->revision == 0x83) ||
1694 		    (rdev->pdev->revision == 0x87) ||
1695 		    (rdev->pdev->device == 0x6820) ||
1696 		    (rdev->pdev->device == 0x6821) ||
1697 		    (rdev->pdev->device == 0x6822) ||
1698 		    (rdev->pdev->device == 0x6823) ||
1699 		    (rdev->pdev->device == 0x682A) ||
1700 		    (rdev->pdev->device == 0x682B))
1701 			new_smc = true;
1702 		new_chip_name = "verde";
1703 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1704 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1705 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1706 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1707 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1708 		mc2_req_size = VERDE_MC_UCODE_SIZE * 4;
1709 		smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1710 		break;
1711 	case CHIP_OLAND:
1712 		chip_name = "OLAND";
1713 		if ((rdev->pdev->revision == 0xC7) ||
1714 		    (rdev->pdev->revision == 0x80) ||
1715 		    (rdev->pdev->revision == 0x81) ||
1716 		    (rdev->pdev->revision == 0x83) ||
1717 		    (rdev->pdev->device == 0x6604) ||
1718 		    (rdev->pdev->device == 0x6605))
1719 			new_smc = true;
1720 		new_chip_name = "oland";
1721 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1722 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1723 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1724 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1725 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1726 		smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1727 		break;
1728 	case CHIP_HAINAN:
1729 		chip_name = "HAINAN";
1730 		if ((rdev->pdev->revision == 0x81) ||
1731 		    (rdev->pdev->revision == 0x83) ||
1732 		    (rdev->pdev->revision == 0xC3) ||
1733 		    (rdev->pdev->device == 0x6664) ||
1734 		    (rdev->pdev->device == 0x6665) ||
1735 		    (rdev->pdev->device == 0x6667))
1736 			new_smc = true;
1737 		new_chip_name = "hainan";
1738 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1739 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1740 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1741 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1742 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1743 		smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1744 		break;
1745 	default: BUG();
1746 	}
1747 
1748 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
1749 
1750 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
1751 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1752 	if (err) {
1753 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1754 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1755 		if (err)
1756 			goto out;
1757 		if (rdev->pfp_fw->size != pfp_req_size) {
1758 			printk(KERN_ERR
1759 			       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1760 			       rdev->pfp_fw->size, fw_name);
1761 			err = -EINVAL;
1762 			goto out;
1763 		}
1764 	} else {
1765 		err = radeon_ucode_validate(rdev->pfp_fw);
1766 		if (err) {
1767 			printk(KERN_ERR
1768 			       "si_cp: validation failed for firmware \"%s\"\n",
1769 			       fw_name);
1770 			goto out;
1771 		} else {
1772 			new_fw++;
1773 		}
1774 	}
1775 
1776 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
1777 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1778 	if (err) {
1779 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1780 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1781 		if (err)
1782 			goto out;
1783 		if (rdev->me_fw->size != me_req_size) {
1784 			printk(KERN_ERR
1785 			       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1786 			       rdev->me_fw->size, fw_name);
1787 			err = -EINVAL;
1788 		}
1789 	} else {
1790 		err = radeon_ucode_validate(rdev->me_fw);
1791 		if (err) {
1792 			printk(KERN_ERR
1793 			       "si_cp: validation failed for firmware \"%s\"\n",
1794 			       fw_name);
1795 			goto out;
1796 		} else {
1797 			new_fw++;
1798 		}
1799 	}
1800 
1801 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
1802 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1803 	if (err) {
1804 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1805 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1806 		if (err)
1807 			goto out;
1808 		if (rdev->ce_fw->size != ce_req_size) {
1809 			printk(KERN_ERR
1810 			       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1811 			       rdev->ce_fw->size, fw_name);
1812 			err = -EINVAL;
1813 		}
1814 	} else {
1815 		err = radeon_ucode_validate(rdev->ce_fw);
1816 		if (err) {
1817 			printk(KERN_ERR
1818 			       "si_cp: validation failed for firmware \"%s\"\n",
1819 			       fw_name);
1820 			goto out;
1821 		} else {
1822 			new_fw++;
1823 		}
1824 	}
1825 
1826 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
1827 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1828 	if (err) {
1829 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1830 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1831 		if (err)
1832 			goto out;
1833 		if (rdev->rlc_fw->size != rlc_req_size) {
1834 			printk(KERN_ERR
1835 			       "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1836 			       rdev->rlc_fw->size, fw_name);
1837 			err = -EINVAL;
1838 		}
1839 	} else {
1840 		err = radeon_ucode_validate(rdev->rlc_fw);
1841 		if (err) {
1842 			printk(KERN_ERR
1843 			       "si_cp: validation failed for firmware \"%s\"\n",
1844 			       fw_name);
1845 			goto out;
1846 		} else {
1847 			new_fw++;
1848 		}
1849 	}
1850 
1851 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
1852 	err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1853 	if (err) {
1854 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
1855 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1856 		if (err) {
1857 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1858 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1859 			if (err)
1860 				goto out;
1861 		}
1862 		if ((rdev->mc_fw->size != mc_req_size) &&
1863 		    (rdev->mc_fw->size != mc2_req_size)) {
1864 			printk(KERN_ERR
1865 			       "si_mc: Bogus length %zu in firmware \"%s\"\n",
1866 			       rdev->mc_fw->size, fw_name);
1867 			err = -EINVAL;
1868 		}
1869 		DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
1870 	} else {
1871 		err = radeon_ucode_validate(rdev->mc_fw);
1872 		if (err) {
1873 			printk(KERN_ERR
1874 			       "si_cp: validation failed for firmware \"%s\"\n",
1875 			       fw_name);
1876 			goto out;
1877 		} else {
1878 			new_fw++;
1879 		}
1880 	}
1881 
1882 	if (new_smc)
1883 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
1884 	else
1885 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
1886 	err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1887 	if (err) {
1888 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1889 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1890 		if (err) {
1891 			printk(KERN_ERR
1892 			       "smc: error loading firmware \"%s\"\n",
1893 			       fw_name);
1894 			release_firmware(rdev->smc_fw);
1895 			rdev->smc_fw = NULL;
1896 			err = 0;
1897 		} else if (rdev->smc_fw->size != smc_req_size) {
1898 			printk(KERN_ERR
1899 			       "si_smc: Bogus length %zu in firmware \"%s\"\n",
1900 			       rdev->smc_fw->size, fw_name);
1901 			err = -EINVAL;
1902 		}
1903 	} else {
1904 		err = radeon_ucode_validate(rdev->smc_fw);
1905 		if (err) {
1906 			printk(KERN_ERR
1907 			       "si_cp: validation failed for firmware \"%s\"\n",
1908 			       fw_name);
1909 			goto out;
1910 		} else {
1911 			new_fw++;
1912 		}
1913 	}
1914 
1915 	if (new_fw == 0) {
1916 		rdev->new_fw = false;
1917 	} else if (new_fw < 6) {
1918 		printk(KERN_ERR "si_fw: mixing new and old firmware!\n");
1919 		err = -EINVAL;
1920 	} else {
1921 		rdev->new_fw = true;
1922 	}
1923 out:
1924 	if (err) {
1925 		if (err != -EINVAL)
1926 			printk(KERN_ERR
1927 			       "si_cp: Failed to load firmware \"%s\"\n",
1928 			       fw_name);
1929 		release_firmware(rdev->pfp_fw);
1930 		rdev->pfp_fw = NULL;
1931 		release_firmware(rdev->me_fw);
1932 		rdev->me_fw = NULL;
1933 		release_firmware(rdev->ce_fw);
1934 		rdev->ce_fw = NULL;
1935 		release_firmware(rdev->rlc_fw);
1936 		rdev->rlc_fw = NULL;
1937 		release_firmware(rdev->mc_fw);
1938 		rdev->mc_fw = NULL;
1939 		release_firmware(rdev->smc_fw);
1940 		rdev->smc_fw = NULL;
1941 	}
1942 	return err;
1943 }
1944 
1945 /* watermark setup */
1946 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1947 				   struct radeon_crtc *radeon_crtc,
1948 				   struct drm_display_mode *mode,
1949 				   struct drm_display_mode *other_mode)
1950 {
1951 	u32 tmp, buffer_alloc, i;
1952 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1953 	/*
1954 	 * Line Buffer Setup
1955 	 * There are 3 line buffers, each one shared by 2 display controllers.
1956 	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1957 	 * the display controllers.  The paritioning is done via one of four
1958 	 * preset allocations specified in bits 21:20:
1959 	 *  0 - half lb
1960 	 *  2 - whole lb, other crtc must be disabled
1961 	 */
1962 	/* this can get tricky if we have two large displays on a paired group
1963 	 * of crtcs.  Ideally for multiple large displays we'd assign them to
1964 	 * non-linked crtcs for maximum line buffer allocation.
1965 	 */
1966 	if (radeon_crtc->base.enabled && mode) {
1967 		if (other_mode) {
1968 			tmp = 0; /* 1/2 */
1969 			buffer_alloc = 1;
1970 		} else {
1971 			tmp = 2; /* whole */
1972 			buffer_alloc = 2;
1973 		}
1974 	} else {
1975 		tmp = 0;
1976 		buffer_alloc = 0;
1977 	}
1978 
1979 	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1980 	       DC_LB_MEMORY_CONFIG(tmp));
1981 
1982 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1983 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1984 	for (i = 0; i < rdev->usec_timeout; i++) {
1985 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1986 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
1987 			break;
1988 		udelay(1);
1989 	}
1990 
1991 	if (radeon_crtc->base.enabled && mode) {
1992 		switch (tmp) {
1993 		case 0:
1994 		default:
1995 			return 4096 * 2;
1996 		case 2:
1997 			return 8192 * 2;
1998 		}
1999 	}
2000 
2001 	/* controller not enabled, so no lb used */
2002 	return 0;
2003 }
2004 
2005 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
2006 {
2007 	u32 tmp = RREG32(MC_SHARED_CHMAP);
2008 
2009 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
2010 	case 0:
2011 	default:
2012 		return 1;
2013 	case 1:
2014 		return 2;
2015 	case 2:
2016 		return 4;
2017 	case 3:
2018 		return 8;
2019 	case 4:
2020 		return 3;
2021 	case 5:
2022 		return 6;
2023 	case 6:
2024 		return 10;
2025 	case 7:
2026 		return 12;
2027 	case 8:
2028 		return 16;
2029 	}
2030 }
2031 
2032 struct dce6_wm_params {
2033 	u32 dram_channels; /* number of dram channels */
2034 	u32 yclk;          /* bandwidth per dram data pin in kHz */
2035 	u32 sclk;          /* engine clock in kHz */
2036 	u32 disp_clk;      /* display clock in kHz */
2037 	u32 src_width;     /* viewport width */
2038 	u32 active_time;   /* active display time in ns */
2039 	u32 blank_time;    /* blank time in ns */
2040 	bool interlaced;    /* mode is interlaced */
2041 	fixed20_12 vsc;    /* vertical scale ratio */
2042 	u32 num_heads;     /* number of active crtcs */
2043 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
2044 	u32 lb_size;       /* line buffer allocated to pipe */
2045 	u32 vtaps;         /* vertical scaler taps */
2046 };
2047 
2048 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
2049 {
2050 	/* Calculate raw DRAM Bandwidth */
2051 	fixed20_12 dram_efficiency; /* 0.7 */
2052 	fixed20_12 yclk, dram_channels, bandwidth;
2053 	fixed20_12 a;
2054 
2055 	a.full = dfixed_const(1000);
2056 	yclk.full = dfixed_const(wm->yclk);
2057 	yclk.full = dfixed_div(yclk, a);
2058 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2059 	a.full = dfixed_const(10);
2060 	dram_efficiency.full = dfixed_const(7);
2061 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
2062 	bandwidth.full = dfixed_mul(dram_channels, yclk);
2063 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
2064 
2065 	return dfixed_trunc(bandwidth);
2066 }
2067 
2068 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2069 {
2070 	/* Calculate DRAM Bandwidth and the part allocated to display. */
2071 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
2072 	fixed20_12 yclk, dram_channels, bandwidth;
2073 	fixed20_12 a;
2074 
2075 	a.full = dfixed_const(1000);
2076 	yclk.full = dfixed_const(wm->yclk);
2077 	yclk.full = dfixed_div(yclk, a);
2078 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2079 	a.full = dfixed_const(10);
2080 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
2081 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
2082 	bandwidth.full = dfixed_mul(dram_channels, yclk);
2083 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
2084 
2085 	return dfixed_trunc(bandwidth);
2086 }
2087 
2088 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
2089 {
2090 	/* Calculate the display Data return Bandwidth */
2091 	fixed20_12 return_efficiency; /* 0.8 */
2092 	fixed20_12 sclk, bandwidth;
2093 	fixed20_12 a;
2094 
2095 	a.full = dfixed_const(1000);
2096 	sclk.full = dfixed_const(wm->sclk);
2097 	sclk.full = dfixed_div(sclk, a);
2098 	a.full = dfixed_const(10);
2099 	return_efficiency.full = dfixed_const(8);
2100 	return_efficiency.full = dfixed_div(return_efficiency, a);
2101 	a.full = dfixed_const(32);
2102 	bandwidth.full = dfixed_mul(a, sclk);
2103 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
2104 
2105 	return dfixed_trunc(bandwidth);
2106 }
2107 
2108 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
2109 {
2110 	return 32;
2111 }
2112 
2113 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
2114 {
2115 	/* Calculate the DMIF Request Bandwidth */
2116 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
2117 	fixed20_12 disp_clk, sclk, bandwidth;
2118 	fixed20_12 a, b1, b2;
2119 	u32 min_bandwidth;
2120 
2121 	a.full = dfixed_const(1000);
2122 	disp_clk.full = dfixed_const(wm->disp_clk);
2123 	disp_clk.full = dfixed_div(disp_clk, a);
2124 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
2125 	b1.full = dfixed_mul(a, disp_clk);
2126 
2127 	a.full = dfixed_const(1000);
2128 	sclk.full = dfixed_const(wm->sclk);
2129 	sclk.full = dfixed_div(sclk, a);
2130 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
2131 	b2.full = dfixed_mul(a, sclk);
2132 
2133 	a.full = dfixed_const(10);
2134 	disp_clk_request_efficiency.full = dfixed_const(8);
2135 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
2136 
2137 	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
2138 
2139 	a.full = dfixed_const(min_bandwidth);
2140 	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
2141 
2142 	return dfixed_trunc(bandwidth);
2143 }
2144 
2145 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
2146 {
2147 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
2148 	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
2149 	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
2150 	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
2151 
2152 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
2153 }
2154 
2155 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
2156 {
2157 	/* Calculate the display mode Average Bandwidth
2158 	 * DisplayMode should contain the source and destination dimensions,
2159 	 * timing, etc.
2160 	 */
2161 	fixed20_12 bpp;
2162 	fixed20_12 line_time;
2163 	fixed20_12 src_width;
2164 	fixed20_12 bandwidth;
2165 	fixed20_12 a;
2166 
2167 	a.full = dfixed_const(1000);
2168 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
2169 	line_time.full = dfixed_div(line_time, a);
2170 	bpp.full = dfixed_const(wm->bytes_per_pixel);
2171 	src_width.full = dfixed_const(wm->src_width);
2172 	bandwidth.full = dfixed_mul(src_width, bpp);
2173 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
2174 	bandwidth.full = dfixed_div(bandwidth, line_time);
2175 
2176 	return dfixed_trunc(bandwidth);
2177 }
2178 
2179 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
2180 {
2181 	/* First calcualte the latency in ns */
2182 	u32 mc_latency = 2000; /* 2000 ns. */
2183 	u32 available_bandwidth = dce6_available_bandwidth(wm);
2184 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
2185 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
2186 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
2187 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
2188 		(wm->num_heads * cursor_line_pair_return_time);
2189 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
2190 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
2191 	u32 tmp, dmif_size = 12288;
2192 	fixed20_12 a, b, c;
2193 
2194 	if (wm->num_heads == 0)
2195 		return 0;
2196 
2197 	a.full = dfixed_const(2);
2198 	b.full = dfixed_const(1);
2199 	if ((wm->vsc.full > a.full) ||
2200 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
2201 	    (wm->vtaps >= 5) ||
2202 	    ((wm->vsc.full >= a.full) && wm->interlaced))
2203 		max_src_lines_per_dst_line = 4;
2204 	else
2205 		max_src_lines_per_dst_line = 2;
2206 
2207 	a.full = dfixed_const(available_bandwidth);
2208 	b.full = dfixed_const(wm->num_heads);
2209 	a.full = dfixed_div(a, b);
2210 
2211 	b.full = dfixed_const(mc_latency + 512);
2212 	c.full = dfixed_const(wm->disp_clk);
2213 	b.full = dfixed_div(b, c);
2214 
2215 	c.full = dfixed_const(dmif_size);
2216 	b.full = dfixed_div(c, b);
2217 
2218 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
2219 
2220 	b.full = dfixed_const(1000);
2221 	c.full = dfixed_const(wm->disp_clk);
2222 	b.full = dfixed_div(c, b);
2223 	c.full = dfixed_const(wm->bytes_per_pixel);
2224 	b.full = dfixed_mul(b, c);
2225 
2226 	lb_fill_bw = min(tmp, dfixed_trunc(b));
2227 
2228 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2229 	b.full = dfixed_const(1000);
2230 	c.full = dfixed_const(lb_fill_bw);
2231 	b.full = dfixed_div(c, b);
2232 	a.full = dfixed_div(a, b);
2233 	line_fill_time = dfixed_trunc(a);
2234 
2235 	if (line_fill_time < wm->active_time)
2236 		return latency;
2237 	else
2238 		return latency + (line_fill_time - wm->active_time);
2239 
2240 }
2241 
2242 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2243 {
2244 	if (dce6_average_bandwidth(wm) <=
2245 	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2246 		return true;
2247 	else
2248 		return false;
2249 };
2250 
2251 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2252 {
2253 	if (dce6_average_bandwidth(wm) <=
2254 	    (dce6_available_bandwidth(wm) / wm->num_heads))
2255 		return true;
2256 	else
2257 		return false;
2258 };
2259 
2260 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2261 {
2262 	u32 lb_partitions = wm->lb_size / wm->src_width;
2263 	u32 line_time = wm->active_time + wm->blank_time;
2264 	u32 latency_tolerant_lines;
2265 	u32 latency_hiding;
2266 	fixed20_12 a;
2267 
2268 	a.full = dfixed_const(1);
2269 	if (wm->vsc.full > a.full)
2270 		latency_tolerant_lines = 1;
2271 	else {
2272 		if (lb_partitions <= (wm->vtaps + 1))
2273 			latency_tolerant_lines = 1;
2274 		else
2275 			latency_tolerant_lines = 2;
2276 	}
2277 
2278 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2279 
2280 	if (dce6_latency_watermark(wm) <= latency_hiding)
2281 		return true;
2282 	else
2283 		return false;
2284 }
2285 
2286 static void dce6_program_watermarks(struct radeon_device *rdev,
2287 					 struct radeon_crtc *radeon_crtc,
2288 					 u32 lb_size, u32 num_heads)
2289 {
2290 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
2291 	struct dce6_wm_params wm_low, wm_high;
2292 	u32 dram_channels;
2293 	u32 pixel_period;
2294 	u32 line_time = 0;
2295 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
2296 	u32 priority_a_mark = 0, priority_b_mark = 0;
2297 	u32 priority_a_cnt = PRIORITY_OFF;
2298 	u32 priority_b_cnt = PRIORITY_OFF;
2299 	u32 tmp, arb_control3;
2300 	fixed20_12 a, b, c;
2301 
2302 	if (radeon_crtc->base.enabled && num_heads && mode) {
2303 		pixel_period = 1000000 / (u32)mode->clock;
2304 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2305 		priority_a_cnt = 0;
2306 		priority_b_cnt = 0;
2307 
2308 		if (rdev->family == CHIP_ARUBA)
2309 			dram_channels = evergreen_get_number_of_dram_channels(rdev);
2310 		else
2311 			dram_channels = si_get_number_of_dram_channels(rdev);
2312 
2313 		/* watermark for high clocks */
2314 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2315 			wm_high.yclk =
2316 				radeon_dpm_get_mclk(rdev, false) * 10;
2317 			wm_high.sclk =
2318 				radeon_dpm_get_sclk(rdev, false) * 10;
2319 		} else {
2320 			wm_high.yclk = rdev->pm.current_mclk * 10;
2321 			wm_high.sclk = rdev->pm.current_sclk * 10;
2322 		}
2323 
2324 		wm_high.disp_clk = mode->clock;
2325 		wm_high.src_width = mode->crtc_hdisplay;
2326 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2327 		wm_high.blank_time = line_time - wm_high.active_time;
2328 		wm_high.interlaced = false;
2329 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2330 			wm_high.interlaced = true;
2331 		wm_high.vsc = radeon_crtc->vsc;
2332 		wm_high.vtaps = 1;
2333 		if (radeon_crtc->rmx_type != RMX_OFF)
2334 			wm_high.vtaps = 2;
2335 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2336 		wm_high.lb_size = lb_size;
2337 		wm_high.dram_channels = dram_channels;
2338 		wm_high.num_heads = num_heads;
2339 
2340 		/* watermark for low clocks */
2341 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2342 			wm_low.yclk =
2343 				radeon_dpm_get_mclk(rdev, true) * 10;
2344 			wm_low.sclk =
2345 				radeon_dpm_get_sclk(rdev, true) * 10;
2346 		} else {
2347 			wm_low.yclk = rdev->pm.current_mclk * 10;
2348 			wm_low.sclk = rdev->pm.current_sclk * 10;
2349 		}
2350 
2351 		wm_low.disp_clk = mode->clock;
2352 		wm_low.src_width = mode->crtc_hdisplay;
2353 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2354 		wm_low.blank_time = line_time - wm_low.active_time;
2355 		wm_low.interlaced = false;
2356 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2357 			wm_low.interlaced = true;
2358 		wm_low.vsc = radeon_crtc->vsc;
2359 		wm_low.vtaps = 1;
2360 		if (radeon_crtc->rmx_type != RMX_OFF)
2361 			wm_low.vtaps = 2;
2362 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2363 		wm_low.lb_size = lb_size;
2364 		wm_low.dram_channels = dram_channels;
2365 		wm_low.num_heads = num_heads;
2366 
2367 		/* set for high clocks */
2368 		latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2369 		/* set for low clocks */
2370 		latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2371 
2372 		/* possibly force display priority to high */
2373 		/* should really do this at mode validation time... */
2374 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2375 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2376 		    !dce6_check_latency_hiding(&wm_high) ||
2377 		    (rdev->disp_priority == 2)) {
2378 			DRM_DEBUG_KMS("force priority to high\n");
2379 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2380 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2381 		}
2382 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2383 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2384 		    !dce6_check_latency_hiding(&wm_low) ||
2385 		    (rdev->disp_priority == 2)) {
2386 			DRM_DEBUG_KMS("force priority to high\n");
2387 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2388 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2389 		}
2390 
2391 		a.full = dfixed_const(1000);
2392 		b.full = dfixed_const(mode->clock);
2393 		b.full = dfixed_div(b, a);
2394 		c.full = dfixed_const(latency_watermark_a);
2395 		c.full = dfixed_mul(c, b);
2396 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2397 		c.full = dfixed_div(c, a);
2398 		a.full = dfixed_const(16);
2399 		c.full = dfixed_div(c, a);
2400 		priority_a_mark = dfixed_trunc(c);
2401 		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2402 
2403 		a.full = dfixed_const(1000);
2404 		b.full = dfixed_const(mode->clock);
2405 		b.full = dfixed_div(b, a);
2406 		c.full = dfixed_const(latency_watermark_b);
2407 		c.full = dfixed_mul(c, b);
2408 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2409 		c.full = dfixed_div(c, a);
2410 		a.full = dfixed_const(16);
2411 		c.full = dfixed_div(c, a);
2412 		priority_b_mark = dfixed_trunc(c);
2413 		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2414 
2415 		/* Save number of lines the linebuffer leads before the scanout */
2416 		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
2417 	}
2418 
2419 	/* select wm A */
2420 	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2421 	tmp = arb_control3;
2422 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2423 	tmp |= LATENCY_WATERMARK_MASK(1);
2424 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2425 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2426 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2427 		LATENCY_HIGH_WATERMARK(line_time)));
2428 	/* select wm B */
2429 	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2430 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2431 	tmp |= LATENCY_WATERMARK_MASK(2);
2432 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2433 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2434 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2435 		LATENCY_HIGH_WATERMARK(line_time)));
2436 	/* restore original selection */
2437 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2438 
2439 	/* write the priority marks */
2440 	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2441 	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2442 
2443 	/* save values for DPM */
2444 	radeon_crtc->line_time = line_time;
2445 	radeon_crtc->wm_high = latency_watermark_a;
2446 	radeon_crtc->wm_low = latency_watermark_b;
2447 }
2448 
2449 void dce6_bandwidth_update(struct radeon_device *rdev)
2450 {
2451 	struct drm_display_mode *mode0 = NULL;
2452 	struct drm_display_mode *mode1 = NULL;
2453 	u32 num_heads = 0, lb_size;
2454 	int i;
2455 
2456 	if (!rdev->mode_info.mode_config_initialized)
2457 		return;
2458 
2459 	radeon_update_display_priority(rdev);
2460 
2461 	for (i = 0; i < rdev->num_crtc; i++) {
2462 		if (rdev->mode_info.crtcs[i]->base.enabled)
2463 			num_heads++;
2464 	}
2465 	for (i = 0; i < rdev->num_crtc; i += 2) {
2466 		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2467 		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2468 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2469 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2470 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2471 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2472 	}
2473 }
2474 
2475 /*
2476  * Core functions
2477  */
2478 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2479 {
2480 	u32 *tile = rdev->config.si.tile_mode_array;
2481 	const u32 num_tile_mode_states =
2482 			ARRAY_SIZE(rdev->config.si.tile_mode_array);
2483 	u32 reg_offset, split_equal_to_row_size;
2484 
2485 	switch (rdev->config.si.mem_row_size_in_kb) {
2486 	case 1:
2487 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2488 		break;
2489 	case 2:
2490 	default:
2491 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2492 		break;
2493 	case 4:
2494 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2495 		break;
2496 	}
2497 
2498 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2499 		tile[reg_offset] = 0;
2500 
2501 	switch(rdev->family) {
2502 	case CHIP_TAHITI:
2503 	case CHIP_PITCAIRN:
2504 		/* non-AA compressed depth or any compressed stencil */
2505 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2506 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2507 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2508 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2509 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2510 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2511 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2512 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2513 		/* 2xAA/4xAA compressed depth only */
2514 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2515 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2516 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2517 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2518 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2519 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2520 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2521 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2522 		/* 8xAA compressed depth only */
2523 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2524 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2525 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2526 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2527 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2528 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2529 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2530 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2531 		/* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2532 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2533 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2534 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2535 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2536 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2537 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2538 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2539 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2540 		/* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2541 		tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2542 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2543 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2544 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2545 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2546 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2547 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2548 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2549 		/* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2550 		tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2551 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2552 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2553 			   TILE_SPLIT(split_equal_to_row_size) |
2554 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2555 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2556 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2557 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2558 		/* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2559 		tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2560 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2561 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2562 			   TILE_SPLIT(split_equal_to_row_size) |
2563 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2564 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2565 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2566 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2567 		/* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2568 		tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2569 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2570 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2571 			   TILE_SPLIT(split_equal_to_row_size) |
2572 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2573 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2574 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2575 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2576 		/* 1D and 1D Array Surfaces */
2577 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2578 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2579 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2580 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2581 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2582 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2583 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2584 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2585 		/* Displayable maps. */
2586 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2587 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2588 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2589 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2590 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2591 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2592 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2593 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2594 		/* Display 8bpp. */
2595 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2596 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2597 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2598 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2599 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2600 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2601 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2602 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2603 		/* Display 16bpp. */
2604 		tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2605 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2606 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2607 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2608 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2609 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2610 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2611 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2612 		/* Display 32bpp. */
2613 		tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2614 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2615 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2616 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2617 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2618 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2619 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2620 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2621 		/* Thin. */
2622 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2623 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2624 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2625 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2626 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2627 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2628 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2629 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2630 		/* Thin 8 bpp. */
2631 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2632 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2633 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2634 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2635 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2636 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2637 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2638 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2639 		/* Thin 16 bpp. */
2640 		tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2641 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2642 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2643 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2644 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2645 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2646 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2647 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2648 		/* Thin 32 bpp. */
2649 		tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2650 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2651 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2652 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2653 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2654 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2655 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2656 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2657 		/* Thin 64 bpp. */
2658 		tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2659 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2660 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2661 			   TILE_SPLIT(split_equal_to_row_size) |
2662 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2663 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2664 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2665 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2666 		/* 8 bpp PRT. */
2667 		tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2668 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2669 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2670 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2671 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2672 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2673 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2674 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2675 		/* 16 bpp PRT */
2676 		tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2677 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2678 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2679 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2680 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2681 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2682 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2683 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2684 		/* 32 bpp PRT */
2685 		tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2686 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2687 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2688 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2689 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2690 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2691 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2692 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2693 		/* 64 bpp PRT */
2694 		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2695 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2696 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2697 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2698 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2699 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2700 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2701 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2702 		/* 128 bpp PRT */
2703 		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2704 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2705 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2706 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2707 			   NUM_BANKS(ADDR_SURF_8_BANK) |
2708 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2709 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2710 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2711 
2712 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2713 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2714 		break;
2715 
2716 	case CHIP_VERDE:
2717 	case CHIP_OLAND:
2718 	case CHIP_HAINAN:
2719 		/* non-AA compressed depth or any compressed stencil */
2720 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2721 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2722 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2723 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2724 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2725 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2726 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2727 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2728 		/* 2xAA/4xAA compressed depth only */
2729 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2730 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2731 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2732 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2733 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2734 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2735 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2736 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2737 		/* 8xAA compressed depth only */
2738 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2739 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2740 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2741 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2742 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2743 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2744 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2745 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2746 		/* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2747 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2748 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2749 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2750 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2751 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2752 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2753 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2754 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2755 		/* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2756 		tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2757 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2758 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2759 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2760 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2761 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2762 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2763 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2764 		/* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2765 		tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2766 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2767 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2768 			   TILE_SPLIT(split_equal_to_row_size) |
2769 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2770 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2771 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2772 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2773 		/* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2774 		tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2775 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2776 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2777 			   TILE_SPLIT(split_equal_to_row_size) |
2778 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2779 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2780 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2781 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2782 		/* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2783 		tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2784 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2785 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2786 			   TILE_SPLIT(split_equal_to_row_size) |
2787 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2788 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2789 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2790 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2791 		/* 1D and 1D Array Surfaces */
2792 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2793 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2794 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2795 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2796 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2797 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2798 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2799 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2800 		/* Displayable maps. */
2801 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2802 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2803 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2804 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2805 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2806 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2807 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2808 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2809 		/* Display 8bpp. */
2810 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2811 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2812 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2813 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2814 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2815 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2816 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2817 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2818 		/* Display 16bpp. */
2819 		tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2820 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2821 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2822 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2823 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2824 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2825 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2826 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2827 		/* Display 32bpp. */
2828 		tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2829 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2830 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2831 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2832 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2833 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2834 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2835 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2836 		/* Thin. */
2837 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2838 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2839 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2840 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2841 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2842 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2843 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2844 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2845 		/* Thin 8 bpp. */
2846 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2847 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2848 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2849 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2850 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2851 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2852 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2853 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2854 		/* Thin 16 bpp. */
2855 		tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2856 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2857 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2858 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2859 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2860 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2861 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2862 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2863 		/* Thin 32 bpp. */
2864 		tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2865 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2866 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2867 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2868 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2869 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2870 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2871 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2872 		/* Thin 64 bpp. */
2873 		tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2874 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2875 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2876 			   TILE_SPLIT(split_equal_to_row_size) |
2877 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2878 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2879 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2880 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2881 		/* 8 bpp PRT. */
2882 		tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2883 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2884 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2885 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2886 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2887 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2888 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2889 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2890 		/* 16 bpp PRT */
2891 		tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2892 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2893 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2894 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2895 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2896 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2897 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2898 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2899 		/* 32 bpp PRT */
2900 		tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2901 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2902 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2903 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2904 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2905 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2906 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2907 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2908 		/* 64 bpp PRT */
2909 		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2910 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2911 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2912 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2913 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2914 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2915 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2916 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2917 		/* 128 bpp PRT */
2918 		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2919 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2920 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2921 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2922 			   NUM_BANKS(ADDR_SURF_8_BANK) |
2923 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2924 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2925 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2926 
2927 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2928 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2929 		break;
2930 
2931 	default:
2932 		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2933 	}
2934 }
2935 
2936 static void si_select_se_sh(struct radeon_device *rdev,
2937 			    u32 se_num, u32 sh_num)
2938 {
2939 	u32 data = INSTANCE_BROADCAST_WRITES;
2940 
2941 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2942 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2943 	else if (se_num == 0xffffffff)
2944 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2945 	else if (sh_num == 0xffffffff)
2946 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2947 	else
2948 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2949 	WREG32(GRBM_GFX_INDEX, data);
2950 }
2951 
2952 static u32 si_create_bitmask(u32 bit_width)
2953 {
2954 	u32 i, mask = 0;
2955 
2956 	for (i = 0; i < bit_width; i++) {
2957 		mask <<= 1;
2958 		mask |= 1;
2959 	}
2960 	return mask;
2961 }
2962 
2963 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2964 {
2965 	u32 data, mask;
2966 
2967 	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2968 	if (data & 1)
2969 		data &= INACTIVE_CUS_MASK;
2970 	else
2971 		data = 0;
2972 	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2973 
2974 	data >>= INACTIVE_CUS_SHIFT;
2975 
2976 	mask = si_create_bitmask(cu_per_sh);
2977 
2978 	return ~data & mask;
2979 }
2980 
2981 static void si_setup_spi(struct radeon_device *rdev,
2982 			 u32 se_num, u32 sh_per_se,
2983 			 u32 cu_per_sh)
2984 {
2985 	int i, j, k;
2986 	u32 data, mask, active_cu;
2987 
2988 	for (i = 0; i < se_num; i++) {
2989 		for (j = 0; j < sh_per_se; j++) {
2990 			si_select_se_sh(rdev, i, j);
2991 			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2992 			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2993 
2994 			mask = 1;
2995 			for (k = 0; k < 16; k++) {
2996 				mask <<= k;
2997 				if (active_cu & mask) {
2998 					data &= ~mask;
2999 					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
3000 					break;
3001 				}
3002 			}
3003 		}
3004 	}
3005 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3006 }
3007 
3008 static u32 si_get_rb_disabled(struct radeon_device *rdev,
3009 			      u32 max_rb_num_per_se,
3010 			      u32 sh_per_se)
3011 {
3012 	u32 data, mask;
3013 
3014 	data = RREG32(CC_RB_BACKEND_DISABLE);
3015 	if (data & 1)
3016 		data &= BACKEND_DISABLE_MASK;
3017 	else
3018 		data = 0;
3019 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3020 
3021 	data >>= BACKEND_DISABLE_SHIFT;
3022 
3023 	mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
3024 
3025 	return data & mask;
3026 }
3027 
3028 static void si_setup_rb(struct radeon_device *rdev,
3029 			u32 se_num, u32 sh_per_se,
3030 			u32 max_rb_num_per_se)
3031 {
3032 	int i, j;
3033 	u32 data, mask;
3034 	u32 disabled_rbs = 0;
3035 	u32 enabled_rbs = 0;
3036 
3037 	for (i = 0; i < se_num; i++) {
3038 		for (j = 0; j < sh_per_se; j++) {
3039 			si_select_se_sh(rdev, i, j);
3040 			data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3041 			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
3042 		}
3043 	}
3044 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3045 
3046 	mask = 1;
3047 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3048 		if (!(disabled_rbs & mask))
3049 			enabled_rbs |= mask;
3050 		mask <<= 1;
3051 	}
3052 
3053 	rdev->config.si.backend_enable_mask = enabled_rbs;
3054 
3055 	for (i = 0; i < se_num; i++) {
3056 		si_select_se_sh(rdev, i, 0xffffffff);
3057 		data = 0;
3058 		for (j = 0; j < sh_per_se; j++) {
3059 			switch (enabled_rbs & 3) {
3060 			case 1:
3061 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3062 				break;
3063 			case 2:
3064 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3065 				break;
3066 			case 3:
3067 			default:
3068 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3069 				break;
3070 			}
3071 			enabled_rbs >>= 2;
3072 		}
3073 		WREG32(PA_SC_RASTER_CONFIG, data);
3074 	}
3075 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3076 }
3077 
3078 static void si_gpu_init(struct radeon_device *rdev)
3079 {
3080 	u32 gb_addr_config = 0;
3081 	u32 mc_shared_chmap, mc_arb_ramcfg;
3082 	u32 sx_debug_1;
3083 	u32 hdp_host_path_cntl;
3084 	u32 tmp;
3085 	int i, j;
3086 
3087 	switch (rdev->family) {
3088 	case CHIP_TAHITI:
3089 		rdev->config.si.max_shader_engines = 2;
3090 		rdev->config.si.max_tile_pipes = 12;
3091 		rdev->config.si.max_cu_per_sh = 8;
3092 		rdev->config.si.max_sh_per_se = 2;
3093 		rdev->config.si.max_backends_per_se = 4;
3094 		rdev->config.si.max_texture_channel_caches = 12;
3095 		rdev->config.si.max_gprs = 256;
3096 		rdev->config.si.max_gs_threads = 32;
3097 		rdev->config.si.max_hw_contexts = 8;
3098 
3099 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3100 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3101 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3102 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3103 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3104 		break;
3105 	case CHIP_PITCAIRN:
3106 		rdev->config.si.max_shader_engines = 2;
3107 		rdev->config.si.max_tile_pipes = 8;
3108 		rdev->config.si.max_cu_per_sh = 5;
3109 		rdev->config.si.max_sh_per_se = 2;
3110 		rdev->config.si.max_backends_per_se = 4;
3111 		rdev->config.si.max_texture_channel_caches = 8;
3112 		rdev->config.si.max_gprs = 256;
3113 		rdev->config.si.max_gs_threads = 32;
3114 		rdev->config.si.max_hw_contexts = 8;
3115 
3116 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3117 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3118 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3119 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3120 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3121 		break;
3122 	case CHIP_VERDE:
3123 	default:
3124 		rdev->config.si.max_shader_engines = 1;
3125 		rdev->config.si.max_tile_pipes = 4;
3126 		rdev->config.si.max_cu_per_sh = 5;
3127 		rdev->config.si.max_sh_per_se = 2;
3128 		rdev->config.si.max_backends_per_se = 4;
3129 		rdev->config.si.max_texture_channel_caches = 4;
3130 		rdev->config.si.max_gprs = 256;
3131 		rdev->config.si.max_gs_threads = 32;
3132 		rdev->config.si.max_hw_contexts = 8;
3133 
3134 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3135 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3136 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3137 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3138 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3139 		break;
3140 	case CHIP_OLAND:
3141 		rdev->config.si.max_shader_engines = 1;
3142 		rdev->config.si.max_tile_pipes = 4;
3143 		rdev->config.si.max_cu_per_sh = 6;
3144 		rdev->config.si.max_sh_per_se = 1;
3145 		rdev->config.si.max_backends_per_se = 2;
3146 		rdev->config.si.max_texture_channel_caches = 4;
3147 		rdev->config.si.max_gprs = 256;
3148 		rdev->config.si.max_gs_threads = 16;
3149 		rdev->config.si.max_hw_contexts = 8;
3150 
3151 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3152 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3153 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3154 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3155 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3156 		break;
3157 	case CHIP_HAINAN:
3158 		rdev->config.si.max_shader_engines = 1;
3159 		rdev->config.si.max_tile_pipes = 4;
3160 		rdev->config.si.max_cu_per_sh = 5;
3161 		rdev->config.si.max_sh_per_se = 1;
3162 		rdev->config.si.max_backends_per_se = 1;
3163 		rdev->config.si.max_texture_channel_caches = 2;
3164 		rdev->config.si.max_gprs = 256;
3165 		rdev->config.si.max_gs_threads = 16;
3166 		rdev->config.si.max_hw_contexts = 8;
3167 
3168 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3169 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3170 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3171 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3172 		gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
3173 		break;
3174 	}
3175 
3176 	/* Initialize HDP */
3177 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3178 		WREG32((0x2c14 + j), 0x00000000);
3179 		WREG32((0x2c18 + j), 0x00000000);
3180 		WREG32((0x2c1c + j), 0x00000000);
3181 		WREG32((0x2c20 + j), 0x00000000);
3182 		WREG32((0x2c24 + j), 0x00000000);
3183 	}
3184 
3185 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3186 	WREG32(SRBM_INT_CNTL, 1);
3187 	WREG32(SRBM_INT_ACK, 1);
3188 
3189 	evergreen_fix_pci_max_read_req_size(rdev);
3190 
3191 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3192 
3193 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3194 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3195 
3196 	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3197 	rdev->config.si.mem_max_burst_length_bytes = 256;
3198 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3199 	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3200 	if (rdev->config.si.mem_row_size_in_kb > 4)
3201 		rdev->config.si.mem_row_size_in_kb = 4;
3202 	/* XXX use MC settings? */
3203 	rdev->config.si.shader_engine_tile_size = 32;
3204 	rdev->config.si.num_gpus = 1;
3205 	rdev->config.si.multi_gpu_tile_size = 64;
3206 
3207 	/* fix up row size */
3208 	gb_addr_config &= ~ROW_SIZE_MASK;
3209 	switch (rdev->config.si.mem_row_size_in_kb) {
3210 	case 1:
3211 	default:
3212 		gb_addr_config |= ROW_SIZE(0);
3213 		break;
3214 	case 2:
3215 		gb_addr_config |= ROW_SIZE(1);
3216 		break;
3217 	case 4:
3218 		gb_addr_config |= ROW_SIZE(2);
3219 		break;
3220 	}
3221 
3222 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3223 	 * not have bank info, so create a custom tiling dword.
3224 	 * bits 3:0   num_pipes
3225 	 * bits 7:4   num_banks
3226 	 * bits 11:8  group_size
3227 	 * bits 15:12 row_size
3228 	 */
3229 	rdev->config.si.tile_config = 0;
3230 	switch (rdev->config.si.num_tile_pipes) {
3231 	case 1:
3232 		rdev->config.si.tile_config |= (0 << 0);
3233 		break;
3234 	case 2:
3235 		rdev->config.si.tile_config |= (1 << 0);
3236 		break;
3237 	case 4:
3238 		rdev->config.si.tile_config |= (2 << 0);
3239 		break;
3240 	case 8:
3241 	default:
3242 		/* XXX what about 12? */
3243 		rdev->config.si.tile_config |= (3 << 0);
3244 		break;
3245 	}
3246 	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3247 	case 0: /* four banks */
3248 		rdev->config.si.tile_config |= 0 << 4;
3249 		break;
3250 	case 1: /* eight banks */
3251 		rdev->config.si.tile_config |= 1 << 4;
3252 		break;
3253 	case 2: /* sixteen banks */
3254 	default:
3255 		rdev->config.si.tile_config |= 2 << 4;
3256 		break;
3257 	}
3258 	rdev->config.si.tile_config |=
3259 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3260 	rdev->config.si.tile_config |=
3261 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3262 
3263 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3264 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3265 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3266 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3267 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3268 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3269 	if (rdev->has_uvd) {
3270 		WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3271 		WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3272 		WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3273 	}
3274 
3275 	si_tiling_mode_table_init(rdev);
3276 
3277 	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3278 		    rdev->config.si.max_sh_per_se,
3279 		    rdev->config.si.max_backends_per_se);
3280 
3281 	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3282 		     rdev->config.si.max_sh_per_se,
3283 		     rdev->config.si.max_cu_per_sh);
3284 
3285 	rdev->config.si.active_cus = 0;
3286 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
3287 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
3288 			rdev->config.si.active_cus +=
3289 				hweight32(si_get_cu_active_bitmap(rdev, i, j));
3290 		}
3291 	}
3292 
3293 	/* set HW defaults for 3D engine */
3294 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3295 				     ROQ_IB2_START(0x2b)));
3296 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3297 
3298 	sx_debug_1 = RREG32(SX_DEBUG_1);
3299 	WREG32(SX_DEBUG_1, sx_debug_1);
3300 
3301 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3302 
3303 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3304 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3305 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3306 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3307 
3308 	WREG32(VGT_NUM_INSTANCES, 1);
3309 
3310 	WREG32(CP_PERFMON_CNTL, 0);
3311 
3312 	WREG32(SQ_CONFIG, 0);
3313 
3314 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3315 					  FORCE_EOV_MAX_REZ_CNT(255)));
3316 
3317 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3318 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3319 
3320 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3321 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3322 
3323 	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3324 	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3325 	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3326 	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3327 	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3328 	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3329 	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3330 	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3331 
3332 	tmp = RREG32(HDP_MISC_CNTL);
3333 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3334 	WREG32(HDP_MISC_CNTL, tmp);
3335 
3336 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3337 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3338 
3339 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3340 
3341 	udelay(50);
3342 }
3343 
3344 /*
3345  * GPU scratch registers helpers function.
3346  */
3347 static void si_scratch_init(struct radeon_device *rdev)
3348 {
3349 	int i;
3350 
3351 	rdev->scratch.num_reg = 7;
3352 	rdev->scratch.reg_base = SCRATCH_REG0;
3353 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3354 		rdev->scratch.free[i] = true;
3355 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3356 	}
3357 }
3358 
3359 void si_fence_ring_emit(struct radeon_device *rdev,
3360 			struct radeon_fence *fence)
3361 {
3362 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3363 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3364 
3365 	/* flush read cache over gart */
3366 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3367 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3368 	radeon_ring_write(ring, 0);
3369 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3370 	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3371 			  PACKET3_TC_ACTION_ENA |
3372 			  PACKET3_SH_KCACHE_ACTION_ENA |
3373 			  PACKET3_SH_ICACHE_ACTION_ENA);
3374 	radeon_ring_write(ring, 0xFFFFFFFF);
3375 	radeon_ring_write(ring, 0);
3376 	radeon_ring_write(ring, 10); /* poll interval */
3377 	/* EVENT_WRITE_EOP - flush caches, send int */
3378 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3379 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3380 	radeon_ring_write(ring, lower_32_bits(addr));
3381 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3382 	radeon_ring_write(ring, fence->seq);
3383 	radeon_ring_write(ring, 0);
3384 }
3385 
3386 /*
3387  * IB stuff
3388  */
3389 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3390 {
3391 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3392 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3393 	u32 header;
3394 
3395 	if (ib->is_const_ib) {
3396 		/* set switch buffer packet before const IB */
3397 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3398 		radeon_ring_write(ring, 0);
3399 
3400 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3401 	} else {
3402 		u32 next_rptr;
3403 		if (ring->rptr_save_reg) {
3404 			next_rptr = ring->wptr + 3 + 4 + 8;
3405 			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3406 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3407 						  PACKET3_SET_CONFIG_REG_START) >> 2));
3408 			radeon_ring_write(ring, next_rptr);
3409 		} else if (rdev->wb.enabled) {
3410 			next_rptr = ring->wptr + 5 + 4 + 8;
3411 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3412 			radeon_ring_write(ring, (1 << 8));
3413 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3414 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3415 			radeon_ring_write(ring, next_rptr);
3416 		}
3417 
3418 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3419 	}
3420 
3421 	radeon_ring_write(ring, header);
3422 	radeon_ring_write(ring,
3423 #ifdef __BIG_ENDIAN
3424 			  (2 << 0) |
3425 #endif
3426 			  (ib->gpu_addr & 0xFFFFFFFC));
3427 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3428 	radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
3429 
3430 	if (!ib->is_const_ib) {
3431 		/* flush read cache over gart for this vmid */
3432 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3433 		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3434 		radeon_ring_write(ring, vm_id);
3435 		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3436 		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3437 				  PACKET3_TC_ACTION_ENA |
3438 				  PACKET3_SH_KCACHE_ACTION_ENA |
3439 				  PACKET3_SH_ICACHE_ACTION_ENA);
3440 		radeon_ring_write(ring, 0xFFFFFFFF);
3441 		radeon_ring_write(ring, 0);
3442 		radeon_ring_write(ring, 10); /* poll interval */
3443 	}
3444 }
3445 
3446 /*
3447  * CP.
3448  */
3449 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3450 {
3451 	if (enable)
3452 		WREG32(CP_ME_CNTL, 0);
3453 	else {
3454 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3455 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3456 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3457 		WREG32(SCRATCH_UMSK, 0);
3458 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3459 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3460 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3461 	}
3462 	udelay(50);
3463 }
3464 
3465 static int si_cp_load_microcode(struct radeon_device *rdev)
3466 {
3467 	int i;
3468 
3469 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3470 		return -EINVAL;
3471 
3472 	si_cp_enable(rdev, false);
3473 
3474 	if (rdev->new_fw) {
3475 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3476 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3477 		const struct gfx_firmware_header_v1_0 *ce_hdr =
3478 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3479 		const struct gfx_firmware_header_v1_0 *me_hdr =
3480 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3481 		const __le32 *fw_data;
3482 		u32 fw_size;
3483 
3484 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3485 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3486 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3487 
3488 		/* PFP */
3489 		fw_data = (const __le32 *)
3490 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3491 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3492 		WREG32(CP_PFP_UCODE_ADDR, 0);
3493 		for (i = 0; i < fw_size; i++)
3494 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3495 		WREG32(CP_PFP_UCODE_ADDR, 0);
3496 
3497 		/* CE */
3498 		fw_data = (const __le32 *)
3499 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3500 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3501 		WREG32(CP_CE_UCODE_ADDR, 0);
3502 		for (i = 0; i < fw_size; i++)
3503 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3504 		WREG32(CP_CE_UCODE_ADDR, 0);
3505 
3506 		/* ME */
3507 		fw_data = (const __be32 *)
3508 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3509 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3510 		WREG32(CP_ME_RAM_WADDR, 0);
3511 		for (i = 0; i < fw_size; i++)
3512 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3513 		WREG32(CP_ME_RAM_WADDR, 0);
3514 	} else {
3515 		const __be32 *fw_data;
3516 
3517 		/* PFP */
3518 		fw_data = (const __be32 *)rdev->pfp_fw->data;
3519 		WREG32(CP_PFP_UCODE_ADDR, 0);
3520 		for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3521 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3522 		WREG32(CP_PFP_UCODE_ADDR, 0);
3523 
3524 		/* CE */
3525 		fw_data = (const __be32 *)rdev->ce_fw->data;
3526 		WREG32(CP_CE_UCODE_ADDR, 0);
3527 		for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3528 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3529 		WREG32(CP_CE_UCODE_ADDR, 0);
3530 
3531 		/* ME */
3532 		fw_data = (const __be32 *)rdev->me_fw->data;
3533 		WREG32(CP_ME_RAM_WADDR, 0);
3534 		for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3535 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3536 		WREG32(CP_ME_RAM_WADDR, 0);
3537 	}
3538 
3539 	WREG32(CP_PFP_UCODE_ADDR, 0);
3540 	WREG32(CP_CE_UCODE_ADDR, 0);
3541 	WREG32(CP_ME_RAM_WADDR, 0);
3542 	WREG32(CP_ME_RAM_RADDR, 0);
3543 	return 0;
3544 }
3545 
3546 static int si_cp_start(struct radeon_device *rdev)
3547 {
3548 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3549 	int r, i;
3550 
3551 	r = radeon_ring_lock(rdev, ring, 7 + 4);
3552 	if (r) {
3553 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3554 		return r;
3555 	}
3556 	/* init the CP */
3557 	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3558 	radeon_ring_write(ring, 0x1);
3559 	radeon_ring_write(ring, 0x0);
3560 	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3561 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3562 	radeon_ring_write(ring, 0);
3563 	radeon_ring_write(ring, 0);
3564 
3565 	/* init the CE partitions */
3566 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3567 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3568 	radeon_ring_write(ring, 0xc000);
3569 	radeon_ring_write(ring, 0xe000);
3570 	radeon_ring_unlock_commit(rdev, ring, false);
3571 
3572 	si_cp_enable(rdev, true);
3573 
3574 	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3575 	if (r) {
3576 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3577 		return r;
3578 	}
3579 
3580 	/* setup clear context state */
3581 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3582 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3583 
3584 	for (i = 0; i < si_default_size; i++)
3585 		radeon_ring_write(ring, si_default_state[i]);
3586 
3587 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3588 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3589 
3590 	/* set clear context state */
3591 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3592 	radeon_ring_write(ring, 0);
3593 
3594 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3595 	radeon_ring_write(ring, 0x00000316);
3596 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3597 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3598 
3599 	radeon_ring_unlock_commit(rdev, ring, false);
3600 
3601 	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3602 		ring = &rdev->ring[i];
3603 		r = radeon_ring_lock(rdev, ring, 2);
3604 
3605 		/* clear the compute context state */
3606 		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3607 		radeon_ring_write(ring, 0);
3608 
3609 		radeon_ring_unlock_commit(rdev, ring, false);
3610 	}
3611 
3612 	return 0;
3613 }
3614 
3615 static void si_cp_fini(struct radeon_device *rdev)
3616 {
3617 	struct radeon_ring *ring;
3618 	si_cp_enable(rdev, false);
3619 
3620 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3621 	radeon_ring_fini(rdev, ring);
3622 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3623 
3624 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3625 	radeon_ring_fini(rdev, ring);
3626 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3627 
3628 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3629 	radeon_ring_fini(rdev, ring);
3630 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3631 }
3632 
3633 static int si_cp_resume(struct radeon_device *rdev)
3634 {
3635 	struct radeon_ring *ring;
3636 	u32 tmp;
3637 	u32 rb_bufsz;
3638 	int r;
3639 
3640 	si_enable_gui_idle_interrupt(rdev, false);
3641 
3642 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3643 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3644 
3645 	/* Set the write pointer delay */
3646 	WREG32(CP_RB_WPTR_DELAY, 0);
3647 
3648 	WREG32(CP_DEBUG, 0);
3649 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3650 
3651 	/* ring 0 - compute and gfx */
3652 	/* Set ring buffer size */
3653 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3654 	rb_bufsz = order_base_2(ring->ring_size / 8);
3655 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3656 #ifdef __BIG_ENDIAN
3657 	tmp |= BUF_SWAP_32BIT;
3658 #endif
3659 	WREG32(CP_RB0_CNTL, tmp);
3660 
3661 	/* Initialize the ring buffer's read and write pointers */
3662 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3663 	ring->wptr = 0;
3664 	WREG32(CP_RB0_WPTR, ring->wptr);
3665 
3666 	/* set the wb address whether it's enabled or not */
3667 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3668 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3669 
3670 	if (rdev->wb.enabled)
3671 		WREG32(SCRATCH_UMSK, 0xff);
3672 	else {
3673 		tmp |= RB_NO_UPDATE;
3674 		WREG32(SCRATCH_UMSK, 0);
3675 	}
3676 
3677 	mdelay(1);
3678 	WREG32(CP_RB0_CNTL, tmp);
3679 
3680 	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3681 
3682 	/* ring1  - compute only */
3683 	/* Set ring buffer size */
3684 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3685 	rb_bufsz = order_base_2(ring->ring_size / 8);
3686 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3687 #ifdef __BIG_ENDIAN
3688 	tmp |= BUF_SWAP_32BIT;
3689 #endif
3690 	WREG32(CP_RB1_CNTL, tmp);
3691 
3692 	/* Initialize the ring buffer's read and write pointers */
3693 	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3694 	ring->wptr = 0;
3695 	WREG32(CP_RB1_WPTR, ring->wptr);
3696 
3697 	/* set the wb address whether it's enabled or not */
3698 	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3699 	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3700 
3701 	mdelay(1);
3702 	WREG32(CP_RB1_CNTL, tmp);
3703 
3704 	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3705 
3706 	/* ring2 - compute only */
3707 	/* Set ring buffer size */
3708 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3709 	rb_bufsz = order_base_2(ring->ring_size / 8);
3710 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3711 #ifdef __BIG_ENDIAN
3712 	tmp |= BUF_SWAP_32BIT;
3713 #endif
3714 	WREG32(CP_RB2_CNTL, tmp);
3715 
3716 	/* Initialize the ring buffer's read and write pointers */
3717 	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3718 	ring->wptr = 0;
3719 	WREG32(CP_RB2_WPTR, ring->wptr);
3720 
3721 	/* set the wb address whether it's enabled or not */
3722 	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3723 	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3724 
3725 	mdelay(1);
3726 	WREG32(CP_RB2_CNTL, tmp);
3727 
3728 	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3729 
3730 	/* start the rings */
3731 	si_cp_start(rdev);
3732 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3733 	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3734 	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3735 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3736 	if (r) {
3737 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3738 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3739 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3740 		return r;
3741 	}
3742 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3743 	if (r) {
3744 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3745 	}
3746 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3747 	if (r) {
3748 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3749 	}
3750 
3751 	si_enable_gui_idle_interrupt(rdev, true);
3752 
3753 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3754 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3755 
3756 	return 0;
3757 }
3758 
3759 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3760 {
3761 	u32 reset_mask = 0;
3762 	u32 tmp;
3763 
3764 	/* GRBM_STATUS */
3765 	tmp = RREG32(GRBM_STATUS);
3766 	if (tmp & (PA_BUSY | SC_BUSY |
3767 		   BCI_BUSY | SX_BUSY |
3768 		   TA_BUSY | VGT_BUSY |
3769 		   DB_BUSY | CB_BUSY |
3770 		   GDS_BUSY | SPI_BUSY |
3771 		   IA_BUSY | IA_BUSY_NO_DMA))
3772 		reset_mask |= RADEON_RESET_GFX;
3773 
3774 	if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3775 		   CP_BUSY | CP_COHERENCY_BUSY))
3776 		reset_mask |= RADEON_RESET_CP;
3777 
3778 	if (tmp & GRBM_EE_BUSY)
3779 		reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3780 
3781 	/* GRBM_STATUS2 */
3782 	tmp = RREG32(GRBM_STATUS2);
3783 	if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3784 		reset_mask |= RADEON_RESET_RLC;
3785 
3786 	/* DMA_STATUS_REG 0 */
3787 	tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3788 	if (!(tmp & DMA_IDLE))
3789 		reset_mask |= RADEON_RESET_DMA;
3790 
3791 	/* DMA_STATUS_REG 1 */
3792 	tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3793 	if (!(tmp & DMA_IDLE))
3794 		reset_mask |= RADEON_RESET_DMA1;
3795 
3796 	/* SRBM_STATUS2 */
3797 	tmp = RREG32(SRBM_STATUS2);
3798 	if (tmp & DMA_BUSY)
3799 		reset_mask |= RADEON_RESET_DMA;
3800 
3801 	if (tmp & DMA1_BUSY)
3802 		reset_mask |= RADEON_RESET_DMA1;
3803 
3804 	/* SRBM_STATUS */
3805 	tmp = RREG32(SRBM_STATUS);
3806 
3807 	if (tmp & IH_BUSY)
3808 		reset_mask |= RADEON_RESET_IH;
3809 
3810 	if (tmp & SEM_BUSY)
3811 		reset_mask |= RADEON_RESET_SEM;
3812 
3813 	if (tmp & GRBM_RQ_PENDING)
3814 		reset_mask |= RADEON_RESET_GRBM;
3815 
3816 	if (tmp & VMC_BUSY)
3817 		reset_mask |= RADEON_RESET_VMC;
3818 
3819 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3820 		   MCC_BUSY | MCD_BUSY))
3821 		reset_mask |= RADEON_RESET_MC;
3822 
3823 	if (evergreen_is_display_hung(rdev))
3824 		reset_mask |= RADEON_RESET_DISPLAY;
3825 
3826 	/* VM_L2_STATUS */
3827 	tmp = RREG32(VM_L2_STATUS);
3828 	if (tmp & L2_BUSY)
3829 		reset_mask |= RADEON_RESET_VMC;
3830 
3831 	/* Skip MC reset as it's mostly likely not hung, just busy */
3832 	if (reset_mask & RADEON_RESET_MC) {
3833 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3834 		reset_mask &= ~RADEON_RESET_MC;
3835 	}
3836 
3837 	return reset_mask;
3838 }
3839 
3840 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3841 {
3842 	struct evergreen_mc_save save;
3843 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3844 	u32 tmp;
3845 
3846 	if (reset_mask == 0)
3847 		return;
3848 
3849 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3850 
3851 	evergreen_print_gpu_status_regs(rdev);
3852 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3853 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3854 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3855 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3856 
3857 	/* disable PG/CG */
3858 	si_fini_pg(rdev);
3859 	si_fini_cg(rdev);
3860 
3861 	/* stop the rlc */
3862 	si_rlc_stop(rdev);
3863 
3864 	/* Disable CP parsing/prefetching */
3865 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3866 
3867 	if (reset_mask & RADEON_RESET_DMA) {
3868 		/* dma0 */
3869 		tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3870 		tmp &= ~DMA_RB_ENABLE;
3871 		WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3872 	}
3873 	if (reset_mask & RADEON_RESET_DMA1) {
3874 		/* dma1 */
3875 		tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3876 		tmp &= ~DMA_RB_ENABLE;
3877 		WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3878 	}
3879 
3880 	udelay(50);
3881 
3882 	evergreen_mc_stop(rdev, &save);
3883 	if (evergreen_mc_wait_for_idle(rdev)) {
3884 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3885 	}
3886 
3887 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3888 		grbm_soft_reset = SOFT_RESET_CB |
3889 			SOFT_RESET_DB |
3890 			SOFT_RESET_GDS |
3891 			SOFT_RESET_PA |
3892 			SOFT_RESET_SC |
3893 			SOFT_RESET_BCI |
3894 			SOFT_RESET_SPI |
3895 			SOFT_RESET_SX |
3896 			SOFT_RESET_TC |
3897 			SOFT_RESET_TA |
3898 			SOFT_RESET_VGT |
3899 			SOFT_RESET_IA;
3900 	}
3901 
3902 	if (reset_mask & RADEON_RESET_CP) {
3903 		grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3904 
3905 		srbm_soft_reset |= SOFT_RESET_GRBM;
3906 	}
3907 
3908 	if (reset_mask & RADEON_RESET_DMA)
3909 		srbm_soft_reset |= SOFT_RESET_DMA;
3910 
3911 	if (reset_mask & RADEON_RESET_DMA1)
3912 		srbm_soft_reset |= SOFT_RESET_DMA1;
3913 
3914 	if (reset_mask & RADEON_RESET_DISPLAY)
3915 		srbm_soft_reset |= SOFT_RESET_DC;
3916 
3917 	if (reset_mask & RADEON_RESET_RLC)
3918 		grbm_soft_reset |= SOFT_RESET_RLC;
3919 
3920 	if (reset_mask & RADEON_RESET_SEM)
3921 		srbm_soft_reset |= SOFT_RESET_SEM;
3922 
3923 	if (reset_mask & RADEON_RESET_IH)
3924 		srbm_soft_reset |= SOFT_RESET_IH;
3925 
3926 	if (reset_mask & RADEON_RESET_GRBM)
3927 		srbm_soft_reset |= SOFT_RESET_GRBM;
3928 
3929 	if (reset_mask & RADEON_RESET_VMC)
3930 		srbm_soft_reset |= SOFT_RESET_VMC;
3931 
3932 	if (reset_mask & RADEON_RESET_MC)
3933 		srbm_soft_reset |= SOFT_RESET_MC;
3934 
3935 	if (grbm_soft_reset) {
3936 		tmp = RREG32(GRBM_SOFT_RESET);
3937 		tmp |= grbm_soft_reset;
3938 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3939 		WREG32(GRBM_SOFT_RESET, tmp);
3940 		tmp = RREG32(GRBM_SOFT_RESET);
3941 
3942 		udelay(50);
3943 
3944 		tmp &= ~grbm_soft_reset;
3945 		WREG32(GRBM_SOFT_RESET, tmp);
3946 		tmp = RREG32(GRBM_SOFT_RESET);
3947 	}
3948 
3949 	if (srbm_soft_reset) {
3950 		tmp = RREG32(SRBM_SOFT_RESET);
3951 		tmp |= srbm_soft_reset;
3952 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3953 		WREG32(SRBM_SOFT_RESET, tmp);
3954 		tmp = RREG32(SRBM_SOFT_RESET);
3955 
3956 		udelay(50);
3957 
3958 		tmp &= ~srbm_soft_reset;
3959 		WREG32(SRBM_SOFT_RESET, tmp);
3960 		tmp = RREG32(SRBM_SOFT_RESET);
3961 	}
3962 
3963 	/* Wait a little for things to settle down */
3964 	udelay(50);
3965 
3966 	evergreen_mc_resume(rdev, &save);
3967 	udelay(50);
3968 
3969 	evergreen_print_gpu_status_regs(rdev);
3970 }
3971 
3972 static void si_set_clk_bypass_mode(struct radeon_device *rdev)
3973 {
3974 	u32 tmp, i;
3975 
3976 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3977 	tmp |= SPLL_BYPASS_EN;
3978 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3979 
3980 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3981 	tmp |= SPLL_CTLREQ_CHG;
3982 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3983 
3984 	for (i = 0; i < rdev->usec_timeout; i++) {
3985 		if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
3986 			break;
3987 		udelay(1);
3988 	}
3989 
3990 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3991 	tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
3992 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3993 
3994 	tmp = RREG32(MPLL_CNTL_MODE);
3995 	tmp &= ~MPLL_MCLK_SEL;
3996 	WREG32(MPLL_CNTL_MODE, tmp);
3997 }
3998 
3999 static void si_spll_powerdown(struct radeon_device *rdev)
4000 {
4001 	u32 tmp;
4002 
4003 	tmp = RREG32(SPLL_CNTL_MODE);
4004 	tmp |= SPLL_SW_DIR_CONTROL;
4005 	WREG32(SPLL_CNTL_MODE, tmp);
4006 
4007 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
4008 	tmp |= SPLL_RESET;
4009 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
4010 
4011 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
4012 	tmp |= SPLL_SLEEP;
4013 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
4014 
4015 	tmp = RREG32(SPLL_CNTL_MODE);
4016 	tmp &= ~SPLL_SW_DIR_CONTROL;
4017 	WREG32(SPLL_CNTL_MODE, tmp);
4018 }
4019 
4020 static void si_gpu_pci_config_reset(struct radeon_device *rdev)
4021 {
4022 	struct evergreen_mc_save save;
4023 	u32 tmp, i;
4024 
4025 	dev_info(rdev->dev, "GPU pci config reset\n");
4026 
4027 	/* disable dpm? */
4028 
4029 	/* disable cg/pg */
4030 	si_fini_pg(rdev);
4031 	si_fini_cg(rdev);
4032 
4033 	/* Disable CP parsing/prefetching */
4034 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4035 	/* dma0 */
4036 	tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
4037 	tmp &= ~DMA_RB_ENABLE;
4038 	WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
4039 	/* dma1 */
4040 	tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
4041 	tmp &= ~DMA_RB_ENABLE;
4042 	WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
4043 	/* XXX other engines? */
4044 
4045 	/* halt the rlc, disable cp internal ints */
4046 	si_rlc_stop(rdev);
4047 
4048 	udelay(50);
4049 
4050 	/* disable mem access */
4051 	evergreen_mc_stop(rdev, &save);
4052 	if (evergreen_mc_wait_for_idle(rdev)) {
4053 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
4054 	}
4055 
4056 	/* set mclk/sclk to bypass */
4057 	si_set_clk_bypass_mode(rdev);
4058 	/* powerdown spll */
4059 	si_spll_powerdown(rdev);
4060 	/* disable BM */
4061 	pci_clear_master(rdev->pdev);
4062 	/* reset */
4063 	radeon_pci_config_reset(rdev);
4064 	/* wait for asic to come out of reset */
4065 	for (i = 0; i < rdev->usec_timeout; i++) {
4066 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
4067 			break;
4068 		udelay(1);
4069 	}
4070 }
4071 
4072 int si_asic_reset(struct radeon_device *rdev, bool hard)
4073 {
4074 	u32 reset_mask;
4075 
4076 	if (hard) {
4077 		si_gpu_pci_config_reset(rdev);
4078 		return 0;
4079 	}
4080 
4081 	reset_mask = si_gpu_check_soft_reset(rdev);
4082 
4083 	if (reset_mask)
4084 		r600_set_bios_scratch_engine_hung(rdev, true);
4085 
4086 	/* try soft reset */
4087 	si_gpu_soft_reset(rdev, reset_mask);
4088 
4089 	reset_mask = si_gpu_check_soft_reset(rdev);
4090 
4091 	/* try pci config reset */
4092 	if (reset_mask && radeon_hard_reset)
4093 		si_gpu_pci_config_reset(rdev);
4094 
4095 	reset_mask = si_gpu_check_soft_reset(rdev);
4096 
4097 	if (!reset_mask)
4098 		r600_set_bios_scratch_engine_hung(rdev, false);
4099 
4100 	return 0;
4101 }
4102 
4103 /**
4104  * si_gfx_is_lockup - Check if the GFX engine is locked up
4105  *
4106  * @rdev: radeon_device pointer
4107  * @ring: radeon_ring structure holding ring information
4108  *
4109  * Check if the GFX engine is locked up.
4110  * Returns true if the engine appears to be locked up, false if not.
4111  */
4112 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4113 {
4114 	u32 reset_mask = si_gpu_check_soft_reset(rdev);
4115 
4116 	if (!(reset_mask & (RADEON_RESET_GFX |
4117 			    RADEON_RESET_COMPUTE |
4118 			    RADEON_RESET_CP))) {
4119 		radeon_ring_lockup_update(rdev, ring);
4120 		return false;
4121 	}
4122 	return radeon_ring_test_lockup(rdev, ring);
4123 }
4124 
4125 /* MC */
4126 static void si_mc_program(struct radeon_device *rdev)
4127 {
4128 	struct evergreen_mc_save save;
4129 	u32 tmp;
4130 	int i, j;
4131 
4132 	/* Initialize HDP */
4133 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4134 		WREG32((0x2c14 + j), 0x00000000);
4135 		WREG32((0x2c18 + j), 0x00000000);
4136 		WREG32((0x2c1c + j), 0x00000000);
4137 		WREG32((0x2c20 + j), 0x00000000);
4138 		WREG32((0x2c24 + j), 0x00000000);
4139 	}
4140 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4141 
4142 	evergreen_mc_stop(rdev, &save);
4143 	if (radeon_mc_wait_for_idle(rdev)) {
4144 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4145 	}
4146 	if (!ASIC_IS_NODCE(rdev))
4147 		/* Lockout access through VGA aperture*/
4148 		WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4149 	/* Update configuration */
4150 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4151 	       rdev->mc.vram_start >> 12);
4152 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4153 	       rdev->mc.vram_end >> 12);
4154 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4155 	       rdev->vram_scratch.gpu_addr >> 12);
4156 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4157 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4158 	WREG32(MC_VM_FB_LOCATION, tmp);
4159 	/* XXX double check these! */
4160 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4161 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4162 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4163 	WREG32(MC_VM_AGP_BASE, 0);
4164 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4165 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4166 	if (radeon_mc_wait_for_idle(rdev)) {
4167 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4168 	}
4169 	evergreen_mc_resume(rdev, &save);
4170 	if (!ASIC_IS_NODCE(rdev)) {
4171 		/* we need to own VRAM, so turn off the VGA renderer here
4172 		 * to stop it overwriting our objects */
4173 		rv515_vga_render_disable(rdev);
4174 	}
4175 }
4176 
4177 void si_vram_gtt_location(struct radeon_device *rdev,
4178 			  struct radeon_mc *mc)
4179 {
4180 	if (mc->mc_vram_size > 0xFFC0000000ULL) {
4181 		/* leave room for at least 1024M GTT */
4182 		dev_warn(rdev->dev, "limiting VRAM\n");
4183 		mc->real_vram_size = 0xFFC0000000ULL;
4184 		mc->mc_vram_size = 0xFFC0000000ULL;
4185 	}
4186 	radeon_vram_location(rdev, &rdev->mc, 0);
4187 	rdev->mc.gtt_base_align = 0;
4188 	radeon_gtt_location(rdev, mc);
4189 }
4190 
4191 static int si_mc_init(struct radeon_device *rdev)
4192 {
4193 	u32 tmp;
4194 	int chansize, numchan;
4195 
4196 	/* Get VRAM informations */
4197 	rdev->mc.vram_is_ddr = true;
4198 	tmp = RREG32(MC_ARB_RAMCFG);
4199 	if (tmp & CHANSIZE_OVERRIDE) {
4200 		chansize = 16;
4201 	} else if (tmp & CHANSIZE_MASK) {
4202 		chansize = 64;
4203 	} else {
4204 		chansize = 32;
4205 	}
4206 	tmp = RREG32(MC_SHARED_CHMAP);
4207 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4208 	case 0:
4209 	default:
4210 		numchan = 1;
4211 		break;
4212 	case 1:
4213 		numchan = 2;
4214 		break;
4215 	case 2:
4216 		numchan = 4;
4217 		break;
4218 	case 3:
4219 		numchan = 8;
4220 		break;
4221 	case 4:
4222 		numchan = 3;
4223 		break;
4224 	case 5:
4225 		numchan = 6;
4226 		break;
4227 	case 6:
4228 		numchan = 10;
4229 		break;
4230 	case 7:
4231 		numchan = 12;
4232 		break;
4233 	case 8:
4234 		numchan = 16;
4235 		break;
4236 	}
4237 	rdev->mc.vram_width = numchan * chansize;
4238 	/* Could aper size report 0 ? */
4239 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4240 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4241 	/* size in MB on si */
4242 	tmp = RREG32(CONFIG_MEMSIZE);
4243 	/* some boards may have garbage in the upper 16 bits */
4244 	if (tmp & 0xffff0000) {
4245 		DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
4246 		if (tmp & 0xffff)
4247 			tmp &= 0xffff;
4248 	}
4249 	rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
4250 	rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
4251 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
4252 	si_vram_gtt_location(rdev, &rdev->mc);
4253 	radeon_update_bandwidth_info(rdev);
4254 
4255 	return 0;
4256 }
4257 
4258 /*
4259  * GART
4260  */
4261 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
4262 {
4263 	/* flush hdp cache */
4264 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
4265 
4266 	/* bits 0-15 are the VM contexts0-15 */
4267 	WREG32(VM_INVALIDATE_REQUEST, 1);
4268 }
4269 
4270 static int si_pcie_gart_enable(struct radeon_device *rdev)
4271 {
4272 	int r, i;
4273 
4274 	if (rdev->gart.robj == NULL) {
4275 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4276 		return -EINVAL;
4277 	}
4278 	r = radeon_gart_table_vram_pin(rdev);
4279 	if (r)
4280 		return r;
4281 	/* Setup TLB control */
4282 	WREG32(MC_VM_MX_L1_TLB_CNTL,
4283 	       (0xA << 7) |
4284 	       ENABLE_L1_TLB |
4285 	       ENABLE_L1_FRAGMENT_PROCESSING |
4286 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4287 	       ENABLE_ADVANCED_DRIVER_MODEL |
4288 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4289 	/* Setup L2 cache */
4290 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4291 	       ENABLE_L2_FRAGMENT_PROCESSING |
4292 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4293 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4294 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4295 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4296 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4297 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4298 	       BANK_SELECT(4) |
4299 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
4300 	/* setup context0 */
4301 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4302 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4303 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4304 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4305 			(u32)(rdev->dummy_page.addr >> 12));
4306 	WREG32(VM_CONTEXT0_CNTL2, 0);
4307 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4308 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4309 
4310 	WREG32(0x15D4, 0);
4311 	WREG32(0x15D8, 0);
4312 	WREG32(0x15DC, 0);
4313 
4314 	/* empty context1-15 */
4315 	/* set vm size, must be a multiple of 4 */
4316 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4317 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
4318 	/* Assign the pt base to something valid for now; the pts used for
4319 	 * the VMs are determined by the application and setup and assigned
4320 	 * on the fly in the vm part of radeon_gart.c
4321 	 */
4322 	for (i = 1; i < 16; i++) {
4323 		if (i < 8)
4324 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4325 			       rdev->vm_manager.saved_table_addr[i]);
4326 		else
4327 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4328 			       rdev->vm_manager.saved_table_addr[i]);
4329 	}
4330 
4331 	/* enable context1-15 */
4332 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4333 	       (u32)(rdev->dummy_page.addr >> 12));
4334 	WREG32(VM_CONTEXT1_CNTL2, 4);
4335 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4336 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
4337 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4338 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4339 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4340 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4341 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4342 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4343 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4344 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4345 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4346 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4347 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4348 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4349 
4350 	si_pcie_gart_tlb_flush(rdev);
4351 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4352 		 (unsigned)(rdev->mc.gtt_size >> 20),
4353 		 (unsigned long long)rdev->gart.table_addr);
4354 	rdev->gart.ready = true;
4355 	return 0;
4356 }
4357 
4358 static void si_pcie_gart_disable(struct radeon_device *rdev)
4359 {
4360 	unsigned i;
4361 
4362 	for (i = 1; i < 16; ++i) {
4363 		uint32_t reg;
4364 		if (i < 8)
4365 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
4366 		else
4367 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
4368 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
4369 	}
4370 
4371 	/* Disable all tables */
4372 	WREG32(VM_CONTEXT0_CNTL, 0);
4373 	WREG32(VM_CONTEXT1_CNTL, 0);
4374 	/* Setup TLB control */
4375 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4376 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4377 	/* Setup L2 cache */
4378 	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4379 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4380 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4381 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4382 	WREG32(VM_L2_CNTL2, 0);
4383 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4384 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4385 	radeon_gart_table_vram_unpin(rdev);
4386 }
4387 
4388 static void si_pcie_gart_fini(struct radeon_device *rdev)
4389 {
4390 	si_pcie_gart_disable(rdev);
4391 	radeon_gart_table_vram_free(rdev);
4392 	radeon_gart_fini(rdev);
4393 }
4394 
4395 /* vm parser */
4396 static bool si_vm_reg_valid(u32 reg)
4397 {
4398 	/* context regs are fine */
4399 	if (reg >= 0x28000)
4400 		return true;
4401 
4402 	/* shader regs are also fine */
4403 	if (reg >= 0xB000 && reg < 0xC000)
4404 		return true;
4405 
4406 	/* check config regs */
4407 	switch (reg) {
4408 	case GRBM_GFX_INDEX:
4409 	case CP_STRMOUT_CNTL:
4410 	case VGT_VTX_VECT_EJECT_REG:
4411 	case VGT_CACHE_INVALIDATION:
4412 	case VGT_ESGS_RING_SIZE:
4413 	case VGT_GSVS_RING_SIZE:
4414 	case VGT_GS_VERTEX_REUSE:
4415 	case VGT_PRIMITIVE_TYPE:
4416 	case VGT_INDEX_TYPE:
4417 	case VGT_NUM_INDICES:
4418 	case VGT_NUM_INSTANCES:
4419 	case VGT_TF_RING_SIZE:
4420 	case VGT_HS_OFFCHIP_PARAM:
4421 	case VGT_TF_MEMORY_BASE:
4422 	case PA_CL_ENHANCE:
4423 	case PA_SU_LINE_STIPPLE_VALUE:
4424 	case PA_SC_LINE_STIPPLE_STATE:
4425 	case PA_SC_ENHANCE:
4426 	case SQC_CACHES:
4427 	case SPI_STATIC_THREAD_MGMT_1:
4428 	case SPI_STATIC_THREAD_MGMT_2:
4429 	case SPI_STATIC_THREAD_MGMT_3:
4430 	case SPI_PS_MAX_WAVE_ID:
4431 	case SPI_CONFIG_CNTL:
4432 	case SPI_CONFIG_CNTL_1:
4433 	case TA_CNTL_AUX:
4434 		return true;
4435 	default:
4436 		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4437 		return false;
4438 	}
4439 }
4440 
4441 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4442 				  u32 *ib, struct radeon_cs_packet *pkt)
4443 {
4444 	switch (pkt->opcode) {
4445 	case PACKET3_NOP:
4446 	case PACKET3_SET_BASE:
4447 	case PACKET3_SET_CE_DE_COUNTERS:
4448 	case PACKET3_LOAD_CONST_RAM:
4449 	case PACKET3_WRITE_CONST_RAM:
4450 	case PACKET3_WRITE_CONST_RAM_OFFSET:
4451 	case PACKET3_DUMP_CONST_RAM:
4452 	case PACKET3_INCREMENT_CE_COUNTER:
4453 	case PACKET3_WAIT_ON_DE_COUNTER:
4454 	case PACKET3_CE_WRITE:
4455 		break;
4456 	default:
4457 		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4458 		return -EINVAL;
4459 	}
4460 	return 0;
4461 }
4462 
4463 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4464 {
4465 	u32 start_reg, reg, i;
4466 	u32 command = ib[idx + 4];
4467 	u32 info = ib[idx + 1];
4468 	u32 idx_value = ib[idx];
4469 	if (command & PACKET3_CP_DMA_CMD_SAS) {
4470 		/* src address space is register */
4471 		if (((info & 0x60000000) >> 29) == 0) {
4472 			start_reg = idx_value << 2;
4473 			if (command & PACKET3_CP_DMA_CMD_SAIC) {
4474 				reg = start_reg;
4475 				if (!si_vm_reg_valid(reg)) {
4476 					DRM_ERROR("CP DMA Bad SRC register\n");
4477 					return -EINVAL;
4478 				}
4479 			} else {
4480 				for (i = 0; i < (command & 0x1fffff); i++) {
4481 					reg = start_reg + (4 * i);
4482 					if (!si_vm_reg_valid(reg)) {
4483 						DRM_ERROR("CP DMA Bad SRC register\n");
4484 						return -EINVAL;
4485 					}
4486 				}
4487 			}
4488 		}
4489 	}
4490 	if (command & PACKET3_CP_DMA_CMD_DAS) {
4491 		/* dst address space is register */
4492 		if (((info & 0x00300000) >> 20) == 0) {
4493 			start_reg = ib[idx + 2];
4494 			if (command & PACKET3_CP_DMA_CMD_DAIC) {
4495 				reg = start_reg;
4496 				if (!si_vm_reg_valid(reg)) {
4497 					DRM_ERROR("CP DMA Bad DST register\n");
4498 					return -EINVAL;
4499 				}
4500 			} else {
4501 				for (i = 0; i < (command & 0x1fffff); i++) {
4502 					reg = start_reg + (4 * i);
4503 				if (!si_vm_reg_valid(reg)) {
4504 						DRM_ERROR("CP DMA Bad DST register\n");
4505 						return -EINVAL;
4506 					}
4507 				}
4508 			}
4509 		}
4510 	}
4511 	return 0;
4512 }
4513 
4514 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4515 				   u32 *ib, struct radeon_cs_packet *pkt)
4516 {
4517 	int r;
4518 	u32 idx = pkt->idx + 1;
4519 	u32 idx_value = ib[idx];
4520 	u32 start_reg, end_reg, reg, i;
4521 
4522 	switch (pkt->opcode) {
4523 	case PACKET3_NOP:
4524 	case PACKET3_SET_BASE:
4525 	case PACKET3_CLEAR_STATE:
4526 	case PACKET3_INDEX_BUFFER_SIZE:
4527 	case PACKET3_DISPATCH_DIRECT:
4528 	case PACKET3_DISPATCH_INDIRECT:
4529 	case PACKET3_ALLOC_GDS:
4530 	case PACKET3_WRITE_GDS_RAM:
4531 	case PACKET3_ATOMIC_GDS:
4532 	case PACKET3_ATOMIC:
4533 	case PACKET3_OCCLUSION_QUERY:
4534 	case PACKET3_SET_PREDICATION:
4535 	case PACKET3_COND_EXEC:
4536 	case PACKET3_PRED_EXEC:
4537 	case PACKET3_DRAW_INDIRECT:
4538 	case PACKET3_DRAW_INDEX_INDIRECT:
4539 	case PACKET3_INDEX_BASE:
4540 	case PACKET3_DRAW_INDEX_2:
4541 	case PACKET3_CONTEXT_CONTROL:
4542 	case PACKET3_INDEX_TYPE:
4543 	case PACKET3_DRAW_INDIRECT_MULTI:
4544 	case PACKET3_DRAW_INDEX_AUTO:
4545 	case PACKET3_DRAW_INDEX_IMMD:
4546 	case PACKET3_NUM_INSTANCES:
4547 	case PACKET3_DRAW_INDEX_MULTI_AUTO:
4548 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4549 	case PACKET3_DRAW_INDEX_OFFSET_2:
4550 	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4551 	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4552 	case PACKET3_MPEG_INDEX:
4553 	case PACKET3_WAIT_REG_MEM:
4554 	case PACKET3_MEM_WRITE:
4555 	case PACKET3_PFP_SYNC_ME:
4556 	case PACKET3_SURFACE_SYNC:
4557 	case PACKET3_EVENT_WRITE:
4558 	case PACKET3_EVENT_WRITE_EOP:
4559 	case PACKET3_EVENT_WRITE_EOS:
4560 	case PACKET3_SET_CONTEXT_REG:
4561 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4562 	case PACKET3_SET_SH_REG:
4563 	case PACKET3_SET_SH_REG_OFFSET:
4564 	case PACKET3_INCREMENT_DE_COUNTER:
4565 	case PACKET3_WAIT_ON_CE_COUNTER:
4566 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4567 	case PACKET3_ME_WRITE:
4568 		break;
4569 	case PACKET3_COPY_DATA:
4570 		if ((idx_value & 0xf00) == 0) {
4571 			reg = ib[idx + 3] * 4;
4572 			if (!si_vm_reg_valid(reg))
4573 				return -EINVAL;
4574 		}
4575 		break;
4576 	case PACKET3_WRITE_DATA:
4577 		if ((idx_value & 0xf00) == 0) {
4578 			start_reg = ib[idx + 1] * 4;
4579 			if (idx_value & 0x10000) {
4580 				if (!si_vm_reg_valid(start_reg))
4581 					return -EINVAL;
4582 			} else {
4583 				for (i = 0; i < (pkt->count - 2); i++) {
4584 					reg = start_reg + (4 * i);
4585 					if (!si_vm_reg_valid(reg))
4586 						return -EINVAL;
4587 				}
4588 			}
4589 		}
4590 		break;
4591 	case PACKET3_COND_WRITE:
4592 		if (idx_value & 0x100) {
4593 			reg = ib[idx + 5] * 4;
4594 			if (!si_vm_reg_valid(reg))
4595 				return -EINVAL;
4596 		}
4597 		break;
4598 	case PACKET3_COPY_DW:
4599 		if (idx_value & 0x2) {
4600 			reg = ib[idx + 3] * 4;
4601 			if (!si_vm_reg_valid(reg))
4602 				return -EINVAL;
4603 		}
4604 		break;
4605 	case PACKET3_SET_CONFIG_REG:
4606 		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4607 		end_reg = 4 * pkt->count + start_reg - 4;
4608 		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4609 		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4610 		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4611 			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4612 			return -EINVAL;
4613 		}
4614 		for (i = 0; i < pkt->count; i++) {
4615 			reg = start_reg + (4 * i);
4616 			if (!si_vm_reg_valid(reg))
4617 				return -EINVAL;
4618 		}
4619 		break;
4620 	case PACKET3_CP_DMA:
4621 		r = si_vm_packet3_cp_dma_check(ib, idx);
4622 		if (r)
4623 			return r;
4624 		break;
4625 	default:
4626 		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4627 		return -EINVAL;
4628 	}
4629 	return 0;
4630 }
4631 
4632 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4633 				       u32 *ib, struct radeon_cs_packet *pkt)
4634 {
4635 	int r;
4636 	u32 idx = pkt->idx + 1;
4637 	u32 idx_value = ib[idx];
4638 	u32 start_reg, reg, i;
4639 
4640 	switch (pkt->opcode) {
4641 	case PACKET3_NOP:
4642 	case PACKET3_SET_BASE:
4643 	case PACKET3_CLEAR_STATE:
4644 	case PACKET3_DISPATCH_DIRECT:
4645 	case PACKET3_DISPATCH_INDIRECT:
4646 	case PACKET3_ALLOC_GDS:
4647 	case PACKET3_WRITE_GDS_RAM:
4648 	case PACKET3_ATOMIC_GDS:
4649 	case PACKET3_ATOMIC:
4650 	case PACKET3_OCCLUSION_QUERY:
4651 	case PACKET3_SET_PREDICATION:
4652 	case PACKET3_COND_EXEC:
4653 	case PACKET3_PRED_EXEC:
4654 	case PACKET3_CONTEXT_CONTROL:
4655 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4656 	case PACKET3_WAIT_REG_MEM:
4657 	case PACKET3_MEM_WRITE:
4658 	case PACKET3_PFP_SYNC_ME:
4659 	case PACKET3_SURFACE_SYNC:
4660 	case PACKET3_EVENT_WRITE:
4661 	case PACKET3_EVENT_WRITE_EOP:
4662 	case PACKET3_EVENT_WRITE_EOS:
4663 	case PACKET3_SET_CONTEXT_REG:
4664 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4665 	case PACKET3_SET_SH_REG:
4666 	case PACKET3_SET_SH_REG_OFFSET:
4667 	case PACKET3_INCREMENT_DE_COUNTER:
4668 	case PACKET3_WAIT_ON_CE_COUNTER:
4669 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4670 	case PACKET3_ME_WRITE:
4671 		break;
4672 	case PACKET3_COPY_DATA:
4673 		if ((idx_value & 0xf00) == 0) {
4674 			reg = ib[idx + 3] * 4;
4675 			if (!si_vm_reg_valid(reg))
4676 				return -EINVAL;
4677 		}
4678 		break;
4679 	case PACKET3_WRITE_DATA:
4680 		if ((idx_value & 0xf00) == 0) {
4681 			start_reg = ib[idx + 1] * 4;
4682 			if (idx_value & 0x10000) {
4683 				if (!si_vm_reg_valid(start_reg))
4684 					return -EINVAL;
4685 			} else {
4686 				for (i = 0; i < (pkt->count - 2); i++) {
4687 					reg = start_reg + (4 * i);
4688 					if (!si_vm_reg_valid(reg))
4689 						return -EINVAL;
4690 				}
4691 			}
4692 		}
4693 		break;
4694 	case PACKET3_COND_WRITE:
4695 		if (idx_value & 0x100) {
4696 			reg = ib[idx + 5] * 4;
4697 			if (!si_vm_reg_valid(reg))
4698 				return -EINVAL;
4699 		}
4700 		break;
4701 	case PACKET3_COPY_DW:
4702 		if (idx_value & 0x2) {
4703 			reg = ib[idx + 3] * 4;
4704 			if (!si_vm_reg_valid(reg))
4705 				return -EINVAL;
4706 		}
4707 		break;
4708 	case PACKET3_CP_DMA:
4709 		r = si_vm_packet3_cp_dma_check(ib, idx);
4710 		if (r)
4711 			return r;
4712 		break;
4713 	default:
4714 		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4715 		return -EINVAL;
4716 	}
4717 	return 0;
4718 }
4719 
4720 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4721 {
4722 	int ret = 0;
4723 	u32 idx = 0, i;
4724 	struct radeon_cs_packet pkt;
4725 
4726 	do {
4727 		pkt.idx = idx;
4728 		pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4729 		pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4730 		pkt.one_reg_wr = 0;
4731 		switch (pkt.type) {
4732 		case RADEON_PACKET_TYPE0:
4733 			dev_err(rdev->dev, "Packet0 not allowed!\n");
4734 			ret = -EINVAL;
4735 			break;
4736 		case RADEON_PACKET_TYPE2:
4737 			idx += 1;
4738 			break;
4739 		case RADEON_PACKET_TYPE3:
4740 			pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4741 			if (ib->is_const_ib)
4742 				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4743 			else {
4744 				switch (ib->ring) {
4745 				case RADEON_RING_TYPE_GFX_INDEX:
4746 					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4747 					break;
4748 				case CAYMAN_RING_TYPE_CP1_INDEX:
4749 				case CAYMAN_RING_TYPE_CP2_INDEX:
4750 					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4751 					break;
4752 				default:
4753 					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4754 					ret = -EINVAL;
4755 					break;
4756 				}
4757 			}
4758 			idx += pkt.count + 2;
4759 			break;
4760 		default:
4761 			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4762 			ret = -EINVAL;
4763 			break;
4764 		}
4765 		if (ret) {
4766 			for (i = 0; i < ib->length_dw; i++) {
4767 				if (i == idx)
4768 					printk("\t0x%08x <---\n", ib->ptr[i]);
4769 				else
4770 					printk("\t0x%08x\n", ib->ptr[i]);
4771 			}
4772 			break;
4773 		}
4774 	} while (idx < ib->length_dw);
4775 
4776 	return ret;
4777 }
4778 
4779 /*
4780  * vm
4781  */
4782 int si_vm_init(struct radeon_device *rdev)
4783 {
4784 	/* number of VMs */
4785 	rdev->vm_manager.nvm = 16;
4786 	/* base offset of vram pages */
4787 	rdev->vm_manager.vram_base_offset = 0;
4788 
4789 	return 0;
4790 }
4791 
4792 void si_vm_fini(struct radeon_device *rdev)
4793 {
4794 }
4795 
4796 /**
4797  * si_vm_decode_fault - print human readable fault info
4798  *
4799  * @rdev: radeon_device pointer
4800  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4801  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4802  *
4803  * Print human readable fault information (SI).
4804  */
4805 static void si_vm_decode_fault(struct radeon_device *rdev,
4806 			       u32 status, u32 addr)
4807 {
4808 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4809 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4810 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4811 	char *block;
4812 
4813 	if (rdev->family == CHIP_TAHITI) {
4814 		switch (mc_id) {
4815 		case 160:
4816 		case 144:
4817 		case 96:
4818 		case 80:
4819 		case 224:
4820 		case 208:
4821 		case 32:
4822 		case 16:
4823 			block = "CB";
4824 			break;
4825 		case 161:
4826 		case 145:
4827 		case 97:
4828 		case 81:
4829 		case 225:
4830 		case 209:
4831 		case 33:
4832 		case 17:
4833 			block = "CB_FMASK";
4834 			break;
4835 		case 162:
4836 		case 146:
4837 		case 98:
4838 		case 82:
4839 		case 226:
4840 		case 210:
4841 		case 34:
4842 		case 18:
4843 			block = "CB_CMASK";
4844 			break;
4845 		case 163:
4846 		case 147:
4847 		case 99:
4848 		case 83:
4849 		case 227:
4850 		case 211:
4851 		case 35:
4852 		case 19:
4853 			block = "CB_IMMED";
4854 			break;
4855 		case 164:
4856 		case 148:
4857 		case 100:
4858 		case 84:
4859 		case 228:
4860 		case 212:
4861 		case 36:
4862 		case 20:
4863 			block = "DB";
4864 			break;
4865 		case 165:
4866 		case 149:
4867 		case 101:
4868 		case 85:
4869 		case 229:
4870 		case 213:
4871 		case 37:
4872 		case 21:
4873 			block = "DB_HTILE";
4874 			break;
4875 		case 167:
4876 		case 151:
4877 		case 103:
4878 		case 87:
4879 		case 231:
4880 		case 215:
4881 		case 39:
4882 		case 23:
4883 			block = "DB_STEN";
4884 			break;
4885 		case 72:
4886 		case 68:
4887 		case 64:
4888 		case 8:
4889 		case 4:
4890 		case 0:
4891 		case 136:
4892 		case 132:
4893 		case 128:
4894 		case 200:
4895 		case 196:
4896 		case 192:
4897 			block = "TC";
4898 			break;
4899 		case 112:
4900 		case 48:
4901 			block = "CP";
4902 			break;
4903 		case 49:
4904 		case 177:
4905 		case 50:
4906 		case 178:
4907 			block = "SH";
4908 			break;
4909 		case 53:
4910 		case 190:
4911 			block = "VGT";
4912 			break;
4913 		case 117:
4914 			block = "IH";
4915 			break;
4916 		case 51:
4917 		case 115:
4918 			block = "RLC";
4919 			break;
4920 		case 119:
4921 		case 183:
4922 			block = "DMA0";
4923 			break;
4924 		case 61:
4925 			block = "DMA1";
4926 			break;
4927 		case 248:
4928 		case 120:
4929 			block = "HDP";
4930 			break;
4931 		default:
4932 			block = "unknown";
4933 			break;
4934 		}
4935 	} else {
4936 		switch (mc_id) {
4937 		case 32:
4938 		case 16:
4939 		case 96:
4940 		case 80:
4941 		case 160:
4942 		case 144:
4943 		case 224:
4944 		case 208:
4945 			block = "CB";
4946 			break;
4947 		case 33:
4948 		case 17:
4949 		case 97:
4950 		case 81:
4951 		case 161:
4952 		case 145:
4953 		case 225:
4954 		case 209:
4955 			block = "CB_FMASK";
4956 			break;
4957 		case 34:
4958 		case 18:
4959 		case 98:
4960 		case 82:
4961 		case 162:
4962 		case 146:
4963 		case 226:
4964 		case 210:
4965 			block = "CB_CMASK";
4966 			break;
4967 		case 35:
4968 		case 19:
4969 		case 99:
4970 		case 83:
4971 		case 163:
4972 		case 147:
4973 		case 227:
4974 		case 211:
4975 			block = "CB_IMMED";
4976 			break;
4977 		case 36:
4978 		case 20:
4979 		case 100:
4980 		case 84:
4981 		case 164:
4982 		case 148:
4983 		case 228:
4984 		case 212:
4985 			block = "DB";
4986 			break;
4987 		case 37:
4988 		case 21:
4989 		case 101:
4990 		case 85:
4991 		case 165:
4992 		case 149:
4993 		case 229:
4994 		case 213:
4995 			block = "DB_HTILE";
4996 			break;
4997 		case 39:
4998 		case 23:
4999 		case 103:
5000 		case 87:
5001 		case 167:
5002 		case 151:
5003 		case 231:
5004 		case 215:
5005 			block = "DB_STEN";
5006 			break;
5007 		case 72:
5008 		case 68:
5009 		case 8:
5010 		case 4:
5011 		case 136:
5012 		case 132:
5013 		case 200:
5014 		case 196:
5015 			block = "TC";
5016 			break;
5017 		case 112:
5018 		case 48:
5019 			block = "CP";
5020 			break;
5021 		case 49:
5022 		case 177:
5023 		case 50:
5024 		case 178:
5025 			block = "SH";
5026 			break;
5027 		case 53:
5028 			block = "VGT";
5029 			break;
5030 		case 117:
5031 			block = "IH";
5032 			break;
5033 		case 51:
5034 		case 115:
5035 			block = "RLC";
5036 			break;
5037 		case 119:
5038 		case 183:
5039 			block = "DMA0";
5040 			break;
5041 		case 61:
5042 			block = "DMA1";
5043 			break;
5044 		case 248:
5045 		case 120:
5046 			block = "HDP";
5047 			break;
5048 		default:
5049 			block = "unknown";
5050 			break;
5051 		}
5052 	}
5053 
5054 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5055 	       protections, vmid, addr,
5056 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5057 	       block, mc_id);
5058 }
5059 
5060 void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5061 		 unsigned vm_id, uint64_t pd_addr)
5062 {
5063 	/* write new base address */
5064 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5065 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5066 				 WRITE_DATA_DST_SEL(0)));
5067 
5068 	if (vm_id < 8) {
5069 		radeon_ring_write(ring,
5070 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5071 	} else {
5072 		radeon_ring_write(ring,
5073 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5074 	}
5075 	radeon_ring_write(ring, 0);
5076 	radeon_ring_write(ring, pd_addr >> 12);
5077 
5078 	/* flush hdp cache */
5079 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5080 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5081 				 WRITE_DATA_DST_SEL(0)));
5082 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5083 	radeon_ring_write(ring, 0);
5084 	radeon_ring_write(ring, 0x1);
5085 
5086 	/* bits 0-15 are the VM contexts0-15 */
5087 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5088 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5089 				 WRITE_DATA_DST_SEL(0)));
5090 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5091 	radeon_ring_write(ring, 0);
5092 	radeon_ring_write(ring, 1 << vm_id);
5093 
5094 	/* wait for the invalidate to complete */
5095 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5096 	radeon_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) |  /* always */
5097 				 WAIT_REG_MEM_ENGINE(0))); /* me */
5098 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5099 	radeon_ring_write(ring, 0);
5100 	radeon_ring_write(ring, 0); /* ref */
5101 	radeon_ring_write(ring, 0); /* mask */
5102 	radeon_ring_write(ring, 0x20); /* poll interval */
5103 
5104 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
5105 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5106 	radeon_ring_write(ring, 0x0);
5107 }
5108 
5109 /*
5110  *  Power and clock gating
5111  */
5112 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
5113 {
5114 	int i;
5115 
5116 	for (i = 0; i < rdev->usec_timeout; i++) {
5117 		if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
5118 			break;
5119 		udelay(1);
5120 	}
5121 
5122 	for (i = 0; i < rdev->usec_timeout; i++) {
5123 		if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
5124 			break;
5125 		udelay(1);
5126 	}
5127 }
5128 
5129 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
5130 					 bool enable)
5131 {
5132 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5133 	u32 mask;
5134 	int i;
5135 
5136 	if (enable)
5137 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5138 	else
5139 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5140 	WREG32(CP_INT_CNTL_RING0, tmp);
5141 
5142 	if (!enable) {
5143 		/* read a gfx register */
5144 		tmp = RREG32(DB_DEPTH_INFO);
5145 
5146 		mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
5147 		for (i = 0; i < rdev->usec_timeout; i++) {
5148 			if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
5149 				break;
5150 			udelay(1);
5151 		}
5152 	}
5153 }
5154 
5155 static void si_set_uvd_dcm(struct radeon_device *rdev,
5156 			   bool sw_mode)
5157 {
5158 	u32 tmp, tmp2;
5159 
5160 	tmp = RREG32(UVD_CGC_CTRL);
5161 	tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
5162 	tmp |= DCM | CG_DT(1) | CLK_OD(4);
5163 
5164 	if (sw_mode) {
5165 		tmp &= ~0x7ffff800;
5166 		tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
5167 	} else {
5168 		tmp |= 0x7ffff800;
5169 		tmp2 = 0;
5170 	}
5171 
5172 	WREG32(UVD_CGC_CTRL, tmp);
5173 	WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
5174 }
5175 
5176 void si_init_uvd_internal_cg(struct radeon_device *rdev)
5177 {
5178 	bool hw_mode = true;
5179 
5180 	if (hw_mode) {
5181 		si_set_uvd_dcm(rdev, false);
5182 	} else {
5183 		u32 tmp = RREG32(UVD_CGC_CTRL);
5184 		tmp &= ~DCM;
5185 		WREG32(UVD_CGC_CTRL, tmp);
5186 	}
5187 }
5188 
5189 static u32 si_halt_rlc(struct radeon_device *rdev)
5190 {
5191 	u32 data, orig;
5192 
5193 	orig = data = RREG32(RLC_CNTL);
5194 
5195 	if (data & RLC_ENABLE) {
5196 		data &= ~RLC_ENABLE;
5197 		WREG32(RLC_CNTL, data);
5198 
5199 		si_wait_for_rlc_serdes(rdev);
5200 	}
5201 
5202 	return orig;
5203 }
5204 
5205 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
5206 {
5207 	u32 tmp;
5208 
5209 	tmp = RREG32(RLC_CNTL);
5210 	if (tmp != rlc)
5211 		WREG32(RLC_CNTL, rlc);
5212 }
5213 
5214 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
5215 {
5216 	u32 data, orig;
5217 
5218 	orig = data = RREG32(DMA_PG);
5219 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
5220 		data |= PG_CNTL_ENABLE;
5221 	else
5222 		data &= ~PG_CNTL_ENABLE;
5223 	if (orig != data)
5224 		WREG32(DMA_PG, data);
5225 }
5226 
5227 static void si_init_dma_pg(struct radeon_device *rdev)
5228 {
5229 	u32 tmp;
5230 
5231 	WREG32(DMA_PGFSM_WRITE,  0x00002000);
5232 	WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
5233 
5234 	for (tmp = 0; tmp < 5; tmp++)
5235 		WREG32(DMA_PGFSM_WRITE, 0);
5236 }
5237 
5238 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
5239 			       bool enable)
5240 {
5241 	u32 tmp;
5242 
5243 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5244 		tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
5245 		WREG32(RLC_TTOP_D, tmp);
5246 
5247 		tmp = RREG32(RLC_PG_CNTL);
5248 		tmp |= GFX_PG_ENABLE;
5249 		WREG32(RLC_PG_CNTL, tmp);
5250 
5251 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5252 		tmp |= AUTO_PG_EN;
5253 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5254 	} else {
5255 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5256 		tmp &= ~AUTO_PG_EN;
5257 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5258 
5259 		tmp = RREG32(DB_RENDER_CONTROL);
5260 	}
5261 }
5262 
5263 static void si_init_gfx_cgpg(struct radeon_device *rdev)
5264 {
5265 	u32 tmp;
5266 
5267 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5268 
5269 	tmp = RREG32(RLC_PG_CNTL);
5270 	tmp |= GFX_PG_SRC;
5271 	WREG32(RLC_PG_CNTL, tmp);
5272 
5273 	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5274 
5275 	tmp = RREG32(RLC_AUTO_PG_CTRL);
5276 
5277 	tmp &= ~GRBM_REG_SGIT_MASK;
5278 	tmp |= GRBM_REG_SGIT(0x700);
5279 	tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5280 	WREG32(RLC_AUTO_PG_CTRL, tmp);
5281 }
5282 
5283 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5284 {
5285 	u32 mask = 0, tmp, tmp1;
5286 	int i;
5287 
5288 	si_select_se_sh(rdev, se, sh);
5289 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5290 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5291 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5292 
5293 	tmp &= 0xffff0000;
5294 
5295 	tmp |= tmp1;
5296 	tmp >>= 16;
5297 
5298 	for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5299 		mask <<= 1;
5300 		mask |= 1;
5301 	}
5302 
5303 	return (~tmp) & mask;
5304 }
5305 
5306 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5307 {
5308 	u32 i, j, k, active_cu_number = 0;
5309 	u32 mask, counter, cu_bitmap;
5310 	u32 tmp = 0;
5311 
5312 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5313 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5314 			mask = 1;
5315 			cu_bitmap = 0;
5316 			counter  = 0;
5317 			for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5318 				if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5319 					if (counter < 2)
5320 						cu_bitmap |= mask;
5321 					counter++;
5322 				}
5323 				mask <<= 1;
5324 			}
5325 
5326 			active_cu_number += counter;
5327 			tmp |= (cu_bitmap << (i * 16 + j * 8));
5328 		}
5329 	}
5330 
5331 	WREG32(RLC_PG_AO_CU_MASK, tmp);
5332 
5333 	tmp = RREG32(RLC_MAX_PG_CU);
5334 	tmp &= ~MAX_PU_CU_MASK;
5335 	tmp |= MAX_PU_CU(active_cu_number);
5336 	WREG32(RLC_MAX_PG_CU, tmp);
5337 }
5338 
5339 static void si_enable_cgcg(struct radeon_device *rdev,
5340 			   bool enable)
5341 {
5342 	u32 data, orig, tmp;
5343 
5344 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5345 
5346 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5347 		si_enable_gui_idle_interrupt(rdev, true);
5348 
5349 		WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5350 
5351 		tmp = si_halt_rlc(rdev);
5352 
5353 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5354 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5355 		WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5356 
5357 		si_wait_for_rlc_serdes(rdev);
5358 
5359 		si_update_rlc(rdev, tmp);
5360 
5361 		WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5362 
5363 		data |= CGCG_EN | CGLS_EN;
5364 	} else {
5365 		si_enable_gui_idle_interrupt(rdev, false);
5366 
5367 		RREG32(CB_CGTT_SCLK_CTRL);
5368 		RREG32(CB_CGTT_SCLK_CTRL);
5369 		RREG32(CB_CGTT_SCLK_CTRL);
5370 		RREG32(CB_CGTT_SCLK_CTRL);
5371 
5372 		data &= ~(CGCG_EN | CGLS_EN);
5373 	}
5374 
5375 	if (orig != data)
5376 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5377 }
5378 
5379 static void si_enable_mgcg(struct radeon_device *rdev,
5380 			   bool enable)
5381 {
5382 	u32 data, orig, tmp = 0;
5383 
5384 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5385 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5386 		data = 0x96940200;
5387 		if (orig != data)
5388 			WREG32(CGTS_SM_CTRL_REG, data);
5389 
5390 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5391 			orig = data = RREG32(CP_MEM_SLP_CNTL);
5392 			data |= CP_MEM_LS_EN;
5393 			if (orig != data)
5394 				WREG32(CP_MEM_SLP_CNTL, data);
5395 		}
5396 
5397 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5398 		data &= 0xffffffc0;
5399 		if (orig != data)
5400 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5401 
5402 		tmp = si_halt_rlc(rdev);
5403 
5404 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5405 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5406 		WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5407 
5408 		si_update_rlc(rdev, tmp);
5409 	} else {
5410 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5411 		data |= 0x00000003;
5412 		if (orig != data)
5413 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5414 
5415 		data = RREG32(CP_MEM_SLP_CNTL);
5416 		if (data & CP_MEM_LS_EN) {
5417 			data &= ~CP_MEM_LS_EN;
5418 			WREG32(CP_MEM_SLP_CNTL, data);
5419 		}
5420 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5421 		data |= LS_OVERRIDE | OVERRIDE;
5422 		if (orig != data)
5423 			WREG32(CGTS_SM_CTRL_REG, data);
5424 
5425 		tmp = si_halt_rlc(rdev);
5426 
5427 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5428 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5429 		WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5430 
5431 		si_update_rlc(rdev, tmp);
5432 	}
5433 }
5434 
5435 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5436 			       bool enable)
5437 {
5438 	u32 orig, data, tmp;
5439 
5440 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5441 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5442 		tmp |= 0x3fff;
5443 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5444 
5445 		orig = data = RREG32(UVD_CGC_CTRL);
5446 		data |= DCM;
5447 		if (orig != data)
5448 			WREG32(UVD_CGC_CTRL, data);
5449 
5450 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5451 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5452 	} else {
5453 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5454 		tmp &= ~0x3fff;
5455 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5456 
5457 		orig = data = RREG32(UVD_CGC_CTRL);
5458 		data &= ~DCM;
5459 		if (orig != data)
5460 			WREG32(UVD_CGC_CTRL, data);
5461 
5462 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5463 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5464 	}
5465 }
5466 
5467 static const u32 mc_cg_registers[] =
5468 {
5469 	MC_HUB_MISC_HUB_CG,
5470 	MC_HUB_MISC_SIP_CG,
5471 	MC_HUB_MISC_VM_CG,
5472 	MC_XPB_CLK_GAT,
5473 	ATC_MISC_CG,
5474 	MC_CITF_MISC_WR_CG,
5475 	MC_CITF_MISC_RD_CG,
5476 	MC_CITF_MISC_VM_CG,
5477 	VM_L2_CG,
5478 };
5479 
5480 static void si_enable_mc_ls(struct radeon_device *rdev,
5481 			    bool enable)
5482 {
5483 	int i;
5484 	u32 orig, data;
5485 
5486 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5487 		orig = data = RREG32(mc_cg_registers[i]);
5488 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5489 			data |= MC_LS_ENABLE;
5490 		else
5491 			data &= ~MC_LS_ENABLE;
5492 		if (data != orig)
5493 			WREG32(mc_cg_registers[i], data);
5494 	}
5495 }
5496 
5497 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5498 			       bool enable)
5499 {
5500 	int i;
5501 	u32 orig, data;
5502 
5503 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5504 		orig = data = RREG32(mc_cg_registers[i]);
5505 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5506 			data |= MC_CG_ENABLE;
5507 		else
5508 			data &= ~MC_CG_ENABLE;
5509 		if (data != orig)
5510 			WREG32(mc_cg_registers[i], data);
5511 	}
5512 }
5513 
5514 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5515 			       bool enable)
5516 {
5517 	u32 orig, data, offset;
5518 	int i;
5519 
5520 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5521 		for (i = 0; i < 2; i++) {
5522 			if (i == 0)
5523 				offset = DMA0_REGISTER_OFFSET;
5524 			else
5525 				offset = DMA1_REGISTER_OFFSET;
5526 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5527 			data &= ~MEM_POWER_OVERRIDE;
5528 			if (data != orig)
5529 				WREG32(DMA_POWER_CNTL + offset, data);
5530 			WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5531 		}
5532 	} else {
5533 		for (i = 0; i < 2; i++) {
5534 			if (i == 0)
5535 				offset = DMA0_REGISTER_OFFSET;
5536 			else
5537 				offset = DMA1_REGISTER_OFFSET;
5538 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5539 			data |= MEM_POWER_OVERRIDE;
5540 			if (data != orig)
5541 				WREG32(DMA_POWER_CNTL + offset, data);
5542 
5543 			orig = data = RREG32(DMA_CLK_CTRL + offset);
5544 			data = 0xff000000;
5545 			if (data != orig)
5546 				WREG32(DMA_CLK_CTRL + offset, data);
5547 		}
5548 	}
5549 }
5550 
5551 static void si_enable_bif_mgls(struct radeon_device *rdev,
5552 			       bool enable)
5553 {
5554 	u32 orig, data;
5555 
5556 	orig = data = RREG32_PCIE(PCIE_CNTL2);
5557 
5558 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5559 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5560 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5561 	else
5562 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5563 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5564 
5565 	if (orig != data)
5566 		WREG32_PCIE(PCIE_CNTL2, data);
5567 }
5568 
5569 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5570 			       bool enable)
5571 {
5572 	u32 orig, data;
5573 
5574 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
5575 
5576 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5577 		data &= ~CLOCK_GATING_DIS;
5578 	else
5579 		data |= CLOCK_GATING_DIS;
5580 
5581 	if (orig != data)
5582 		WREG32(HDP_HOST_PATH_CNTL, data);
5583 }
5584 
5585 static void si_enable_hdp_ls(struct radeon_device *rdev,
5586 			     bool enable)
5587 {
5588 	u32 orig, data;
5589 
5590 	orig = data = RREG32(HDP_MEM_POWER_LS);
5591 
5592 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5593 		data |= HDP_LS_ENABLE;
5594 	else
5595 		data &= ~HDP_LS_ENABLE;
5596 
5597 	if (orig != data)
5598 		WREG32(HDP_MEM_POWER_LS, data);
5599 }
5600 
5601 static void si_update_cg(struct radeon_device *rdev,
5602 			 u32 block, bool enable)
5603 {
5604 	if (block & RADEON_CG_BLOCK_GFX) {
5605 		si_enable_gui_idle_interrupt(rdev, false);
5606 		/* order matters! */
5607 		if (enable) {
5608 			si_enable_mgcg(rdev, true);
5609 			si_enable_cgcg(rdev, true);
5610 		} else {
5611 			si_enable_cgcg(rdev, false);
5612 			si_enable_mgcg(rdev, false);
5613 		}
5614 		si_enable_gui_idle_interrupt(rdev, true);
5615 	}
5616 
5617 	if (block & RADEON_CG_BLOCK_MC) {
5618 		si_enable_mc_mgcg(rdev, enable);
5619 		si_enable_mc_ls(rdev, enable);
5620 	}
5621 
5622 	if (block & RADEON_CG_BLOCK_SDMA) {
5623 		si_enable_dma_mgcg(rdev, enable);
5624 	}
5625 
5626 	if (block & RADEON_CG_BLOCK_BIF) {
5627 		si_enable_bif_mgls(rdev, enable);
5628 	}
5629 
5630 	if (block & RADEON_CG_BLOCK_UVD) {
5631 		if (rdev->has_uvd) {
5632 			si_enable_uvd_mgcg(rdev, enable);
5633 		}
5634 	}
5635 
5636 	if (block & RADEON_CG_BLOCK_HDP) {
5637 		si_enable_hdp_mgcg(rdev, enable);
5638 		si_enable_hdp_ls(rdev, enable);
5639 	}
5640 }
5641 
5642 static void si_init_cg(struct radeon_device *rdev)
5643 {
5644 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5645 			    RADEON_CG_BLOCK_MC |
5646 			    RADEON_CG_BLOCK_SDMA |
5647 			    RADEON_CG_BLOCK_BIF |
5648 			    RADEON_CG_BLOCK_HDP), true);
5649 	if (rdev->has_uvd) {
5650 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5651 		si_init_uvd_internal_cg(rdev);
5652 	}
5653 }
5654 
5655 static void si_fini_cg(struct radeon_device *rdev)
5656 {
5657 	if (rdev->has_uvd) {
5658 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5659 	}
5660 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5661 			    RADEON_CG_BLOCK_MC |
5662 			    RADEON_CG_BLOCK_SDMA |
5663 			    RADEON_CG_BLOCK_BIF |
5664 			    RADEON_CG_BLOCK_HDP), false);
5665 }
5666 
5667 u32 si_get_csb_size(struct radeon_device *rdev)
5668 {
5669 	u32 count = 0;
5670 	const struct cs_section_def *sect = NULL;
5671 	const struct cs_extent_def *ext = NULL;
5672 
5673 	if (rdev->rlc.cs_data == NULL)
5674 		return 0;
5675 
5676 	/* begin clear state */
5677 	count += 2;
5678 	/* context control state */
5679 	count += 3;
5680 
5681 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5682 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5683 			if (sect->id == SECT_CONTEXT)
5684 				count += 2 + ext->reg_count;
5685 			else
5686 				return 0;
5687 		}
5688 	}
5689 	/* pa_sc_raster_config */
5690 	count += 3;
5691 	/* end clear state */
5692 	count += 2;
5693 	/* clear state */
5694 	count += 2;
5695 
5696 	return count;
5697 }
5698 
5699 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5700 {
5701 	u32 count = 0, i;
5702 	const struct cs_section_def *sect = NULL;
5703 	const struct cs_extent_def *ext = NULL;
5704 
5705 	if (rdev->rlc.cs_data == NULL)
5706 		return;
5707 	if (buffer == NULL)
5708 		return;
5709 
5710 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5711 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5712 
5713 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5714 	buffer[count++] = cpu_to_le32(0x80000000);
5715 	buffer[count++] = cpu_to_le32(0x80000000);
5716 
5717 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5718 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5719 			if (sect->id == SECT_CONTEXT) {
5720 				buffer[count++] =
5721 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5722 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5723 				for (i = 0; i < ext->reg_count; i++)
5724 					buffer[count++] = cpu_to_le32(ext->extent[i]);
5725 			} else {
5726 				return;
5727 			}
5728 		}
5729 	}
5730 
5731 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5732 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5733 	switch (rdev->family) {
5734 	case CHIP_TAHITI:
5735 	case CHIP_PITCAIRN:
5736 		buffer[count++] = cpu_to_le32(0x2a00126a);
5737 		break;
5738 	case CHIP_VERDE:
5739 		buffer[count++] = cpu_to_le32(0x0000124a);
5740 		break;
5741 	case CHIP_OLAND:
5742 		buffer[count++] = cpu_to_le32(0x00000082);
5743 		break;
5744 	case CHIP_HAINAN:
5745 		buffer[count++] = cpu_to_le32(0x00000000);
5746 		break;
5747 	default:
5748 		buffer[count++] = cpu_to_le32(0x00000000);
5749 		break;
5750 	}
5751 
5752 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5753 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5754 
5755 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5756 	buffer[count++] = cpu_to_le32(0);
5757 }
5758 
5759 static void si_init_pg(struct radeon_device *rdev)
5760 {
5761 	if (rdev->pg_flags) {
5762 		if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5763 			si_init_dma_pg(rdev);
5764 		}
5765 		si_init_ao_cu_mask(rdev);
5766 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5767 			si_init_gfx_cgpg(rdev);
5768 		} else {
5769 			WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5770 			WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5771 		}
5772 		si_enable_dma_pg(rdev, true);
5773 		si_enable_gfx_cgpg(rdev, true);
5774 	} else {
5775 		WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5776 		WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5777 	}
5778 }
5779 
5780 static void si_fini_pg(struct radeon_device *rdev)
5781 {
5782 	if (rdev->pg_flags) {
5783 		si_enable_dma_pg(rdev, false);
5784 		si_enable_gfx_cgpg(rdev, false);
5785 	}
5786 }
5787 
5788 /*
5789  * RLC
5790  */
5791 void si_rlc_reset(struct radeon_device *rdev)
5792 {
5793 	u32 tmp = RREG32(GRBM_SOFT_RESET);
5794 
5795 	tmp |= SOFT_RESET_RLC;
5796 	WREG32(GRBM_SOFT_RESET, tmp);
5797 	udelay(50);
5798 	tmp &= ~SOFT_RESET_RLC;
5799 	WREG32(GRBM_SOFT_RESET, tmp);
5800 	udelay(50);
5801 }
5802 
5803 static void si_rlc_stop(struct radeon_device *rdev)
5804 {
5805 	WREG32(RLC_CNTL, 0);
5806 
5807 	si_enable_gui_idle_interrupt(rdev, false);
5808 
5809 	si_wait_for_rlc_serdes(rdev);
5810 }
5811 
5812 static void si_rlc_start(struct radeon_device *rdev)
5813 {
5814 	WREG32(RLC_CNTL, RLC_ENABLE);
5815 
5816 	si_enable_gui_idle_interrupt(rdev, true);
5817 
5818 	udelay(50);
5819 }
5820 
5821 static bool si_lbpw_supported(struct radeon_device *rdev)
5822 {
5823 	u32 tmp;
5824 
5825 	/* Enable LBPW only for DDR3 */
5826 	tmp = RREG32(MC_SEQ_MISC0);
5827 	if ((tmp & 0xF0000000) == 0xB0000000)
5828 		return true;
5829 	return false;
5830 }
5831 
5832 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5833 {
5834 	u32 tmp;
5835 
5836 	tmp = RREG32(RLC_LB_CNTL);
5837 	if (enable)
5838 		tmp |= LOAD_BALANCE_ENABLE;
5839 	else
5840 		tmp &= ~LOAD_BALANCE_ENABLE;
5841 	WREG32(RLC_LB_CNTL, tmp);
5842 
5843 	if (!enable) {
5844 		si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5845 		WREG32(SPI_LB_CU_MASK, 0x00ff);
5846 	}
5847 }
5848 
5849 static int si_rlc_resume(struct radeon_device *rdev)
5850 {
5851 	u32 i;
5852 
5853 	if (!rdev->rlc_fw)
5854 		return -EINVAL;
5855 
5856 	si_rlc_stop(rdev);
5857 
5858 	si_rlc_reset(rdev);
5859 
5860 	si_init_pg(rdev);
5861 
5862 	si_init_cg(rdev);
5863 
5864 	WREG32(RLC_RL_BASE, 0);
5865 	WREG32(RLC_RL_SIZE, 0);
5866 	WREG32(RLC_LB_CNTL, 0);
5867 	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5868 	WREG32(RLC_LB_CNTR_INIT, 0);
5869 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5870 
5871 	WREG32(RLC_MC_CNTL, 0);
5872 	WREG32(RLC_UCODE_CNTL, 0);
5873 
5874 	if (rdev->new_fw) {
5875 		const struct rlc_firmware_header_v1_0 *hdr =
5876 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5877 		u32 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5878 		const __le32 *fw_data = (const __le32 *)
5879 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5880 
5881 		radeon_ucode_print_rlc_hdr(&hdr->header);
5882 
5883 		for (i = 0; i < fw_size; i++) {
5884 			WREG32(RLC_UCODE_ADDR, i);
5885 			WREG32(RLC_UCODE_DATA, le32_to_cpup(fw_data++));
5886 		}
5887 	} else {
5888 		const __be32 *fw_data =
5889 			(const __be32 *)rdev->rlc_fw->data;
5890 		for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5891 			WREG32(RLC_UCODE_ADDR, i);
5892 			WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5893 		}
5894 	}
5895 	WREG32(RLC_UCODE_ADDR, 0);
5896 
5897 	si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5898 
5899 	si_rlc_start(rdev);
5900 
5901 	return 0;
5902 }
5903 
5904 static void si_enable_interrupts(struct radeon_device *rdev)
5905 {
5906 	u32 ih_cntl = RREG32(IH_CNTL);
5907 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5908 
5909 	ih_cntl |= ENABLE_INTR;
5910 	ih_rb_cntl |= IH_RB_ENABLE;
5911 	WREG32(IH_CNTL, ih_cntl);
5912 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5913 	rdev->ih.enabled = true;
5914 }
5915 
5916 static void si_disable_interrupts(struct radeon_device *rdev)
5917 {
5918 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5919 	u32 ih_cntl = RREG32(IH_CNTL);
5920 
5921 	ih_rb_cntl &= ~IH_RB_ENABLE;
5922 	ih_cntl &= ~ENABLE_INTR;
5923 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5924 	WREG32(IH_CNTL, ih_cntl);
5925 	/* set rptr, wptr to 0 */
5926 	WREG32(IH_RB_RPTR, 0);
5927 	WREG32(IH_RB_WPTR, 0);
5928 	rdev->ih.enabled = false;
5929 	rdev->ih.rptr = 0;
5930 }
5931 
5932 static void si_disable_interrupt_state(struct radeon_device *rdev)
5933 {
5934 	u32 tmp;
5935 
5936 	tmp = RREG32(CP_INT_CNTL_RING0) &
5937 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5938 	WREG32(CP_INT_CNTL_RING0, tmp);
5939 	WREG32(CP_INT_CNTL_RING1, 0);
5940 	WREG32(CP_INT_CNTL_RING2, 0);
5941 	tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5942 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5943 	tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5944 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5945 	WREG32(GRBM_INT_CNTL, 0);
5946 	WREG32(SRBM_INT_CNTL, 0);
5947 	if (rdev->num_crtc >= 2) {
5948 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5949 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5950 	}
5951 	if (rdev->num_crtc >= 4) {
5952 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5953 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5954 	}
5955 	if (rdev->num_crtc >= 6) {
5956 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5957 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5958 	}
5959 
5960 	if (rdev->num_crtc >= 2) {
5961 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5962 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5963 	}
5964 	if (rdev->num_crtc >= 4) {
5965 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5966 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5967 	}
5968 	if (rdev->num_crtc >= 6) {
5969 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5970 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5971 	}
5972 
5973 	if (!ASIC_IS_NODCE(rdev)) {
5974 		WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5975 
5976 		tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5977 		WREG32(DC_HPD1_INT_CONTROL, tmp);
5978 		tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5979 		WREG32(DC_HPD2_INT_CONTROL, tmp);
5980 		tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5981 		WREG32(DC_HPD3_INT_CONTROL, tmp);
5982 		tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5983 		WREG32(DC_HPD4_INT_CONTROL, tmp);
5984 		tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5985 		WREG32(DC_HPD5_INT_CONTROL, tmp);
5986 		tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5987 		WREG32(DC_HPD6_INT_CONTROL, tmp);
5988 	}
5989 }
5990 
5991 static int si_irq_init(struct radeon_device *rdev)
5992 {
5993 	int ret = 0;
5994 	int rb_bufsz;
5995 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5996 
5997 	/* allocate ring */
5998 	ret = r600_ih_ring_alloc(rdev);
5999 	if (ret)
6000 		return ret;
6001 
6002 	/* disable irqs */
6003 	si_disable_interrupts(rdev);
6004 
6005 	/* init rlc */
6006 	ret = si_rlc_resume(rdev);
6007 	if (ret) {
6008 		r600_ih_ring_fini(rdev);
6009 		return ret;
6010 	}
6011 
6012 	/* setup interrupt control */
6013 	/* set dummy read address to ring address */
6014 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6015 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
6016 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6017 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6018 	 */
6019 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6020 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6021 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6022 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6023 
6024 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6025 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6026 
6027 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6028 		      IH_WPTR_OVERFLOW_CLEAR |
6029 		      (rb_bufsz << 1));
6030 
6031 	if (rdev->wb.enabled)
6032 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6033 
6034 	/* set the writeback address whether it's enabled or not */
6035 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6036 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6037 
6038 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6039 
6040 	/* set rptr, wptr to 0 */
6041 	WREG32(IH_RB_RPTR, 0);
6042 	WREG32(IH_RB_WPTR, 0);
6043 
6044 	/* Default settings for IH_CNTL (disabled at first) */
6045 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6046 	/* RPTR_REARM only works if msi's are enabled */
6047 	if (rdev->msi_enabled)
6048 		ih_cntl |= RPTR_REARM;
6049 	WREG32(IH_CNTL, ih_cntl);
6050 
6051 	/* force the active interrupt state to all disabled */
6052 	si_disable_interrupt_state(rdev);
6053 
6054 	pci_set_master(rdev->pdev);
6055 
6056 	/* enable irqs */
6057 	si_enable_interrupts(rdev);
6058 
6059 	return ret;
6060 }
6061 
6062 int si_irq_set(struct radeon_device *rdev)
6063 {
6064 	u32 cp_int_cntl;
6065 	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
6066 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6067 	u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
6068 	u32 grbm_int_cntl = 0;
6069 	u32 dma_cntl, dma_cntl1;
6070 	u32 thermal_int = 0;
6071 
6072 	if (!rdev->irq.installed) {
6073 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6074 		return -EINVAL;
6075 	}
6076 	/* don't enable anything if the ih is disabled */
6077 	if (!rdev->ih.enabled) {
6078 		si_disable_interrupts(rdev);
6079 		/* force the active interrupt state to all disabled */
6080 		si_disable_interrupt_state(rdev);
6081 		return 0;
6082 	}
6083 
6084 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6085 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6086 
6087 	if (!ASIC_IS_NODCE(rdev)) {
6088 		hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6089 		hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6090 		hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6091 		hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6092 		hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6093 		hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6094 	}
6095 
6096 	dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6097 	dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6098 
6099 	thermal_int = RREG32(CG_THERMAL_INT) &
6100 		~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6101 
6102 	/* enable CP interrupts on all rings */
6103 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6104 		DRM_DEBUG("si_irq_set: sw int gfx\n");
6105 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6106 	}
6107 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6108 		DRM_DEBUG("si_irq_set: sw int cp1\n");
6109 		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
6110 	}
6111 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6112 		DRM_DEBUG("si_irq_set: sw int cp2\n");
6113 		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
6114 	}
6115 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6116 		DRM_DEBUG("si_irq_set: sw int dma\n");
6117 		dma_cntl |= TRAP_ENABLE;
6118 	}
6119 
6120 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6121 		DRM_DEBUG("si_irq_set: sw int dma1\n");
6122 		dma_cntl1 |= TRAP_ENABLE;
6123 	}
6124 	if (rdev->irq.crtc_vblank_int[0] ||
6125 	    atomic_read(&rdev->irq.pflip[0])) {
6126 		DRM_DEBUG("si_irq_set: vblank 0\n");
6127 		crtc1 |= VBLANK_INT_MASK;
6128 	}
6129 	if (rdev->irq.crtc_vblank_int[1] ||
6130 	    atomic_read(&rdev->irq.pflip[1])) {
6131 		DRM_DEBUG("si_irq_set: vblank 1\n");
6132 		crtc2 |= VBLANK_INT_MASK;
6133 	}
6134 	if (rdev->irq.crtc_vblank_int[2] ||
6135 	    atomic_read(&rdev->irq.pflip[2])) {
6136 		DRM_DEBUG("si_irq_set: vblank 2\n");
6137 		crtc3 |= VBLANK_INT_MASK;
6138 	}
6139 	if (rdev->irq.crtc_vblank_int[3] ||
6140 	    atomic_read(&rdev->irq.pflip[3])) {
6141 		DRM_DEBUG("si_irq_set: vblank 3\n");
6142 		crtc4 |= VBLANK_INT_MASK;
6143 	}
6144 	if (rdev->irq.crtc_vblank_int[4] ||
6145 	    atomic_read(&rdev->irq.pflip[4])) {
6146 		DRM_DEBUG("si_irq_set: vblank 4\n");
6147 		crtc5 |= VBLANK_INT_MASK;
6148 	}
6149 	if (rdev->irq.crtc_vblank_int[5] ||
6150 	    atomic_read(&rdev->irq.pflip[5])) {
6151 		DRM_DEBUG("si_irq_set: vblank 5\n");
6152 		crtc6 |= VBLANK_INT_MASK;
6153 	}
6154 	if (rdev->irq.hpd[0]) {
6155 		DRM_DEBUG("si_irq_set: hpd 1\n");
6156 		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6157 	}
6158 	if (rdev->irq.hpd[1]) {
6159 		DRM_DEBUG("si_irq_set: hpd 2\n");
6160 		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6161 	}
6162 	if (rdev->irq.hpd[2]) {
6163 		DRM_DEBUG("si_irq_set: hpd 3\n");
6164 		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6165 	}
6166 	if (rdev->irq.hpd[3]) {
6167 		DRM_DEBUG("si_irq_set: hpd 4\n");
6168 		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6169 	}
6170 	if (rdev->irq.hpd[4]) {
6171 		DRM_DEBUG("si_irq_set: hpd 5\n");
6172 		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6173 	}
6174 	if (rdev->irq.hpd[5]) {
6175 		DRM_DEBUG("si_irq_set: hpd 6\n");
6176 		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6177 	}
6178 
6179 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6180 	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
6181 	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
6182 
6183 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
6184 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
6185 
6186 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6187 
6188 	if (rdev->irq.dpm_thermal) {
6189 		DRM_DEBUG("dpm thermal\n");
6190 		thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6191 	}
6192 
6193 	if (rdev->num_crtc >= 2) {
6194 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6195 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6196 	}
6197 	if (rdev->num_crtc >= 4) {
6198 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6199 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6200 	}
6201 	if (rdev->num_crtc >= 6) {
6202 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6203 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6204 	}
6205 
6206 	if (rdev->num_crtc >= 2) {
6207 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
6208 		       GRPH_PFLIP_INT_MASK);
6209 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
6210 		       GRPH_PFLIP_INT_MASK);
6211 	}
6212 	if (rdev->num_crtc >= 4) {
6213 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
6214 		       GRPH_PFLIP_INT_MASK);
6215 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
6216 		       GRPH_PFLIP_INT_MASK);
6217 	}
6218 	if (rdev->num_crtc >= 6) {
6219 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
6220 		       GRPH_PFLIP_INT_MASK);
6221 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
6222 		       GRPH_PFLIP_INT_MASK);
6223 	}
6224 
6225 	if (!ASIC_IS_NODCE(rdev)) {
6226 		WREG32(DC_HPD1_INT_CONTROL, hpd1);
6227 		WREG32(DC_HPD2_INT_CONTROL, hpd2);
6228 		WREG32(DC_HPD3_INT_CONTROL, hpd3);
6229 		WREG32(DC_HPD4_INT_CONTROL, hpd4);
6230 		WREG32(DC_HPD5_INT_CONTROL, hpd5);
6231 		WREG32(DC_HPD6_INT_CONTROL, hpd6);
6232 	}
6233 
6234 	WREG32(CG_THERMAL_INT, thermal_int);
6235 
6236 	/* posting read */
6237 	RREG32(SRBM_STATUS);
6238 
6239 	return 0;
6240 }
6241 
6242 static inline void si_irq_ack(struct radeon_device *rdev)
6243 {
6244 	u32 tmp;
6245 
6246 	if (ASIC_IS_NODCE(rdev))
6247 		return;
6248 
6249 	rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6250 	rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6251 	rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6252 	rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6253 	rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6254 	rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6255 	rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
6256 	rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
6257 	if (rdev->num_crtc >= 4) {
6258 		rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
6259 		rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
6260 	}
6261 	if (rdev->num_crtc >= 6) {
6262 		rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
6263 		rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
6264 	}
6265 
6266 	if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
6267 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6268 	if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
6269 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6270 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
6271 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6272 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
6273 		WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6274 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6275 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6276 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6277 		WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6278 
6279 	if (rdev->num_crtc >= 4) {
6280 		if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
6281 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6282 		if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
6283 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6284 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6285 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6286 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6287 			WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6288 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6289 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6290 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6291 			WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6292 	}
6293 
6294 	if (rdev->num_crtc >= 6) {
6295 		if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
6296 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6297 		if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
6298 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6299 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6300 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6301 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6302 			WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6303 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6304 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6305 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6306 			WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6307 	}
6308 
6309 	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6310 		tmp = RREG32(DC_HPD1_INT_CONTROL);
6311 		tmp |= DC_HPDx_INT_ACK;
6312 		WREG32(DC_HPD1_INT_CONTROL, tmp);
6313 	}
6314 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6315 		tmp = RREG32(DC_HPD2_INT_CONTROL);
6316 		tmp |= DC_HPDx_INT_ACK;
6317 		WREG32(DC_HPD2_INT_CONTROL, tmp);
6318 	}
6319 	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6320 		tmp = RREG32(DC_HPD3_INT_CONTROL);
6321 		tmp |= DC_HPDx_INT_ACK;
6322 		WREG32(DC_HPD3_INT_CONTROL, tmp);
6323 	}
6324 	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6325 		tmp = RREG32(DC_HPD4_INT_CONTROL);
6326 		tmp |= DC_HPDx_INT_ACK;
6327 		WREG32(DC_HPD4_INT_CONTROL, tmp);
6328 	}
6329 	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6330 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6331 		tmp |= DC_HPDx_INT_ACK;
6332 		WREG32(DC_HPD5_INT_CONTROL, tmp);
6333 	}
6334 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6335 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6336 		tmp |= DC_HPDx_INT_ACK;
6337 		WREG32(DC_HPD6_INT_CONTROL, tmp);
6338 	}
6339 
6340 	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT) {
6341 		tmp = RREG32(DC_HPD1_INT_CONTROL);
6342 		tmp |= DC_HPDx_RX_INT_ACK;
6343 		WREG32(DC_HPD1_INT_CONTROL, tmp);
6344 	}
6345 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
6346 		tmp = RREG32(DC_HPD2_INT_CONTROL);
6347 		tmp |= DC_HPDx_RX_INT_ACK;
6348 		WREG32(DC_HPD2_INT_CONTROL, tmp);
6349 	}
6350 	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
6351 		tmp = RREG32(DC_HPD3_INT_CONTROL);
6352 		tmp |= DC_HPDx_RX_INT_ACK;
6353 		WREG32(DC_HPD3_INT_CONTROL, tmp);
6354 	}
6355 	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
6356 		tmp = RREG32(DC_HPD4_INT_CONTROL);
6357 		tmp |= DC_HPDx_RX_INT_ACK;
6358 		WREG32(DC_HPD4_INT_CONTROL, tmp);
6359 	}
6360 	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
6361 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6362 		tmp |= DC_HPDx_RX_INT_ACK;
6363 		WREG32(DC_HPD5_INT_CONTROL, tmp);
6364 	}
6365 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
6366 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6367 		tmp |= DC_HPDx_RX_INT_ACK;
6368 		WREG32(DC_HPD6_INT_CONTROL, tmp);
6369 	}
6370 }
6371 
6372 static void si_irq_disable(struct radeon_device *rdev)
6373 {
6374 	si_disable_interrupts(rdev);
6375 	/* Wait and acknowledge irq */
6376 	mdelay(1);
6377 	si_irq_ack(rdev);
6378 	si_disable_interrupt_state(rdev);
6379 }
6380 
6381 static void si_irq_suspend(struct radeon_device *rdev)
6382 {
6383 	si_irq_disable(rdev);
6384 	si_rlc_stop(rdev);
6385 }
6386 
6387 static void si_irq_fini(struct radeon_device *rdev)
6388 {
6389 	si_irq_suspend(rdev);
6390 	r600_ih_ring_fini(rdev);
6391 }
6392 
6393 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6394 {
6395 	u32 wptr, tmp;
6396 
6397 	if (rdev->wb.enabled)
6398 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6399 	else
6400 		wptr = RREG32(IH_RB_WPTR);
6401 
6402 	if (wptr & RB_OVERFLOW) {
6403 		wptr &= ~RB_OVERFLOW;
6404 		/* When a ring buffer overflow happen start parsing interrupt
6405 		 * from the last not overwritten vector (wptr + 16). Hopefully
6406 		 * this should allow us to catchup.
6407 		 */
6408 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
6409 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
6410 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6411 		tmp = RREG32(IH_RB_CNTL);
6412 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
6413 		WREG32(IH_RB_CNTL, tmp);
6414 	}
6415 	return (wptr & rdev->ih.ptr_mask);
6416 }
6417 
6418 /*        SI IV Ring
6419  * Each IV ring entry is 128 bits:
6420  * [7:0]    - interrupt source id
6421  * [31:8]   - reserved
6422  * [59:32]  - interrupt source data
6423  * [63:60]  - reserved
6424  * [71:64]  - RINGID
6425  * [79:72]  - VMID
6426  * [127:80] - reserved
6427  */
6428 int si_irq_process(struct radeon_device *rdev)
6429 {
6430 	u32 wptr;
6431 	u32 rptr;
6432 	u32 src_id, src_data, ring_id;
6433 	u32 ring_index;
6434 	bool queue_hotplug = false;
6435 	bool queue_dp = false;
6436 	bool queue_thermal = false;
6437 	u32 status, addr;
6438 
6439 	if (!rdev->ih.enabled || rdev->shutdown)
6440 		return IRQ_NONE;
6441 
6442 	wptr = si_get_ih_wptr(rdev);
6443 
6444 restart_ih:
6445 	/* is somebody else already processing irqs? */
6446 	if (atomic_xchg(&rdev->ih.lock, 1))
6447 		return IRQ_NONE;
6448 
6449 	rptr = rdev->ih.rptr;
6450 	DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6451 
6452 	/* Order reading of wptr vs. reading of IH ring data */
6453 	rmb();
6454 
6455 	/* display interrupts */
6456 	si_irq_ack(rdev);
6457 
6458 	while (rptr != wptr) {
6459 		/* wptr/rptr are in bytes! */
6460 		ring_index = rptr / 4;
6461 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6462 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6463 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6464 
6465 		switch (src_id) {
6466 		case 1: /* D1 vblank/vline */
6467 			switch (src_data) {
6468 			case 0: /* D1 vblank */
6469 				if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT))
6470 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6471 
6472 				if (rdev->irq.crtc_vblank_int[0]) {
6473 					drm_handle_vblank(rdev->ddev, 0);
6474 					rdev->pm.vblank_sync = true;
6475 					wake_up(&rdev->irq.vblank_queue);
6476 				}
6477 				if (atomic_read(&rdev->irq.pflip[0]))
6478 					radeon_crtc_handle_vblank(rdev, 0);
6479 				rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6480 				DRM_DEBUG("IH: D1 vblank\n");
6481 
6482 				break;
6483 			case 1: /* D1 vline */
6484 				if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT))
6485 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6486 
6487 				rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6488 				DRM_DEBUG("IH: D1 vline\n");
6489 
6490 				break;
6491 			default:
6492 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6493 				break;
6494 			}
6495 			break;
6496 		case 2: /* D2 vblank/vline */
6497 			switch (src_data) {
6498 			case 0: /* D2 vblank */
6499 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
6500 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6501 
6502 				if (rdev->irq.crtc_vblank_int[1]) {
6503 					drm_handle_vblank(rdev->ddev, 1);
6504 					rdev->pm.vblank_sync = true;
6505 					wake_up(&rdev->irq.vblank_queue);
6506 				}
6507 				if (atomic_read(&rdev->irq.pflip[1]))
6508 					radeon_crtc_handle_vblank(rdev, 1);
6509 				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6510 				DRM_DEBUG("IH: D2 vblank\n");
6511 
6512 				break;
6513 			case 1: /* D2 vline */
6514 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT))
6515 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6516 
6517 				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6518 				DRM_DEBUG("IH: D2 vline\n");
6519 
6520 				break;
6521 			default:
6522 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6523 				break;
6524 			}
6525 			break;
6526 		case 3: /* D3 vblank/vline */
6527 			switch (src_data) {
6528 			case 0: /* D3 vblank */
6529 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
6530 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6531 
6532 				if (rdev->irq.crtc_vblank_int[2]) {
6533 					drm_handle_vblank(rdev->ddev, 2);
6534 					rdev->pm.vblank_sync = true;
6535 					wake_up(&rdev->irq.vblank_queue);
6536 				}
6537 				if (atomic_read(&rdev->irq.pflip[2]))
6538 					radeon_crtc_handle_vblank(rdev, 2);
6539 				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6540 				DRM_DEBUG("IH: D3 vblank\n");
6541 
6542 				break;
6543 			case 1: /* D3 vline */
6544 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
6545 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6546 
6547 				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6548 				DRM_DEBUG("IH: D3 vline\n");
6549 
6550 				break;
6551 			default:
6552 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6553 				break;
6554 			}
6555 			break;
6556 		case 4: /* D4 vblank/vline */
6557 			switch (src_data) {
6558 			case 0: /* D4 vblank */
6559 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
6560 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6561 
6562 				if (rdev->irq.crtc_vblank_int[3]) {
6563 					drm_handle_vblank(rdev->ddev, 3);
6564 					rdev->pm.vblank_sync = true;
6565 					wake_up(&rdev->irq.vblank_queue);
6566 				}
6567 				if (atomic_read(&rdev->irq.pflip[3]))
6568 					radeon_crtc_handle_vblank(rdev, 3);
6569 				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6570 				DRM_DEBUG("IH: D4 vblank\n");
6571 
6572 				break;
6573 			case 1: /* D4 vline */
6574 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
6575 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6576 
6577 				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6578 				DRM_DEBUG("IH: D4 vline\n");
6579 
6580 				break;
6581 			default:
6582 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6583 				break;
6584 			}
6585 			break;
6586 		case 5: /* D5 vblank/vline */
6587 			switch (src_data) {
6588 			case 0: /* D5 vblank */
6589 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
6590 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6591 
6592 				if (rdev->irq.crtc_vblank_int[4]) {
6593 					drm_handle_vblank(rdev->ddev, 4);
6594 					rdev->pm.vblank_sync = true;
6595 					wake_up(&rdev->irq.vblank_queue);
6596 				}
6597 				if (atomic_read(&rdev->irq.pflip[4]))
6598 					radeon_crtc_handle_vblank(rdev, 4);
6599 				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6600 				DRM_DEBUG("IH: D5 vblank\n");
6601 
6602 				break;
6603 			case 1: /* D5 vline */
6604 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
6605 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6606 
6607 				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6608 				DRM_DEBUG("IH: D5 vline\n");
6609 
6610 				break;
6611 			default:
6612 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6613 				break;
6614 			}
6615 			break;
6616 		case 6: /* D6 vblank/vline */
6617 			switch (src_data) {
6618 			case 0: /* D6 vblank */
6619 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
6620 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6621 
6622 				if (rdev->irq.crtc_vblank_int[5]) {
6623 					drm_handle_vblank(rdev->ddev, 5);
6624 					rdev->pm.vblank_sync = true;
6625 					wake_up(&rdev->irq.vblank_queue);
6626 				}
6627 				if (atomic_read(&rdev->irq.pflip[5]))
6628 					radeon_crtc_handle_vblank(rdev, 5);
6629 				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6630 				DRM_DEBUG("IH: D6 vblank\n");
6631 
6632 				break;
6633 			case 1: /* D6 vline */
6634 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
6635 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6636 
6637 				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6638 				DRM_DEBUG("IH: D6 vline\n");
6639 
6640 				break;
6641 			default:
6642 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6643 				break;
6644 			}
6645 			break;
6646 		case 8: /* D1 page flip */
6647 		case 10: /* D2 page flip */
6648 		case 12: /* D3 page flip */
6649 		case 14: /* D4 page flip */
6650 		case 16: /* D5 page flip */
6651 		case 18: /* D6 page flip */
6652 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
6653 			if (radeon_use_pflipirq > 0)
6654 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
6655 			break;
6656 		case 42: /* HPD hotplug */
6657 			switch (src_data) {
6658 			case 0:
6659 				if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT))
6660 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6661 
6662 				rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6663 				queue_hotplug = true;
6664 				DRM_DEBUG("IH: HPD1\n");
6665 
6666 				break;
6667 			case 1:
6668 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT))
6669 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6670 
6671 				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6672 				queue_hotplug = true;
6673 				DRM_DEBUG("IH: HPD2\n");
6674 
6675 				break;
6676 			case 2:
6677 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT))
6678 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6679 
6680 				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6681 				queue_hotplug = true;
6682 				DRM_DEBUG("IH: HPD3\n");
6683 
6684 				break;
6685 			case 3:
6686 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT))
6687 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6688 
6689 				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6690 				queue_hotplug = true;
6691 				DRM_DEBUG("IH: HPD4\n");
6692 
6693 				break;
6694 			case 4:
6695 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT))
6696 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6697 
6698 				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6699 				queue_hotplug = true;
6700 				DRM_DEBUG("IH: HPD5\n");
6701 
6702 				break;
6703 			case 5:
6704 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT))
6705 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6706 
6707 				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6708 				queue_hotplug = true;
6709 				DRM_DEBUG("IH: HPD6\n");
6710 
6711 				break;
6712 			case 6:
6713 				if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT))
6714 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6715 
6716 				rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_RX_INTERRUPT;
6717 				queue_dp = true;
6718 				DRM_DEBUG("IH: HPD_RX 1\n");
6719 
6720 				break;
6721 			case 7:
6722 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT))
6723 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6724 
6725 				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
6726 				queue_dp = true;
6727 				DRM_DEBUG("IH: HPD_RX 2\n");
6728 
6729 				break;
6730 			case 8:
6731 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
6732 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6733 
6734 				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
6735 				queue_dp = true;
6736 				DRM_DEBUG("IH: HPD_RX 3\n");
6737 
6738 				break;
6739 			case 9:
6740 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
6741 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6742 
6743 				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
6744 				queue_dp = true;
6745 				DRM_DEBUG("IH: HPD_RX 4\n");
6746 
6747 				break;
6748 			case 10:
6749 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
6750 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6751 
6752 				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
6753 				queue_dp = true;
6754 				DRM_DEBUG("IH: HPD_RX 5\n");
6755 
6756 				break;
6757 			case 11:
6758 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
6759 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6760 
6761 				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
6762 				queue_dp = true;
6763 				DRM_DEBUG("IH: HPD_RX 6\n");
6764 
6765 				break;
6766 			default:
6767 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6768 				break;
6769 			}
6770 			break;
6771 		case 96:
6772 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
6773 			WREG32(SRBM_INT_ACK, 0x1);
6774 			break;
6775 		case 124: /* UVD */
6776 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6777 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6778 			break;
6779 		case 146:
6780 		case 147:
6781 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6782 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6783 			/* reset addr and status */
6784 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6785 			if (addr == 0x0 && status == 0x0)
6786 				break;
6787 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6788 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6789 				addr);
6790 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6791 				status);
6792 			si_vm_decode_fault(rdev, status, addr);
6793 			break;
6794 		case 176: /* RINGID0 CP_INT */
6795 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6796 			break;
6797 		case 177: /* RINGID1 CP_INT */
6798 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6799 			break;
6800 		case 178: /* RINGID2 CP_INT */
6801 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6802 			break;
6803 		case 181: /* CP EOP event */
6804 			DRM_DEBUG("IH: CP EOP\n");
6805 			switch (ring_id) {
6806 			case 0:
6807 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6808 				break;
6809 			case 1:
6810 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6811 				break;
6812 			case 2:
6813 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6814 				break;
6815 			}
6816 			break;
6817 		case 224: /* DMA trap event */
6818 			DRM_DEBUG("IH: DMA trap\n");
6819 			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6820 			break;
6821 		case 230: /* thermal low to high */
6822 			DRM_DEBUG("IH: thermal low to high\n");
6823 			rdev->pm.dpm.thermal.high_to_low = false;
6824 			queue_thermal = true;
6825 			break;
6826 		case 231: /* thermal high to low */
6827 			DRM_DEBUG("IH: thermal high to low\n");
6828 			rdev->pm.dpm.thermal.high_to_low = true;
6829 			queue_thermal = true;
6830 			break;
6831 		case 233: /* GUI IDLE */
6832 			DRM_DEBUG("IH: GUI idle\n");
6833 			break;
6834 		case 244: /* DMA trap event */
6835 			DRM_DEBUG("IH: DMA1 trap\n");
6836 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6837 			break;
6838 		default:
6839 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6840 			break;
6841 		}
6842 
6843 		/* wptr/rptr are in bytes! */
6844 		rptr += 16;
6845 		rptr &= rdev->ih.ptr_mask;
6846 		WREG32(IH_RB_RPTR, rptr);
6847 	}
6848 	if (queue_dp)
6849 		schedule_work(&rdev->dp_work);
6850 	if (queue_hotplug)
6851 		schedule_delayed_work(&rdev->hotplug_work, 0);
6852 	if (queue_thermal && rdev->pm.dpm_enabled)
6853 		schedule_work(&rdev->pm.dpm.thermal.work);
6854 	rdev->ih.rptr = rptr;
6855 	atomic_set(&rdev->ih.lock, 0);
6856 
6857 	/* make sure wptr hasn't changed while processing */
6858 	wptr = si_get_ih_wptr(rdev);
6859 	if (wptr != rptr)
6860 		goto restart_ih;
6861 
6862 	return IRQ_HANDLED;
6863 }
6864 
6865 /*
6866  * startup/shutdown callbacks
6867  */
6868 static void si_uvd_init(struct radeon_device *rdev)
6869 {
6870 	int r;
6871 
6872 	if (!rdev->has_uvd)
6873 		return;
6874 
6875 	r = radeon_uvd_init(rdev);
6876 	if (r) {
6877 		dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
6878 		/*
6879 		 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
6880 		 * to early fails uvd_v2_2_resume() and thus nothing happens
6881 		 * there. So it is pointless to try to go through that code
6882 		 * hence why we disable uvd here.
6883 		 */
6884 		rdev->has_uvd = 0;
6885 		return;
6886 	}
6887 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
6888 	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
6889 }
6890 
6891 static void si_uvd_start(struct radeon_device *rdev)
6892 {
6893 	int r;
6894 
6895 	if (!rdev->has_uvd)
6896 		return;
6897 
6898 	r = uvd_v2_2_resume(rdev);
6899 	if (r) {
6900 		dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
6901 		goto error;
6902 	}
6903 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
6904 	if (r) {
6905 		dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
6906 		goto error;
6907 	}
6908 	return;
6909 
6910 error:
6911 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6912 }
6913 
6914 static void si_uvd_resume(struct radeon_device *rdev)
6915 {
6916 	struct radeon_ring *ring;
6917 	int r;
6918 
6919 	if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
6920 		return;
6921 
6922 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6923 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
6924 	if (r) {
6925 		dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
6926 		return;
6927 	}
6928 	r = uvd_v1_0_init(rdev);
6929 	if (r) {
6930 		dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
6931 		return;
6932 	}
6933 }
6934 
6935 static void si_vce_init(struct radeon_device *rdev)
6936 {
6937 	int r;
6938 
6939 	if (!rdev->has_vce)
6940 		return;
6941 
6942 	r = radeon_vce_init(rdev);
6943 	if (r) {
6944 		dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
6945 		/*
6946 		 * At this point rdev->vce.vcpu_bo is NULL which trickles down
6947 		 * to early fails si_vce_start() and thus nothing happens
6948 		 * there. So it is pointless to try to go through that code
6949 		 * hence why we disable vce here.
6950 		 */
6951 		rdev->has_vce = 0;
6952 		return;
6953 	}
6954 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
6955 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
6956 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
6957 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
6958 }
6959 
6960 static void si_vce_start(struct radeon_device *rdev)
6961 {
6962 	int r;
6963 
6964 	if (!rdev->has_vce)
6965 		return;
6966 
6967 	r = radeon_vce_resume(rdev);
6968 	if (r) {
6969 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6970 		goto error;
6971 	}
6972 	r = vce_v1_0_resume(rdev);
6973 	if (r) {
6974 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6975 		goto error;
6976 	}
6977 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
6978 	if (r) {
6979 		dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
6980 		goto error;
6981 	}
6982 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
6983 	if (r) {
6984 		dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
6985 		goto error;
6986 	}
6987 	return;
6988 
6989 error:
6990 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
6991 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
6992 }
6993 
6994 static void si_vce_resume(struct radeon_device *rdev)
6995 {
6996 	struct radeon_ring *ring;
6997 	int r;
6998 
6999 	if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
7000 		return;
7001 
7002 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
7003 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
7004 	if (r) {
7005 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
7006 		return;
7007 	}
7008 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
7009 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
7010 	if (r) {
7011 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
7012 		return;
7013 	}
7014 	r = vce_v1_0_init(rdev);
7015 	if (r) {
7016 		dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
7017 		return;
7018 	}
7019 }
7020 
7021 static int si_startup(struct radeon_device *rdev)
7022 {
7023 	struct radeon_ring *ring;
7024 	int r;
7025 
7026 	/* enable pcie gen2/3 link */
7027 	si_pcie_gen3_enable(rdev);
7028 	/* enable aspm */
7029 	si_program_aspm(rdev);
7030 
7031 	/* scratch needs to be initialized before MC */
7032 	r = r600_vram_scratch_init(rdev);
7033 	if (r)
7034 		return r;
7035 
7036 	si_mc_program(rdev);
7037 
7038 	if (!rdev->pm.dpm_enabled) {
7039 		r = si_mc_load_microcode(rdev);
7040 		if (r) {
7041 			DRM_ERROR("Failed to load MC firmware!\n");
7042 			return r;
7043 		}
7044 	}
7045 
7046 	r = si_pcie_gart_enable(rdev);
7047 	if (r)
7048 		return r;
7049 	si_gpu_init(rdev);
7050 
7051 	/* allocate rlc buffers */
7052 	if (rdev->family == CHIP_VERDE) {
7053 		rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
7054 		rdev->rlc.reg_list_size =
7055 			(u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
7056 	}
7057 	rdev->rlc.cs_data = si_cs_data;
7058 	r = sumo_rlc_init(rdev);
7059 	if (r) {
7060 		DRM_ERROR("Failed to init rlc BOs!\n");
7061 		return r;
7062 	}
7063 
7064 	/* allocate wb buffer */
7065 	r = radeon_wb_init(rdev);
7066 	if (r)
7067 		return r;
7068 
7069 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7070 	if (r) {
7071 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7072 		return r;
7073 	}
7074 
7075 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7076 	if (r) {
7077 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7078 		return r;
7079 	}
7080 
7081 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7082 	if (r) {
7083 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7084 		return r;
7085 	}
7086 
7087 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7088 	if (r) {
7089 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7090 		return r;
7091 	}
7092 
7093 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7094 	if (r) {
7095 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7096 		return r;
7097 	}
7098 
7099 	si_uvd_start(rdev);
7100 	si_vce_start(rdev);
7101 
7102 	/* Enable IRQ */
7103 	if (!rdev->irq.installed) {
7104 		r = radeon_irq_kms_init(rdev);
7105 		if (r)
7106 			return r;
7107 	}
7108 
7109 	r = si_irq_init(rdev);
7110 	if (r) {
7111 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
7112 		radeon_irq_kms_fini(rdev);
7113 		return r;
7114 	}
7115 	si_irq_set(rdev);
7116 
7117 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7118 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7119 			     RADEON_CP_PACKET2);
7120 	if (r)
7121 		return r;
7122 
7123 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7124 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7125 			     RADEON_CP_PACKET2);
7126 	if (r)
7127 		return r;
7128 
7129 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7130 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7131 			     RADEON_CP_PACKET2);
7132 	if (r)
7133 		return r;
7134 
7135 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7136 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7137 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
7138 	if (r)
7139 		return r;
7140 
7141 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7142 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7143 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
7144 	if (r)
7145 		return r;
7146 
7147 	r = si_cp_load_microcode(rdev);
7148 	if (r)
7149 		return r;
7150 	r = si_cp_resume(rdev);
7151 	if (r)
7152 		return r;
7153 
7154 	r = cayman_dma_resume(rdev);
7155 	if (r)
7156 		return r;
7157 
7158 	si_uvd_resume(rdev);
7159 	si_vce_resume(rdev);
7160 
7161 	r = radeon_ib_pool_init(rdev);
7162 	if (r) {
7163 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7164 		return r;
7165 	}
7166 
7167 	r = radeon_vm_manager_init(rdev);
7168 	if (r) {
7169 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7170 		return r;
7171 	}
7172 
7173 	r = radeon_audio_init(rdev);
7174 	if (r)
7175 		return r;
7176 
7177 	return 0;
7178 }
7179 
7180 int si_resume(struct radeon_device *rdev)
7181 {
7182 	int r;
7183 
7184 	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
7185 	 * posting will perform necessary task to bring back GPU into good
7186 	 * shape.
7187 	 */
7188 	/* post card */
7189 	atom_asic_init(rdev->mode_info.atom_context);
7190 
7191 	/* init golden registers */
7192 	si_init_golden_registers(rdev);
7193 
7194 	if (rdev->pm.pm_method == PM_METHOD_DPM)
7195 		radeon_pm_resume(rdev);
7196 
7197 	rdev->accel_working = true;
7198 	r = si_startup(rdev);
7199 	if (r) {
7200 		DRM_ERROR("si startup failed on resume\n");
7201 		rdev->accel_working = false;
7202 		return r;
7203 	}
7204 
7205 	return r;
7206 
7207 }
7208 
7209 int si_suspend(struct radeon_device *rdev)
7210 {
7211 	radeon_pm_suspend(rdev);
7212 	radeon_audio_fini(rdev);
7213 	radeon_vm_manager_fini(rdev);
7214 	si_cp_enable(rdev, false);
7215 	cayman_dma_stop(rdev);
7216 	if (rdev->has_uvd) {
7217 		uvd_v1_0_fini(rdev);
7218 		radeon_uvd_suspend(rdev);
7219 	}
7220 	if (rdev->has_vce)
7221 		radeon_vce_suspend(rdev);
7222 	si_fini_pg(rdev);
7223 	si_fini_cg(rdev);
7224 	si_irq_suspend(rdev);
7225 	radeon_wb_disable(rdev);
7226 	si_pcie_gart_disable(rdev);
7227 	return 0;
7228 }
7229 
7230 /* Plan is to move initialization in that function and use
7231  * helper function so that radeon_device_init pretty much
7232  * do nothing more than calling asic specific function. This
7233  * should also allow to remove a bunch of callback function
7234  * like vram_info.
7235  */
7236 int si_init(struct radeon_device *rdev)
7237 {
7238 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7239 	int r;
7240 
7241 	/* Read BIOS */
7242 	if (!radeon_get_bios(rdev)) {
7243 		if (ASIC_IS_AVIVO(rdev))
7244 			return -EINVAL;
7245 	}
7246 	/* Must be an ATOMBIOS */
7247 	if (!rdev->is_atom_bios) {
7248 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7249 		return -EINVAL;
7250 	}
7251 	r = radeon_atombios_init(rdev);
7252 	if (r)
7253 		return r;
7254 
7255 	/* Post card if necessary */
7256 	if (!radeon_card_posted(rdev)) {
7257 		if (!rdev->bios) {
7258 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7259 			return -EINVAL;
7260 		}
7261 		DRM_INFO("GPU not posted. posting now...\n");
7262 		atom_asic_init(rdev->mode_info.atom_context);
7263 	}
7264 	/* init golden registers */
7265 	si_init_golden_registers(rdev);
7266 	/* Initialize scratch registers */
7267 	si_scratch_init(rdev);
7268 	/* Initialize surface registers */
7269 	radeon_surface_init(rdev);
7270 	/* Initialize clocks */
7271 	radeon_get_clock_info(rdev->ddev);
7272 
7273 	/* Fence driver */
7274 	r = radeon_fence_driver_init(rdev);
7275 	if (r)
7276 		return r;
7277 
7278 	/* initialize memory controller */
7279 	r = si_mc_init(rdev);
7280 	if (r)
7281 		return r;
7282 	/* Memory manager */
7283 	r = radeon_bo_init(rdev);
7284 	if (r)
7285 		return r;
7286 
7287 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7288 	    !rdev->rlc_fw || !rdev->mc_fw) {
7289 		r = si_init_microcode(rdev);
7290 		if (r) {
7291 			DRM_ERROR("Failed to load firmware!\n");
7292 			return r;
7293 		}
7294 	}
7295 
7296 	/* Initialize power management */
7297 	radeon_pm_init(rdev);
7298 
7299 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7300 	ring->ring_obj = NULL;
7301 	r600_ring_init(rdev, ring, 1024 * 1024);
7302 
7303 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7304 	ring->ring_obj = NULL;
7305 	r600_ring_init(rdev, ring, 1024 * 1024);
7306 
7307 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7308 	ring->ring_obj = NULL;
7309 	r600_ring_init(rdev, ring, 1024 * 1024);
7310 
7311 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7312 	ring->ring_obj = NULL;
7313 	r600_ring_init(rdev, ring, 64 * 1024);
7314 
7315 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7316 	ring->ring_obj = NULL;
7317 	r600_ring_init(rdev, ring, 64 * 1024);
7318 
7319 	si_uvd_init(rdev);
7320 	si_vce_init(rdev);
7321 
7322 	rdev->ih.ring_obj = NULL;
7323 	r600_ih_ring_init(rdev, 64 * 1024);
7324 
7325 	r = r600_pcie_gart_init(rdev);
7326 	if (r)
7327 		return r;
7328 
7329 	rdev->accel_working = true;
7330 	r = si_startup(rdev);
7331 	if (r) {
7332 		dev_err(rdev->dev, "disabling GPU acceleration\n");
7333 		si_cp_fini(rdev);
7334 		cayman_dma_fini(rdev);
7335 		si_irq_fini(rdev);
7336 		sumo_rlc_fini(rdev);
7337 		radeon_wb_fini(rdev);
7338 		radeon_ib_pool_fini(rdev);
7339 		radeon_vm_manager_fini(rdev);
7340 		radeon_irq_kms_fini(rdev);
7341 		si_pcie_gart_fini(rdev);
7342 		rdev->accel_working = false;
7343 	}
7344 
7345 	/* Don't start up if the MC ucode is missing.
7346 	 * The default clocks and voltages before the MC ucode
7347 	 * is loaded are not suffient for advanced operations.
7348 	 */
7349 	if (!rdev->mc_fw) {
7350 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
7351 		return -EINVAL;
7352 	}
7353 
7354 	return 0;
7355 }
7356 
7357 void si_fini(struct radeon_device *rdev)
7358 {
7359 	radeon_pm_fini(rdev);
7360 	si_cp_fini(rdev);
7361 	cayman_dma_fini(rdev);
7362 	si_fini_pg(rdev);
7363 	si_fini_cg(rdev);
7364 	si_irq_fini(rdev);
7365 	sumo_rlc_fini(rdev);
7366 	radeon_wb_fini(rdev);
7367 	radeon_vm_manager_fini(rdev);
7368 	radeon_ib_pool_fini(rdev);
7369 	radeon_irq_kms_fini(rdev);
7370 	if (rdev->has_uvd) {
7371 		uvd_v1_0_fini(rdev);
7372 		radeon_uvd_fini(rdev);
7373 	}
7374 	if (rdev->has_vce)
7375 		radeon_vce_fini(rdev);
7376 	si_pcie_gart_fini(rdev);
7377 	r600_vram_scratch_fini(rdev);
7378 	radeon_gem_fini(rdev);
7379 	radeon_fence_driver_fini(rdev);
7380 	radeon_bo_fini(rdev);
7381 	radeon_atombios_fini(rdev);
7382 	kfree(rdev->bios);
7383 	rdev->bios = NULL;
7384 }
7385 
7386 /**
7387  * si_get_gpu_clock_counter - return GPU clock counter snapshot
7388  *
7389  * @rdev: radeon_device pointer
7390  *
7391  * Fetches a GPU clock counter snapshot (SI).
7392  * Returns the 64 bit clock counter snapshot.
7393  */
7394 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
7395 {
7396 	uint64_t clock;
7397 
7398 	mutex_lock(&rdev->gpu_clock_mutex);
7399 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
7400 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
7401 		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
7402 	mutex_unlock(&rdev->gpu_clock_mutex);
7403 	return clock;
7404 }
7405 
7406 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
7407 {
7408 	unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
7409 	int r;
7410 
7411 	/* bypass vclk and dclk with bclk */
7412 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7413 		VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
7414 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7415 
7416 	/* put PLL in bypass mode */
7417 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
7418 
7419 	if (!vclk || !dclk) {
7420 		/* keep the Bypass mode */
7421 		return 0;
7422 	}
7423 
7424 	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
7425 					  16384, 0x03FFFFFF, 0, 128, 5,
7426 					  &fb_div, &vclk_div, &dclk_div);
7427 	if (r)
7428 		return r;
7429 
7430 	/* set RESET_ANTI_MUX to 0 */
7431 	WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7432 
7433 	/* set VCO_MODE to 1 */
7434 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
7435 
7436 	/* disable sleep mode */
7437 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
7438 
7439 	/* deassert UPLL_RESET */
7440 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7441 
7442 	mdelay(1);
7443 
7444 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7445 	if (r)
7446 		return r;
7447 
7448 	/* assert UPLL_RESET again */
7449 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
7450 
7451 	/* disable spread spectrum. */
7452 	WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7453 
7454 	/* set feedback divider */
7455 	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
7456 
7457 	/* set ref divider to 0 */
7458 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
7459 
7460 	if (fb_div < 307200)
7461 		WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
7462 	else
7463 		WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
7464 
7465 	/* set PDIV_A and PDIV_B */
7466 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7467 		UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
7468 		~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
7469 
7470 	/* give the PLL some time to settle */
7471 	mdelay(15);
7472 
7473 	/* deassert PLL_RESET */
7474 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7475 
7476 	mdelay(15);
7477 
7478 	/* switch from bypass mode to normal mode */
7479 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
7480 
7481 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7482 	if (r)
7483 		return r;
7484 
7485 	/* switch VCLK and DCLK selection */
7486 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7487 		VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
7488 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7489 
7490 	mdelay(100);
7491 
7492 	return 0;
7493 }
7494 
7495 static void si_pcie_gen3_enable(struct radeon_device *rdev)
7496 {
7497 	struct pci_dev *root = rdev->pdev->bus->self;
7498 	int bridge_pos, gpu_pos;
7499 	u32 speed_cntl, mask, current_data_rate;
7500 	int ret, i;
7501 	u16 tmp16;
7502 
7503 	if (pci_is_root_bus(rdev->pdev->bus))
7504 		return;
7505 
7506 	if (radeon_pcie_gen2 == 0)
7507 		return;
7508 
7509 	if (rdev->flags & RADEON_IS_IGP)
7510 		return;
7511 
7512 	if (!(rdev->flags & RADEON_IS_PCIE))
7513 		return;
7514 
7515 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
7516 	if (ret != 0)
7517 		return;
7518 
7519 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
7520 		return;
7521 
7522 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7523 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7524 		LC_CURRENT_DATA_RATE_SHIFT;
7525 	if (mask & DRM_PCIE_SPEED_80) {
7526 		if (current_data_rate == 2) {
7527 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7528 			return;
7529 		}
7530 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7531 	} else if (mask & DRM_PCIE_SPEED_50) {
7532 		if (current_data_rate == 1) {
7533 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7534 			return;
7535 		}
7536 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7537 	}
7538 
7539 	bridge_pos = pci_pcie_cap(root);
7540 	if (!bridge_pos)
7541 		return;
7542 
7543 	gpu_pos = pci_pcie_cap(rdev->pdev);
7544 	if (!gpu_pos)
7545 		return;
7546 
7547 	if (mask & DRM_PCIE_SPEED_80) {
7548 		/* re-try equalization if gen3 is not already enabled */
7549 		if (current_data_rate != 2) {
7550 			u16 bridge_cfg, gpu_cfg;
7551 			u16 bridge_cfg2, gpu_cfg2;
7552 			u32 max_lw, current_lw, tmp;
7553 
7554 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7555 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7556 
7557 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7558 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7559 
7560 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7561 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7562 
7563 			tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7564 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7565 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7566 
7567 			if (current_lw < max_lw) {
7568 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7569 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
7570 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7571 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7572 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7573 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7574 				}
7575 			}
7576 
7577 			for (i = 0; i < 10; i++) {
7578 				/* check status */
7579 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
7580 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7581 					break;
7582 
7583 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7584 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7585 
7586 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
7587 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
7588 
7589 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7590 				tmp |= LC_SET_QUIESCE;
7591 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7592 
7593 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7594 				tmp |= LC_REDO_EQ;
7595 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7596 
7597 				mdelay(100);
7598 
7599 				/* linkctl */
7600 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
7601 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7602 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7603 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7604 
7605 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
7606 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7607 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7608 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7609 
7610 				/* linkctl2 */
7611 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7612 				tmp16 &= ~((1 << 4) | (7 << 9));
7613 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
7614 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
7615 
7616 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7617 				tmp16 &= ~((1 << 4) | (7 << 9));
7618 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
7619 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7620 
7621 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7622 				tmp &= ~LC_SET_QUIESCE;
7623 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7624 			}
7625 		}
7626 	}
7627 
7628 	/* set the link speed */
7629 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7630 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7631 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7632 
7633 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7634 	tmp16 &= ~0xf;
7635 	if (mask & DRM_PCIE_SPEED_80)
7636 		tmp16 |= 3; /* gen3 */
7637 	else if (mask & DRM_PCIE_SPEED_50)
7638 		tmp16 |= 2; /* gen2 */
7639 	else
7640 		tmp16 |= 1; /* gen1 */
7641 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7642 
7643 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7644 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7645 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7646 
7647 	for (i = 0; i < rdev->usec_timeout; i++) {
7648 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7649 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7650 			break;
7651 		udelay(1);
7652 	}
7653 }
7654 
7655 static void si_program_aspm(struct radeon_device *rdev)
7656 {
7657 	u32 data, orig;
7658 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7659 	bool disable_clkreq = false;
7660 
7661 	if (radeon_aspm == 0)
7662 		return;
7663 
7664 	if (!(rdev->flags & RADEON_IS_PCIE))
7665 		return;
7666 
7667 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7668 	data &= ~LC_XMIT_N_FTS_MASK;
7669 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7670 	if (orig != data)
7671 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7672 
7673 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7674 	data |= LC_GO_TO_RECOVERY;
7675 	if (orig != data)
7676 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7677 
7678 	orig = data = RREG32_PCIE(PCIE_P_CNTL);
7679 	data |= P_IGNORE_EDB_ERR;
7680 	if (orig != data)
7681 		WREG32_PCIE(PCIE_P_CNTL, data);
7682 
7683 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7684 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7685 	data |= LC_PMI_TO_L1_DIS;
7686 	if (!disable_l0s)
7687 		data |= LC_L0S_INACTIVITY(7);
7688 
7689 	if (!disable_l1) {
7690 		data |= LC_L1_INACTIVITY(7);
7691 		data &= ~LC_PMI_TO_L1_DIS;
7692 		if (orig != data)
7693 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7694 
7695 		if (!disable_plloff_in_l1) {
7696 			bool clk_req_support;
7697 
7698 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7699 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7700 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7701 			if (orig != data)
7702 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7703 
7704 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7705 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7706 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7707 			if (orig != data)
7708 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7709 
7710 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7711 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7712 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7713 			if (orig != data)
7714 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7715 
7716 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7717 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7718 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7719 			if (orig != data)
7720 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7721 
7722 			if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7723 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7724 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7725 				if (orig != data)
7726 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7727 
7728 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7729 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7730 				if (orig != data)
7731 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7732 
7733 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7734 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7735 				if (orig != data)
7736 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7737 
7738 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7739 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7740 				if (orig != data)
7741 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7742 
7743 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7744 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7745 				if (orig != data)
7746 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7747 
7748 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7749 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7750 				if (orig != data)
7751 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7752 
7753 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7754 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7755 				if (orig != data)
7756 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7757 
7758 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7759 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7760 				if (orig != data)
7761 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7762 			}
7763 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7764 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7765 			data |= LC_DYN_LANES_PWR_STATE(3);
7766 			if (orig != data)
7767 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7768 
7769 			orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7770 			data &= ~LS2_EXIT_TIME_MASK;
7771 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7772 				data |= LS2_EXIT_TIME(5);
7773 			if (orig != data)
7774 				WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7775 
7776 			orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7777 			data &= ~LS2_EXIT_TIME_MASK;
7778 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7779 				data |= LS2_EXIT_TIME(5);
7780 			if (orig != data)
7781 				WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7782 
7783 			if (!disable_clkreq &&
7784 			    !pci_is_root_bus(rdev->pdev->bus)) {
7785 				struct pci_dev *root = rdev->pdev->bus->self;
7786 				u32 lnkcap;
7787 
7788 				clk_req_support = false;
7789 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7790 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7791 					clk_req_support = true;
7792 			} else {
7793 				clk_req_support = false;
7794 			}
7795 
7796 			if (clk_req_support) {
7797 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7798 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7799 				if (orig != data)
7800 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7801 
7802 				orig = data = RREG32(THM_CLK_CNTL);
7803 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7804 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7805 				if (orig != data)
7806 					WREG32(THM_CLK_CNTL, data);
7807 
7808 				orig = data = RREG32(MISC_CLK_CNTL);
7809 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7810 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7811 				if (orig != data)
7812 					WREG32(MISC_CLK_CNTL, data);
7813 
7814 				orig = data = RREG32(CG_CLKPIN_CNTL);
7815 				data &= ~BCLK_AS_XCLK;
7816 				if (orig != data)
7817 					WREG32(CG_CLKPIN_CNTL, data);
7818 
7819 				orig = data = RREG32(CG_CLKPIN_CNTL_2);
7820 				data &= ~FORCE_BIF_REFCLK_EN;
7821 				if (orig != data)
7822 					WREG32(CG_CLKPIN_CNTL_2, data);
7823 
7824 				orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7825 				data &= ~MPLL_CLKOUT_SEL_MASK;
7826 				data |= MPLL_CLKOUT_SEL(4);
7827 				if (orig != data)
7828 					WREG32(MPLL_BYPASSCLK_SEL, data);
7829 
7830 				orig = data = RREG32(SPLL_CNTL_MODE);
7831 				data &= ~SPLL_REFCLK_SEL_MASK;
7832 				if (orig != data)
7833 					WREG32(SPLL_CNTL_MODE, data);
7834 			}
7835 		}
7836 	} else {
7837 		if (orig != data)
7838 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7839 	}
7840 
7841 	orig = data = RREG32_PCIE(PCIE_CNTL2);
7842 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7843 	if (orig != data)
7844 		WREG32_PCIE(PCIE_CNTL2, data);
7845 
7846 	if (!disable_l0s) {
7847 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7848 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7849 			data = RREG32_PCIE(PCIE_LC_STATUS1);
7850 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7851 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7852 				data &= ~LC_L0S_INACTIVITY_MASK;
7853 				if (orig != data)
7854 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7855 			}
7856 		}
7857 	}
7858 }
7859 
7860 int si_vce_send_vcepll_ctlreq(struct radeon_device *rdev)
7861 {
7862 	unsigned i;
7863 
7864 	/* make sure VCEPLL_CTLREQ is deasserted */
7865 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7866 
7867 	mdelay(10);
7868 
7869 	/* assert UPLL_CTLREQ */
7870 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
7871 
7872 	/* wait for CTLACK and CTLACK2 to get asserted */
7873 	for (i = 0; i < 100; ++i) {
7874 		uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
7875 		if ((RREG32_SMC(CG_VCEPLL_FUNC_CNTL) & mask) == mask)
7876 			break;
7877 		mdelay(10);
7878 	}
7879 
7880 	/* deassert UPLL_CTLREQ */
7881 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7882 
7883 	if (i == 100) {
7884 		DRM_ERROR("Timeout setting UVD clocks!\n");
7885 		return -ETIMEDOUT;
7886 	}
7887 
7888 	return 0;
7889 }
7890 
7891 int si_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
7892 {
7893 	unsigned fb_div = 0, evclk_div = 0, ecclk_div = 0;
7894 	int r;
7895 
7896 	/* bypass evclk and ecclk with bclk */
7897 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7898 		     EVCLK_SRC_SEL(1) | ECCLK_SRC_SEL(1),
7899 		     ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7900 
7901 	/* put PLL in bypass mode */
7902 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_BYPASS_EN_MASK,
7903 		     ~VCEPLL_BYPASS_EN_MASK);
7904 
7905 	if (!evclk || !ecclk) {
7906 		/* keep the Bypass mode, put PLL to sleep */
7907 		WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7908 			     ~VCEPLL_SLEEP_MASK);
7909 		return 0;
7910 	}
7911 
7912 	r = radeon_uvd_calc_upll_dividers(rdev, evclk, ecclk, 125000, 250000,
7913 					  16384, 0x03FFFFFF, 0, 128, 5,
7914 					  &fb_div, &evclk_div, &ecclk_div);
7915 	if (r)
7916 		return r;
7917 
7918 	/* set RESET_ANTI_MUX to 0 */
7919 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7920 
7921 	/* set VCO_MODE to 1 */
7922 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_VCO_MODE_MASK,
7923 		     ~VCEPLL_VCO_MODE_MASK);
7924 
7925 	/* toggle VCEPLL_SLEEP to 1 then back to 0 */
7926 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7927 		     ~VCEPLL_SLEEP_MASK);
7928 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_SLEEP_MASK);
7929 
7930 	/* deassert VCEPLL_RESET */
7931 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7932 
7933 	mdelay(1);
7934 
7935 	r = si_vce_send_vcepll_ctlreq(rdev);
7936 	if (r)
7937 		return r;
7938 
7939 	/* assert VCEPLL_RESET again */
7940 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_RESET_MASK, ~VCEPLL_RESET_MASK);
7941 
7942 	/* disable spread spectrum. */
7943 	WREG32_SMC_P(CG_VCEPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7944 
7945 	/* set feedback divider */
7946 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_3, VCEPLL_FB_DIV(fb_div), ~VCEPLL_FB_DIV_MASK);
7947 
7948 	/* set ref divider to 0 */
7949 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_REF_DIV_MASK);
7950 
7951 	/* set PDIV_A and PDIV_B */
7952 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7953 		     VCEPLL_PDIV_A(evclk_div) | VCEPLL_PDIV_B(ecclk_div),
7954 		     ~(VCEPLL_PDIV_A_MASK | VCEPLL_PDIV_B_MASK));
7955 
7956 	/* give the PLL some time to settle */
7957 	mdelay(15);
7958 
7959 	/* deassert PLL_RESET */
7960 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7961 
7962 	mdelay(15);
7963 
7964 	/* switch from bypass mode to normal mode */
7965 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_BYPASS_EN_MASK);
7966 
7967 	r = si_vce_send_vcepll_ctlreq(rdev);
7968 	if (r)
7969 		return r;
7970 
7971 	/* switch VCLK and DCLK selection */
7972 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7973 		     EVCLK_SRC_SEL(16) | ECCLK_SRC_SEL(16),
7974 		     ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7975 
7976 	mdelay(100);
7977 
7978 	return 0;
7979 }
7980