xref: /openbmc/linux/drivers/gpu/drm/radeon/si.c (revision e4781421e883340b796da5a724bda7226817990b)
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include <drm/radeon_drm.h>
32 #include "sid.h"
33 #include "atom.h"
34 #include "si_blit_shaders.h"
35 #include "clearstate_si.h"
36 #include "radeon_ucode.h"
37 
38 
39 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_mc2.bin");
44 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
45 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
46 
47 MODULE_FIRMWARE("radeon/tahiti_pfp.bin");
48 MODULE_FIRMWARE("radeon/tahiti_me.bin");
49 MODULE_FIRMWARE("radeon/tahiti_ce.bin");
50 MODULE_FIRMWARE("radeon/tahiti_mc.bin");
51 MODULE_FIRMWARE("radeon/tahiti_rlc.bin");
52 MODULE_FIRMWARE("radeon/tahiti_smc.bin");
53 MODULE_FIRMWARE("radeon/tahiti_k_smc.bin");
54 
55 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
56 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
57 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
58 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
59 MODULE_FIRMWARE("radeon/PITCAIRN_mc2.bin");
60 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
61 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
62 
63 MODULE_FIRMWARE("radeon/pitcairn_pfp.bin");
64 MODULE_FIRMWARE("radeon/pitcairn_me.bin");
65 MODULE_FIRMWARE("radeon/pitcairn_ce.bin");
66 MODULE_FIRMWARE("radeon/pitcairn_mc.bin");
67 MODULE_FIRMWARE("radeon/pitcairn_rlc.bin");
68 MODULE_FIRMWARE("radeon/pitcairn_smc.bin");
69 MODULE_FIRMWARE("radeon/pitcairn_k_smc.bin");
70 
71 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
72 MODULE_FIRMWARE("radeon/VERDE_me.bin");
73 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
74 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
75 MODULE_FIRMWARE("radeon/VERDE_mc2.bin");
76 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
77 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
78 
79 MODULE_FIRMWARE("radeon/verde_pfp.bin");
80 MODULE_FIRMWARE("radeon/verde_me.bin");
81 MODULE_FIRMWARE("radeon/verde_ce.bin");
82 MODULE_FIRMWARE("radeon/verde_mc.bin");
83 MODULE_FIRMWARE("radeon/verde_rlc.bin");
84 MODULE_FIRMWARE("radeon/verde_smc.bin");
85 MODULE_FIRMWARE("radeon/verde_k_smc.bin");
86 
87 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
88 MODULE_FIRMWARE("radeon/OLAND_me.bin");
89 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
90 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
91 MODULE_FIRMWARE("radeon/OLAND_mc2.bin");
92 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
93 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
94 
95 MODULE_FIRMWARE("radeon/oland_pfp.bin");
96 MODULE_FIRMWARE("radeon/oland_me.bin");
97 MODULE_FIRMWARE("radeon/oland_ce.bin");
98 MODULE_FIRMWARE("radeon/oland_mc.bin");
99 MODULE_FIRMWARE("radeon/oland_rlc.bin");
100 MODULE_FIRMWARE("radeon/oland_smc.bin");
101 MODULE_FIRMWARE("radeon/oland_k_smc.bin");
102 
103 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
104 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
105 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
106 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
107 MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
108 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
109 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
110 
111 MODULE_FIRMWARE("radeon/hainan_pfp.bin");
112 MODULE_FIRMWARE("radeon/hainan_me.bin");
113 MODULE_FIRMWARE("radeon/hainan_ce.bin");
114 MODULE_FIRMWARE("radeon/hainan_mc.bin");
115 MODULE_FIRMWARE("radeon/hainan_rlc.bin");
116 MODULE_FIRMWARE("radeon/hainan_smc.bin");
117 MODULE_FIRMWARE("radeon/hainan_k_smc.bin");
118 
119 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
120 static void si_pcie_gen3_enable(struct radeon_device *rdev);
121 static void si_program_aspm(struct radeon_device *rdev);
122 extern void sumo_rlc_fini(struct radeon_device *rdev);
123 extern int sumo_rlc_init(struct radeon_device *rdev);
124 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
125 extern void r600_ih_ring_fini(struct radeon_device *rdev);
126 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
127 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
128 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
129 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
130 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
131 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
132 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
133 					 bool enable);
134 static void si_init_pg(struct radeon_device *rdev);
135 static void si_init_cg(struct radeon_device *rdev);
136 static void si_fini_pg(struct radeon_device *rdev);
137 static void si_fini_cg(struct radeon_device *rdev);
138 static void si_rlc_stop(struct radeon_device *rdev);
139 
140 static const u32 verde_rlc_save_restore_register_list[] =
141 {
142 	(0x8000 << 16) | (0x98f4 >> 2),
143 	0x00000000,
144 	(0x8040 << 16) | (0x98f4 >> 2),
145 	0x00000000,
146 	(0x8000 << 16) | (0xe80 >> 2),
147 	0x00000000,
148 	(0x8040 << 16) | (0xe80 >> 2),
149 	0x00000000,
150 	(0x8000 << 16) | (0x89bc >> 2),
151 	0x00000000,
152 	(0x8040 << 16) | (0x89bc >> 2),
153 	0x00000000,
154 	(0x8000 << 16) | (0x8c1c >> 2),
155 	0x00000000,
156 	(0x8040 << 16) | (0x8c1c >> 2),
157 	0x00000000,
158 	(0x9c00 << 16) | (0x98f0 >> 2),
159 	0x00000000,
160 	(0x9c00 << 16) | (0xe7c >> 2),
161 	0x00000000,
162 	(0x8000 << 16) | (0x9148 >> 2),
163 	0x00000000,
164 	(0x8040 << 16) | (0x9148 >> 2),
165 	0x00000000,
166 	(0x9c00 << 16) | (0x9150 >> 2),
167 	0x00000000,
168 	(0x9c00 << 16) | (0x897c >> 2),
169 	0x00000000,
170 	(0x9c00 << 16) | (0x8d8c >> 2),
171 	0x00000000,
172 	(0x9c00 << 16) | (0xac54 >> 2),
173 	0X00000000,
174 	0x3,
175 	(0x9c00 << 16) | (0x98f8 >> 2),
176 	0x00000000,
177 	(0x9c00 << 16) | (0x9910 >> 2),
178 	0x00000000,
179 	(0x9c00 << 16) | (0x9914 >> 2),
180 	0x00000000,
181 	(0x9c00 << 16) | (0x9918 >> 2),
182 	0x00000000,
183 	(0x9c00 << 16) | (0x991c >> 2),
184 	0x00000000,
185 	(0x9c00 << 16) | (0x9920 >> 2),
186 	0x00000000,
187 	(0x9c00 << 16) | (0x9924 >> 2),
188 	0x00000000,
189 	(0x9c00 << 16) | (0x9928 >> 2),
190 	0x00000000,
191 	(0x9c00 << 16) | (0x992c >> 2),
192 	0x00000000,
193 	(0x9c00 << 16) | (0x9930 >> 2),
194 	0x00000000,
195 	(0x9c00 << 16) | (0x9934 >> 2),
196 	0x00000000,
197 	(0x9c00 << 16) | (0x9938 >> 2),
198 	0x00000000,
199 	(0x9c00 << 16) | (0x993c >> 2),
200 	0x00000000,
201 	(0x9c00 << 16) | (0x9940 >> 2),
202 	0x00000000,
203 	(0x9c00 << 16) | (0x9944 >> 2),
204 	0x00000000,
205 	(0x9c00 << 16) | (0x9948 >> 2),
206 	0x00000000,
207 	(0x9c00 << 16) | (0x994c >> 2),
208 	0x00000000,
209 	(0x9c00 << 16) | (0x9950 >> 2),
210 	0x00000000,
211 	(0x9c00 << 16) | (0x9954 >> 2),
212 	0x00000000,
213 	(0x9c00 << 16) | (0x9958 >> 2),
214 	0x00000000,
215 	(0x9c00 << 16) | (0x995c >> 2),
216 	0x00000000,
217 	(0x9c00 << 16) | (0x9960 >> 2),
218 	0x00000000,
219 	(0x9c00 << 16) | (0x9964 >> 2),
220 	0x00000000,
221 	(0x9c00 << 16) | (0x9968 >> 2),
222 	0x00000000,
223 	(0x9c00 << 16) | (0x996c >> 2),
224 	0x00000000,
225 	(0x9c00 << 16) | (0x9970 >> 2),
226 	0x00000000,
227 	(0x9c00 << 16) | (0x9974 >> 2),
228 	0x00000000,
229 	(0x9c00 << 16) | (0x9978 >> 2),
230 	0x00000000,
231 	(0x9c00 << 16) | (0x997c >> 2),
232 	0x00000000,
233 	(0x9c00 << 16) | (0x9980 >> 2),
234 	0x00000000,
235 	(0x9c00 << 16) | (0x9984 >> 2),
236 	0x00000000,
237 	(0x9c00 << 16) | (0x9988 >> 2),
238 	0x00000000,
239 	(0x9c00 << 16) | (0x998c >> 2),
240 	0x00000000,
241 	(0x9c00 << 16) | (0x8c00 >> 2),
242 	0x00000000,
243 	(0x9c00 << 16) | (0x8c14 >> 2),
244 	0x00000000,
245 	(0x9c00 << 16) | (0x8c04 >> 2),
246 	0x00000000,
247 	(0x9c00 << 16) | (0x8c08 >> 2),
248 	0x00000000,
249 	(0x8000 << 16) | (0x9b7c >> 2),
250 	0x00000000,
251 	(0x8040 << 16) | (0x9b7c >> 2),
252 	0x00000000,
253 	(0x8000 << 16) | (0xe84 >> 2),
254 	0x00000000,
255 	(0x8040 << 16) | (0xe84 >> 2),
256 	0x00000000,
257 	(0x8000 << 16) | (0x89c0 >> 2),
258 	0x00000000,
259 	(0x8040 << 16) | (0x89c0 >> 2),
260 	0x00000000,
261 	(0x8000 << 16) | (0x914c >> 2),
262 	0x00000000,
263 	(0x8040 << 16) | (0x914c >> 2),
264 	0x00000000,
265 	(0x8000 << 16) | (0x8c20 >> 2),
266 	0x00000000,
267 	(0x8040 << 16) | (0x8c20 >> 2),
268 	0x00000000,
269 	(0x8000 << 16) | (0x9354 >> 2),
270 	0x00000000,
271 	(0x8040 << 16) | (0x9354 >> 2),
272 	0x00000000,
273 	(0x9c00 << 16) | (0x9060 >> 2),
274 	0x00000000,
275 	(0x9c00 << 16) | (0x9364 >> 2),
276 	0x00000000,
277 	(0x9c00 << 16) | (0x9100 >> 2),
278 	0x00000000,
279 	(0x9c00 << 16) | (0x913c >> 2),
280 	0x00000000,
281 	(0x8000 << 16) | (0x90e0 >> 2),
282 	0x00000000,
283 	(0x8000 << 16) | (0x90e4 >> 2),
284 	0x00000000,
285 	(0x8000 << 16) | (0x90e8 >> 2),
286 	0x00000000,
287 	(0x8040 << 16) | (0x90e0 >> 2),
288 	0x00000000,
289 	(0x8040 << 16) | (0x90e4 >> 2),
290 	0x00000000,
291 	(0x8040 << 16) | (0x90e8 >> 2),
292 	0x00000000,
293 	(0x9c00 << 16) | (0x8bcc >> 2),
294 	0x00000000,
295 	(0x9c00 << 16) | (0x8b24 >> 2),
296 	0x00000000,
297 	(0x9c00 << 16) | (0x88c4 >> 2),
298 	0x00000000,
299 	(0x9c00 << 16) | (0x8e50 >> 2),
300 	0x00000000,
301 	(0x9c00 << 16) | (0x8c0c >> 2),
302 	0x00000000,
303 	(0x9c00 << 16) | (0x8e58 >> 2),
304 	0x00000000,
305 	(0x9c00 << 16) | (0x8e5c >> 2),
306 	0x00000000,
307 	(0x9c00 << 16) | (0x9508 >> 2),
308 	0x00000000,
309 	(0x9c00 << 16) | (0x950c >> 2),
310 	0x00000000,
311 	(0x9c00 << 16) | (0x9494 >> 2),
312 	0x00000000,
313 	(0x9c00 << 16) | (0xac0c >> 2),
314 	0x00000000,
315 	(0x9c00 << 16) | (0xac10 >> 2),
316 	0x00000000,
317 	(0x9c00 << 16) | (0xac14 >> 2),
318 	0x00000000,
319 	(0x9c00 << 16) | (0xae00 >> 2),
320 	0x00000000,
321 	(0x9c00 << 16) | (0xac08 >> 2),
322 	0x00000000,
323 	(0x9c00 << 16) | (0x88d4 >> 2),
324 	0x00000000,
325 	(0x9c00 << 16) | (0x88c8 >> 2),
326 	0x00000000,
327 	(0x9c00 << 16) | (0x88cc >> 2),
328 	0x00000000,
329 	(0x9c00 << 16) | (0x89b0 >> 2),
330 	0x00000000,
331 	(0x9c00 << 16) | (0x8b10 >> 2),
332 	0x00000000,
333 	(0x9c00 << 16) | (0x8a14 >> 2),
334 	0x00000000,
335 	(0x9c00 << 16) | (0x9830 >> 2),
336 	0x00000000,
337 	(0x9c00 << 16) | (0x9834 >> 2),
338 	0x00000000,
339 	(0x9c00 << 16) | (0x9838 >> 2),
340 	0x00000000,
341 	(0x9c00 << 16) | (0x9a10 >> 2),
342 	0x00000000,
343 	(0x8000 << 16) | (0x9870 >> 2),
344 	0x00000000,
345 	(0x8000 << 16) | (0x9874 >> 2),
346 	0x00000000,
347 	(0x8001 << 16) | (0x9870 >> 2),
348 	0x00000000,
349 	(0x8001 << 16) | (0x9874 >> 2),
350 	0x00000000,
351 	(0x8040 << 16) | (0x9870 >> 2),
352 	0x00000000,
353 	(0x8040 << 16) | (0x9874 >> 2),
354 	0x00000000,
355 	(0x8041 << 16) | (0x9870 >> 2),
356 	0x00000000,
357 	(0x8041 << 16) | (0x9874 >> 2),
358 	0x00000000,
359 	0x00000000
360 };
361 
362 static const u32 tahiti_golden_rlc_registers[] =
363 {
364 	0xc424, 0xffffffff, 0x00601005,
365 	0xc47c, 0xffffffff, 0x10104040,
366 	0xc488, 0xffffffff, 0x0100000a,
367 	0xc314, 0xffffffff, 0x00000800,
368 	0xc30c, 0xffffffff, 0x800000f4,
369 	0xf4a8, 0xffffffff, 0x00000000
370 };
371 
372 static const u32 tahiti_golden_registers[] =
373 {
374 	0x9a10, 0x00010000, 0x00018208,
375 	0x9830, 0xffffffff, 0x00000000,
376 	0x9834, 0xf00fffff, 0x00000400,
377 	0x9838, 0x0002021c, 0x00020200,
378 	0xc78, 0x00000080, 0x00000000,
379 	0xd030, 0x000300c0, 0x00800040,
380 	0xd830, 0x000300c0, 0x00800040,
381 	0x5bb0, 0x000000f0, 0x00000070,
382 	0x5bc0, 0x00200000, 0x50100000,
383 	0x7030, 0x31000311, 0x00000011,
384 	0x277c, 0x00000003, 0x000007ff,
385 	0x240c, 0x000007ff, 0x00000000,
386 	0x8a14, 0xf000001f, 0x00000007,
387 	0x8b24, 0xffffffff, 0x00ffffff,
388 	0x8b10, 0x0000ff0f, 0x00000000,
389 	0x28a4c, 0x07ffffff, 0x4e000000,
390 	0x28350, 0x3f3f3fff, 0x2a00126a,
391 	0x30, 0x000000ff, 0x0040,
392 	0x34, 0x00000040, 0x00004040,
393 	0x9100, 0x07ffffff, 0x03000000,
394 	0x8e88, 0x01ff1f3f, 0x00000000,
395 	0x8e84, 0x01ff1f3f, 0x00000000,
396 	0x9060, 0x0000007f, 0x00000020,
397 	0x9508, 0x00010000, 0x00010000,
398 	0xac14, 0x00000200, 0x000002fb,
399 	0xac10, 0xffffffff, 0x0000543b,
400 	0xac0c, 0xffffffff, 0xa9210876,
401 	0x88d0, 0xffffffff, 0x000fff40,
402 	0x88d4, 0x0000001f, 0x00000010,
403 	0x1410, 0x20000000, 0x20fffed8,
404 	0x15c0, 0x000c0fc0, 0x000c0400
405 };
406 
407 static const u32 tahiti_golden_registers2[] =
408 {
409 	0xc64, 0x00000001, 0x00000001
410 };
411 
412 static const u32 pitcairn_golden_rlc_registers[] =
413 {
414 	0xc424, 0xffffffff, 0x00601004,
415 	0xc47c, 0xffffffff, 0x10102020,
416 	0xc488, 0xffffffff, 0x01000020,
417 	0xc314, 0xffffffff, 0x00000800,
418 	0xc30c, 0xffffffff, 0x800000a4
419 };
420 
421 static const u32 pitcairn_golden_registers[] =
422 {
423 	0x9a10, 0x00010000, 0x00018208,
424 	0x9830, 0xffffffff, 0x00000000,
425 	0x9834, 0xf00fffff, 0x00000400,
426 	0x9838, 0x0002021c, 0x00020200,
427 	0xc78, 0x00000080, 0x00000000,
428 	0xd030, 0x000300c0, 0x00800040,
429 	0xd830, 0x000300c0, 0x00800040,
430 	0x5bb0, 0x000000f0, 0x00000070,
431 	0x5bc0, 0x00200000, 0x50100000,
432 	0x7030, 0x31000311, 0x00000011,
433 	0x2ae4, 0x00073ffe, 0x000022a2,
434 	0x240c, 0x000007ff, 0x00000000,
435 	0x8a14, 0xf000001f, 0x00000007,
436 	0x8b24, 0xffffffff, 0x00ffffff,
437 	0x8b10, 0x0000ff0f, 0x00000000,
438 	0x28a4c, 0x07ffffff, 0x4e000000,
439 	0x28350, 0x3f3f3fff, 0x2a00126a,
440 	0x30, 0x000000ff, 0x0040,
441 	0x34, 0x00000040, 0x00004040,
442 	0x9100, 0x07ffffff, 0x03000000,
443 	0x9060, 0x0000007f, 0x00000020,
444 	0x9508, 0x00010000, 0x00010000,
445 	0xac14, 0x000003ff, 0x000000f7,
446 	0xac10, 0xffffffff, 0x00000000,
447 	0xac0c, 0xffffffff, 0x32761054,
448 	0x88d4, 0x0000001f, 0x00000010,
449 	0x15c0, 0x000c0fc0, 0x000c0400
450 };
451 
452 static const u32 verde_golden_rlc_registers[] =
453 {
454 	0xc424, 0xffffffff, 0x033f1005,
455 	0xc47c, 0xffffffff, 0x10808020,
456 	0xc488, 0xffffffff, 0x00800008,
457 	0xc314, 0xffffffff, 0x00001000,
458 	0xc30c, 0xffffffff, 0x80010014
459 };
460 
461 static const u32 verde_golden_registers[] =
462 {
463 	0x9a10, 0x00010000, 0x00018208,
464 	0x9830, 0xffffffff, 0x00000000,
465 	0x9834, 0xf00fffff, 0x00000400,
466 	0x9838, 0x0002021c, 0x00020200,
467 	0xc78, 0x00000080, 0x00000000,
468 	0xd030, 0x000300c0, 0x00800040,
469 	0xd030, 0x000300c0, 0x00800040,
470 	0xd830, 0x000300c0, 0x00800040,
471 	0xd830, 0x000300c0, 0x00800040,
472 	0x5bb0, 0x000000f0, 0x00000070,
473 	0x5bc0, 0x00200000, 0x50100000,
474 	0x7030, 0x31000311, 0x00000011,
475 	0x2ae4, 0x00073ffe, 0x000022a2,
476 	0x2ae4, 0x00073ffe, 0x000022a2,
477 	0x2ae4, 0x00073ffe, 0x000022a2,
478 	0x240c, 0x000007ff, 0x00000000,
479 	0x240c, 0x000007ff, 0x00000000,
480 	0x240c, 0x000007ff, 0x00000000,
481 	0x8a14, 0xf000001f, 0x00000007,
482 	0x8a14, 0xf000001f, 0x00000007,
483 	0x8a14, 0xf000001f, 0x00000007,
484 	0x8b24, 0xffffffff, 0x00ffffff,
485 	0x8b10, 0x0000ff0f, 0x00000000,
486 	0x28a4c, 0x07ffffff, 0x4e000000,
487 	0x28350, 0x3f3f3fff, 0x0000124a,
488 	0x28350, 0x3f3f3fff, 0x0000124a,
489 	0x28350, 0x3f3f3fff, 0x0000124a,
490 	0x30, 0x000000ff, 0x0040,
491 	0x34, 0x00000040, 0x00004040,
492 	0x9100, 0x07ffffff, 0x03000000,
493 	0x9100, 0x07ffffff, 0x03000000,
494 	0x8e88, 0x01ff1f3f, 0x00000000,
495 	0x8e88, 0x01ff1f3f, 0x00000000,
496 	0x8e88, 0x01ff1f3f, 0x00000000,
497 	0x8e84, 0x01ff1f3f, 0x00000000,
498 	0x8e84, 0x01ff1f3f, 0x00000000,
499 	0x8e84, 0x01ff1f3f, 0x00000000,
500 	0x9060, 0x0000007f, 0x00000020,
501 	0x9508, 0x00010000, 0x00010000,
502 	0xac14, 0x000003ff, 0x00000003,
503 	0xac14, 0x000003ff, 0x00000003,
504 	0xac14, 0x000003ff, 0x00000003,
505 	0xac10, 0xffffffff, 0x00000000,
506 	0xac10, 0xffffffff, 0x00000000,
507 	0xac10, 0xffffffff, 0x00000000,
508 	0xac0c, 0xffffffff, 0x00001032,
509 	0xac0c, 0xffffffff, 0x00001032,
510 	0xac0c, 0xffffffff, 0x00001032,
511 	0x88d4, 0x0000001f, 0x00000010,
512 	0x88d4, 0x0000001f, 0x00000010,
513 	0x88d4, 0x0000001f, 0x00000010,
514 	0x15c0, 0x000c0fc0, 0x000c0400
515 };
516 
517 static const u32 oland_golden_rlc_registers[] =
518 {
519 	0xc424, 0xffffffff, 0x00601005,
520 	0xc47c, 0xffffffff, 0x10104040,
521 	0xc488, 0xffffffff, 0x0100000a,
522 	0xc314, 0xffffffff, 0x00000800,
523 	0xc30c, 0xffffffff, 0x800000f4
524 };
525 
526 static const u32 oland_golden_registers[] =
527 {
528 	0x9a10, 0x00010000, 0x00018208,
529 	0x9830, 0xffffffff, 0x00000000,
530 	0x9834, 0xf00fffff, 0x00000400,
531 	0x9838, 0x0002021c, 0x00020200,
532 	0xc78, 0x00000080, 0x00000000,
533 	0xd030, 0x000300c0, 0x00800040,
534 	0xd830, 0x000300c0, 0x00800040,
535 	0x5bb0, 0x000000f0, 0x00000070,
536 	0x5bc0, 0x00200000, 0x50100000,
537 	0x7030, 0x31000311, 0x00000011,
538 	0x2ae4, 0x00073ffe, 0x000022a2,
539 	0x240c, 0x000007ff, 0x00000000,
540 	0x8a14, 0xf000001f, 0x00000007,
541 	0x8b24, 0xffffffff, 0x00ffffff,
542 	0x8b10, 0x0000ff0f, 0x00000000,
543 	0x28a4c, 0x07ffffff, 0x4e000000,
544 	0x28350, 0x3f3f3fff, 0x00000082,
545 	0x30, 0x000000ff, 0x0040,
546 	0x34, 0x00000040, 0x00004040,
547 	0x9100, 0x07ffffff, 0x03000000,
548 	0x9060, 0x0000007f, 0x00000020,
549 	0x9508, 0x00010000, 0x00010000,
550 	0xac14, 0x000003ff, 0x000000f3,
551 	0xac10, 0xffffffff, 0x00000000,
552 	0xac0c, 0xffffffff, 0x00003210,
553 	0x88d4, 0x0000001f, 0x00000010,
554 	0x15c0, 0x000c0fc0, 0x000c0400
555 };
556 
557 static const u32 hainan_golden_registers[] =
558 {
559 	0x9a10, 0x00010000, 0x00018208,
560 	0x9830, 0xffffffff, 0x00000000,
561 	0x9834, 0xf00fffff, 0x00000400,
562 	0x9838, 0x0002021c, 0x00020200,
563 	0xd0c0, 0xff000fff, 0x00000100,
564 	0xd030, 0x000300c0, 0x00800040,
565 	0xd8c0, 0xff000fff, 0x00000100,
566 	0xd830, 0x000300c0, 0x00800040,
567 	0x2ae4, 0x00073ffe, 0x000022a2,
568 	0x240c, 0x000007ff, 0x00000000,
569 	0x8a14, 0xf000001f, 0x00000007,
570 	0x8b24, 0xffffffff, 0x00ffffff,
571 	0x8b10, 0x0000ff0f, 0x00000000,
572 	0x28a4c, 0x07ffffff, 0x4e000000,
573 	0x28350, 0x3f3f3fff, 0x00000000,
574 	0x30, 0x000000ff, 0x0040,
575 	0x34, 0x00000040, 0x00004040,
576 	0x9100, 0x03e00000, 0x03600000,
577 	0x9060, 0x0000007f, 0x00000020,
578 	0x9508, 0x00010000, 0x00010000,
579 	0xac14, 0x000003ff, 0x000000f1,
580 	0xac10, 0xffffffff, 0x00000000,
581 	0xac0c, 0xffffffff, 0x00003210,
582 	0x88d4, 0x0000001f, 0x00000010,
583 	0x15c0, 0x000c0fc0, 0x000c0400
584 };
585 
586 static const u32 hainan_golden_registers2[] =
587 {
588 	0x98f8, 0xffffffff, 0x02010001
589 };
590 
591 static const u32 tahiti_mgcg_cgcg_init[] =
592 {
593 	0xc400, 0xffffffff, 0xfffffffc,
594 	0x802c, 0xffffffff, 0xe0000000,
595 	0x9a60, 0xffffffff, 0x00000100,
596 	0x92a4, 0xffffffff, 0x00000100,
597 	0xc164, 0xffffffff, 0x00000100,
598 	0x9774, 0xffffffff, 0x00000100,
599 	0x8984, 0xffffffff, 0x06000100,
600 	0x8a18, 0xffffffff, 0x00000100,
601 	0x92a0, 0xffffffff, 0x00000100,
602 	0xc380, 0xffffffff, 0x00000100,
603 	0x8b28, 0xffffffff, 0x00000100,
604 	0x9144, 0xffffffff, 0x00000100,
605 	0x8d88, 0xffffffff, 0x00000100,
606 	0x8d8c, 0xffffffff, 0x00000100,
607 	0x9030, 0xffffffff, 0x00000100,
608 	0x9034, 0xffffffff, 0x00000100,
609 	0x9038, 0xffffffff, 0x00000100,
610 	0x903c, 0xffffffff, 0x00000100,
611 	0xad80, 0xffffffff, 0x00000100,
612 	0xac54, 0xffffffff, 0x00000100,
613 	0x897c, 0xffffffff, 0x06000100,
614 	0x9868, 0xffffffff, 0x00000100,
615 	0x9510, 0xffffffff, 0x00000100,
616 	0xaf04, 0xffffffff, 0x00000100,
617 	0xae04, 0xffffffff, 0x00000100,
618 	0x949c, 0xffffffff, 0x00000100,
619 	0x802c, 0xffffffff, 0xe0000000,
620 	0x9160, 0xffffffff, 0x00010000,
621 	0x9164, 0xffffffff, 0x00030002,
622 	0x9168, 0xffffffff, 0x00040007,
623 	0x916c, 0xffffffff, 0x00060005,
624 	0x9170, 0xffffffff, 0x00090008,
625 	0x9174, 0xffffffff, 0x00020001,
626 	0x9178, 0xffffffff, 0x00040003,
627 	0x917c, 0xffffffff, 0x00000007,
628 	0x9180, 0xffffffff, 0x00060005,
629 	0x9184, 0xffffffff, 0x00090008,
630 	0x9188, 0xffffffff, 0x00030002,
631 	0x918c, 0xffffffff, 0x00050004,
632 	0x9190, 0xffffffff, 0x00000008,
633 	0x9194, 0xffffffff, 0x00070006,
634 	0x9198, 0xffffffff, 0x000a0009,
635 	0x919c, 0xffffffff, 0x00040003,
636 	0x91a0, 0xffffffff, 0x00060005,
637 	0x91a4, 0xffffffff, 0x00000009,
638 	0x91a8, 0xffffffff, 0x00080007,
639 	0x91ac, 0xffffffff, 0x000b000a,
640 	0x91b0, 0xffffffff, 0x00050004,
641 	0x91b4, 0xffffffff, 0x00070006,
642 	0x91b8, 0xffffffff, 0x0008000b,
643 	0x91bc, 0xffffffff, 0x000a0009,
644 	0x91c0, 0xffffffff, 0x000d000c,
645 	0x91c4, 0xffffffff, 0x00060005,
646 	0x91c8, 0xffffffff, 0x00080007,
647 	0x91cc, 0xffffffff, 0x0000000b,
648 	0x91d0, 0xffffffff, 0x000a0009,
649 	0x91d4, 0xffffffff, 0x000d000c,
650 	0x91d8, 0xffffffff, 0x00070006,
651 	0x91dc, 0xffffffff, 0x00090008,
652 	0x91e0, 0xffffffff, 0x0000000c,
653 	0x91e4, 0xffffffff, 0x000b000a,
654 	0x91e8, 0xffffffff, 0x000e000d,
655 	0x91ec, 0xffffffff, 0x00080007,
656 	0x91f0, 0xffffffff, 0x000a0009,
657 	0x91f4, 0xffffffff, 0x0000000d,
658 	0x91f8, 0xffffffff, 0x000c000b,
659 	0x91fc, 0xffffffff, 0x000f000e,
660 	0x9200, 0xffffffff, 0x00090008,
661 	0x9204, 0xffffffff, 0x000b000a,
662 	0x9208, 0xffffffff, 0x000c000f,
663 	0x920c, 0xffffffff, 0x000e000d,
664 	0x9210, 0xffffffff, 0x00110010,
665 	0x9214, 0xffffffff, 0x000a0009,
666 	0x9218, 0xffffffff, 0x000c000b,
667 	0x921c, 0xffffffff, 0x0000000f,
668 	0x9220, 0xffffffff, 0x000e000d,
669 	0x9224, 0xffffffff, 0x00110010,
670 	0x9228, 0xffffffff, 0x000b000a,
671 	0x922c, 0xffffffff, 0x000d000c,
672 	0x9230, 0xffffffff, 0x00000010,
673 	0x9234, 0xffffffff, 0x000f000e,
674 	0x9238, 0xffffffff, 0x00120011,
675 	0x923c, 0xffffffff, 0x000c000b,
676 	0x9240, 0xffffffff, 0x000e000d,
677 	0x9244, 0xffffffff, 0x00000011,
678 	0x9248, 0xffffffff, 0x0010000f,
679 	0x924c, 0xffffffff, 0x00130012,
680 	0x9250, 0xffffffff, 0x000d000c,
681 	0x9254, 0xffffffff, 0x000f000e,
682 	0x9258, 0xffffffff, 0x00100013,
683 	0x925c, 0xffffffff, 0x00120011,
684 	0x9260, 0xffffffff, 0x00150014,
685 	0x9264, 0xffffffff, 0x000e000d,
686 	0x9268, 0xffffffff, 0x0010000f,
687 	0x926c, 0xffffffff, 0x00000013,
688 	0x9270, 0xffffffff, 0x00120011,
689 	0x9274, 0xffffffff, 0x00150014,
690 	0x9278, 0xffffffff, 0x000f000e,
691 	0x927c, 0xffffffff, 0x00110010,
692 	0x9280, 0xffffffff, 0x00000014,
693 	0x9284, 0xffffffff, 0x00130012,
694 	0x9288, 0xffffffff, 0x00160015,
695 	0x928c, 0xffffffff, 0x0010000f,
696 	0x9290, 0xffffffff, 0x00120011,
697 	0x9294, 0xffffffff, 0x00000015,
698 	0x9298, 0xffffffff, 0x00140013,
699 	0x929c, 0xffffffff, 0x00170016,
700 	0x9150, 0xffffffff, 0x96940200,
701 	0x8708, 0xffffffff, 0x00900100,
702 	0xc478, 0xffffffff, 0x00000080,
703 	0xc404, 0xffffffff, 0x0020003f,
704 	0x30, 0xffffffff, 0x0000001c,
705 	0x34, 0x000f0000, 0x000f0000,
706 	0x160c, 0xffffffff, 0x00000100,
707 	0x1024, 0xffffffff, 0x00000100,
708 	0x102c, 0x00000101, 0x00000000,
709 	0x20a8, 0xffffffff, 0x00000104,
710 	0x264c, 0x000c0000, 0x000c0000,
711 	0x2648, 0x000c0000, 0x000c0000,
712 	0x55e4, 0xff000fff, 0x00000100,
713 	0x55e8, 0x00000001, 0x00000001,
714 	0x2f50, 0x00000001, 0x00000001,
715 	0x30cc, 0xc0000fff, 0x00000104,
716 	0xc1e4, 0x00000001, 0x00000001,
717 	0xd0c0, 0xfffffff0, 0x00000100,
718 	0xd8c0, 0xfffffff0, 0x00000100
719 };
720 
721 static const u32 pitcairn_mgcg_cgcg_init[] =
722 {
723 	0xc400, 0xffffffff, 0xfffffffc,
724 	0x802c, 0xffffffff, 0xe0000000,
725 	0x9a60, 0xffffffff, 0x00000100,
726 	0x92a4, 0xffffffff, 0x00000100,
727 	0xc164, 0xffffffff, 0x00000100,
728 	0x9774, 0xffffffff, 0x00000100,
729 	0x8984, 0xffffffff, 0x06000100,
730 	0x8a18, 0xffffffff, 0x00000100,
731 	0x92a0, 0xffffffff, 0x00000100,
732 	0xc380, 0xffffffff, 0x00000100,
733 	0x8b28, 0xffffffff, 0x00000100,
734 	0x9144, 0xffffffff, 0x00000100,
735 	0x8d88, 0xffffffff, 0x00000100,
736 	0x8d8c, 0xffffffff, 0x00000100,
737 	0x9030, 0xffffffff, 0x00000100,
738 	0x9034, 0xffffffff, 0x00000100,
739 	0x9038, 0xffffffff, 0x00000100,
740 	0x903c, 0xffffffff, 0x00000100,
741 	0xad80, 0xffffffff, 0x00000100,
742 	0xac54, 0xffffffff, 0x00000100,
743 	0x897c, 0xffffffff, 0x06000100,
744 	0x9868, 0xffffffff, 0x00000100,
745 	0x9510, 0xffffffff, 0x00000100,
746 	0xaf04, 0xffffffff, 0x00000100,
747 	0xae04, 0xffffffff, 0x00000100,
748 	0x949c, 0xffffffff, 0x00000100,
749 	0x802c, 0xffffffff, 0xe0000000,
750 	0x9160, 0xffffffff, 0x00010000,
751 	0x9164, 0xffffffff, 0x00030002,
752 	0x9168, 0xffffffff, 0x00040007,
753 	0x916c, 0xffffffff, 0x00060005,
754 	0x9170, 0xffffffff, 0x00090008,
755 	0x9174, 0xffffffff, 0x00020001,
756 	0x9178, 0xffffffff, 0x00040003,
757 	0x917c, 0xffffffff, 0x00000007,
758 	0x9180, 0xffffffff, 0x00060005,
759 	0x9184, 0xffffffff, 0x00090008,
760 	0x9188, 0xffffffff, 0x00030002,
761 	0x918c, 0xffffffff, 0x00050004,
762 	0x9190, 0xffffffff, 0x00000008,
763 	0x9194, 0xffffffff, 0x00070006,
764 	0x9198, 0xffffffff, 0x000a0009,
765 	0x919c, 0xffffffff, 0x00040003,
766 	0x91a0, 0xffffffff, 0x00060005,
767 	0x91a4, 0xffffffff, 0x00000009,
768 	0x91a8, 0xffffffff, 0x00080007,
769 	0x91ac, 0xffffffff, 0x000b000a,
770 	0x91b0, 0xffffffff, 0x00050004,
771 	0x91b4, 0xffffffff, 0x00070006,
772 	0x91b8, 0xffffffff, 0x0008000b,
773 	0x91bc, 0xffffffff, 0x000a0009,
774 	0x91c0, 0xffffffff, 0x000d000c,
775 	0x9200, 0xffffffff, 0x00090008,
776 	0x9204, 0xffffffff, 0x000b000a,
777 	0x9208, 0xffffffff, 0x000c000f,
778 	0x920c, 0xffffffff, 0x000e000d,
779 	0x9210, 0xffffffff, 0x00110010,
780 	0x9214, 0xffffffff, 0x000a0009,
781 	0x9218, 0xffffffff, 0x000c000b,
782 	0x921c, 0xffffffff, 0x0000000f,
783 	0x9220, 0xffffffff, 0x000e000d,
784 	0x9224, 0xffffffff, 0x00110010,
785 	0x9228, 0xffffffff, 0x000b000a,
786 	0x922c, 0xffffffff, 0x000d000c,
787 	0x9230, 0xffffffff, 0x00000010,
788 	0x9234, 0xffffffff, 0x000f000e,
789 	0x9238, 0xffffffff, 0x00120011,
790 	0x923c, 0xffffffff, 0x000c000b,
791 	0x9240, 0xffffffff, 0x000e000d,
792 	0x9244, 0xffffffff, 0x00000011,
793 	0x9248, 0xffffffff, 0x0010000f,
794 	0x924c, 0xffffffff, 0x00130012,
795 	0x9250, 0xffffffff, 0x000d000c,
796 	0x9254, 0xffffffff, 0x000f000e,
797 	0x9258, 0xffffffff, 0x00100013,
798 	0x925c, 0xffffffff, 0x00120011,
799 	0x9260, 0xffffffff, 0x00150014,
800 	0x9150, 0xffffffff, 0x96940200,
801 	0x8708, 0xffffffff, 0x00900100,
802 	0xc478, 0xffffffff, 0x00000080,
803 	0xc404, 0xffffffff, 0x0020003f,
804 	0x30, 0xffffffff, 0x0000001c,
805 	0x34, 0x000f0000, 0x000f0000,
806 	0x160c, 0xffffffff, 0x00000100,
807 	0x1024, 0xffffffff, 0x00000100,
808 	0x102c, 0x00000101, 0x00000000,
809 	0x20a8, 0xffffffff, 0x00000104,
810 	0x55e4, 0xff000fff, 0x00000100,
811 	0x55e8, 0x00000001, 0x00000001,
812 	0x2f50, 0x00000001, 0x00000001,
813 	0x30cc, 0xc0000fff, 0x00000104,
814 	0xc1e4, 0x00000001, 0x00000001,
815 	0xd0c0, 0xfffffff0, 0x00000100,
816 	0xd8c0, 0xfffffff0, 0x00000100
817 };
818 
819 static const u32 verde_mgcg_cgcg_init[] =
820 {
821 	0xc400, 0xffffffff, 0xfffffffc,
822 	0x802c, 0xffffffff, 0xe0000000,
823 	0x9a60, 0xffffffff, 0x00000100,
824 	0x92a4, 0xffffffff, 0x00000100,
825 	0xc164, 0xffffffff, 0x00000100,
826 	0x9774, 0xffffffff, 0x00000100,
827 	0x8984, 0xffffffff, 0x06000100,
828 	0x8a18, 0xffffffff, 0x00000100,
829 	0x92a0, 0xffffffff, 0x00000100,
830 	0xc380, 0xffffffff, 0x00000100,
831 	0x8b28, 0xffffffff, 0x00000100,
832 	0x9144, 0xffffffff, 0x00000100,
833 	0x8d88, 0xffffffff, 0x00000100,
834 	0x8d8c, 0xffffffff, 0x00000100,
835 	0x9030, 0xffffffff, 0x00000100,
836 	0x9034, 0xffffffff, 0x00000100,
837 	0x9038, 0xffffffff, 0x00000100,
838 	0x903c, 0xffffffff, 0x00000100,
839 	0xad80, 0xffffffff, 0x00000100,
840 	0xac54, 0xffffffff, 0x00000100,
841 	0x897c, 0xffffffff, 0x06000100,
842 	0x9868, 0xffffffff, 0x00000100,
843 	0x9510, 0xffffffff, 0x00000100,
844 	0xaf04, 0xffffffff, 0x00000100,
845 	0xae04, 0xffffffff, 0x00000100,
846 	0x949c, 0xffffffff, 0x00000100,
847 	0x802c, 0xffffffff, 0xe0000000,
848 	0x9160, 0xffffffff, 0x00010000,
849 	0x9164, 0xffffffff, 0x00030002,
850 	0x9168, 0xffffffff, 0x00040007,
851 	0x916c, 0xffffffff, 0x00060005,
852 	0x9170, 0xffffffff, 0x00090008,
853 	0x9174, 0xffffffff, 0x00020001,
854 	0x9178, 0xffffffff, 0x00040003,
855 	0x917c, 0xffffffff, 0x00000007,
856 	0x9180, 0xffffffff, 0x00060005,
857 	0x9184, 0xffffffff, 0x00090008,
858 	0x9188, 0xffffffff, 0x00030002,
859 	0x918c, 0xffffffff, 0x00050004,
860 	0x9190, 0xffffffff, 0x00000008,
861 	0x9194, 0xffffffff, 0x00070006,
862 	0x9198, 0xffffffff, 0x000a0009,
863 	0x919c, 0xffffffff, 0x00040003,
864 	0x91a0, 0xffffffff, 0x00060005,
865 	0x91a4, 0xffffffff, 0x00000009,
866 	0x91a8, 0xffffffff, 0x00080007,
867 	0x91ac, 0xffffffff, 0x000b000a,
868 	0x91b0, 0xffffffff, 0x00050004,
869 	0x91b4, 0xffffffff, 0x00070006,
870 	0x91b8, 0xffffffff, 0x0008000b,
871 	0x91bc, 0xffffffff, 0x000a0009,
872 	0x91c0, 0xffffffff, 0x000d000c,
873 	0x9200, 0xffffffff, 0x00090008,
874 	0x9204, 0xffffffff, 0x000b000a,
875 	0x9208, 0xffffffff, 0x000c000f,
876 	0x920c, 0xffffffff, 0x000e000d,
877 	0x9210, 0xffffffff, 0x00110010,
878 	0x9214, 0xffffffff, 0x000a0009,
879 	0x9218, 0xffffffff, 0x000c000b,
880 	0x921c, 0xffffffff, 0x0000000f,
881 	0x9220, 0xffffffff, 0x000e000d,
882 	0x9224, 0xffffffff, 0x00110010,
883 	0x9228, 0xffffffff, 0x000b000a,
884 	0x922c, 0xffffffff, 0x000d000c,
885 	0x9230, 0xffffffff, 0x00000010,
886 	0x9234, 0xffffffff, 0x000f000e,
887 	0x9238, 0xffffffff, 0x00120011,
888 	0x923c, 0xffffffff, 0x000c000b,
889 	0x9240, 0xffffffff, 0x000e000d,
890 	0x9244, 0xffffffff, 0x00000011,
891 	0x9248, 0xffffffff, 0x0010000f,
892 	0x924c, 0xffffffff, 0x00130012,
893 	0x9250, 0xffffffff, 0x000d000c,
894 	0x9254, 0xffffffff, 0x000f000e,
895 	0x9258, 0xffffffff, 0x00100013,
896 	0x925c, 0xffffffff, 0x00120011,
897 	0x9260, 0xffffffff, 0x00150014,
898 	0x9150, 0xffffffff, 0x96940200,
899 	0x8708, 0xffffffff, 0x00900100,
900 	0xc478, 0xffffffff, 0x00000080,
901 	0xc404, 0xffffffff, 0x0020003f,
902 	0x30, 0xffffffff, 0x0000001c,
903 	0x34, 0x000f0000, 0x000f0000,
904 	0x160c, 0xffffffff, 0x00000100,
905 	0x1024, 0xffffffff, 0x00000100,
906 	0x102c, 0x00000101, 0x00000000,
907 	0x20a8, 0xffffffff, 0x00000104,
908 	0x264c, 0x000c0000, 0x000c0000,
909 	0x2648, 0x000c0000, 0x000c0000,
910 	0x55e4, 0xff000fff, 0x00000100,
911 	0x55e8, 0x00000001, 0x00000001,
912 	0x2f50, 0x00000001, 0x00000001,
913 	0x30cc, 0xc0000fff, 0x00000104,
914 	0xc1e4, 0x00000001, 0x00000001,
915 	0xd0c0, 0xfffffff0, 0x00000100,
916 	0xd8c0, 0xfffffff0, 0x00000100
917 };
918 
919 static const u32 oland_mgcg_cgcg_init[] =
920 {
921 	0xc400, 0xffffffff, 0xfffffffc,
922 	0x802c, 0xffffffff, 0xe0000000,
923 	0x9a60, 0xffffffff, 0x00000100,
924 	0x92a4, 0xffffffff, 0x00000100,
925 	0xc164, 0xffffffff, 0x00000100,
926 	0x9774, 0xffffffff, 0x00000100,
927 	0x8984, 0xffffffff, 0x06000100,
928 	0x8a18, 0xffffffff, 0x00000100,
929 	0x92a0, 0xffffffff, 0x00000100,
930 	0xc380, 0xffffffff, 0x00000100,
931 	0x8b28, 0xffffffff, 0x00000100,
932 	0x9144, 0xffffffff, 0x00000100,
933 	0x8d88, 0xffffffff, 0x00000100,
934 	0x8d8c, 0xffffffff, 0x00000100,
935 	0x9030, 0xffffffff, 0x00000100,
936 	0x9034, 0xffffffff, 0x00000100,
937 	0x9038, 0xffffffff, 0x00000100,
938 	0x903c, 0xffffffff, 0x00000100,
939 	0xad80, 0xffffffff, 0x00000100,
940 	0xac54, 0xffffffff, 0x00000100,
941 	0x897c, 0xffffffff, 0x06000100,
942 	0x9868, 0xffffffff, 0x00000100,
943 	0x9510, 0xffffffff, 0x00000100,
944 	0xaf04, 0xffffffff, 0x00000100,
945 	0xae04, 0xffffffff, 0x00000100,
946 	0x949c, 0xffffffff, 0x00000100,
947 	0x802c, 0xffffffff, 0xe0000000,
948 	0x9160, 0xffffffff, 0x00010000,
949 	0x9164, 0xffffffff, 0x00030002,
950 	0x9168, 0xffffffff, 0x00040007,
951 	0x916c, 0xffffffff, 0x00060005,
952 	0x9170, 0xffffffff, 0x00090008,
953 	0x9174, 0xffffffff, 0x00020001,
954 	0x9178, 0xffffffff, 0x00040003,
955 	0x917c, 0xffffffff, 0x00000007,
956 	0x9180, 0xffffffff, 0x00060005,
957 	0x9184, 0xffffffff, 0x00090008,
958 	0x9188, 0xffffffff, 0x00030002,
959 	0x918c, 0xffffffff, 0x00050004,
960 	0x9190, 0xffffffff, 0x00000008,
961 	0x9194, 0xffffffff, 0x00070006,
962 	0x9198, 0xffffffff, 0x000a0009,
963 	0x919c, 0xffffffff, 0x00040003,
964 	0x91a0, 0xffffffff, 0x00060005,
965 	0x91a4, 0xffffffff, 0x00000009,
966 	0x91a8, 0xffffffff, 0x00080007,
967 	0x91ac, 0xffffffff, 0x000b000a,
968 	0x91b0, 0xffffffff, 0x00050004,
969 	0x91b4, 0xffffffff, 0x00070006,
970 	0x91b8, 0xffffffff, 0x0008000b,
971 	0x91bc, 0xffffffff, 0x000a0009,
972 	0x91c0, 0xffffffff, 0x000d000c,
973 	0x91c4, 0xffffffff, 0x00060005,
974 	0x91c8, 0xffffffff, 0x00080007,
975 	0x91cc, 0xffffffff, 0x0000000b,
976 	0x91d0, 0xffffffff, 0x000a0009,
977 	0x91d4, 0xffffffff, 0x000d000c,
978 	0x9150, 0xffffffff, 0x96940200,
979 	0x8708, 0xffffffff, 0x00900100,
980 	0xc478, 0xffffffff, 0x00000080,
981 	0xc404, 0xffffffff, 0x0020003f,
982 	0x30, 0xffffffff, 0x0000001c,
983 	0x34, 0x000f0000, 0x000f0000,
984 	0x160c, 0xffffffff, 0x00000100,
985 	0x1024, 0xffffffff, 0x00000100,
986 	0x102c, 0x00000101, 0x00000000,
987 	0x20a8, 0xffffffff, 0x00000104,
988 	0x264c, 0x000c0000, 0x000c0000,
989 	0x2648, 0x000c0000, 0x000c0000,
990 	0x55e4, 0xff000fff, 0x00000100,
991 	0x55e8, 0x00000001, 0x00000001,
992 	0x2f50, 0x00000001, 0x00000001,
993 	0x30cc, 0xc0000fff, 0x00000104,
994 	0xc1e4, 0x00000001, 0x00000001,
995 	0xd0c0, 0xfffffff0, 0x00000100,
996 	0xd8c0, 0xfffffff0, 0x00000100
997 };
998 
999 static const u32 hainan_mgcg_cgcg_init[] =
1000 {
1001 	0xc400, 0xffffffff, 0xfffffffc,
1002 	0x802c, 0xffffffff, 0xe0000000,
1003 	0x9a60, 0xffffffff, 0x00000100,
1004 	0x92a4, 0xffffffff, 0x00000100,
1005 	0xc164, 0xffffffff, 0x00000100,
1006 	0x9774, 0xffffffff, 0x00000100,
1007 	0x8984, 0xffffffff, 0x06000100,
1008 	0x8a18, 0xffffffff, 0x00000100,
1009 	0x92a0, 0xffffffff, 0x00000100,
1010 	0xc380, 0xffffffff, 0x00000100,
1011 	0x8b28, 0xffffffff, 0x00000100,
1012 	0x9144, 0xffffffff, 0x00000100,
1013 	0x8d88, 0xffffffff, 0x00000100,
1014 	0x8d8c, 0xffffffff, 0x00000100,
1015 	0x9030, 0xffffffff, 0x00000100,
1016 	0x9034, 0xffffffff, 0x00000100,
1017 	0x9038, 0xffffffff, 0x00000100,
1018 	0x903c, 0xffffffff, 0x00000100,
1019 	0xad80, 0xffffffff, 0x00000100,
1020 	0xac54, 0xffffffff, 0x00000100,
1021 	0x897c, 0xffffffff, 0x06000100,
1022 	0x9868, 0xffffffff, 0x00000100,
1023 	0x9510, 0xffffffff, 0x00000100,
1024 	0xaf04, 0xffffffff, 0x00000100,
1025 	0xae04, 0xffffffff, 0x00000100,
1026 	0x949c, 0xffffffff, 0x00000100,
1027 	0x802c, 0xffffffff, 0xe0000000,
1028 	0x9160, 0xffffffff, 0x00010000,
1029 	0x9164, 0xffffffff, 0x00030002,
1030 	0x9168, 0xffffffff, 0x00040007,
1031 	0x916c, 0xffffffff, 0x00060005,
1032 	0x9170, 0xffffffff, 0x00090008,
1033 	0x9174, 0xffffffff, 0x00020001,
1034 	0x9178, 0xffffffff, 0x00040003,
1035 	0x917c, 0xffffffff, 0x00000007,
1036 	0x9180, 0xffffffff, 0x00060005,
1037 	0x9184, 0xffffffff, 0x00090008,
1038 	0x9188, 0xffffffff, 0x00030002,
1039 	0x918c, 0xffffffff, 0x00050004,
1040 	0x9190, 0xffffffff, 0x00000008,
1041 	0x9194, 0xffffffff, 0x00070006,
1042 	0x9198, 0xffffffff, 0x000a0009,
1043 	0x919c, 0xffffffff, 0x00040003,
1044 	0x91a0, 0xffffffff, 0x00060005,
1045 	0x91a4, 0xffffffff, 0x00000009,
1046 	0x91a8, 0xffffffff, 0x00080007,
1047 	0x91ac, 0xffffffff, 0x000b000a,
1048 	0x91b0, 0xffffffff, 0x00050004,
1049 	0x91b4, 0xffffffff, 0x00070006,
1050 	0x91b8, 0xffffffff, 0x0008000b,
1051 	0x91bc, 0xffffffff, 0x000a0009,
1052 	0x91c0, 0xffffffff, 0x000d000c,
1053 	0x91c4, 0xffffffff, 0x00060005,
1054 	0x91c8, 0xffffffff, 0x00080007,
1055 	0x91cc, 0xffffffff, 0x0000000b,
1056 	0x91d0, 0xffffffff, 0x000a0009,
1057 	0x91d4, 0xffffffff, 0x000d000c,
1058 	0x9150, 0xffffffff, 0x96940200,
1059 	0x8708, 0xffffffff, 0x00900100,
1060 	0xc478, 0xffffffff, 0x00000080,
1061 	0xc404, 0xffffffff, 0x0020003f,
1062 	0x30, 0xffffffff, 0x0000001c,
1063 	0x34, 0x000f0000, 0x000f0000,
1064 	0x160c, 0xffffffff, 0x00000100,
1065 	0x1024, 0xffffffff, 0x00000100,
1066 	0x20a8, 0xffffffff, 0x00000104,
1067 	0x264c, 0x000c0000, 0x000c0000,
1068 	0x2648, 0x000c0000, 0x000c0000,
1069 	0x2f50, 0x00000001, 0x00000001,
1070 	0x30cc, 0xc0000fff, 0x00000104,
1071 	0xc1e4, 0x00000001, 0x00000001,
1072 	0xd0c0, 0xfffffff0, 0x00000100,
1073 	0xd8c0, 0xfffffff0, 0x00000100
1074 };
1075 
1076 static u32 verde_pg_init[] =
1077 {
1078 	0x353c, 0xffffffff, 0x40000,
1079 	0x3538, 0xffffffff, 0x200010ff,
1080 	0x353c, 0xffffffff, 0x0,
1081 	0x353c, 0xffffffff, 0x0,
1082 	0x353c, 0xffffffff, 0x0,
1083 	0x353c, 0xffffffff, 0x0,
1084 	0x353c, 0xffffffff, 0x0,
1085 	0x353c, 0xffffffff, 0x7007,
1086 	0x3538, 0xffffffff, 0x300010ff,
1087 	0x353c, 0xffffffff, 0x0,
1088 	0x353c, 0xffffffff, 0x0,
1089 	0x353c, 0xffffffff, 0x0,
1090 	0x353c, 0xffffffff, 0x0,
1091 	0x353c, 0xffffffff, 0x0,
1092 	0x353c, 0xffffffff, 0x400000,
1093 	0x3538, 0xffffffff, 0x100010ff,
1094 	0x353c, 0xffffffff, 0x0,
1095 	0x353c, 0xffffffff, 0x0,
1096 	0x353c, 0xffffffff, 0x0,
1097 	0x353c, 0xffffffff, 0x0,
1098 	0x353c, 0xffffffff, 0x0,
1099 	0x353c, 0xffffffff, 0x120200,
1100 	0x3538, 0xffffffff, 0x500010ff,
1101 	0x353c, 0xffffffff, 0x0,
1102 	0x353c, 0xffffffff, 0x0,
1103 	0x353c, 0xffffffff, 0x0,
1104 	0x353c, 0xffffffff, 0x0,
1105 	0x353c, 0xffffffff, 0x0,
1106 	0x353c, 0xffffffff, 0x1e1e16,
1107 	0x3538, 0xffffffff, 0x600010ff,
1108 	0x353c, 0xffffffff, 0x0,
1109 	0x353c, 0xffffffff, 0x0,
1110 	0x353c, 0xffffffff, 0x0,
1111 	0x353c, 0xffffffff, 0x0,
1112 	0x353c, 0xffffffff, 0x0,
1113 	0x353c, 0xffffffff, 0x171f1e,
1114 	0x3538, 0xffffffff, 0x700010ff,
1115 	0x353c, 0xffffffff, 0x0,
1116 	0x353c, 0xffffffff, 0x0,
1117 	0x353c, 0xffffffff, 0x0,
1118 	0x353c, 0xffffffff, 0x0,
1119 	0x353c, 0xffffffff, 0x0,
1120 	0x353c, 0xffffffff, 0x0,
1121 	0x3538, 0xffffffff, 0x9ff,
1122 	0x3500, 0xffffffff, 0x0,
1123 	0x3504, 0xffffffff, 0x10000800,
1124 	0x3504, 0xffffffff, 0xf,
1125 	0x3504, 0xffffffff, 0xf,
1126 	0x3500, 0xffffffff, 0x4,
1127 	0x3504, 0xffffffff, 0x1000051e,
1128 	0x3504, 0xffffffff, 0xffff,
1129 	0x3504, 0xffffffff, 0xffff,
1130 	0x3500, 0xffffffff, 0x8,
1131 	0x3504, 0xffffffff, 0x80500,
1132 	0x3500, 0xffffffff, 0x12,
1133 	0x3504, 0xffffffff, 0x9050c,
1134 	0x3500, 0xffffffff, 0x1d,
1135 	0x3504, 0xffffffff, 0xb052c,
1136 	0x3500, 0xffffffff, 0x2a,
1137 	0x3504, 0xffffffff, 0x1053e,
1138 	0x3500, 0xffffffff, 0x2d,
1139 	0x3504, 0xffffffff, 0x10546,
1140 	0x3500, 0xffffffff, 0x30,
1141 	0x3504, 0xffffffff, 0xa054e,
1142 	0x3500, 0xffffffff, 0x3c,
1143 	0x3504, 0xffffffff, 0x1055f,
1144 	0x3500, 0xffffffff, 0x3f,
1145 	0x3504, 0xffffffff, 0x10567,
1146 	0x3500, 0xffffffff, 0x42,
1147 	0x3504, 0xffffffff, 0x1056f,
1148 	0x3500, 0xffffffff, 0x45,
1149 	0x3504, 0xffffffff, 0x10572,
1150 	0x3500, 0xffffffff, 0x48,
1151 	0x3504, 0xffffffff, 0x20575,
1152 	0x3500, 0xffffffff, 0x4c,
1153 	0x3504, 0xffffffff, 0x190801,
1154 	0x3500, 0xffffffff, 0x67,
1155 	0x3504, 0xffffffff, 0x1082a,
1156 	0x3500, 0xffffffff, 0x6a,
1157 	0x3504, 0xffffffff, 0x1b082d,
1158 	0x3500, 0xffffffff, 0x87,
1159 	0x3504, 0xffffffff, 0x310851,
1160 	0x3500, 0xffffffff, 0xba,
1161 	0x3504, 0xffffffff, 0x891,
1162 	0x3500, 0xffffffff, 0xbc,
1163 	0x3504, 0xffffffff, 0x893,
1164 	0x3500, 0xffffffff, 0xbe,
1165 	0x3504, 0xffffffff, 0x20895,
1166 	0x3500, 0xffffffff, 0xc2,
1167 	0x3504, 0xffffffff, 0x20899,
1168 	0x3500, 0xffffffff, 0xc6,
1169 	0x3504, 0xffffffff, 0x2089d,
1170 	0x3500, 0xffffffff, 0xca,
1171 	0x3504, 0xffffffff, 0x8a1,
1172 	0x3500, 0xffffffff, 0xcc,
1173 	0x3504, 0xffffffff, 0x8a3,
1174 	0x3500, 0xffffffff, 0xce,
1175 	0x3504, 0xffffffff, 0x308a5,
1176 	0x3500, 0xffffffff, 0xd3,
1177 	0x3504, 0xffffffff, 0x6d08cd,
1178 	0x3500, 0xffffffff, 0x142,
1179 	0x3504, 0xffffffff, 0x2000095a,
1180 	0x3504, 0xffffffff, 0x1,
1181 	0x3500, 0xffffffff, 0x144,
1182 	0x3504, 0xffffffff, 0x301f095b,
1183 	0x3500, 0xffffffff, 0x165,
1184 	0x3504, 0xffffffff, 0xc094d,
1185 	0x3500, 0xffffffff, 0x173,
1186 	0x3504, 0xffffffff, 0xf096d,
1187 	0x3500, 0xffffffff, 0x184,
1188 	0x3504, 0xffffffff, 0x15097f,
1189 	0x3500, 0xffffffff, 0x19b,
1190 	0x3504, 0xffffffff, 0xc0998,
1191 	0x3500, 0xffffffff, 0x1a9,
1192 	0x3504, 0xffffffff, 0x409a7,
1193 	0x3500, 0xffffffff, 0x1af,
1194 	0x3504, 0xffffffff, 0xcdc,
1195 	0x3500, 0xffffffff, 0x1b1,
1196 	0x3504, 0xffffffff, 0x800,
1197 	0x3508, 0xffffffff, 0x6c9b2000,
1198 	0x3510, 0xfc00, 0x2000,
1199 	0x3544, 0xffffffff, 0xfc0,
1200 	0x28d4, 0x00000100, 0x100
1201 };
1202 
1203 static void si_init_golden_registers(struct radeon_device *rdev)
1204 {
1205 	switch (rdev->family) {
1206 	case CHIP_TAHITI:
1207 		radeon_program_register_sequence(rdev,
1208 						 tahiti_golden_registers,
1209 						 (const u32)ARRAY_SIZE(tahiti_golden_registers));
1210 		radeon_program_register_sequence(rdev,
1211 						 tahiti_golden_rlc_registers,
1212 						 (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1213 		radeon_program_register_sequence(rdev,
1214 						 tahiti_mgcg_cgcg_init,
1215 						 (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1216 		radeon_program_register_sequence(rdev,
1217 						 tahiti_golden_registers2,
1218 						 (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1219 		break;
1220 	case CHIP_PITCAIRN:
1221 		radeon_program_register_sequence(rdev,
1222 						 pitcairn_golden_registers,
1223 						 (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1224 		radeon_program_register_sequence(rdev,
1225 						 pitcairn_golden_rlc_registers,
1226 						 (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1227 		radeon_program_register_sequence(rdev,
1228 						 pitcairn_mgcg_cgcg_init,
1229 						 (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1230 		break;
1231 	case CHIP_VERDE:
1232 		radeon_program_register_sequence(rdev,
1233 						 verde_golden_registers,
1234 						 (const u32)ARRAY_SIZE(verde_golden_registers));
1235 		radeon_program_register_sequence(rdev,
1236 						 verde_golden_rlc_registers,
1237 						 (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1238 		radeon_program_register_sequence(rdev,
1239 						 verde_mgcg_cgcg_init,
1240 						 (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1241 		radeon_program_register_sequence(rdev,
1242 						 verde_pg_init,
1243 						 (const u32)ARRAY_SIZE(verde_pg_init));
1244 		break;
1245 	case CHIP_OLAND:
1246 		radeon_program_register_sequence(rdev,
1247 						 oland_golden_registers,
1248 						 (const u32)ARRAY_SIZE(oland_golden_registers));
1249 		radeon_program_register_sequence(rdev,
1250 						 oland_golden_rlc_registers,
1251 						 (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1252 		radeon_program_register_sequence(rdev,
1253 						 oland_mgcg_cgcg_init,
1254 						 (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1255 		break;
1256 	case CHIP_HAINAN:
1257 		radeon_program_register_sequence(rdev,
1258 						 hainan_golden_registers,
1259 						 (const u32)ARRAY_SIZE(hainan_golden_registers));
1260 		radeon_program_register_sequence(rdev,
1261 						 hainan_golden_registers2,
1262 						 (const u32)ARRAY_SIZE(hainan_golden_registers2));
1263 		radeon_program_register_sequence(rdev,
1264 						 hainan_mgcg_cgcg_init,
1265 						 (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1266 		break;
1267 	default:
1268 		break;
1269 	}
1270 }
1271 
1272 /**
1273  * si_get_allowed_info_register - fetch the register for the info ioctl
1274  *
1275  * @rdev: radeon_device pointer
1276  * @reg: register offset in bytes
1277  * @val: register value
1278  *
1279  * Returns 0 for success or -EINVAL for an invalid register
1280  *
1281  */
1282 int si_get_allowed_info_register(struct radeon_device *rdev,
1283 				 u32 reg, u32 *val)
1284 {
1285 	switch (reg) {
1286 	case GRBM_STATUS:
1287 	case GRBM_STATUS2:
1288 	case GRBM_STATUS_SE0:
1289 	case GRBM_STATUS_SE1:
1290 	case SRBM_STATUS:
1291 	case SRBM_STATUS2:
1292 	case (DMA_STATUS_REG + DMA0_REGISTER_OFFSET):
1293 	case (DMA_STATUS_REG + DMA1_REGISTER_OFFSET):
1294 	case UVD_STATUS:
1295 		*val = RREG32(reg);
1296 		return 0;
1297 	default:
1298 		return -EINVAL;
1299 	}
1300 }
1301 
1302 #define PCIE_BUS_CLK                10000
1303 #define TCLK                        (PCIE_BUS_CLK / 10)
1304 
1305 /**
1306  * si_get_xclk - get the xclk
1307  *
1308  * @rdev: radeon_device pointer
1309  *
1310  * Returns the reference clock used by the gfx engine
1311  * (SI).
1312  */
1313 u32 si_get_xclk(struct radeon_device *rdev)
1314 {
1315 	u32 reference_clock = rdev->clock.spll.reference_freq;
1316 	u32 tmp;
1317 
1318 	tmp = RREG32(CG_CLKPIN_CNTL_2);
1319 	if (tmp & MUX_TCLK_TO_XCLK)
1320 		return TCLK;
1321 
1322 	tmp = RREG32(CG_CLKPIN_CNTL);
1323 	if (tmp & XTALIN_DIVIDE)
1324 		return reference_clock / 4;
1325 
1326 	return reference_clock;
1327 }
1328 
1329 /* get temperature in millidegrees */
1330 int si_get_temp(struct radeon_device *rdev)
1331 {
1332 	u32 temp;
1333 	int actual_temp = 0;
1334 
1335 	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1336 		CTF_TEMP_SHIFT;
1337 
1338 	if (temp & 0x200)
1339 		actual_temp = 255;
1340 	else
1341 		actual_temp = temp & 0x1ff;
1342 
1343 	actual_temp = (actual_temp * 1000);
1344 
1345 	return actual_temp;
1346 }
1347 
1348 #define TAHITI_IO_MC_REGS_SIZE 36
1349 
1350 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1351 	{0x0000006f, 0x03044000},
1352 	{0x00000070, 0x0480c018},
1353 	{0x00000071, 0x00000040},
1354 	{0x00000072, 0x01000000},
1355 	{0x00000074, 0x000000ff},
1356 	{0x00000075, 0x00143400},
1357 	{0x00000076, 0x08ec0800},
1358 	{0x00000077, 0x040000cc},
1359 	{0x00000079, 0x00000000},
1360 	{0x0000007a, 0x21000409},
1361 	{0x0000007c, 0x00000000},
1362 	{0x0000007d, 0xe8000000},
1363 	{0x0000007e, 0x044408a8},
1364 	{0x0000007f, 0x00000003},
1365 	{0x00000080, 0x00000000},
1366 	{0x00000081, 0x01000000},
1367 	{0x00000082, 0x02000000},
1368 	{0x00000083, 0x00000000},
1369 	{0x00000084, 0xe3f3e4f4},
1370 	{0x00000085, 0x00052024},
1371 	{0x00000087, 0x00000000},
1372 	{0x00000088, 0x66036603},
1373 	{0x00000089, 0x01000000},
1374 	{0x0000008b, 0x1c0a0000},
1375 	{0x0000008c, 0xff010000},
1376 	{0x0000008e, 0xffffefff},
1377 	{0x0000008f, 0xfff3efff},
1378 	{0x00000090, 0xfff3efbf},
1379 	{0x00000094, 0x00101101},
1380 	{0x00000095, 0x00000fff},
1381 	{0x00000096, 0x00116fff},
1382 	{0x00000097, 0x60010000},
1383 	{0x00000098, 0x10010000},
1384 	{0x00000099, 0x00006000},
1385 	{0x0000009a, 0x00001000},
1386 	{0x0000009f, 0x00a77400}
1387 };
1388 
1389 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1390 	{0x0000006f, 0x03044000},
1391 	{0x00000070, 0x0480c018},
1392 	{0x00000071, 0x00000040},
1393 	{0x00000072, 0x01000000},
1394 	{0x00000074, 0x000000ff},
1395 	{0x00000075, 0x00143400},
1396 	{0x00000076, 0x08ec0800},
1397 	{0x00000077, 0x040000cc},
1398 	{0x00000079, 0x00000000},
1399 	{0x0000007a, 0x21000409},
1400 	{0x0000007c, 0x00000000},
1401 	{0x0000007d, 0xe8000000},
1402 	{0x0000007e, 0x044408a8},
1403 	{0x0000007f, 0x00000003},
1404 	{0x00000080, 0x00000000},
1405 	{0x00000081, 0x01000000},
1406 	{0x00000082, 0x02000000},
1407 	{0x00000083, 0x00000000},
1408 	{0x00000084, 0xe3f3e4f4},
1409 	{0x00000085, 0x00052024},
1410 	{0x00000087, 0x00000000},
1411 	{0x00000088, 0x66036603},
1412 	{0x00000089, 0x01000000},
1413 	{0x0000008b, 0x1c0a0000},
1414 	{0x0000008c, 0xff010000},
1415 	{0x0000008e, 0xffffefff},
1416 	{0x0000008f, 0xfff3efff},
1417 	{0x00000090, 0xfff3efbf},
1418 	{0x00000094, 0x00101101},
1419 	{0x00000095, 0x00000fff},
1420 	{0x00000096, 0x00116fff},
1421 	{0x00000097, 0x60010000},
1422 	{0x00000098, 0x10010000},
1423 	{0x00000099, 0x00006000},
1424 	{0x0000009a, 0x00001000},
1425 	{0x0000009f, 0x00a47400}
1426 };
1427 
1428 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1429 	{0x0000006f, 0x03044000},
1430 	{0x00000070, 0x0480c018},
1431 	{0x00000071, 0x00000040},
1432 	{0x00000072, 0x01000000},
1433 	{0x00000074, 0x000000ff},
1434 	{0x00000075, 0x00143400},
1435 	{0x00000076, 0x08ec0800},
1436 	{0x00000077, 0x040000cc},
1437 	{0x00000079, 0x00000000},
1438 	{0x0000007a, 0x21000409},
1439 	{0x0000007c, 0x00000000},
1440 	{0x0000007d, 0xe8000000},
1441 	{0x0000007e, 0x044408a8},
1442 	{0x0000007f, 0x00000003},
1443 	{0x00000080, 0x00000000},
1444 	{0x00000081, 0x01000000},
1445 	{0x00000082, 0x02000000},
1446 	{0x00000083, 0x00000000},
1447 	{0x00000084, 0xe3f3e4f4},
1448 	{0x00000085, 0x00052024},
1449 	{0x00000087, 0x00000000},
1450 	{0x00000088, 0x66036603},
1451 	{0x00000089, 0x01000000},
1452 	{0x0000008b, 0x1c0a0000},
1453 	{0x0000008c, 0xff010000},
1454 	{0x0000008e, 0xffffefff},
1455 	{0x0000008f, 0xfff3efff},
1456 	{0x00000090, 0xfff3efbf},
1457 	{0x00000094, 0x00101101},
1458 	{0x00000095, 0x00000fff},
1459 	{0x00000096, 0x00116fff},
1460 	{0x00000097, 0x60010000},
1461 	{0x00000098, 0x10010000},
1462 	{0x00000099, 0x00006000},
1463 	{0x0000009a, 0x00001000},
1464 	{0x0000009f, 0x00a37400}
1465 };
1466 
1467 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1468 	{0x0000006f, 0x03044000},
1469 	{0x00000070, 0x0480c018},
1470 	{0x00000071, 0x00000040},
1471 	{0x00000072, 0x01000000},
1472 	{0x00000074, 0x000000ff},
1473 	{0x00000075, 0x00143400},
1474 	{0x00000076, 0x08ec0800},
1475 	{0x00000077, 0x040000cc},
1476 	{0x00000079, 0x00000000},
1477 	{0x0000007a, 0x21000409},
1478 	{0x0000007c, 0x00000000},
1479 	{0x0000007d, 0xe8000000},
1480 	{0x0000007e, 0x044408a8},
1481 	{0x0000007f, 0x00000003},
1482 	{0x00000080, 0x00000000},
1483 	{0x00000081, 0x01000000},
1484 	{0x00000082, 0x02000000},
1485 	{0x00000083, 0x00000000},
1486 	{0x00000084, 0xe3f3e4f4},
1487 	{0x00000085, 0x00052024},
1488 	{0x00000087, 0x00000000},
1489 	{0x00000088, 0x66036603},
1490 	{0x00000089, 0x01000000},
1491 	{0x0000008b, 0x1c0a0000},
1492 	{0x0000008c, 0xff010000},
1493 	{0x0000008e, 0xffffefff},
1494 	{0x0000008f, 0xfff3efff},
1495 	{0x00000090, 0xfff3efbf},
1496 	{0x00000094, 0x00101101},
1497 	{0x00000095, 0x00000fff},
1498 	{0x00000096, 0x00116fff},
1499 	{0x00000097, 0x60010000},
1500 	{0x00000098, 0x10010000},
1501 	{0x00000099, 0x00006000},
1502 	{0x0000009a, 0x00001000},
1503 	{0x0000009f, 0x00a17730}
1504 };
1505 
1506 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1507 	{0x0000006f, 0x03044000},
1508 	{0x00000070, 0x0480c018},
1509 	{0x00000071, 0x00000040},
1510 	{0x00000072, 0x01000000},
1511 	{0x00000074, 0x000000ff},
1512 	{0x00000075, 0x00143400},
1513 	{0x00000076, 0x08ec0800},
1514 	{0x00000077, 0x040000cc},
1515 	{0x00000079, 0x00000000},
1516 	{0x0000007a, 0x21000409},
1517 	{0x0000007c, 0x00000000},
1518 	{0x0000007d, 0xe8000000},
1519 	{0x0000007e, 0x044408a8},
1520 	{0x0000007f, 0x00000003},
1521 	{0x00000080, 0x00000000},
1522 	{0x00000081, 0x01000000},
1523 	{0x00000082, 0x02000000},
1524 	{0x00000083, 0x00000000},
1525 	{0x00000084, 0xe3f3e4f4},
1526 	{0x00000085, 0x00052024},
1527 	{0x00000087, 0x00000000},
1528 	{0x00000088, 0x66036603},
1529 	{0x00000089, 0x01000000},
1530 	{0x0000008b, 0x1c0a0000},
1531 	{0x0000008c, 0xff010000},
1532 	{0x0000008e, 0xffffefff},
1533 	{0x0000008f, 0xfff3efff},
1534 	{0x00000090, 0xfff3efbf},
1535 	{0x00000094, 0x00101101},
1536 	{0x00000095, 0x00000fff},
1537 	{0x00000096, 0x00116fff},
1538 	{0x00000097, 0x60010000},
1539 	{0x00000098, 0x10010000},
1540 	{0x00000099, 0x00006000},
1541 	{0x0000009a, 0x00001000},
1542 	{0x0000009f, 0x00a07730}
1543 };
1544 
1545 /* ucode loading */
1546 int si_mc_load_microcode(struct radeon_device *rdev)
1547 {
1548 	const __be32 *fw_data = NULL;
1549 	const __le32 *new_fw_data = NULL;
1550 	u32 running;
1551 	u32 *io_mc_regs = NULL;
1552 	const __le32 *new_io_mc_regs = NULL;
1553 	int i, regs_size, ucode_size;
1554 
1555 	if (!rdev->mc_fw)
1556 		return -EINVAL;
1557 
1558 	if (rdev->new_fw) {
1559 		const struct mc_firmware_header_v1_0 *hdr =
1560 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1561 
1562 		radeon_ucode_print_mc_hdr(&hdr->header);
1563 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1564 		new_io_mc_regs = (const __le32 *)
1565 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1566 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1567 		new_fw_data = (const __le32 *)
1568 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1569 	} else {
1570 		ucode_size = rdev->mc_fw->size / 4;
1571 
1572 		switch (rdev->family) {
1573 		case CHIP_TAHITI:
1574 			io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1575 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1576 			break;
1577 		case CHIP_PITCAIRN:
1578 			io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1579 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1580 			break;
1581 		case CHIP_VERDE:
1582 		default:
1583 			io_mc_regs = (u32 *)&verde_io_mc_regs;
1584 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1585 			break;
1586 		case CHIP_OLAND:
1587 			io_mc_regs = (u32 *)&oland_io_mc_regs;
1588 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1589 			break;
1590 		case CHIP_HAINAN:
1591 			io_mc_regs = (u32 *)&hainan_io_mc_regs;
1592 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1593 			break;
1594 		}
1595 		fw_data = (const __be32 *)rdev->mc_fw->data;
1596 	}
1597 
1598 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1599 
1600 	if (running == 0) {
1601 		/* reset the engine and set to writable */
1602 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1603 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1604 
1605 		/* load mc io regs */
1606 		for (i = 0; i < regs_size; i++) {
1607 			if (rdev->new_fw) {
1608 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1609 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1610 			} else {
1611 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1612 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1613 			}
1614 		}
1615 		/* load the MC ucode */
1616 		for (i = 0; i < ucode_size; i++) {
1617 			if (rdev->new_fw)
1618 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1619 			else
1620 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1621 		}
1622 
1623 		/* put the engine back into the active state */
1624 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1625 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1626 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1627 
1628 		/* wait for training to complete */
1629 		for (i = 0; i < rdev->usec_timeout; i++) {
1630 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1631 				break;
1632 			udelay(1);
1633 		}
1634 		for (i = 0; i < rdev->usec_timeout; i++) {
1635 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1636 				break;
1637 			udelay(1);
1638 		}
1639 	}
1640 
1641 	return 0;
1642 }
1643 
1644 static int si_init_microcode(struct radeon_device *rdev)
1645 {
1646 	const char *chip_name;
1647 	const char *new_chip_name;
1648 	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1649 	size_t smc_req_size, mc2_req_size;
1650 	char fw_name[30];
1651 	int err;
1652 	int new_fw = 0;
1653 	bool new_smc = false;
1654 
1655 	DRM_DEBUG("\n");
1656 
1657 	switch (rdev->family) {
1658 	case CHIP_TAHITI:
1659 		chip_name = "TAHITI";
1660 		/* XXX: figure out which Tahitis need the new ucode */
1661 		if (0)
1662 			new_smc = true;
1663 		new_chip_name = "tahiti";
1664 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1665 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1666 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1667 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1668 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1669 		mc2_req_size = TAHITI_MC_UCODE_SIZE * 4;
1670 		smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1671 		break;
1672 	case CHIP_PITCAIRN:
1673 		chip_name = "PITCAIRN";
1674 		if ((rdev->pdev->revision == 0x81) ||
1675 		    (rdev->pdev->device == 0x6810) ||
1676 		    (rdev->pdev->device == 0x6811) ||
1677 		    (rdev->pdev->device == 0x6816) ||
1678 		    (rdev->pdev->device == 0x6817) ||
1679 		    (rdev->pdev->device == 0x6806))
1680 			new_smc = true;
1681 		new_chip_name = "pitcairn";
1682 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1683 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1684 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1685 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1686 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1687 		mc2_req_size = PITCAIRN_MC_UCODE_SIZE * 4;
1688 		smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1689 		break;
1690 	case CHIP_VERDE:
1691 		chip_name = "VERDE";
1692 		if ((rdev->pdev->revision == 0x81) ||
1693 		    (rdev->pdev->revision == 0x83) ||
1694 		    (rdev->pdev->revision == 0x87) ||
1695 		    (rdev->pdev->device == 0x6820) ||
1696 		    (rdev->pdev->device == 0x6821) ||
1697 		    (rdev->pdev->device == 0x6822) ||
1698 		    (rdev->pdev->device == 0x6823) ||
1699 		    (rdev->pdev->device == 0x682A) ||
1700 		    (rdev->pdev->device == 0x682B))
1701 			new_smc = true;
1702 		new_chip_name = "verde";
1703 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1704 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1705 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1706 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1707 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1708 		mc2_req_size = VERDE_MC_UCODE_SIZE * 4;
1709 		smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1710 		break;
1711 	case CHIP_OLAND:
1712 		chip_name = "OLAND";
1713 		if ((rdev->pdev->revision == 0xC7) ||
1714 		    (rdev->pdev->revision == 0x80) ||
1715 		    (rdev->pdev->revision == 0x81) ||
1716 		    (rdev->pdev->revision == 0x83) ||
1717 		    (rdev->pdev->revision == 0x87) ||
1718 		    (rdev->pdev->device == 0x6604) ||
1719 		    (rdev->pdev->device == 0x6605))
1720 			new_smc = true;
1721 		new_chip_name = "oland";
1722 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1723 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1724 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1725 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1726 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1727 		smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1728 		break;
1729 	case CHIP_HAINAN:
1730 		chip_name = "HAINAN";
1731 		if ((rdev->pdev->revision == 0x81) ||
1732 		    (rdev->pdev->revision == 0x83) ||
1733 		    (rdev->pdev->revision == 0xC3) ||
1734 		    (rdev->pdev->device == 0x6664) ||
1735 		    (rdev->pdev->device == 0x6665) ||
1736 		    (rdev->pdev->device == 0x6667))
1737 			new_smc = true;
1738 		new_chip_name = "hainan";
1739 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1740 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1741 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1742 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1743 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1744 		smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1745 		break;
1746 	default: BUG();
1747 	}
1748 
1749 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
1750 
1751 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
1752 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1753 	if (err) {
1754 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1755 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1756 		if (err)
1757 			goto out;
1758 		if (rdev->pfp_fw->size != pfp_req_size) {
1759 			printk(KERN_ERR
1760 			       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1761 			       rdev->pfp_fw->size, fw_name);
1762 			err = -EINVAL;
1763 			goto out;
1764 		}
1765 	} else {
1766 		err = radeon_ucode_validate(rdev->pfp_fw);
1767 		if (err) {
1768 			printk(KERN_ERR
1769 			       "si_cp: validation failed for firmware \"%s\"\n",
1770 			       fw_name);
1771 			goto out;
1772 		} else {
1773 			new_fw++;
1774 		}
1775 	}
1776 
1777 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
1778 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1779 	if (err) {
1780 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1781 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1782 		if (err)
1783 			goto out;
1784 		if (rdev->me_fw->size != me_req_size) {
1785 			printk(KERN_ERR
1786 			       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1787 			       rdev->me_fw->size, fw_name);
1788 			err = -EINVAL;
1789 		}
1790 	} else {
1791 		err = radeon_ucode_validate(rdev->me_fw);
1792 		if (err) {
1793 			printk(KERN_ERR
1794 			       "si_cp: validation failed for firmware \"%s\"\n",
1795 			       fw_name);
1796 			goto out;
1797 		} else {
1798 			new_fw++;
1799 		}
1800 	}
1801 
1802 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
1803 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1804 	if (err) {
1805 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1806 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1807 		if (err)
1808 			goto out;
1809 		if (rdev->ce_fw->size != ce_req_size) {
1810 			printk(KERN_ERR
1811 			       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1812 			       rdev->ce_fw->size, fw_name);
1813 			err = -EINVAL;
1814 		}
1815 	} else {
1816 		err = radeon_ucode_validate(rdev->ce_fw);
1817 		if (err) {
1818 			printk(KERN_ERR
1819 			       "si_cp: validation failed for firmware \"%s\"\n",
1820 			       fw_name);
1821 			goto out;
1822 		} else {
1823 			new_fw++;
1824 		}
1825 	}
1826 
1827 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
1828 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1829 	if (err) {
1830 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1831 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1832 		if (err)
1833 			goto out;
1834 		if (rdev->rlc_fw->size != rlc_req_size) {
1835 			printk(KERN_ERR
1836 			       "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1837 			       rdev->rlc_fw->size, fw_name);
1838 			err = -EINVAL;
1839 		}
1840 	} else {
1841 		err = radeon_ucode_validate(rdev->rlc_fw);
1842 		if (err) {
1843 			printk(KERN_ERR
1844 			       "si_cp: validation failed for firmware \"%s\"\n",
1845 			       fw_name);
1846 			goto out;
1847 		} else {
1848 			new_fw++;
1849 		}
1850 	}
1851 
1852 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
1853 	err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1854 	if (err) {
1855 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
1856 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1857 		if (err) {
1858 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1859 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1860 			if (err)
1861 				goto out;
1862 		}
1863 		if ((rdev->mc_fw->size != mc_req_size) &&
1864 		    (rdev->mc_fw->size != mc2_req_size)) {
1865 			printk(KERN_ERR
1866 			       "si_mc: Bogus length %zu in firmware \"%s\"\n",
1867 			       rdev->mc_fw->size, fw_name);
1868 			err = -EINVAL;
1869 		}
1870 		DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
1871 	} else {
1872 		err = radeon_ucode_validate(rdev->mc_fw);
1873 		if (err) {
1874 			printk(KERN_ERR
1875 			       "si_cp: validation failed for firmware \"%s\"\n",
1876 			       fw_name);
1877 			goto out;
1878 		} else {
1879 			new_fw++;
1880 		}
1881 	}
1882 
1883 	if (new_smc)
1884 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
1885 	else
1886 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
1887 	err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1888 	if (err) {
1889 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1890 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1891 		if (err) {
1892 			printk(KERN_ERR
1893 			       "smc: error loading firmware \"%s\"\n",
1894 			       fw_name);
1895 			release_firmware(rdev->smc_fw);
1896 			rdev->smc_fw = NULL;
1897 			err = 0;
1898 		} else if (rdev->smc_fw->size != smc_req_size) {
1899 			printk(KERN_ERR
1900 			       "si_smc: Bogus length %zu in firmware \"%s\"\n",
1901 			       rdev->smc_fw->size, fw_name);
1902 			err = -EINVAL;
1903 		}
1904 	} else {
1905 		err = radeon_ucode_validate(rdev->smc_fw);
1906 		if (err) {
1907 			printk(KERN_ERR
1908 			       "si_cp: validation failed for firmware \"%s\"\n",
1909 			       fw_name);
1910 			goto out;
1911 		} else {
1912 			new_fw++;
1913 		}
1914 	}
1915 
1916 	if (new_fw == 0) {
1917 		rdev->new_fw = false;
1918 	} else if (new_fw < 6) {
1919 		printk(KERN_ERR "si_fw: mixing new and old firmware!\n");
1920 		err = -EINVAL;
1921 	} else {
1922 		rdev->new_fw = true;
1923 	}
1924 out:
1925 	if (err) {
1926 		if (err != -EINVAL)
1927 			printk(KERN_ERR
1928 			       "si_cp: Failed to load firmware \"%s\"\n",
1929 			       fw_name);
1930 		release_firmware(rdev->pfp_fw);
1931 		rdev->pfp_fw = NULL;
1932 		release_firmware(rdev->me_fw);
1933 		rdev->me_fw = NULL;
1934 		release_firmware(rdev->ce_fw);
1935 		rdev->ce_fw = NULL;
1936 		release_firmware(rdev->rlc_fw);
1937 		rdev->rlc_fw = NULL;
1938 		release_firmware(rdev->mc_fw);
1939 		rdev->mc_fw = NULL;
1940 		release_firmware(rdev->smc_fw);
1941 		rdev->smc_fw = NULL;
1942 	}
1943 	return err;
1944 }
1945 
1946 /* watermark setup */
1947 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1948 				   struct radeon_crtc *radeon_crtc,
1949 				   struct drm_display_mode *mode,
1950 				   struct drm_display_mode *other_mode)
1951 {
1952 	u32 tmp, buffer_alloc, i;
1953 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1954 	/*
1955 	 * Line Buffer Setup
1956 	 * There are 3 line buffers, each one shared by 2 display controllers.
1957 	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1958 	 * the display controllers.  The paritioning is done via one of four
1959 	 * preset allocations specified in bits 21:20:
1960 	 *  0 - half lb
1961 	 *  2 - whole lb, other crtc must be disabled
1962 	 */
1963 	/* this can get tricky if we have two large displays on a paired group
1964 	 * of crtcs.  Ideally for multiple large displays we'd assign them to
1965 	 * non-linked crtcs for maximum line buffer allocation.
1966 	 */
1967 	if (radeon_crtc->base.enabled && mode) {
1968 		if (other_mode) {
1969 			tmp = 0; /* 1/2 */
1970 			buffer_alloc = 1;
1971 		} else {
1972 			tmp = 2; /* whole */
1973 			buffer_alloc = 2;
1974 		}
1975 	} else {
1976 		tmp = 0;
1977 		buffer_alloc = 0;
1978 	}
1979 
1980 	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1981 	       DC_LB_MEMORY_CONFIG(tmp));
1982 
1983 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1984 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1985 	for (i = 0; i < rdev->usec_timeout; i++) {
1986 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1987 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
1988 			break;
1989 		udelay(1);
1990 	}
1991 
1992 	if (radeon_crtc->base.enabled && mode) {
1993 		switch (tmp) {
1994 		case 0:
1995 		default:
1996 			return 4096 * 2;
1997 		case 2:
1998 			return 8192 * 2;
1999 		}
2000 	}
2001 
2002 	/* controller not enabled, so no lb used */
2003 	return 0;
2004 }
2005 
2006 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
2007 {
2008 	u32 tmp = RREG32(MC_SHARED_CHMAP);
2009 
2010 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
2011 	case 0:
2012 	default:
2013 		return 1;
2014 	case 1:
2015 		return 2;
2016 	case 2:
2017 		return 4;
2018 	case 3:
2019 		return 8;
2020 	case 4:
2021 		return 3;
2022 	case 5:
2023 		return 6;
2024 	case 6:
2025 		return 10;
2026 	case 7:
2027 		return 12;
2028 	case 8:
2029 		return 16;
2030 	}
2031 }
2032 
2033 struct dce6_wm_params {
2034 	u32 dram_channels; /* number of dram channels */
2035 	u32 yclk;          /* bandwidth per dram data pin in kHz */
2036 	u32 sclk;          /* engine clock in kHz */
2037 	u32 disp_clk;      /* display clock in kHz */
2038 	u32 src_width;     /* viewport width */
2039 	u32 active_time;   /* active display time in ns */
2040 	u32 blank_time;    /* blank time in ns */
2041 	bool interlaced;    /* mode is interlaced */
2042 	fixed20_12 vsc;    /* vertical scale ratio */
2043 	u32 num_heads;     /* number of active crtcs */
2044 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
2045 	u32 lb_size;       /* line buffer allocated to pipe */
2046 	u32 vtaps;         /* vertical scaler taps */
2047 };
2048 
2049 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
2050 {
2051 	/* Calculate raw DRAM Bandwidth */
2052 	fixed20_12 dram_efficiency; /* 0.7 */
2053 	fixed20_12 yclk, dram_channels, bandwidth;
2054 	fixed20_12 a;
2055 
2056 	a.full = dfixed_const(1000);
2057 	yclk.full = dfixed_const(wm->yclk);
2058 	yclk.full = dfixed_div(yclk, a);
2059 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2060 	a.full = dfixed_const(10);
2061 	dram_efficiency.full = dfixed_const(7);
2062 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
2063 	bandwidth.full = dfixed_mul(dram_channels, yclk);
2064 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
2065 
2066 	return dfixed_trunc(bandwidth);
2067 }
2068 
2069 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2070 {
2071 	/* Calculate DRAM Bandwidth and the part allocated to display. */
2072 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
2073 	fixed20_12 yclk, dram_channels, bandwidth;
2074 	fixed20_12 a;
2075 
2076 	a.full = dfixed_const(1000);
2077 	yclk.full = dfixed_const(wm->yclk);
2078 	yclk.full = dfixed_div(yclk, a);
2079 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2080 	a.full = dfixed_const(10);
2081 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
2082 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
2083 	bandwidth.full = dfixed_mul(dram_channels, yclk);
2084 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
2085 
2086 	return dfixed_trunc(bandwidth);
2087 }
2088 
2089 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
2090 {
2091 	/* Calculate the display Data return Bandwidth */
2092 	fixed20_12 return_efficiency; /* 0.8 */
2093 	fixed20_12 sclk, bandwidth;
2094 	fixed20_12 a;
2095 
2096 	a.full = dfixed_const(1000);
2097 	sclk.full = dfixed_const(wm->sclk);
2098 	sclk.full = dfixed_div(sclk, a);
2099 	a.full = dfixed_const(10);
2100 	return_efficiency.full = dfixed_const(8);
2101 	return_efficiency.full = dfixed_div(return_efficiency, a);
2102 	a.full = dfixed_const(32);
2103 	bandwidth.full = dfixed_mul(a, sclk);
2104 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
2105 
2106 	return dfixed_trunc(bandwidth);
2107 }
2108 
2109 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
2110 {
2111 	return 32;
2112 }
2113 
2114 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
2115 {
2116 	/* Calculate the DMIF Request Bandwidth */
2117 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
2118 	fixed20_12 disp_clk, sclk, bandwidth;
2119 	fixed20_12 a, b1, b2;
2120 	u32 min_bandwidth;
2121 
2122 	a.full = dfixed_const(1000);
2123 	disp_clk.full = dfixed_const(wm->disp_clk);
2124 	disp_clk.full = dfixed_div(disp_clk, a);
2125 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
2126 	b1.full = dfixed_mul(a, disp_clk);
2127 
2128 	a.full = dfixed_const(1000);
2129 	sclk.full = dfixed_const(wm->sclk);
2130 	sclk.full = dfixed_div(sclk, a);
2131 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
2132 	b2.full = dfixed_mul(a, sclk);
2133 
2134 	a.full = dfixed_const(10);
2135 	disp_clk_request_efficiency.full = dfixed_const(8);
2136 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
2137 
2138 	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
2139 
2140 	a.full = dfixed_const(min_bandwidth);
2141 	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
2142 
2143 	return dfixed_trunc(bandwidth);
2144 }
2145 
2146 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
2147 {
2148 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
2149 	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
2150 	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
2151 	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
2152 
2153 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
2154 }
2155 
2156 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
2157 {
2158 	/* Calculate the display mode Average Bandwidth
2159 	 * DisplayMode should contain the source and destination dimensions,
2160 	 * timing, etc.
2161 	 */
2162 	fixed20_12 bpp;
2163 	fixed20_12 line_time;
2164 	fixed20_12 src_width;
2165 	fixed20_12 bandwidth;
2166 	fixed20_12 a;
2167 
2168 	a.full = dfixed_const(1000);
2169 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
2170 	line_time.full = dfixed_div(line_time, a);
2171 	bpp.full = dfixed_const(wm->bytes_per_pixel);
2172 	src_width.full = dfixed_const(wm->src_width);
2173 	bandwidth.full = dfixed_mul(src_width, bpp);
2174 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
2175 	bandwidth.full = dfixed_div(bandwidth, line_time);
2176 
2177 	return dfixed_trunc(bandwidth);
2178 }
2179 
2180 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
2181 {
2182 	/* First calcualte the latency in ns */
2183 	u32 mc_latency = 2000; /* 2000 ns. */
2184 	u32 available_bandwidth = dce6_available_bandwidth(wm);
2185 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
2186 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
2187 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
2188 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
2189 		(wm->num_heads * cursor_line_pair_return_time);
2190 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
2191 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
2192 	u32 tmp, dmif_size = 12288;
2193 	fixed20_12 a, b, c;
2194 
2195 	if (wm->num_heads == 0)
2196 		return 0;
2197 
2198 	a.full = dfixed_const(2);
2199 	b.full = dfixed_const(1);
2200 	if ((wm->vsc.full > a.full) ||
2201 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
2202 	    (wm->vtaps >= 5) ||
2203 	    ((wm->vsc.full >= a.full) && wm->interlaced))
2204 		max_src_lines_per_dst_line = 4;
2205 	else
2206 		max_src_lines_per_dst_line = 2;
2207 
2208 	a.full = dfixed_const(available_bandwidth);
2209 	b.full = dfixed_const(wm->num_heads);
2210 	a.full = dfixed_div(a, b);
2211 
2212 	b.full = dfixed_const(mc_latency + 512);
2213 	c.full = dfixed_const(wm->disp_clk);
2214 	b.full = dfixed_div(b, c);
2215 
2216 	c.full = dfixed_const(dmif_size);
2217 	b.full = dfixed_div(c, b);
2218 
2219 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
2220 
2221 	b.full = dfixed_const(1000);
2222 	c.full = dfixed_const(wm->disp_clk);
2223 	b.full = dfixed_div(c, b);
2224 	c.full = dfixed_const(wm->bytes_per_pixel);
2225 	b.full = dfixed_mul(b, c);
2226 
2227 	lb_fill_bw = min(tmp, dfixed_trunc(b));
2228 
2229 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2230 	b.full = dfixed_const(1000);
2231 	c.full = dfixed_const(lb_fill_bw);
2232 	b.full = dfixed_div(c, b);
2233 	a.full = dfixed_div(a, b);
2234 	line_fill_time = dfixed_trunc(a);
2235 
2236 	if (line_fill_time < wm->active_time)
2237 		return latency;
2238 	else
2239 		return latency + (line_fill_time - wm->active_time);
2240 
2241 }
2242 
2243 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2244 {
2245 	if (dce6_average_bandwidth(wm) <=
2246 	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2247 		return true;
2248 	else
2249 		return false;
2250 };
2251 
2252 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2253 {
2254 	if (dce6_average_bandwidth(wm) <=
2255 	    (dce6_available_bandwidth(wm) / wm->num_heads))
2256 		return true;
2257 	else
2258 		return false;
2259 };
2260 
2261 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2262 {
2263 	u32 lb_partitions = wm->lb_size / wm->src_width;
2264 	u32 line_time = wm->active_time + wm->blank_time;
2265 	u32 latency_tolerant_lines;
2266 	u32 latency_hiding;
2267 	fixed20_12 a;
2268 
2269 	a.full = dfixed_const(1);
2270 	if (wm->vsc.full > a.full)
2271 		latency_tolerant_lines = 1;
2272 	else {
2273 		if (lb_partitions <= (wm->vtaps + 1))
2274 			latency_tolerant_lines = 1;
2275 		else
2276 			latency_tolerant_lines = 2;
2277 	}
2278 
2279 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2280 
2281 	if (dce6_latency_watermark(wm) <= latency_hiding)
2282 		return true;
2283 	else
2284 		return false;
2285 }
2286 
2287 static void dce6_program_watermarks(struct radeon_device *rdev,
2288 					 struct radeon_crtc *radeon_crtc,
2289 					 u32 lb_size, u32 num_heads)
2290 {
2291 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
2292 	struct dce6_wm_params wm_low, wm_high;
2293 	u32 dram_channels;
2294 	u32 pixel_period;
2295 	u32 line_time = 0;
2296 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
2297 	u32 priority_a_mark = 0, priority_b_mark = 0;
2298 	u32 priority_a_cnt = PRIORITY_OFF;
2299 	u32 priority_b_cnt = PRIORITY_OFF;
2300 	u32 tmp, arb_control3;
2301 	fixed20_12 a, b, c;
2302 
2303 	if (radeon_crtc->base.enabled && num_heads && mode) {
2304 		pixel_period = 1000000 / (u32)mode->clock;
2305 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2306 		priority_a_cnt = 0;
2307 		priority_b_cnt = 0;
2308 
2309 		if (rdev->family == CHIP_ARUBA)
2310 			dram_channels = evergreen_get_number_of_dram_channels(rdev);
2311 		else
2312 			dram_channels = si_get_number_of_dram_channels(rdev);
2313 
2314 		/* watermark for high clocks */
2315 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2316 			wm_high.yclk =
2317 				radeon_dpm_get_mclk(rdev, false) * 10;
2318 			wm_high.sclk =
2319 				radeon_dpm_get_sclk(rdev, false) * 10;
2320 		} else {
2321 			wm_high.yclk = rdev->pm.current_mclk * 10;
2322 			wm_high.sclk = rdev->pm.current_sclk * 10;
2323 		}
2324 
2325 		wm_high.disp_clk = mode->clock;
2326 		wm_high.src_width = mode->crtc_hdisplay;
2327 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2328 		wm_high.blank_time = line_time - wm_high.active_time;
2329 		wm_high.interlaced = false;
2330 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2331 			wm_high.interlaced = true;
2332 		wm_high.vsc = radeon_crtc->vsc;
2333 		wm_high.vtaps = 1;
2334 		if (radeon_crtc->rmx_type != RMX_OFF)
2335 			wm_high.vtaps = 2;
2336 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2337 		wm_high.lb_size = lb_size;
2338 		wm_high.dram_channels = dram_channels;
2339 		wm_high.num_heads = num_heads;
2340 
2341 		/* watermark for low clocks */
2342 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2343 			wm_low.yclk =
2344 				radeon_dpm_get_mclk(rdev, true) * 10;
2345 			wm_low.sclk =
2346 				radeon_dpm_get_sclk(rdev, true) * 10;
2347 		} else {
2348 			wm_low.yclk = rdev->pm.current_mclk * 10;
2349 			wm_low.sclk = rdev->pm.current_sclk * 10;
2350 		}
2351 
2352 		wm_low.disp_clk = mode->clock;
2353 		wm_low.src_width = mode->crtc_hdisplay;
2354 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2355 		wm_low.blank_time = line_time - wm_low.active_time;
2356 		wm_low.interlaced = false;
2357 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2358 			wm_low.interlaced = true;
2359 		wm_low.vsc = radeon_crtc->vsc;
2360 		wm_low.vtaps = 1;
2361 		if (radeon_crtc->rmx_type != RMX_OFF)
2362 			wm_low.vtaps = 2;
2363 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2364 		wm_low.lb_size = lb_size;
2365 		wm_low.dram_channels = dram_channels;
2366 		wm_low.num_heads = num_heads;
2367 
2368 		/* set for high clocks */
2369 		latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2370 		/* set for low clocks */
2371 		latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2372 
2373 		/* possibly force display priority to high */
2374 		/* should really do this at mode validation time... */
2375 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2376 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2377 		    !dce6_check_latency_hiding(&wm_high) ||
2378 		    (rdev->disp_priority == 2)) {
2379 			DRM_DEBUG_KMS("force priority to high\n");
2380 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2381 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2382 		}
2383 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2384 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2385 		    !dce6_check_latency_hiding(&wm_low) ||
2386 		    (rdev->disp_priority == 2)) {
2387 			DRM_DEBUG_KMS("force priority to high\n");
2388 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2389 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2390 		}
2391 
2392 		a.full = dfixed_const(1000);
2393 		b.full = dfixed_const(mode->clock);
2394 		b.full = dfixed_div(b, a);
2395 		c.full = dfixed_const(latency_watermark_a);
2396 		c.full = dfixed_mul(c, b);
2397 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2398 		c.full = dfixed_div(c, a);
2399 		a.full = dfixed_const(16);
2400 		c.full = dfixed_div(c, a);
2401 		priority_a_mark = dfixed_trunc(c);
2402 		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2403 
2404 		a.full = dfixed_const(1000);
2405 		b.full = dfixed_const(mode->clock);
2406 		b.full = dfixed_div(b, a);
2407 		c.full = dfixed_const(latency_watermark_b);
2408 		c.full = dfixed_mul(c, b);
2409 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2410 		c.full = dfixed_div(c, a);
2411 		a.full = dfixed_const(16);
2412 		c.full = dfixed_div(c, a);
2413 		priority_b_mark = dfixed_trunc(c);
2414 		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2415 
2416 		/* Save number of lines the linebuffer leads before the scanout */
2417 		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
2418 	}
2419 
2420 	/* select wm A */
2421 	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2422 	tmp = arb_control3;
2423 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2424 	tmp |= LATENCY_WATERMARK_MASK(1);
2425 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2426 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2427 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2428 		LATENCY_HIGH_WATERMARK(line_time)));
2429 	/* select wm B */
2430 	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2431 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2432 	tmp |= LATENCY_WATERMARK_MASK(2);
2433 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2434 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2435 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2436 		LATENCY_HIGH_WATERMARK(line_time)));
2437 	/* restore original selection */
2438 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2439 
2440 	/* write the priority marks */
2441 	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2442 	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2443 
2444 	/* save values for DPM */
2445 	radeon_crtc->line_time = line_time;
2446 	radeon_crtc->wm_high = latency_watermark_a;
2447 	radeon_crtc->wm_low = latency_watermark_b;
2448 }
2449 
2450 void dce6_bandwidth_update(struct radeon_device *rdev)
2451 {
2452 	struct drm_display_mode *mode0 = NULL;
2453 	struct drm_display_mode *mode1 = NULL;
2454 	u32 num_heads = 0, lb_size;
2455 	int i;
2456 
2457 	if (!rdev->mode_info.mode_config_initialized)
2458 		return;
2459 
2460 	radeon_update_display_priority(rdev);
2461 
2462 	for (i = 0; i < rdev->num_crtc; i++) {
2463 		if (rdev->mode_info.crtcs[i]->base.enabled)
2464 			num_heads++;
2465 	}
2466 	for (i = 0; i < rdev->num_crtc; i += 2) {
2467 		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2468 		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2469 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2470 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2471 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2472 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2473 	}
2474 }
2475 
2476 /*
2477  * Core functions
2478  */
2479 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2480 {
2481 	u32 *tile = rdev->config.si.tile_mode_array;
2482 	const u32 num_tile_mode_states =
2483 			ARRAY_SIZE(rdev->config.si.tile_mode_array);
2484 	u32 reg_offset, split_equal_to_row_size;
2485 
2486 	switch (rdev->config.si.mem_row_size_in_kb) {
2487 	case 1:
2488 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2489 		break;
2490 	case 2:
2491 	default:
2492 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2493 		break;
2494 	case 4:
2495 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2496 		break;
2497 	}
2498 
2499 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2500 		tile[reg_offset] = 0;
2501 
2502 	switch(rdev->family) {
2503 	case CHIP_TAHITI:
2504 	case CHIP_PITCAIRN:
2505 		/* non-AA compressed depth or any compressed stencil */
2506 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2507 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2508 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2509 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2510 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2511 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2512 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2513 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2514 		/* 2xAA/4xAA compressed depth only */
2515 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2516 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2517 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2518 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2519 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2520 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2521 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2522 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2523 		/* 8xAA compressed depth only */
2524 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2525 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2526 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2527 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2528 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2529 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2530 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2531 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2532 		/* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2533 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2534 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2535 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2536 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2537 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2538 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2539 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2540 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2541 		/* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2542 		tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2543 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2544 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2545 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2546 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2547 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2548 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2549 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2550 		/* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2551 		tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2552 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2553 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2554 			   TILE_SPLIT(split_equal_to_row_size) |
2555 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2556 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2557 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2558 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2559 		/* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2560 		tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2561 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2562 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2563 			   TILE_SPLIT(split_equal_to_row_size) |
2564 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2565 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2566 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2567 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2568 		/* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2569 		tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2570 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2571 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2572 			   TILE_SPLIT(split_equal_to_row_size) |
2573 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2574 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2575 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2576 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2577 		/* 1D and 1D Array Surfaces */
2578 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2579 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2580 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2581 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2582 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2583 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2584 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2585 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2586 		/* Displayable maps. */
2587 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2588 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2589 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2590 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2591 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2592 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2593 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2594 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2595 		/* Display 8bpp. */
2596 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2597 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2598 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2599 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2600 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2601 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2602 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2603 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2604 		/* Display 16bpp. */
2605 		tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2606 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2607 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2608 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2609 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2610 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2611 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2612 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2613 		/* Display 32bpp. */
2614 		tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2615 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2616 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2617 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2618 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2619 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2620 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2621 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2622 		/* Thin. */
2623 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2624 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2625 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2626 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2627 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2628 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2629 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2630 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2631 		/* Thin 8 bpp. */
2632 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2633 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2634 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2635 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2636 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2637 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2638 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2639 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2640 		/* Thin 16 bpp. */
2641 		tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2642 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2643 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2644 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2645 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2646 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2647 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2648 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2649 		/* Thin 32 bpp. */
2650 		tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2651 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2652 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2653 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2654 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2655 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2656 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2657 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2658 		/* Thin 64 bpp. */
2659 		tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2660 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2661 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2662 			   TILE_SPLIT(split_equal_to_row_size) |
2663 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2664 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2665 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2666 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2667 		/* 8 bpp PRT. */
2668 		tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2669 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2670 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2671 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2672 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2673 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2674 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2675 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2676 		/* 16 bpp PRT */
2677 		tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2678 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2679 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2680 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2681 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2682 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2683 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2684 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2685 		/* 32 bpp PRT */
2686 		tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2687 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2688 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2689 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2690 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2691 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2692 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2693 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2694 		/* 64 bpp PRT */
2695 		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2696 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2697 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2698 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2699 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2700 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2701 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2702 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2703 		/* 128 bpp PRT */
2704 		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2705 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2706 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2707 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2708 			   NUM_BANKS(ADDR_SURF_8_BANK) |
2709 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2710 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2711 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2712 
2713 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2714 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2715 		break;
2716 
2717 	case CHIP_VERDE:
2718 	case CHIP_OLAND:
2719 	case CHIP_HAINAN:
2720 		/* non-AA compressed depth or any compressed stencil */
2721 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2722 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2723 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2724 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2725 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2726 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2728 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2729 		/* 2xAA/4xAA compressed depth only */
2730 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2731 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2732 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2733 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2734 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2735 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2736 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2737 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2738 		/* 8xAA compressed depth only */
2739 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2740 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2741 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2742 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2743 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2744 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2745 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2746 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2747 		/* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2748 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2749 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2750 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2751 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2752 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2753 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2754 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2755 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2756 		/* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2757 		tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2758 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2759 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2760 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2761 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2762 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2763 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2764 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2765 		/* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2766 		tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2767 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2768 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2769 			   TILE_SPLIT(split_equal_to_row_size) |
2770 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2771 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2772 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2773 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2774 		/* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2775 		tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2776 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2777 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2778 			   TILE_SPLIT(split_equal_to_row_size) |
2779 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2780 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2781 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2782 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2783 		/* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2784 		tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2785 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2786 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2787 			   TILE_SPLIT(split_equal_to_row_size) |
2788 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2789 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2790 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2791 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2792 		/* 1D and 1D Array Surfaces */
2793 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2794 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2795 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2796 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2797 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2798 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2799 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2800 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2801 		/* Displayable maps. */
2802 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2803 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2804 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2805 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2806 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2807 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2808 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2809 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2810 		/* Display 8bpp. */
2811 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2812 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2813 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2814 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2815 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2816 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2817 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2818 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2819 		/* Display 16bpp. */
2820 		tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2821 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2822 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2823 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2824 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2825 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2826 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2827 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2828 		/* Display 32bpp. */
2829 		tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2830 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2831 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2832 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2833 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2834 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2835 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2836 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2837 		/* Thin. */
2838 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2839 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2840 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2841 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2842 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2843 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2844 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2845 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2846 		/* Thin 8 bpp. */
2847 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2848 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2849 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2850 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2851 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2852 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2854 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2855 		/* Thin 16 bpp. */
2856 		tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2857 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2858 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2859 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2860 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2861 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2862 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2863 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2864 		/* Thin 32 bpp. */
2865 		tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2866 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2867 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2868 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2869 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2870 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2871 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2872 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2873 		/* Thin 64 bpp. */
2874 		tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2875 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2876 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2877 			   TILE_SPLIT(split_equal_to_row_size) |
2878 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2879 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2880 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2881 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2882 		/* 8 bpp PRT. */
2883 		tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2884 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2885 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2886 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2887 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2888 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2889 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2890 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2891 		/* 16 bpp PRT */
2892 		tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2893 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2894 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2895 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2896 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2897 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2898 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2899 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2900 		/* 32 bpp PRT */
2901 		tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2902 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2903 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2904 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2905 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2906 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2907 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2908 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2909 		/* 64 bpp PRT */
2910 		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2911 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2912 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2913 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2914 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2915 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2916 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2917 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2918 		/* 128 bpp PRT */
2919 		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2920 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2921 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2922 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2923 			   NUM_BANKS(ADDR_SURF_8_BANK) |
2924 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2925 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2926 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2927 
2928 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2929 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2930 		break;
2931 
2932 	default:
2933 		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2934 	}
2935 }
2936 
2937 static void si_select_se_sh(struct radeon_device *rdev,
2938 			    u32 se_num, u32 sh_num)
2939 {
2940 	u32 data = INSTANCE_BROADCAST_WRITES;
2941 
2942 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2943 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2944 	else if (se_num == 0xffffffff)
2945 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2946 	else if (sh_num == 0xffffffff)
2947 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2948 	else
2949 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2950 	WREG32(GRBM_GFX_INDEX, data);
2951 }
2952 
2953 static u32 si_create_bitmask(u32 bit_width)
2954 {
2955 	u32 i, mask = 0;
2956 
2957 	for (i = 0; i < bit_width; i++) {
2958 		mask <<= 1;
2959 		mask |= 1;
2960 	}
2961 	return mask;
2962 }
2963 
2964 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2965 {
2966 	u32 data, mask;
2967 
2968 	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2969 	if (data & 1)
2970 		data &= INACTIVE_CUS_MASK;
2971 	else
2972 		data = 0;
2973 	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2974 
2975 	data >>= INACTIVE_CUS_SHIFT;
2976 
2977 	mask = si_create_bitmask(cu_per_sh);
2978 
2979 	return ~data & mask;
2980 }
2981 
2982 static void si_setup_spi(struct radeon_device *rdev,
2983 			 u32 se_num, u32 sh_per_se,
2984 			 u32 cu_per_sh)
2985 {
2986 	int i, j, k;
2987 	u32 data, mask, active_cu;
2988 
2989 	for (i = 0; i < se_num; i++) {
2990 		for (j = 0; j < sh_per_se; j++) {
2991 			si_select_se_sh(rdev, i, j);
2992 			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2993 			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2994 
2995 			mask = 1;
2996 			for (k = 0; k < 16; k++) {
2997 				mask <<= k;
2998 				if (active_cu & mask) {
2999 					data &= ~mask;
3000 					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
3001 					break;
3002 				}
3003 			}
3004 		}
3005 	}
3006 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3007 }
3008 
3009 static u32 si_get_rb_disabled(struct radeon_device *rdev,
3010 			      u32 max_rb_num_per_se,
3011 			      u32 sh_per_se)
3012 {
3013 	u32 data, mask;
3014 
3015 	data = RREG32(CC_RB_BACKEND_DISABLE);
3016 	if (data & 1)
3017 		data &= BACKEND_DISABLE_MASK;
3018 	else
3019 		data = 0;
3020 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3021 
3022 	data >>= BACKEND_DISABLE_SHIFT;
3023 
3024 	mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
3025 
3026 	return data & mask;
3027 }
3028 
3029 static void si_setup_rb(struct radeon_device *rdev,
3030 			u32 se_num, u32 sh_per_se,
3031 			u32 max_rb_num_per_se)
3032 {
3033 	int i, j;
3034 	u32 data, mask;
3035 	u32 disabled_rbs = 0;
3036 	u32 enabled_rbs = 0;
3037 
3038 	for (i = 0; i < se_num; i++) {
3039 		for (j = 0; j < sh_per_se; j++) {
3040 			si_select_se_sh(rdev, i, j);
3041 			data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3042 			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
3043 		}
3044 	}
3045 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3046 
3047 	mask = 1;
3048 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3049 		if (!(disabled_rbs & mask))
3050 			enabled_rbs |= mask;
3051 		mask <<= 1;
3052 	}
3053 
3054 	rdev->config.si.backend_enable_mask = enabled_rbs;
3055 
3056 	for (i = 0; i < se_num; i++) {
3057 		si_select_se_sh(rdev, i, 0xffffffff);
3058 		data = 0;
3059 		for (j = 0; j < sh_per_se; j++) {
3060 			switch (enabled_rbs & 3) {
3061 			case 1:
3062 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3063 				break;
3064 			case 2:
3065 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3066 				break;
3067 			case 3:
3068 			default:
3069 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3070 				break;
3071 			}
3072 			enabled_rbs >>= 2;
3073 		}
3074 		WREG32(PA_SC_RASTER_CONFIG, data);
3075 	}
3076 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3077 }
3078 
3079 static void si_gpu_init(struct radeon_device *rdev)
3080 {
3081 	u32 gb_addr_config = 0;
3082 	u32 mc_shared_chmap, mc_arb_ramcfg;
3083 	u32 sx_debug_1;
3084 	u32 hdp_host_path_cntl;
3085 	u32 tmp;
3086 	int i, j;
3087 
3088 	switch (rdev->family) {
3089 	case CHIP_TAHITI:
3090 		rdev->config.si.max_shader_engines = 2;
3091 		rdev->config.si.max_tile_pipes = 12;
3092 		rdev->config.si.max_cu_per_sh = 8;
3093 		rdev->config.si.max_sh_per_se = 2;
3094 		rdev->config.si.max_backends_per_se = 4;
3095 		rdev->config.si.max_texture_channel_caches = 12;
3096 		rdev->config.si.max_gprs = 256;
3097 		rdev->config.si.max_gs_threads = 32;
3098 		rdev->config.si.max_hw_contexts = 8;
3099 
3100 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3101 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3102 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3103 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3104 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3105 		break;
3106 	case CHIP_PITCAIRN:
3107 		rdev->config.si.max_shader_engines = 2;
3108 		rdev->config.si.max_tile_pipes = 8;
3109 		rdev->config.si.max_cu_per_sh = 5;
3110 		rdev->config.si.max_sh_per_se = 2;
3111 		rdev->config.si.max_backends_per_se = 4;
3112 		rdev->config.si.max_texture_channel_caches = 8;
3113 		rdev->config.si.max_gprs = 256;
3114 		rdev->config.si.max_gs_threads = 32;
3115 		rdev->config.si.max_hw_contexts = 8;
3116 
3117 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3118 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3119 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3120 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3121 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3122 		break;
3123 	case CHIP_VERDE:
3124 	default:
3125 		rdev->config.si.max_shader_engines = 1;
3126 		rdev->config.si.max_tile_pipes = 4;
3127 		rdev->config.si.max_cu_per_sh = 5;
3128 		rdev->config.si.max_sh_per_se = 2;
3129 		rdev->config.si.max_backends_per_se = 4;
3130 		rdev->config.si.max_texture_channel_caches = 4;
3131 		rdev->config.si.max_gprs = 256;
3132 		rdev->config.si.max_gs_threads = 32;
3133 		rdev->config.si.max_hw_contexts = 8;
3134 
3135 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3136 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3137 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3138 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3139 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3140 		break;
3141 	case CHIP_OLAND:
3142 		rdev->config.si.max_shader_engines = 1;
3143 		rdev->config.si.max_tile_pipes = 4;
3144 		rdev->config.si.max_cu_per_sh = 6;
3145 		rdev->config.si.max_sh_per_se = 1;
3146 		rdev->config.si.max_backends_per_se = 2;
3147 		rdev->config.si.max_texture_channel_caches = 4;
3148 		rdev->config.si.max_gprs = 256;
3149 		rdev->config.si.max_gs_threads = 16;
3150 		rdev->config.si.max_hw_contexts = 8;
3151 
3152 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3153 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3154 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3155 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3156 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3157 		break;
3158 	case CHIP_HAINAN:
3159 		rdev->config.si.max_shader_engines = 1;
3160 		rdev->config.si.max_tile_pipes = 4;
3161 		rdev->config.si.max_cu_per_sh = 5;
3162 		rdev->config.si.max_sh_per_se = 1;
3163 		rdev->config.si.max_backends_per_se = 1;
3164 		rdev->config.si.max_texture_channel_caches = 2;
3165 		rdev->config.si.max_gprs = 256;
3166 		rdev->config.si.max_gs_threads = 16;
3167 		rdev->config.si.max_hw_contexts = 8;
3168 
3169 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3170 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3171 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3172 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3173 		gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
3174 		break;
3175 	}
3176 
3177 	/* Initialize HDP */
3178 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3179 		WREG32((0x2c14 + j), 0x00000000);
3180 		WREG32((0x2c18 + j), 0x00000000);
3181 		WREG32((0x2c1c + j), 0x00000000);
3182 		WREG32((0x2c20 + j), 0x00000000);
3183 		WREG32((0x2c24 + j), 0x00000000);
3184 	}
3185 
3186 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3187 	WREG32(SRBM_INT_CNTL, 1);
3188 	WREG32(SRBM_INT_ACK, 1);
3189 
3190 	evergreen_fix_pci_max_read_req_size(rdev);
3191 
3192 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3193 
3194 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3195 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3196 
3197 	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3198 	rdev->config.si.mem_max_burst_length_bytes = 256;
3199 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3200 	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3201 	if (rdev->config.si.mem_row_size_in_kb > 4)
3202 		rdev->config.si.mem_row_size_in_kb = 4;
3203 	/* XXX use MC settings? */
3204 	rdev->config.si.shader_engine_tile_size = 32;
3205 	rdev->config.si.num_gpus = 1;
3206 	rdev->config.si.multi_gpu_tile_size = 64;
3207 
3208 	/* fix up row size */
3209 	gb_addr_config &= ~ROW_SIZE_MASK;
3210 	switch (rdev->config.si.mem_row_size_in_kb) {
3211 	case 1:
3212 	default:
3213 		gb_addr_config |= ROW_SIZE(0);
3214 		break;
3215 	case 2:
3216 		gb_addr_config |= ROW_SIZE(1);
3217 		break;
3218 	case 4:
3219 		gb_addr_config |= ROW_SIZE(2);
3220 		break;
3221 	}
3222 
3223 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3224 	 * not have bank info, so create a custom tiling dword.
3225 	 * bits 3:0   num_pipes
3226 	 * bits 7:4   num_banks
3227 	 * bits 11:8  group_size
3228 	 * bits 15:12 row_size
3229 	 */
3230 	rdev->config.si.tile_config = 0;
3231 	switch (rdev->config.si.num_tile_pipes) {
3232 	case 1:
3233 		rdev->config.si.tile_config |= (0 << 0);
3234 		break;
3235 	case 2:
3236 		rdev->config.si.tile_config |= (1 << 0);
3237 		break;
3238 	case 4:
3239 		rdev->config.si.tile_config |= (2 << 0);
3240 		break;
3241 	case 8:
3242 	default:
3243 		/* XXX what about 12? */
3244 		rdev->config.si.tile_config |= (3 << 0);
3245 		break;
3246 	}
3247 	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3248 	case 0: /* four banks */
3249 		rdev->config.si.tile_config |= 0 << 4;
3250 		break;
3251 	case 1: /* eight banks */
3252 		rdev->config.si.tile_config |= 1 << 4;
3253 		break;
3254 	case 2: /* sixteen banks */
3255 	default:
3256 		rdev->config.si.tile_config |= 2 << 4;
3257 		break;
3258 	}
3259 	rdev->config.si.tile_config |=
3260 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3261 	rdev->config.si.tile_config |=
3262 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3263 
3264 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3265 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3266 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3267 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3268 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3269 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3270 	if (rdev->has_uvd) {
3271 		WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3272 		WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3273 		WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3274 	}
3275 
3276 	si_tiling_mode_table_init(rdev);
3277 
3278 	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3279 		    rdev->config.si.max_sh_per_se,
3280 		    rdev->config.si.max_backends_per_se);
3281 
3282 	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3283 		     rdev->config.si.max_sh_per_se,
3284 		     rdev->config.si.max_cu_per_sh);
3285 
3286 	rdev->config.si.active_cus = 0;
3287 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
3288 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
3289 			rdev->config.si.active_cus +=
3290 				hweight32(si_get_cu_active_bitmap(rdev, i, j));
3291 		}
3292 	}
3293 
3294 	/* set HW defaults for 3D engine */
3295 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3296 				     ROQ_IB2_START(0x2b)));
3297 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3298 
3299 	sx_debug_1 = RREG32(SX_DEBUG_1);
3300 	WREG32(SX_DEBUG_1, sx_debug_1);
3301 
3302 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3303 
3304 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3305 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3306 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3307 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3308 
3309 	WREG32(VGT_NUM_INSTANCES, 1);
3310 
3311 	WREG32(CP_PERFMON_CNTL, 0);
3312 
3313 	WREG32(SQ_CONFIG, 0);
3314 
3315 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3316 					  FORCE_EOV_MAX_REZ_CNT(255)));
3317 
3318 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3319 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3320 
3321 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3322 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3323 
3324 	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3325 	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3326 	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3327 	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3328 	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3329 	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3330 	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3331 	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3332 
3333 	tmp = RREG32(HDP_MISC_CNTL);
3334 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3335 	WREG32(HDP_MISC_CNTL, tmp);
3336 
3337 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3338 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3339 
3340 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3341 
3342 	udelay(50);
3343 }
3344 
3345 /*
3346  * GPU scratch registers helpers function.
3347  */
3348 static void si_scratch_init(struct radeon_device *rdev)
3349 {
3350 	int i;
3351 
3352 	rdev->scratch.num_reg = 7;
3353 	rdev->scratch.reg_base = SCRATCH_REG0;
3354 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3355 		rdev->scratch.free[i] = true;
3356 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3357 	}
3358 }
3359 
3360 void si_fence_ring_emit(struct radeon_device *rdev,
3361 			struct radeon_fence *fence)
3362 {
3363 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3364 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3365 
3366 	/* flush read cache over gart */
3367 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3368 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3369 	radeon_ring_write(ring, 0);
3370 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3371 	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3372 			  PACKET3_TC_ACTION_ENA |
3373 			  PACKET3_SH_KCACHE_ACTION_ENA |
3374 			  PACKET3_SH_ICACHE_ACTION_ENA);
3375 	radeon_ring_write(ring, 0xFFFFFFFF);
3376 	radeon_ring_write(ring, 0);
3377 	radeon_ring_write(ring, 10); /* poll interval */
3378 	/* EVENT_WRITE_EOP - flush caches, send int */
3379 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3380 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3381 	radeon_ring_write(ring, lower_32_bits(addr));
3382 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3383 	radeon_ring_write(ring, fence->seq);
3384 	radeon_ring_write(ring, 0);
3385 }
3386 
3387 /*
3388  * IB stuff
3389  */
3390 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3391 {
3392 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3393 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3394 	u32 header;
3395 
3396 	if (ib->is_const_ib) {
3397 		/* set switch buffer packet before const IB */
3398 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3399 		radeon_ring_write(ring, 0);
3400 
3401 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3402 	} else {
3403 		u32 next_rptr;
3404 		if (ring->rptr_save_reg) {
3405 			next_rptr = ring->wptr + 3 + 4 + 8;
3406 			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3407 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3408 						  PACKET3_SET_CONFIG_REG_START) >> 2));
3409 			radeon_ring_write(ring, next_rptr);
3410 		} else if (rdev->wb.enabled) {
3411 			next_rptr = ring->wptr + 5 + 4 + 8;
3412 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3413 			radeon_ring_write(ring, (1 << 8));
3414 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3415 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3416 			radeon_ring_write(ring, next_rptr);
3417 		}
3418 
3419 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3420 	}
3421 
3422 	radeon_ring_write(ring, header);
3423 	radeon_ring_write(ring,
3424 #ifdef __BIG_ENDIAN
3425 			  (2 << 0) |
3426 #endif
3427 			  (ib->gpu_addr & 0xFFFFFFFC));
3428 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3429 	radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
3430 
3431 	if (!ib->is_const_ib) {
3432 		/* flush read cache over gart for this vmid */
3433 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3434 		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3435 		radeon_ring_write(ring, vm_id);
3436 		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3437 		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3438 				  PACKET3_TC_ACTION_ENA |
3439 				  PACKET3_SH_KCACHE_ACTION_ENA |
3440 				  PACKET3_SH_ICACHE_ACTION_ENA);
3441 		radeon_ring_write(ring, 0xFFFFFFFF);
3442 		radeon_ring_write(ring, 0);
3443 		radeon_ring_write(ring, 10); /* poll interval */
3444 	}
3445 }
3446 
3447 /*
3448  * CP.
3449  */
3450 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3451 {
3452 	if (enable)
3453 		WREG32(CP_ME_CNTL, 0);
3454 	else {
3455 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3456 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3457 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3458 		WREG32(SCRATCH_UMSK, 0);
3459 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3460 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3461 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3462 	}
3463 	udelay(50);
3464 }
3465 
3466 static int si_cp_load_microcode(struct radeon_device *rdev)
3467 {
3468 	int i;
3469 
3470 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3471 		return -EINVAL;
3472 
3473 	si_cp_enable(rdev, false);
3474 
3475 	if (rdev->new_fw) {
3476 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3477 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3478 		const struct gfx_firmware_header_v1_0 *ce_hdr =
3479 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3480 		const struct gfx_firmware_header_v1_0 *me_hdr =
3481 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3482 		const __le32 *fw_data;
3483 		u32 fw_size;
3484 
3485 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3486 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3487 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3488 
3489 		/* PFP */
3490 		fw_data = (const __le32 *)
3491 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3492 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3493 		WREG32(CP_PFP_UCODE_ADDR, 0);
3494 		for (i = 0; i < fw_size; i++)
3495 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3496 		WREG32(CP_PFP_UCODE_ADDR, 0);
3497 
3498 		/* CE */
3499 		fw_data = (const __le32 *)
3500 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3501 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3502 		WREG32(CP_CE_UCODE_ADDR, 0);
3503 		for (i = 0; i < fw_size; i++)
3504 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3505 		WREG32(CP_CE_UCODE_ADDR, 0);
3506 
3507 		/* ME */
3508 		fw_data = (const __be32 *)
3509 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3510 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3511 		WREG32(CP_ME_RAM_WADDR, 0);
3512 		for (i = 0; i < fw_size; i++)
3513 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3514 		WREG32(CP_ME_RAM_WADDR, 0);
3515 	} else {
3516 		const __be32 *fw_data;
3517 
3518 		/* PFP */
3519 		fw_data = (const __be32 *)rdev->pfp_fw->data;
3520 		WREG32(CP_PFP_UCODE_ADDR, 0);
3521 		for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3522 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3523 		WREG32(CP_PFP_UCODE_ADDR, 0);
3524 
3525 		/* CE */
3526 		fw_data = (const __be32 *)rdev->ce_fw->data;
3527 		WREG32(CP_CE_UCODE_ADDR, 0);
3528 		for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3529 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3530 		WREG32(CP_CE_UCODE_ADDR, 0);
3531 
3532 		/* ME */
3533 		fw_data = (const __be32 *)rdev->me_fw->data;
3534 		WREG32(CP_ME_RAM_WADDR, 0);
3535 		for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3536 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3537 		WREG32(CP_ME_RAM_WADDR, 0);
3538 	}
3539 
3540 	WREG32(CP_PFP_UCODE_ADDR, 0);
3541 	WREG32(CP_CE_UCODE_ADDR, 0);
3542 	WREG32(CP_ME_RAM_WADDR, 0);
3543 	WREG32(CP_ME_RAM_RADDR, 0);
3544 	return 0;
3545 }
3546 
3547 static int si_cp_start(struct radeon_device *rdev)
3548 {
3549 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3550 	int r, i;
3551 
3552 	r = radeon_ring_lock(rdev, ring, 7 + 4);
3553 	if (r) {
3554 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3555 		return r;
3556 	}
3557 	/* init the CP */
3558 	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3559 	radeon_ring_write(ring, 0x1);
3560 	radeon_ring_write(ring, 0x0);
3561 	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3562 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3563 	radeon_ring_write(ring, 0);
3564 	radeon_ring_write(ring, 0);
3565 
3566 	/* init the CE partitions */
3567 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3568 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3569 	radeon_ring_write(ring, 0xc000);
3570 	radeon_ring_write(ring, 0xe000);
3571 	radeon_ring_unlock_commit(rdev, ring, false);
3572 
3573 	si_cp_enable(rdev, true);
3574 
3575 	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3576 	if (r) {
3577 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3578 		return r;
3579 	}
3580 
3581 	/* setup clear context state */
3582 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3583 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3584 
3585 	for (i = 0; i < si_default_size; i++)
3586 		radeon_ring_write(ring, si_default_state[i]);
3587 
3588 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3589 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3590 
3591 	/* set clear context state */
3592 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3593 	radeon_ring_write(ring, 0);
3594 
3595 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3596 	radeon_ring_write(ring, 0x00000316);
3597 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3598 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3599 
3600 	radeon_ring_unlock_commit(rdev, ring, false);
3601 
3602 	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3603 		ring = &rdev->ring[i];
3604 		r = radeon_ring_lock(rdev, ring, 2);
3605 
3606 		/* clear the compute context state */
3607 		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3608 		radeon_ring_write(ring, 0);
3609 
3610 		radeon_ring_unlock_commit(rdev, ring, false);
3611 	}
3612 
3613 	return 0;
3614 }
3615 
3616 static void si_cp_fini(struct radeon_device *rdev)
3617 {
3618 	struct radeon_ring *ring;
3619 	si_cp_enable(rdev, false);
3620 
3621 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3622 	radeon_ring_fini(rdev, ring);
3623 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3624 
3625 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3626 	radeon_ring_fini(rdev, ring);
3627 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3628 
3629 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3630 	radeon_ring_fini(rdev, ring);
3631 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3632 }
3633 
3634 static int si_cp_resume(struct radeon_device *rdev)
3635 {
3636 	struct radeon_ring *ring;
3637 	u32 tmp;
3638 	u32 rb_bufsz;
3639 	int r;
3640 
3641 	si_enable_gui_idle_interrupt(rdev, false);
3642 
3643 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3644 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3645 
3646 	/* Set the write pointer delay */
3647 	WREG32(CP_RB_WPTR_DELAY, 0);
3648 
3649 	WREG32(CP_DEBUG, 0);
3650 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3651 
3652 	/* ring 0 - compute and gfx */
3653 	/* Set ring buffer size */
3654 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3655 	rb_bufsz = order_base_2(ring->ring_size / 8);
3656 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3657 #ifdef __BIG_ENDIAN
3658 	tmp |= BUF_SWAP_32BIT;
3659 #endif
3660 	WREG32(CP_RB0_CNTL, tmp);
3661 
3662 	/* Initialize the ring buffer's read and write pointers */
3663 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3664 	ring->wptr = 0;
3665 	WREG32(CP_RB0_WPTR, ring->wptr);
3666 
3667 	/* set the wb address whether it's enabled or not */
3668 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3669 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3670 
3671 	if (rdev->wb.enabled)
3672 		WREG32(SCRATCH_UMSK, 0xff);
3673 	else {
3674 		tmp |= RB_NO_UPDATE;
3675 		WREG32(SCRATCH_UMSK, 0);
3676 	}
3677 
3678 	mdelay(1);
3679 	WREG32(CP_RB0_CNTL, tmp);
3680 
3681 	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3682 
3683 	/* ring1  - compute only */
3684 	/* Set ring buffer size */
3685 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3686 	rb_bufsz = order_base_2(ring->ring_size / 8);
3687 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3688 #ifdef __BIG_ENDIAN
3689 	tmp |= BUF_SWAP_32BIT;
3690 #endif
3691 	WREG32(CP_RB1_CNTL, tmp);
3692 
3693 	/* Initialize the ring buffer's read and write pointers */
3694 	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3695 	ring->wptr = 0;
3696 	WREG32(CP_RB1_WPTR, ring->wptr);
3697 
3698 	/* set the wb address whether it's enabled or not */
3699 	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3700 	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3701 
3702 	mdelay(1);
3703 	WREG32(CP_RB1_CNTL, tmp);
3704 
3705 	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3706 
3707 	/* ring2 - compute only */
3708 	/* Set ring buffer size */
3709 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3710 	rb_bufsz = order_base_2(ring->ring_size / 8);
3711 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3712 #ifdef __BIG_ENDIAN
3713 	tmp |= BUF_SWAP_32BIT;
3714 #endif
3715 	WREG32(CP_RB2_CNTL, tmp);
3716 
3717 	/* Initialize the ring buffer's read and write pointers */
3718 	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3719 	ring->wptr = 0;
3720 	WREG32(CP_RB2_WPTR, ring->wptr);
3721 
3722 	/* set the wb address whether it's enabled or not */
3723 	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3724 	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3725 
3726 	mdelay(1);
3727 	WREG32(CP_RB2_CNTL, tmp);
3728 
3729 	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3730 
3731 	/* start the rings */
3732 	si_cp_start(rdev);
3733 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3734 	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3735 	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3736 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3737 	if (r) {
3738 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3739 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3740 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3741 		return r;
3742 	}
3743 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3744 	if (r) {
3745 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3746 	}
3747 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3748 	if (r) {
3749 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3750 	}
3751 
3752 	si_enable_gui_idle_interrupt(rdev, true);
3753 
3754 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3755 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3756 
3757 	return 0;
3758 }
3759 
3760 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3761 {
3762 	u32 reset_mask = 0;
3763 	u32 tmp;
3764 
3765 	/* GRBM_STATUS */
3766 	tmp = RREG32(GRBM_STATUS);
3767 	if (tmp & (PA_BUSY | SC_BUSY |
3768 		   BCI_BUSY | SX_BUSY |
3769 		   TA_BUSY | VGT_BUSY |
3770 		   DB_BUSY | CB_BUSY |
3771 		   GDS_BUSY | SPI_BUSY |
3772 		   IA_BUSY | IA_BUSY_NO_DMA))
3773 		reset_mask |= RADEON_RESET_GFX;
3774 
3775 	if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3776 		   CP_BUSY | CP_COHERENCY_BUSY))
3777 		reset_mask |= RADEON_RESET_CP;
3778 
3779 	if (tmp & GRBM_EE_BUSY)
3780 		reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3781 
3782 	/* GRBM_STATUS2 */
3783 	tmp = RREG32(GRBM_STATUS2);
3784 	if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3785 		reset_mask |= RADEON_RESET_RLC;
3786 
3787 	/* DMA_STATUS_REG 0 */
3788 	tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3789 	if (!(tmp & DMA_IDLE))
3790 		reset_mask |= RADEON_RESET_DMA;
3791 
3792 	/* DMA_STATUS_REG 1 */
3793 	tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3794 	if (!(tmp & DMA_IDLE))
3795 		reset_mask |= RADEON_RESET_DMA1;
3796 
3797 	/* SRBM_STATUS2 */
3798 	tmp = RREG32(SRBM_STATUS2);
3799 	if (tmp & DMA_BUSY)
3800 		reset_mask |= RADEON_RESET_DMA;
3801 
3802 	if (tmp & DMA1_BUSY)
3803 		reset_mask |= RADEON_RESET_DMA1;
3804 
3805 	/* SRBM_STATUS */
3806 	tmp = RREG32(SRBM_STATUS);
3807 
3808 	if (tmp & IH_BUSY)
3809 		reset_mask |= RADEON_RESET_IH;
3810 
3811 	if (tmp & SEM_BUSY)
3812 		reset_mask |= RADEON_RESET_SEM;
3813 
3814 	if (tmp & GRBM_RQ_PENDING)
3815 		reset_mask |= RADEON_RESET_GRBM;
3816 
3817 	if (tmp & VMC_BUSY)
3818 		reset_mask |= RADEON_RESET_VMC;
3819 
3820 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3821 		   MCC_BUSY | MCD_BUSY))
3822 		reset_mask |= RADEON_RESET_MC;
3823 
3824 	if (evergreen_is_display_hung(rdev))
3825 		reset_mask |= RADEON_RESET_DISPLAY;
3826 
3827 	/* VM_L2_STATUS */
3828 	tmp = RREG32(VM_L2_STATUS);
3829 	if (tmp & L2_BUSY)
3830 		reset_mask |= RADEON_RESET_VMC;
3831 
3832 	/* Skip MC reset as it's mostly likely not hung, just busy */
3833 	if (reset_mask & RADEON_RESET_MC) {
3834 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3835 		reset_mask &= ~RADEON_RESET_MC;
3836 	}
3837 
3838 	return reset_mask;
3839 }
3840 
3841 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3842 {
3843 	struct evergreen_mc_save save;
3844 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3845 	u32 tmp;
3846 
3847 	if (reset_mask == 0)
3848 		return;
3849 
3850 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3851 
3852 	evergreen_print_gpu_status_regs(rdev);
3853 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3854 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3855 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3856 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3857 
3858 	/* disable PG/CG */
3859 	si_fini_pg(rdev);
3860 	si_fini_cg(rdev);
3861 
3862 	/* stop the rlc */
3863 	si_rlc_stop(rdev);
3864 
3865 	/* Disable CP parsing/prefetching */
3866 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3867 
3868 	if (reset_mask & RADEON_RESET_DMA) {
3869 		/* dma0 */
3870 		tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3871 		tmp &= ~DMA_RB_ENABLE;
3872 		WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3873 	}
3874 	if (reset_mask & RADEON_RESET_DMA1) {
3875 		/* dma1 */
3876 		tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3877 		tmp &= ~DMA_RB_ENABLE;
3878 		WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3879 	}
3880 
3881 	udelay(50);
3882 
3883 	evergreen_mc_stop(rdev, &save);
3884 	if (evergreen_mc_wait_for_idle(rdev)) {
3885 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3886 	}
3887 
3888 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3889 		grbm_soft_reset = SOFT_RESET_CB |
3890 			SOFT_RESET_DB |
3891 			SOFT_RESET_GDS |
3892 			SOFT_RESET_PA |
3893 			SOFT_RESET_SC |
3894 			SOFT_RESET_BCI |
3895 			SOFT_RESET_SPI |
3896 			SOFT_RESET_SX |
3897 			SOFT_RESET_TC |
3898 			SOFT_RESET_TA |
3899 			SOFT_RESET_VGT |
3900 			SOFT_RESET_IA;
3901 	}
3902 
3903 	if (reset_mask & RADEON_RESET_CP) {
3904 		grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3905 
3906 		srbm_soft_reset |= SOFT_RESET_GRBM;
3907 	}
3908 
3909 	if (reset_mask & RADEON_RESET_DMA)
3910 		srbm_soft_reset |= SOFT_RESET_DMA;
3911 
3912 	if (reset_mask & RADEON_RESET_DMA1)
3913 		srbm_soft_reset |= SOFT_RESET_DMA1;
3914 
3915 	if (reset_mask & RADEON_RESET_DISPLAY)
3916 		srbm_soft_reset |= SOFT_RESET_DC;
3917 
3918 	if (reset_mask & RADEON_RESET_RLC)
3919 		grbm_soft_reset |= SOFT_RESET_RLC;
3920 
3921 	if (reset_mask & RADEON_RESET_SEM)
3922 		srbm_soft_reset |= SOFT_RESET_SEM;
3923 
3924 	if (reset_mask & RADEON_RESET_IH)
3925 		srbm_soft_reset |= SOFT_RESET_IH;
3926 
3927 	if (reset_mask & RADEON_RESET_GRBM)
3928 		srbm_soft_reset |= SOFT_RESET_GRBM;
3929 
3930 	if (reset_mask & RADEON_RESET_VMC)
3931 		srbm_soft_reset |= SOFT_RESET_VMC;
3932 
3933 	if (reset_mask & RADEON_RESET_MC)
3934 		srbm_soft_reset |= SOFT_RESET_MC;
3935 
3936 	if (grbm_soft_reset) {
3937 		tmp = RREG32(GRBM_SOFT_RESET);
3938 		tmp |= grbm_soft_reset;
3939 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3940 		WREG32(GRBM_SOFT_RESET, tmp);
3941 		tmp = RREG32(GRBM_SOFT_RESET);
3942 
3943 		udelay(50);
3944 
3945 		tmp &= ~grbm_soft_reset;
3946 		WREG32(GRBM_SOFT_RESET, tmp);
3947 		tmp = RREG32(GRBM_SOFT_RESET);
3948 	}
3949 
3950 	if (srbm_soft_reset) {
3951 		tmp = RREG32(SRBM_SOFT_RESET);
3952 		tmp |= srbm_soft_reset;
3953 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3954 		WREG32(SRBM_SOFT_RESET, tmp);
3955 		tmp = RREG32(SRBM_SOFT_RESET);
3956 
3957 		udelay(50);
3958 
3959 		tmp &= ~srbm_soft_reset;
3960 		WREG32(SRBM_SOFT_RESET, tmp);
3961 		tmp = RREG32(SRBM_SOFT_RESET);
3962 	}
3963 
3964 	/* Wait a little for things to settle down */
3965 	udelay(50);
3966 
3967 	evergreen_mc_resume(rdev, &save);
3968 	udelay(50);
3969 
3970 	evergreen_print_gpu_status_regs(rdev);
3971 }
3972 
3973 static void si_set_clk_bypass_mode(struct radeon_device *rdev)
3974 {
3975 	u32 tmp, i;
3976 
3977 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3978 	tmp |= SPLL_BYPASS_EN;
3979 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3980 
3981 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3982 	tmp |= SPLL_CTLREQ_CHG;
3983 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3984 
3985 	for (i = 0; i < rdev->usec_timeout; i++) {
3986 		if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
3987 			break;
3988 		udelay(1);
3989 	}
3990 
3991 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3992 	tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
3993 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3994 
3995 	tmp = RREG32(MPLL_CNTL_MODE);
3996 	tmp &= ~MPLL_MCLK_SEL;
3997 	WREG32(MPLL_CNTL_MODE, tmp);
3998 }
3999 
4000 static void si_spll_powerdown(struct radeon_device *rdev)
4001 {
4002 	u32 tmp;
4003 
4004 	tmp = RREG32(SPLL_CNTL_MODE);
4005 	tmp |= SPLL_SW_DIR_CONTROL;
4006 	WREG32(SPLL_CNTL_MODE, tmp);
4007 
4008 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
4009 	tmp |= SPLL_RESET;
4010 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
4011 
4012 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
4013 	tmp |= SPLL_SLEEP;
4014 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
4015 
4016 	tmp = RREG32(SPLL_CNTL_MODE);
4017 	tmp &= ~SPLL_SW_DIR_CONTROL;
4018 	WREG32(SPLL_CNTL_MODE, tmp);
4019 }
4020 
4021 static void si_gpu_pci_config_reset(struct radeon_device *rdev)
4022 {
4023 	struct evergreen_mc_save save;
4024 	u32 tmp, i;
4025 
4026 	dev_info(rdev->dev, "GPU pci config reset\n");
4027 
4028 	/* disable dpm? */
4029 
4030 	/* disable cg/pg */
4031 	si_fini_pg(rdev);
4032 	si_fini_cg(rdev);
4033 
4034 	/* Disable CP parsing/prefetching */
4035 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4036 	/* dma0 */
4037 	tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
4038 	tmp &= ~DMA_RB_ENABLE;
4039 	WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
4040 	/* dma1 */
4041 	tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
4042 	tmp &= ~DMA_RB_ENABLE;
4043 	WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
4044 	/* XXX other engines? */
4045 
4046 	/* halt the rlc, disable cp internal ints */
4047 	si_rlc_stop(rdev);
4048 
4049 	udelay(50);
4050 
4051 	/* disable mem access */
4052 	evergreen_mc_stop(rdev, &save);
4053 	if (evergreen_mc_wait_for_idle(rdev)) {
4054 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
4055 	}
4056 
4057 	/* set mclk/sclk to bypass */
4058 	si_set_clk_bypass_mode(rdev);
4059 	/* powerdown spll */
4060 	si_spll_powerdown(rdev);
4061 	/* disable BM */
4062 	pci_clear_master(rdev->pdev);
4063 	/* reset */
4064 	radeon_pci_config_reset(rdev);
4065 	/* wait for asic to come out of reset */
4066 	for (i = 0; i < rdev->usec_timeout; i++) {
4067 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
4068 			break;
4069 		udelay(1);
4070 	}
4071 }
4072 
4073 int si_asic_reset(struct radeon_device *rdev, bool hard)
4074 {
4075 	u32 reset_mask;
4076 
4077 	if (hard) {
4078 		si_gpu_pci_config_reset(rdev);
4079 		return 0;
4080 	}
4081 
4082 	reset_mask = si_gpu_check_soft_reset(rdev);
4083 
4084 	if (reset_mask)
4085 		r600_set_bios_scratch_engine_hung(rdev, true);
4086 
4087 	/* try soft reset */
4088 	si_gpu_soft_reset(rdev, reset_mask);
4089 
4090 	reset_mask = si_gpu_check_soft_reset(rdev);
4091 
4092 	/* try pci config reset */
4093 	if (reset_mask && radeon_hard_reset)
4094 		si_gpu_pci_config_reset(rdev);
4095 
4096 	reset_mask = si_gpu_check_soft_reset(rdev);
4097 
4098 	if (!reset_mask)
4099 		r600_set_bios_scratch_engine_hung(rdev, false);
4100 
4101 	return 0;
4102 }
4103 
4104 /**
4105  * si_gfx_is_lockup - Check if the GFX engine is locked up
4106  *
4107  * @rdev: radeon_device pointer
4108  * @ring: radeon_ring structure holding ring information
4109  *
4110  * Check if the GFX engine is locked up.
4111  * Returns true if the engine appears to be locked up, false if not.
4112  */
4113 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4114 {
4115 	u32 reset_mask = si_gpu_check_soft_reset(rdev);
4116 
4117 	if (!(reset_mask & (RADEON_RESET_GFX |
4118 			    RADEON_RESET_COMPUTE |
4119 			    RADEON_RESET_CP))) {
4120 		radeon_ring_lockup_update(rdev, ring);
4121 		return false;
4122 	}
4123 	return radeon_ring_test_lockup(rdev, ring);
4124 }
4125 
4126 /* MC */
4127 static void si_mc_program(struct radeon_device *rdev)
4128 {
4129 	struct evergreen_mc_save save;
4130 	u32 tmp;
4131 	int i, j;
4132 
4133 	/* Initialize HDP */
4134 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4135 		WREG32((0x2c14 + j), 0x00000000);
4136 		WREG32((0x2c18 + j), 0x00000000);
4137 		WREG32((0x2c1c + j), 0x00000000);
4138 		WREG32((0x2c20 + j), 0x00000000);
4139 		WREG32((0x2c24 + j), 0x00000000);
4140 	}
4141 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4142 
4143 	evergreen_mc_stop(rdev, &save);
4144 	if (radeon_mc_wait_for_idle(rdev)) {
4145 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4146 	}
4147 	if (!ASIC_IS_NODCE(rdev))
4148 		/* Lockout access through VGA aperture*/
4149 		WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4150 	/* Update configuration */
4151 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4152 	       rdev->mc.vram_start >> 12);
4153 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4154 	       rdev->mc.vram_end >> 12);
4155 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4156 	       rdev->vram_scratch.gpu_addr >> 12);
4157 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4158 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4159 	WREG32(MC_VM_FB_LOCATION, tmp);
4160 	/* XXX double check these! */
4161 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4162 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4163 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4164 	WREG32(MC_VM_AGP_BASE, 0);
4165 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4166 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4167 	if (radeon_mc_wait_for_idle(rdev)) {
4168 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4169 	}
4170 	evergreen_mc_resume(rdev, &save);
4171 	if (!ASIC_IS_NODCE(rdev)) {
4172 		/* we need to own VRAM, so turn off the VGA renderer here
4173 		 * to stop it overwriting our objects */
4174 		rv515_vga_render_disable(rdev);
4175 	}
4176 }
4177 
4178 void si_vram_gtt_location(struct radeon_device *rdev,
4179 			  struct radeon_mc *mc)
4180 {
4181 	if (mc->mc_vram_size > 0xFFC0000000ULL) {
4182 		/* leave room for at least 1024M GTT */
4183 		dev_warn(rdev->dev, "limiting VRAM\n");
4184 		mc->real_vram_size = 0xFFC0000000ULL;
4185 		mc->mc_vram_size = 0xFFC0000000ULL;
4186 	}
4187 	radeon_vram_location(rdev, &rdev->mc, 0);
4188 	rdev->mc.gtt_base_align = 0;
4189 	radeon_gtt_location(rdev, mc);
4190 }
4191 
4192 static int si_mc_init(struct radeon_device *rdev)
4193 {
4194 	u32 tmp;
4195 	int chansize, numchan;
4196 
4197 	/* Get VRAM informations */
4198 	rdev->mc.vram_is_ddr = true;
4199 	tmp = RREG32(MC_ARB_RAMCFG);
4200 	if (tmp & CHANSIZE_OVERRIDE) {
4201 		chansize = 16;
4202 	} else if (tmp & CHANSIZE_MASK) {
4203 		chansize = 64;
4204 	} else {
4205 		chansize = 32;
4206 	}
4207 	tmp = RREG32(MC_SHARED_CHMAP);
4208 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4209 	case 0:
4210 	default:
4211 		numchan = 1;
4212 		break;
4213 	case 1:
4214 		numchan = 2;
4215 		break;
4216 	case 2:
4217 		numchan = 4;
4218 		break;
4219 	case 3:
4220 		numchan = 8;
4221 		break;
4222 	case 4:
4223 		numchan = 3;
4224 		break;
4225 	case 5:
4226 		numchan = 6;
4227 		break;
4228 	case 6:
4229 		numchan = 10;
4230 		break;
4231 	case 7:
4232 		numchan = 12;
4233 		break;
4234 	case 8:
4235 		numchan = 16;
4236 		break;
4237 	}
4238 	rdev->mc.vram_width = numchan * chansize;
4239 	/* Could aper size report 0 ? */
4240 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4241 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4242 	/* size in MB on si */
4243 	tmp = RREG32(CONFIG_MEMSIZE);
4244 	/* some boards may have garbage in the upper 16 bits */
4245 	if (tmp & 0xffff0000) {
4246 		DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
4247 		if (tmp & 0xffff)
4248 			tmp &= 0xffff;
4249 	}
4250 	rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
4251 	rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
4252 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
4253 	si_vram_gtt_location(rdev, &rdev->mc);
4254 	radeon_update_bandwidth_info(rdev);
4255 
4256 	return 0;
4257 }
4258 
4259 /*
4260  * GART
4261  */
4262 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
4263 {
4264 	/* flush hdp cache */
4265 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
4266 
4267 	/* bits 0-15 are the VM contexts0-15 */
4268 	WREG32(VM_INVALIDATE_REQUEST, 1);
4269 }
4270 
4271 static int si_pcie_gart_enable(struct radeon_device *rdev)
4272 {
4273 	int r, i;
4274 
4275 	if (rdev->gart.robj == NULL) {
4276 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4277 		return -EINVAL;
4278 	}
4279 	r = radeon_gart_table_vram_pin(rdev);
4280 	if (r)
4281 		return r;
4282 	/* Setup TLB control */
4283 	WREG32(MC_VM_MX_L1_TLB_CNTL,
4284 	       (0xA << 7) |
4285 	       ENABLE_L1_TLB |
4286 	       ENABLE_L1_FRAGMENT_PROCESSING |
4287 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4288 	       ENABLE_ADVANCED_DRIVER_MODEL |
4289 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4290 	/* Setup L2 cache */
4291 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4292 	       ENABLE_L2_FRAGMENT_PROCESSING |
4293 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4294 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4295 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4296 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4297 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4298 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4299 	       BANK_SELECT(4) |
4300 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
4301 	/* setup context0 */
4302 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4303 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4304 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4305 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4306 			(u32)(rdev->dummy_page.addr >> 12));
4307 	WREG32(VM_CONTEXT0_CNTL2, 0);
4308 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4309 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4310 
4311 	WREG32(0x15D4, 0);
4312 	WREG32(0x15D8, 0);
4313 	WREG32(0x15DC, 0);
4314 
4315 	/* empty context1-15 */
4316 	/* set vm size, must be a multiple of 4 */
4317 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4318 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
4319 	/* Assign the pt base to something valid for now; the pts used for
4320 	 * the VMs are determined by the application and setup and assigned
4321 	 * on the fly in the vm part of radeon_gart.c
4322 	 */
4323 	for (i = 1; i < 16; i++) {
4324 		if (i < 8)
4325 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4326 			       rdev->vm_manager.saved_table_addr[i]);
4327 		else
4328 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4329 			       rdev->vm_manager.saved_table_addr[i]);
4330 	}
4331 
4332 	/* enable context1-15 */
4333 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4334 	       (u32)(rdev->dummy_page.addr >> 12));
4335 	WREG32(VM_CONTEXT1_CNTL2, 4);
4336 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4337 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
4338 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4339 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4340 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4341 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4342 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4343 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4344 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4345 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4346 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4347 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4348 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4349 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4350 
4351 	si_pcie_gart_tlb_flush(rdev);
4352 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4353 		 (unsigned)(rdev->mc.gtt_size >> 20),
4354 		 (unsigned long long)rdev->gart.table_addr);
4355 	rdev->gart.ready = true;
4356 	return 0;
4357 }
4358 
4359 static void si_pcie_gart_disable(struct radeon_device *rdev)
4360 {
4361 	unsigned i;
4362 
4363 	for (i = 1; i < 16; ++i) {
4364 		uint32_t reg;
4365 		if (i < 8)
4366 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
4367 		else
4368 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
4369 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
4370 	}
4371 
4372 	/* Disable all tables */
4373 	WREG32(VM_CONTEXT0_CNTL, 0);
4374 	WREG32(VM_CONTEXT1_CNTL, 0);
4375 	/* Setup TLB control */
4376 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4377 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4378 	/* Setup L2 cache */
4379 	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4380 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4381 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4382 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4383 	WREG32(VM_L2_CNTL2, 0);
4384 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4385 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4386 	radeon_gart_table_vram_unpin(rdev);
4387 }
4388 
4389 static void si_pcie_gart_fini(struct radeon_device *rdev)
4390 {
4391 	si_pcie_gart_disable(rdev);
4392 	radeon_gart_table_vram_free(rdev);
4393 	radeon_gart_fini(rdev);
4394 }
4395 
4396 /* vm parser */
4397 static bool si_vm_reg_valid(u32 reg)
4398 {
4399 	/* context regs are fine */
4400 	if (reg >= 0x28000)
4401 		return true;
4402 
4403 	/* shader regs are also fine */
4404 	if (reg >= 0xB000 && reg < 0xC000)
4405 		return true;
4406 
4407 	/* check config regs */
4408 	switch (reg) {
4409 	case GRBM_GFX_INDEX:
4410 	case CP_STRMOUT_CNTL:
4411 	case VGT_VTX_VECT_EJECT_REG:
4412 	case VGT_CACHE_INVALIDATION:
4413 	case VGT_ESGS_RING_SIZE:
4414 	case VGT_GSVS_RING_SIZE:
4415 	case VGT_GS_VERTEX_REUSE:
4416 	case VGT_PRIMITIVE_TYPE:
4417 	case VGT_INDEX_TYPE:
4418 	case VGT_NUM_INDICES:
4419 	case VGT_NUM_INSTANCES:
4420 	case VGT_TF_RING_SIZE:
4421 	case VGT_HS_OFFCHIP_PARAM:
4422 	case VGT_TF_MEMORY_BASE:
4423 	case PA_CL_ENHANCE:
4424 	case PA_SU_LINE_STIPPLE_VALUE:
4425 	case PA_SC_LINE_STIPPLE_STATE:
4426 	case PA_SC_ENHANCE:
4427 	case SQC_CACHES:
4428 	case SPI_STATIC_THREAD_MGMT_1:
4429 	case SPI_STATIC_THREAD_MGMT_2:
4430 	case SPI_STATIC_THREAD_MGMT_3:
4431 	case SPI_PS_MAX_WAVE_ID:
4432 	case SPI_CONFIG_CNTL:
4433 	case SPI_CONFIG_CNTL_1:
4434 	case TA_CNTL_AUX:
4435 	case TA_CS_BC_BASE_ADDR:
4436 		return true;
4437 	default:
4438 		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4439 		return false;
4440 	}
4441 }
4442 
4443 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4444 				  u32 *ib, struct radeon_cs_packet *pkt)
4445 {
4446 	switch (pkt->opcode) {
4447 	case PACKET3_NOP:
4448 	case PACKET3_SET_BASE:
4449 	case PACKET3_SET_CE_DE_COUNTERS:
4450 	case PACKET3_LOAD_CONST_RAM:
4451 	case PACKET3_WRITE_CONST_RAM:
4452 	case PACKET3_WRITE_CONST_RAM_OFFSET:
4453 	case PACKET3_DUMP_CONST_RAM:
4454 	case PACKET3_INCREMENT_CE_COUNTER:
4455 	case PACKET3_WAIT_ON_DE_COUNTER:
4456 	case PACKET3_CE_WRITE:
4457 		break;
4458 	default:
4459 		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4460 		return -EINVAL;
4461 	}
4462 	return 0;
4463 }
4464 
4465 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4466 {
4467 	u32 start_reg, reg, i;
4468 	u32 command = ib[idx + 4];
4469 	u32 info = ib[idx + 1];
4470 	u32 idx_value = ib[idx];
4471 	if (command & PACKET3_CP_DMA_CMD_SAS) {
4472 		/* src address space is register */
4473 		if (((info & 0x60000000) >> 29) == 0) {
4474 			start_reg = idx_value << 2;
4475 			if (command & PACKET3_CP_DMA_CMD_SAIC) {
4476 				reg = start_reg;
4477 				if (!si_vm_reg_valid(reg)) {
4478 					DRM_ERROR("CP DMA Bad SRC register\n");
4479 					return -EINVAL;
4480 				}
4481 			} else {
4482 				for (i = 0; i < (command & 0x1fffff); i++) {
4483 					reg = start_reg + (4 * i);
4484 					if (!si_vm_reg_valid(reg)) {
4485 						DRM_ERROR("CP DMA Bad SRC register\n");
4486 						return -EINVAL;
4487 					}
4488 				}
4489 			}
4490 		}
4491 	}
4492 	if (command & PACKET3_CP_DMA_CMD_DAS) {
4493 		/* dst address space is register */
4494 		if (((info & 0x00300000) >> 20) == 0) {
4495 			start_reg = ib[idx + 2];
4496 			if (command & PACKET3_CP_DMA_CMD_DAIC) {
4497 				reg = start_reg;
4498 				if (!si_vm_reg_valid(reg)) {
4499 					DRM_ERROR("CP DMA Bad DST register\n");
4500 					return -EINVAL;
4501 				}
4502 			} else {
4503 				for (i = 0; i < (command & 0x1fffff); i++) {
4504 					reg = start_reg + (4 * i);
4505 				if (!si_vm_reg_valid(reg)) {
4506 						DRM_ERROR("CP DMA Bad DST register\n");
4507 						return -EINVAL;
4508 					}
4509 				}
4510 			}
4511 		}
4512 	}
4513 	return 0;
4514 }
4515 
4516 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4517 				   u32 *ib, struct radeon_cs_packet *pkt)
4518 {
4519 	int r;
4520 	u32 idx = pkt->idx + 1;
4521 	u32 idx_value = ib[idx];
4522 	u32 start_reg, end_reg, reg, i;
4523 
4524 	switch (pkt->opcode) {
4525 	case PACKET3_NOP:
4526 	case PACKET3_SET_BASE:
4527 	case PACKET3_CLEAR_STATE:
4528 	case PACKET3_INDEX_BUFFER_SIZE:
4529 	case PACKET3_DISPATCH_DIRECT:
4530 	case PACKET3_DISPATCH_INDIRECT:
4531 	case PACKET3_ALLOC_GDS:
4532 	case PACKET3_WRITE_GDS_RAM:
4533 	case PACKET3_ATOMIC_GDS:
4534 	case PACKET3_ATOMIC:
4535 	case PACKET3_OCCLUSION_QUERY:
4536 	case PACKET3_SET_PREDICATION:
4537 	case PACKET3_COND_EXEC:
4538 	case PACKET3_PRED_EXEC:
4539 	case PACKET3_DRAW_INDIRECT:
4540 	case PACKET3_DRAW_INDEX_INDIRECT:
4541 	case PACKET3_INDEX_BASE:
4542 	case PACKET3_DRAW_INDEX_2:
4543 	case PACKET3_CONTEXT_CONTROL:
4544 	case PACKET3_INDEX_TYPE:
4545 	case PACKET3_DRAW_INDIRECT_MULTI:
4546 	case PACKET3_DRAW_INDEX_AUTO:
4547 	case PACKET3_DRAW_INDEX_IMMD:
4548 	case PACKET3_NUM_INSTANCES:
4549 	case PACKET3_DRAW_INDEX_MULTI_AUTO:
4550 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4551 	case PACKET3_DRAW_INDEX_OFFSET_2:
4552 	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4553 	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4554 	case PACKET3_MPEG_INDEX:
4555 	case PACKET3_WAIT_REG_MEM:
4556 	case PACKET3_MEM_WRITE:
4557 	case PACKET3_PFP_SYNC_ME:
4558 	case PACKET3_SURFACE_SYNC:
4559 	case PACKET3_EVENT_WRITE:
4560 	case PACKET3_EVENT_WRITE_EOP:
4561 	case PACKET3_EVENT_WRITE_EOS:
4562 	case PACKET3_SET_CONTEXT_REG:
4563 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4564 	case PACKET3_SET_SH_REG:
4565 	case PACKET3_SET_SH_REG_OFFSET:
4566 	case PACKET3_INCREMENT_DE_COUNTER:
4567 	case PACKET3_WAIT_ON_CE_COUNTER:
4568 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4569 	case PACKET3_ME_WRITE:
4570 		break;
4571 	case PACKET3_COPY_DATA:
4572 		if ((idx_value & 0xf00) == 0) {
4573 			reg = ib[idx + 3] * 4;
4574 			if (!si_vm_reg_valid(reg))
4575 				return -EINVAL;
4576 		}
4577 		break;
4578 	case PACKET3_WRITE_DATA:
4579 		if ((idx_value & 0xf00) == 0) {
4580 			start_reg = ib[idx + 1] * 4;
4581 			if (idx_value & 0x10000) {
4582 				if (!si_vm_reg_valid(start_reg))
4583 					return -EINVAL;
4584 			} else {
4585 				for (i = 0; i < (pkt->count - 2); i++) {
4586 					reg = start_reg + (4 * i);
4587 					if (!si_vm_reg_valid(reg))
4588 						return -EINVAL;
4589 				}
4590 			}
4591 		}
4592 		break;
4593 	case PACKET3_COND_WRITE:
4594 		if (idx_value & 0x100) {
4595 			reg = ib[idx + 5] * 4;
4596 			if (!si_vm_reg_valid(reg))
4597 				return -EINVAL;
4598 		}
4599 		break;
4600 	case PACKET3_COPY_DW:
4601 		if (idx_value & 0x2) {
4602 			reg = ib[idx + 3] * 4;
4603 			if (!si_vm_reg_valid(reg))
4604 				return -EINVAL;
4605 		}
4606 		break;
4607 	case PACKET3_SET_CONFIG_REG:
4608 		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4609 		end_reg = 4 * pkt->count + start_reg - 4;
4610 		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4611 		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4612 		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4613 			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4614 			return -EINVAL;
4615 		}
4616 		for (i = 0; i < pkt->count; i++) {
4617 			reg = start_reg + (4 * i);
4618 			if (!si_vm_reg_valid(reg))
4619 				return -EINVAL;
4620 		}
4621 		break;
4622 	case PACKET3_CP_DMA:
4623 		r = si_vm_packet3_cp_dma_check(ib, idx);
4624 		if (r)
4625 			return r;
4626 		break;
4627 	default:
4628 		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4629 		return -EINVAL;
4630 	}
4631 	return 0;
4632 }
4633 
4634 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4635 				       u32 *ib, struct radeon_cs_packet *pkt)
4636 {
4637 	int r;
4638 	u32 idx = pkt->idx + 1;
4639 	u32 idx_value = ib[idx];
4640 	u32 start_reg, reg, i;
4641 
4642 	switch (pkt->opcode) {
4643 	case PACKET3_NOP:
4644 	case PACKET3_SET_BASE:
4645 	case PACKET3_CLEAR_STATE:
4646 	case PACKET3_DISPATCH_DIRECT:
4647 	case PACKET3_DISPATCH_INDIRECT:
4648 	case PACKET3_ALLOC_GDS:
4649 	case PACKET3_WRITE_GDS_RAM:
4650 	case PACKET3_ATOMIC_GDS:
4651 	case PACKET3_ATOMIC:
4652 	case PACKET3_OCCLUSION_QUERY:
4653 	case PACKET3_SET_PREDICATION:
4654 	case PACKET3_COND_EXEC:
4655 	case PACKET3_PRED_EXEC:
4656 	case PACKET3_CONTEXT_CONTROL:
4657 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4658 	case PACKET3_WAIT_REG_MEM:
4659 	case PACKET3_MEM_WRITE:
4660 	case PACKET3_PFP_SYNC_ME:
4661 	case PACKET3_SURFACE_SYNC:
4662 	case PACKET3_EVENT_WRITE:
4663 	case PACKET3_EVENT_WRITE_EOP:
4664 	case PACKET3_EVENT_WRITE_EOS:
4665 	case PACKET3_SET_CONTEXT_REG:
4666 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4667 	case PACKET3_SET_SH_REG:
4668 	case PACKET3_SET_SH_REG_OFFSET:
4669 	case PACKET3_INCREMENT_DE_COUNTER:
4670 	case PACKET3_WAIT_ON_CE_COUNTER:
4671 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4672 	case PACKET3_ME_WRITE:
4673 		break;
4674 	case PACKET3_COPY_DATA:
4675 		if ((idx_value & 0xf00) == 0) {
4676 			reg = ib[idx + 3] * 4;
4677 			if (!si_vm_reg_valid(reg))
4678 				return -EINVAL;
4679 		}
4680 		break;
4681 	case PACKET3_WRITE_DATA:
4682 		if ((idx_value & 0xf00) == 0) {
4683 			start_reg = ib[idx + 1] * 4;
4684 			if (idx_value & 0x10000) {
4685 				if (!si_vm_reg_valid(start_reg))
4686 					return -EINVAL;
4687 			} else {
4688 				for (i = 0; i < (pkt->count - 2); i++) {
4689 					reg = start_reg + (4 * i);
4690 					if (!si_vm_reg_valid(reg))
4691 						return -EINVAL;
4692 				}
4693 			}
4694 		}
4695 		break;
4696 	case PACKET3_COND_WRITE:
4697 		if (idx_value & 0x100) {
4698 			reg = ib[idx + 5] * 4;
4699 			if (!si_vm_reg_valid(reg))
4700 				return -EINVAL;
4701 		}
4702 		break;
4703 	case PACKET3_COPY_DW:
4704 		if (idx_value & 0x2) {
4705 			reg = ib[idx + 3] * 4;
4706 			if (!si_vm_reg_valid(reg))
4707 				return -EINVAL;
4708 		}
4709 		break;
4710 	case PACKET3_CP_DMA:
4711 		r = si_vm_packet3_cp_dma_check(ib, idx);
4712 		if (r)
4713 			return r;
4714 		break;
4715 	default:
4716 		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4717 		return -EINVAL;
4718 	}
4719 	return 0;
4720 }
4721 
4722 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4723 {
4724 	int ret = 0;
4725 	u32 idx = 0, i;
4726 	struct radeon_cs_packet pkt;
4727 
4728 	do {
4729 		pkt.idx = idx;
4730 		pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4731 		pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4732 		pkt.one_reg_wr = 0;
4733 		switch (pkt.type) {
4734 		case RADEON_PACKET_TYPE0:
4735 			dev_err(rdev->dev, "Packet0 not allowed!\n");
4736 			ret = -EINVAL;
4737 			break;
4738 		case RADEON_PACKET_TYPE2:
4739 			idx += 1;
4740 			break;
4741 		case RADEON_PACKET_TYPE3:
4742 			pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4743 			if (ib->is_const_ib)
4744 				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4745 			else {
4746 				switch (ib->ring) {
4747 				case RADEON_RING_TYPE_GFX_INDEX:
4748 					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4749 					break;
4750 				case CAYMAN_RING_TYPE_CP1_INDEX:
4751 				case CAYMAN_RING_TYPE_CP2_INDEX:
4752 					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4753 					break;
4754 				default:
4755 					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4756 					ret = -EINVAL;
4757 					break;
4758 				}
4759 			}
4760 			idx += pkt.count + 2;
4761 			break;
4762 		default:
4763 			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4764 			ret = -EINVAL;
4765 			break;
4766 		}
4767 		if (ret) {
4768 			for (i = 0; i < ib->length_dw; i++) {
4769 				if (i == idx)
4770 					printk("\t0x%08x <---\n", ib->ptr[i]);
4771 				else
4772 					printk("\t0x%08x\n", ib->ptr[i]);
4773 			}
4774 			break;
4775 		}
4776 	} while (idx < ib->length_dw);
4777 
4778 	return ret;
4779 }
4780 
4781 /*
4782  * vm
4783  */
4784 int si_vm_init(struct radeon_device *rdev)
4785 {
4786 	/* number of VMs */
4787 	rdev->vm_manager.nvm = 16;
4788 	/* base offset of vram pages */
4789 	rdev->vm_manager.vram_base_offset = 0;
4790 
4791 	return 0;
4792 }
4793 
4794 void si_vm_fini(struct radeon_device *rdev)
4795 {
4796 }
4797 
4798 /**
4799  * si_vm_decode_fault - print human readable fault info
4800  *
4801  * @rdev: radeon_device pointer
4802  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4803  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4804  *
4805  * Print human readable fault information (SI).
4806  */
4807 static void si_vm_decode_fault(struct radeon_device *rdev,
4808 			       u32 status, u32 addr)
4809 {
4810 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4811 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4812 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4813 	char *block;
4814 
4815 	if (rdev->family == CHIP_TAHITI) {
4816 		switch (mc_id) {
4817 		case 160:
4818 		case 144:
4819 		case 96:
4820 		case 80:
4821 		case 224:
4822 		case 208:
4823 		case 32:
4824 		case 16:
4825 			block = "CB";
4826 			break;
4827 		case 161:
4828 		case 145:
4829 		case 97:
4830 		case 81:
4831 		case 225:
4832 		case 209:
4833 		case 33:
4834 		case 17:
4835 			block = "CB_FMASK";
4836 			break;
4837 		case 162:
4838 		case 146:
4839 		case 98:
4840 		case 82:
4841 		case 226:
4842 		case 210:
4843 		case 34:
4844 		case 18:
4845 			block = "CB_CMASK";
4846 			break;
4847 		case 163:
4848 		case 147:
4849 		case 99:
4850 		case 83:
4851 		case 227:
4852 		case 211:
4853 		case 35:
4854 		case 19:
4855 			block = "CB_IMMED";
4856 			break;
4857 		case 164:
4858 		case 148:
4859 		case 100:
4860 		case 84:
4861 		case 228:
4862 		case 212:
4863 		case 36:
4864 		case 20:
4865 			block = "DB";
4866 			break;
4867 		case 165:
4868 		case 149:
4869 		case 101:
4870 		case 85:
4871 		case 229:
4872 		case 213:
4873 		case 37:
4874 		case 21:
4875 			block = "DB_HTILE";
4876 			break;
4877 		case 167:
4878 		case 151:
4879 		case 103:
4880 		case 87:
4881 		case 231:
4882 		case 215:
4883 		case 39:
4884 		case 23:
4885 			block = "DB_STEN";
4886 			break;
4887 		case 72:
4888 		case 68:
4889 		case 64:
4890 		case 8:
4891 		case 4:
4892 		case 0:
4893 		case 136:
4894 		case 132:
4895 		case 128:
4896 		case 200:
4897 		case 196:
4898 		case 192:
4899 			block = "TC";
4900 			break;
4901 		case 112:
4902 		case 48:
4903 			block = "CP";
4904 			break;
4905 		case 49:
4906 		case 177:
4907 		case 50:
4908 		case 178:
4909 			block = "SH";
4910 			break;
4911 		case 53:
4912 		case 190:
4913 			block = "VGT";
4914 			break;
4915 		case 117:
4916 			block = "IH";
4917 			break;
4918 		case 51:
4919 		case 115:
4920 			block = "RLC";
4921 			break;
4922 		case 119:
4923 		case 183:
4924 			block = "DMA0";
4925 			break;
4926 		case 61:
4927 			block = "DMA1";
4928 			break;
4929 		case 248:
4930 		case 120:
4931 			block = "HDP";
4932 			break;
4933 		default:
4934 			block = "unknown";
4935 			break;
4936 		}
4937 	} else {
4938 		switch (mc_id) {
4939 		case 32:
4940 		case 16:
4941 		case 96:
4942 		case 80:
4943 		case 160:
4944 		case 144:
4945 		case 224:
4946 		case 208:
4947 			block = "CB";
4948 			break;
4949 		case 33:
4950 		case 17:
4951 		case 97:
4952 		case 81:
4953 		case 161:
4954 		case 145:
4955 		case 225:
4956 		case 209:
4957 			block = "CB_FMASK";
4958 			break;
4959 		case 34:
4960 		case 18:
4961 		case 98:
4962 		case 82:
4963 		case 162:
4964 		case 146:
4965 		case 226:
4966 		case 210:
4967 			block = "CB_CMASK";
4968 			break;
4969 		case 35:
4970 		case 19:
4971 		case 99:
4972 		case 83:
4973 		case 163:
4974 		case 147:
4975 		case 227:
4976 		case 211:
4977 			block = "CB_IMMED";
4978 			break;
4979 		case 36:
4980 		case 20:
4981 		case 100:
4982 		case 84:
4983 		case 164:
4984 		case 148:
4985 		case 228:
4986 		case 212:
4987 			block = "DB";
4988 			break;
4989 		case 37:
4990 		case 21:
4991 		case 101:
4992 		case 85:
4993 		case 165:
4994 		case 149:
4995 		case 229:
4996 		case 213:
4997 			block = "DB_HTILE";
4998 			break;
4999 		case 39:
5000 		case 23:
5001 		case 103:
5002 		case 87:
5003 		case 167:
5004 		case 151:
5005 		case 231:
5006 		case 215:
5007 			block = "DB_STEN";
5008 			break;
5009 		case 72:
5010 		case 68:
5011 		case 8:
5012 		case 4:
5013 		case 136:
5014 		case 132:
5015 		case 200:
5016 		case 196:
5017 			block = "TC";
5018 			break;
5019 		case 112:
5020 		case 48:
5021 			block = "CP";
5022 			break;
5023 		case 49:
5024 		case 177:
5025 		case 50:
5026 		case 178:
5027 			block = "SH";
5028 			break;
5029 		case 53:
5030 			block = "VGT";
5031 			break;
5032 		case 117:
5033 			block = "IH";
5034 			break;
5035 		case 51:
5036 		case 115:
5037 			block = "RLC";
5038 			break;
5039 		case 119:
5040 		case 183:
5041 			block = "DMA0";
5042 			break;
5043 		case 61:
5044 			block = "DMA1";
5045 			break;
5046 		case 248:
5047 		case 120:
5048 			block = "HDP";
5049 			break;
5050 		default:
5051 			block = "unknown";
5052 			break;
5053 		}
5054 	}
5055 
5056 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5057 	       protections, vmid, addr,
5058 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5059 	       block, mc_id);
5060 }
5061 
5062 void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5063 		 unsigned vm_id, uint64_t pd_addr)
5064 {
5065 	/* write new base address */
5066 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5067 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5068 				 WRITE_DATA_DST_SEL(0)));
5069 
5070 	if (vm_id < 8) {
5071 		radeon_ring_write(ring,
5072 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5073 	} else {
5074 		radeon_ring_write(ring,
5075 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5076 	}
5077 	radeon_ring_write(ring, 0);
5078 	radeon_ring_write(ring, pd_addr >> 12);
5079 
5080 	/* flush hdp cache */
5081 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5082 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5083 				 WRITE_DATA_DST_SEL(0)));
5084 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5085 	radeon_ring_write(ring, 0);
5086 	radeon_ring_write(ring, 0x1);
5087 
5088 	/* bits 0-15 are the VM contexts0-15 */
5089 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5090 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5091 				 WRITE_DATA_DST_SEL(0)));
5092 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5093 	radeon_ring_write(ring, 0);
5094 	radeon_ring_write(ring, 1 << vm_id);
5095 
5096 	/* wait for the invalidate to complete */
5097 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5098 	radeon_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) |  /* always */
5099 				 WAIT_REG_MEM_ENGINE(0))); /* me */
5100 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5101 	radeon_ring_write(ring, 0);
5102 	radeon_ring_write(ring, 0); /* ref */
5103 	radeon_ring_write(ring, 0); /* mask */
5104 	radeon_ring_write(ring, 0x20); /* poll interval */
5105 
5106 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
5107 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5108 	radeon_ring_write(ring, 0x0);
5109 }
5110 
5111 /*
5112  *  Power and clock gating
5113  */
5114 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
5115 {
5116 	int i;
5117 
5118 	for (i = 0; i < rdev->usec_timeout; i++) {
5119 		if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
5120 			break;
5121 		udelay(1);
5122 	}
5123 
5124 	for (i = 0; i < rdev->usec_timeout; i++) {
5125 		if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
5126 			break;
5127 		udelay(1);
5128 	}
5129 }
5130 
5131 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
5132 					 bool enable)
5133 {
5134 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5135 	u32 mask;
5136 	int i;
5137 
5138 	if (enable)
5139 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5140 	else
5141 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5142 	WREG32(CP_INT_CNTL_RING0, tmp);
5143 
5144 	if (!enable) {
5145 		/* read a gfx register */
5146 		tmp = RREG32(DB_DEPTH_INFO);
5147 
5148 		mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
5149 		for (i = 0; i < rdev->usec_timeout; i++) {
5150 			if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
5151 				break;
5152 			udelay(1);
5153 		}
5154 	}
5155 }
5156 
5157 static void si_set_uvd_dcm(struct radeon_device *rdev,
5158 			   bool sw_mode)
5159 {
5160 	u32 tmp, tmp2;
5161 
5162 	tmp = RREG32(UVD_CGC_CTRL);
5163 	tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
5164 	tmp |= DCM | CG_DT(1) | CLK_OD(4);
5165 
5166 	if (sw_mode) {
5167 		tmp &= ~0x7ffff800;
5168 		tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
5169 	} else {
5170 		tmp |= 0x7ffff800;
5171 		tmp2 = 0;
5172 	}
5173 
5174 	WREG32(UVD_CGC_CTRL, tmp);
5175 	WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
5176 }
5177 
5178 void si_init_uvd_internal_cg(struct radeon_device *rdev)
5179 {
5180 	bool hw_mode = true;
5181 
5182 	if (hw_mode) {
5183 		si_set_uvd_dcm(rdev, false);
5184 	} else {
5185 		u32 tmp = RREG32(UVD_CGC_CTRL);
5186 		tmp &= ~DCM;
5187 		WREG32(UVD_CGC_CTRL, tmp);
5188 	}
5189 }
5190 
5191 static u32 si_halt_rlc(struct radeon_device *rdev)
5192 {
5193 	u32 data, orig;
5194 
5195 	orig = data = RREG32(RLC_CNTL);
5196 
5197 	if (data & RLC_ENABLE) {
5198 		data &= ~RLC_ENABLE;
5199 		WREG32(RLC_CNTL, data);
5200 
5201 		si_wait_for_rlc_serdes(rdev);
5202 	}
5203 
5204 	return orig;
5205 }
5206 
5207 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
5208 {
5209 	u32 tmp;
5210 
5211 	tmp = RREG32(RLC_CNTL);
5212 	if (tmp != rlc)
5213 		WREG32(RLC_CNTL, rlc);
5214 }
5215 
5216 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
5217 {
5218 	u32 data, orig;
5219 
5220 	orig = data = RREG32(DMA_PG);
5221 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
5222 		data |= PG_CNTL_ENABLE;
5223 	else
5224 		data &= ~PG_CNTL_ENABLE;
5225 	if (orig != data)
5226 		WREG32(DMA_PG, data);
5227 }
5228 
5229 static void si_init_dma_pg(struct radeon_device *rdev)
5230 {
5231 	u32 tmp;
5232 
5233 	WREG32(DMA_PGFSM_WRITE,  0x00002000);
5234 	WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
5235 
5236 	for (tmp = 0; tmp < 5; tmp++)
5237 		WREG32(DMA_PGFSM_WRITE, 0);
5238 }
5239 
5240 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
5241 			       bool enable)
5242 {
5243 	u32 tmp;
5244 
5245 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5246 		tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
5247 		WREG32(RLC_TTOP_D, tmp);
5248 
5249 		tmp = RREG32(RLC_PG_CNTL);
5250 		tmp |= GFX_PG_ENABLE;
5251 		WREG32(RLC_PG_CNTL, tmp);
5252 
5253 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5254 		tmp |= AUTO_PG_EN;
5255 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5256 	} else {
5257 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5258 		tmp &= ~AUTO_PG_EN;
5259 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5260 
5261 		tmp = RREG32(DB_RENDER_CONTROL);
5262 	}
5263 }
5264 
5265 static void si_init_gfx_cgpg(struct radeon_device *rdev)
5266 {
5267 	u32 tmp;
5268 
5269 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5270 
5271 	tmp = RREG32(RLC_PG_CNTL);
5272 	tmp |= GFX_PG_SRC;
5273 	WREG32(RLC_PG_CNTL, tmp);
5274 
5275 	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5276 
5277 	tmp = RREG32(RLC_AUTO_PG_CTRL);
5278 
5279 	tmp &= ~GRBM_REG_SGIT_MASK;
5280 	tmp |= GRBM_REG_SGIT(0x700);
5281 	tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5282 	WREG32(RLC_AUTO_PG_CTRL, tmp);
5283 }
5284 
5285 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5286 {
5287 	u32 mask = 0, tmp, tmp1;
5288 	int i;
5289 
5290 	si_select_se_sh(rdev, se, sh);
5291 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5292 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5293 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5294 
5295 	tmp &= 0xffff0000;
5296 
5297 	tmp |= tmp1;
5298 	tmp >>= 16;
5299 
5300 	for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5301 		mask <<= 1;
5302 		mask |= 1;
5303 	}
5304 
5305 	return (~tmp) & mask;
5306 }
5307 
5308 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5309 {
5310 	u32 i, j, k, active_cu_number = 0;
5311 	u32 mask, counter, cu_bitmap;
5312 	u32 tmp = 0;
5313 
5314 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5315 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5316 			mask = 1;
5317 			cu_bitmap = 0;
5318 			counter  = 0;
5319 			for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5320 				if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5321 					if (counter < 2)
5322 						cu_bitmap |= mask;
5323 					counter++;
5324 				}
5325 				mask <<= 1;
5326 			}
5327 
5328 			active_cu_number += counter;
5329 			tmp |= (cu_bitmap << (i * 16 + j * 8));
5330 		}
5331 	}
5332 
5333 	WREG32(RLC_PG_AO_CU_MASK, tmp);
5334 
5335 	tmp = RREG32(RLC_MAX_PG_CU);
5336 	tmp &= ~MAX_PU_CU_MASK;
5337 	tmp |= MAX_PU_CU(active_cu_number);
5338 	WREG32(RLC_MAX_PG_CU, tmp);
5339 }
5340 
5341 static void si_enable_cgcg(struct radeon_device *rdev,
5342 			   bool enable)
5343 {
5344 	u32 data, orig, tmp;
5345 
5346 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5347 
5348 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5349 		si_enable_gui_idle_interrupt(rdev, true);
5350 
5351 		WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5352 
5353 		tmp = si_halt_rlc(rdev);
5354 
5355 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5356 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5357 		WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5358 
5359 		si_wait_for_rlc_serdes(rdev);
5360 
5361 		si_update_rlc(rdev, tmp);
5362 
5363 		WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5364 
5365 		data |= CGCG_EN | CGLS_EN;
5366 	} else {
5367 		si_enable_gui_idle_interrupt(rdev, false);
5368 
5369 		RREG32(CB_CGTT_SCLK_CTRL);
5370 		RREG32(CB_CGTT_SCLK_CTRL);
5371 		RREG32(CB_CGTT_SCLK_CTRL);
5372 		RREG32(CB_CGTT_SCLK_CTRL);
5373 
5374 		data &= ~(CGCG_EN | CGLS_EN);
5375 	}
5376 
5377 	if (orig != data)
5378 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5379 }
5380 
5381 static void si_enable_mgcg(struct radeon_device *rdev,
5382 			   bool enable)
5383 {
5384 	u32 data, orig, tmp = 0;
5385 
5386 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5387 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5388 		data = 0x96940200;
5389 		if (orig != data)
5390 			WREG32(CGTS_SM_CTRL_REG, data);
5391 
5392 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5393 			orig = data = RREG32(CP_MEM_SLP_CNTL);
5394 			data |= CP_MEM_LS_EN;
5395 			if (orig != data)
5396 				WREG32(CP_MEM_SLP_CNTL, data);
5397 		}
5398 
5399 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5400 		data &= 0xffffffc0;
5401 		if (orig != data)
5402 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5403 
5404 		tmp = si_halt_rlc(rdev);
5405 
5406 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5407 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5408 		WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5409 
5410 		si_update_rlc(rdev, tmp);
5411 	} else {
5412 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5413 		data |= 0x00000003;
5414 		if (orig != data)
5415 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5416 
5417 		data = RREG32(CP_MEM_SLP_CNTL);
5418 		if (data & CP_MEM_LS_EN) {
5419 			data &= ~CP_MEM_LS_EN;
5420 			WREG32(CP_MEM_SLP_CNTL, data);
5421 		}
5422 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5423 		data |= LS_OVERRIDE | OVERRIDE;
5424 		if (orig != data)
5425 			WREG32(CGTS_SM_CTRL_REG, data);
5426 
5427 		tmp = si_halt_rlc(rdev);
5428 
5429 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5430 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5431 		WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5432 
5433 		si_update_rlc(rdev, tmp);
5434 	}
5435 }
5436 
5437 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5438 			       bool enable)
5439 {
5440 	u32 orig, data, tmp;
5441 
5442 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5443 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5444 		tmp |= 0x3fff;
5445 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5446 
5447 		orig = data = RREG32(UVD_CGC_CTRL);
5448 		data |= DCM;
5449 		if (orig != data)
5450 			WREG32(UVD_CGC_CTRL, data);
5451 
5452 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5453 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5454 	} else {
5455 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5456 		tmp &= ~0x3fff;
5457 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5458 
5459 		orig = data = RREG32(UVD_CGC_CTRL);
5460 		data &= ~DCM;
5461 		if (orig != data)
5462 			WREG32(UVD_CGC_CTRL, data);
5463 
5464 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5465 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5466 	}
5467 }
5468 
5469 static const u32 mc_cg_registers[] =
5470 {
5471 	MC_HUB_MISC_HUB_CG,
5472 	MC_HUB_MISC_SIP_CG,
5473 	MC_HUB_MISC_VM_CG,
5474 	MC_XPB_CLK_GAT,
5475 	ATC_MISC_CG,
5476 	MC_CITF_MISC_WR_CG,
5477 	MC_CITF_MISC_RD_CG,
5478 	MC_CITF_MISC_VM_CG,
5479 	VM_L2_CG,
5480 };
5481 
5482 static void si_enable_mc_ls(struct radeon_device *rdev,
5483 			    bool enable)
5484 {
5485 	int i;
5486 	u32 orig, data;
5487 
5488 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5489 		orig = data = RREG32(mc_cg_registers[i]);
5490 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5491 			data |= MC_LS_ENABLE;
5492 		else
5493 			data &= ~MC_LS_ENABLE;
5494 		if (data != orig)
5495 			WREG32(mc_cg_registers[i], data);
5496 	}
5497 }
5498 
5499 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5500 			       bool enable)
5501 {
5502 	int i;
5503 	u32 orig, data;
5504 
5505 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5506 		orig = data = RREG32(mc_cg_registers[i]);
5507 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5508 			data |= MC_CG_ENABLE;
5509 		else
5510 			data &= ~MC_CG_ENABLE;
5511 		if (data != orig)
5512 			WREG32(mc_cg_registers[i], data);
5513 	}
5514 }
5515 
5516 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5517 			       bool enable)
5518 {
5519 	u32 orig, data, offset;
5520 	int i;
5521 
5522 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5523 		for (i = 0; i < 2; i++) {
5524 			if (i == 0)
5525 				offset = DMA0_REGISTER_OFFSET;
5526 			else
5527 				offset = DMA1_REGISTER_OFFSET;
5528 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5529 			data &= ~MEM_POWER_OVERRIDE;
5530 			if (data != orig)
5531 				WREG32(DMA_POWER_CNTL + offset, data);
5532 			WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5533 		}
5534 	} else {
5535 		for (i = 0; i < 2; i++) {
5536 			if (i == 0)
5537 				offset = DMA0_REGISTER_OFFSET;
5538 			else
5539 				offset = DMA1_REGISTER_OFFSET;
5540 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5541 			data |= MEM_POWER_OVERRIDE;
5542 			if (data != orig)
5543 				WREG32(DMA_POWER_CNTL + offset, data);
5544 
5545 			orig = data = RREG32(DMA_CLK_CTRL + offset);
5546 			data = 0xff000000;
5547 			if (data != orig)
5548 				WREG32(DMA_CLK_CTRL + offset, data);
5549 		}
5550 	}
5551 }
5552 
5553 static void si_enable_bif_mgls(struct radeon_device *rdev,
5554 			       bool enable)
5555 {
5556 	u32 orig, data;
5557 
5558 	orig = data = RREG32_PCIE(PCIE_CNTL2);
5559 
5560 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5561 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5562 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5563 	else
5564 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5565 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5566 
5567 	if (orig != data)
5568 		WREG32_PCIE(PCIE_CNTL2, data);
5569 }
5570 
5571 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5572 			       bool enable)
5573 {
5574 	u32 orig, data;
5575 
5576 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
5577 
5578 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5579 		data &= ~CLOCK_GATING_DIS;
5580 	else
5581 		data |= CLOCK_GATING_DIS;
5582 
5583 	if (orig != data)
5584 		WREG32(HDP_HOST_PATH_CNTL, data);
5585 }
5586 
5587 static void si_enable_hdp_ls(struct radeon_device *rdev,
5588 			     bool enable)
5589 {
5590 	u32 orig, data;
5591 
5592 	orig = data = RREG32(HDP_MEM_POWER_LS);
5593 
5594 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5595 		data |= HDP_LS_ENABLE;
5596 	else
5597 		data &= ~HDP_LS_ENABLE;
5598 
5599 	if (orig != data)
5600 		WREG32(HDP_MEM_POWER_LS, data);
5601 }
5602 
5603 static void si_update_cg(struct radeon_device *rdev,
5604 			 u32 block, bool enable)
5605 {
5606 	if (block & RADEON_CG_BLOCK_GFX) {
5607 		si_enable_gui_idle_interrupt(rdev, false);
5608 		/* order matters! */
5609 		if (enable) {
5610 			si_enable_mgcg(rdev, true);
5611 			si_enable_cgcg(rdev, true);
5612 		} else {
5613 			si_enable_cgcg(rdev, false);
5614 			si_enable_mgcg(rdev, false);
5615 		}
5616 		si_enable_gui_idle_interrupt(rdev, true);
5617 	}
5618 
5619 	if (block & RADEON_CG_BLOCK_MC) {
5620 		si_enable_mc_mgcg(rdev, enable);
5621 		si_enable_mc_ls(rdev, enable);
5622 	}
5623 
5624 	if (block & RADEON_CG_BLOCK_SDMA) {
5625 		si_enable_dma_mgcg(rdev, enable);
5626 	}
5627 
5628 	if (block & RADEON_CG_BLOCK_BIF) {
5629 		si_enable_bif_mgls(rdev, enable);
5630 	}
5631 
5632 	if (block & RADEON_CG_BLOCK_UVD) {
5633 		if (rdev->has_uvd) {
5634 			si_enable_uvd_mgcg(rdev, enable);
5635 		}
5636 	}
5637 
5638 	if (block & RADEON_CG_BLOCK_HDP) {
5639 		si_enable_hdp_mgcg(rdev, enable);
5640 		si_enable_hdp_ls(rdev, enable);
5641 	}
5642 }
5643 
5644 static void si_init_cg(struct radeon_device *rdev)
5645 {
5646 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5647 			    RADEON_CG_BLOCK_MC |
5648 			    RADEON_CG_BLOCK_SDMA |
5649 			    RADEON_CG_BLOCK_BIF |
5650 			    RADEON_CG_BLOCK_HDP), true);
5651 	if (rdev->has_uvd) {
5652 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5653 		si_init_uvd_internal_cg(rdev);
5654 	}
5655 }
5656 
5657 static void si_fini_cg(struct radeon_device *rdev)
5658 {
5659 	if (rdev->has_uvd) {
5660 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5661 	}
5662 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5663 			    RADEON_CG_BLOCK_MC |
5664 			    RADEON_CG_BLOCK_SDMA |
5665 			    RADEON_CG_BLOCK_BIF |
5666 			    RADEON_CG_BLOCK_HDP), false);
5667 }
5668 
5669 u32 si_get_csb_size(struct radeon_device *rdev)
5670 {
5671 	u32 count = 0;
5672 	const struct cs_section_def *sect = NULL;
5673 	const struct cs_extent_def *ext = NULL;
5674 
5675 	if (rdev->rlc.cs_data == NULL)
5676 		return 0;
5677 
5678 	/* begin clear state */
5679 	count += 2;
5680 	/* context control state */
5681 	count += 3;
5682 
5683 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5684 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5685 			if (sect->id == SECT_CONTEXT)
5686 				count += 2 + ext->reg_count;
5687 			else
5688 				return 0;
5689 		}
5690 	}
5691 	/* pa_sc_raster_config */
5692 	count += 3;
5693 	/* end clear state */
5694 	count += 2;
5695 	/* clear state */
5696 	count += 2;
5697 
5698 	return count;
5699 }
5700 
5701 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5702 {
5703 	u32 count = 0, i;
5704 	const struct cs_section_def *sect = NULL;
5705 	const struct cs_extent_def *ext = NULL;
5706 
5707 	if (rdev->rlc.cs_data == NULL)
5708 		return;
5709 	if (buffer == NULL)
5710 		return;
5711 
5712 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5713 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5714 
5715 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5716 	buffer[count++] = cpu_to_le32(0x80000000);
5717 	buffer[count++] = cpu_to_le32(0x80000000);
5718 
5719 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5720 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5721 			if (sect->id == SECT_CONTEXT) {
5722 				buffer[count++] =
5723 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5724 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5725 				for (i = 0; i < ext->reg_count; i++)
5726 					buffer[count++] = cpu_to_le32(ext->extent[i]);
5727 			} else {
5728 				return;
5729 			}
5730 		}
5731 	}
5732 
5733 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5734 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5735 	switch (rdev->family) {
5736 	case CHIP_TAHITI:
5737 	case CHIP_PITCAIRN:
5738 		buffer[count++] = cpu_to_le32(0x2a00126a);
5739 		break;
5740 	case CHIP_VERDE:
5741 		buffer[count++] = cpu_to_le32(0x0000124a);
5742 		break;
5743 	case CHIP_OLAND:
5744 		buffer[count++] = cpu_to_le32(0x00000082);
5745 		break;
5746 	case CHIP_HAINAN:
5747 		buffer[count++] = cpu_to_le32(0x00000000);
5748 		break;
5749 	default:
5750 		buffer[count++] = cpu_to_le32(0x00000000);
5751 		break;
5752 	}
5753 
5754 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5755 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5756 
5757 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5758 	buffer[count++] = cpu_to_le32(0);
5759 }
5760 
5761 static void si_init_pg(struct radeon_device *rdev)
5762 {
5763 	if (rdev->pg_flags) {
5764 		if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5765 			si_init_dma_pg(rdev);
5766 		}
5767 		si_init_ao_cu_mask(rdev);
5768 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5769 			si_init_gfx_cgpg(rdev);
5770 		} else {
5771 			WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5772 			WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5773 		}
5774 		si_enable_dma_pg(rdev, true);
5775 		si_enable_gfx_cgpg(rdev, true);
5776 	} else {
5777 		WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5778 		WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5779 	}
5780 }
5781 
5782 static void si_fini_pg(struct radeon_device *rdev)
5783 {
5784 	if (rdev->pg_flags) {
5785 		si_enable_dma_pg(rdev, false);
5786 		si_enable_gfx_cgpg(rdev, false);
5787 	}
5788 }
5789 
5790 /*
5791  * RLC
5792  */
5793 void si_rlc_reset(struct radeon_device *rdev)
5794 {
5795 	u32 tmp = RREG32(GRBM_SOFT_RESET);
5796 
5797 	tmp |= SOFT_RESET_RLC;
5798 	WREG32(GRBM_SOFT_RESET, tmp);
5799 	udelay(50);
5800 	tmp &= ~SOFT_RESET_RLC;
5801 	WREG32(GRBM_SOFT_RESET, tmp);
5802 	udelay(50);
5803 }
5804 
5805 static void si_rlc_stop(struct radeon_device *rdev)
5806 {
5807 	WREG32(RLC_CNTL, 0);
5808 
5809 	si_enable_gui_idle_interrupt(rdev, false);
5810 
5811 	si_wait_for_rlc_serdes(rdev);
5812 }
5813 
5814 static void si_rlc_start(struct radeon_device *rdev)
5815 {
5816 	WREG32(RLC_CNTL, RLC_ENABLE);
5817 
5818 	si_enable_gui_idle_interrupt(rdev, true);
5819 
5820 	udelay(50);
5821 }
5822 
5823 static bool si_lbpw_supported(struct radeon_device *rdev)
5824 {
5825 	u32 tmp;
5826 
5827 	/* Enable LBPW only for DDR3 */
5828 	tmp = RREG32(MC_SEQ_MISC0);
5829 	if ((tmp & 0xF0000000) == 0xB0000000)
5830 		return true;
5831 	return false;
5832 }
5833 
5834 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5835 {
5836 	u32 tmp;
5837 
5838 	tmp = RREG32(RLC_LB_CNTL);
5839 	if (enable)
5840 		tmp |= LOAD_BALANCE_ENABLE;
5841 	else
5842 		tmp &= ~LOAD_BALANCE_ENABLE;
5843 	WREG32(RLC_LB_CNTL, tmp);
5844 
5845 	if (!enable) {
5846 		si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5847 		WREG32(SPI_LB_CU_MASK, 0x00ff);
5848 	}
5849 }
5850 
5851 static int si_rlc_resume(struct radeon_device *rdev)
5852 {
5853 	u32 i;
5854 
5855 	if (!rdev->rlc_fw)
5856 		return -EINVAL;
5857 
5858 	si_rlc_stop(rdev);
5859 
5860 	si_rlc_reset(rdev);
5861 
5862 	si_init_pg(rdev);
5863 
5864 	si_init_cg(rdev);
5865 
5866 	WREG32(RLC_RL_BASE, 0);
5867 	WREG32(RLC_RL_SIZE, 0);
5868 	WREG32(RLC_LB_CNTL, 0);
5869 	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5870 	WREG32(RLC_LB_CNTR_INIT, 0);
5871 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5872 
5873 	WREG32(RLC_MC_CNTL, 0);
5874 	WREG32(RLC_UCODE_CNTL, 0);
5875 
5876 	if (rdev->new_fw) {
5877 		const struct rlc_firmware_header_v1_0 *hdr =
5878 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5879 		u32 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5880 		const __le32 *fw_data = (const __le32 *)
5881 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5882 
5883 		radeon_ucode_print_rlc_hdr(&hdr->header);
5884 
5885 		for (i = 0; i < fw_size; i++) {
5886 			WREG32(RLC_UCODE_ADDR, i);
5887 			WREG32(RLC_UCODE_DATA, le32_to_cpup(fw_data++));
5888 		}
5889 	} else {
5890 		const __be32 *fw_data =
5891 			(const __be32 *)rdev->rlc_fw->data;
5892 		for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5893 			WREG32(RLC_UCODE_ADDR, i);
5894 			WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5895 		}
5896 	}
5897 	WREG32(RLC_UCODE_ADDR, 0);
5898 
5899 	si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5900 
5901 	si_rlc_start(rdev);
5902 
5903 	return 0;
5904 }
5905 
5906 static void si_enable_interrupts(struct radeon_device *rdev)
5907 {
5908 	u32 ih_cntl = RREG32(IH_CNTL);
5909 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5910 
5911 	ih_cntl |= ENABLE_INTR;
5912 	ih_rb_cntl |= IH_RB_ENABLE;
5913 	WREG32(IH_CNTL, ih_cntl);
5914 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5915 	rdev->ih.enabled = true;
5916 }
5917 
5918 static void si_disable_interrupts(struct radeon_device *rdev)
5919 {
5920 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5921 	u32 ih_cntl = RREG32(IH_CNTL);
5922 
5923 	ih_rb_cntl &= ~IH_RB_ENABLE;
5924 	ih_cntl &= ~ENABLE_INTR;
5925 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5926 	WREG32(IH_CNTL, ih_cntl);
5927 	/* set rptr, wptr to 0 */
5928 	WREG32(IH_RB_RPTR, 0);
5929 	WREG32(IH_RB_WPTR, 0);
5930 	rdev->ih.enabled = false;
5931 	rdev->ih.rptr = 0;
5932 }
5933 
5934 static void si_disable_interrupt_state(struct radeon_device *rdev)
5935 {
5936 	u32 tmp;
5937 
5938 	tmp = RREG32(CP_INT_CNTL_RING0) &
5939 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5940 	WREG32(CP_INT_CNTL_RING0, tmp);
5941 	WREG32(CP_INT_CNTL_RING1, 0);
5942 	WREG32(CP_INT_CNTL_RING2, 0);
5943 	tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5944 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5945 	tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5946 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5947 	WREG32(GRBM_INT_CNTL, 0);
5948 	WREG32(SRBM_INT_CNTL, 0);
5949 	if (rdev->num_crtc >= 2) {
5950 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5951 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5952 	}
5953 	if (rdev->num_crtc >= 4) {
5954 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5955 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5956 	}
5957 	if (rdev->num_crtc >= 6) {
5958 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5959 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5960 	}
5961 
5962 	if (rdev->num_crtc >= 2) {
5963 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5964 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5965 	}
5966 	if (rdev->num_crtc >= 4) {
5967 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5968 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5969 	}
5970 	if (rdev->num_crtc >= 6) {
5971 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5972 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5973 	}
5974 
5975 	if (!ASIC_IS_NODCE(rdev)) {
5976 		WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5977 
5978 		tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5979 		WREG32(DC_HPD1_INT_CONTROL, tmp);
5980 		tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5981 		WREG32(DC_HPD2_INT_CONTROL, tmp);
5982 		tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5983 		WREG32(DC_HPD3_INT_CONTROL, tmp);
5984 		tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5985 		WREG32(DC_HPD4_INT_CONTROL, tmp);
5986 		tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5987 		WREG32(DC_HPD5_INT_CONTROL, tmp);
5988 		tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5989 		WREG32(DC_HPD6_INT_CONTROL, tmp);
5990 	}
5991 }
5992 
5993 static int si_irq_init(struct radeon_device *rdev)
5994 {
5995 	int ret = 0;
5996 	int rb_bufsz;
5997 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5998 
5999 	/* allocate ring */
6000 	ret = r600_ih_ring_alloc(rdev);
6001 	if (ret)
6002 		return ret;
6003 
6004 	/* disable irqs */
6005 	si_disable_interrupts(rdev);
6006 
6007 	/* init rlc */
6008 	ret = si_rlc_resume(rdev);
6009 	if (ret) {
6010 		r600_ih_ring_fini(rdev);
6011 		return ret;
6012 	}
6013 
6014 	/* setup interrupt control */
6015 	/* set dummy read address to ring address */
6016 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6017 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
6018 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6019 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6020 	 */
6021 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6022 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6023 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6024 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6025 
6026 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6027 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6028 
6029 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6030 		      IH_WPTR_OVERFLOW_CLEAR |
6031 		      (rb_bufsz << 1));
6032 
6033 	if (rdev->wb.enabled)
6034 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6035 
6036 	/* set the writeback address whether it's enabled or not */
6037 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6038 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6039 
6040 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6041 
6042 	/* set rptr, wptr to 0 */
6043 	WREG32(IH_RB_RPTR, 0);
6044 	WREG32(IH_RB_WPTR, 0);
6045 
6046 	/* Default settings for IH_CNTL (disabled at first) */
6047 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6048 	/* RPTR_REARM only works if msi's are enabled */
6049 	if (rdev->msi_enabled)
6050 		ih_cntl |= RPTR_REARM;
6051 	WREG32(IH_CNTL, ih_cntl);
6052 
6053 	/* force the active interrupt state to all disabled */
6054 	si_disable_interrupt_state(rdev);
6055 
6056 	pci_set_master(rdev->pdev);
6057 
6058 	/* enable irqs */
6059 	si_enable_interrupts(rdev);
6060 
6061 	return ret;
6062 }
6063 
6064 int si_irq_set(struct radeon_device *rdev)
6065 {
6066 	u32 cp_int_cntl;
6067 	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
6068 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6069 	u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
6070 	u32 grbm_int_cntl = 0;
6071 	u32 dma_cntl, dma_cntl1;
6072 	u32 thermal_int = 0;
6073 
6074 	if (!rdev->irq.installed) {
6075 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6076 		return -EINVAL;
6077 	}
6078 	/* don't enable anything if the ih is disabled */
6079 	if (!rdev->ih.enabled) {
6080 		si_disable_interrupts(rdev);
6081 		/* force the active interrupt state to all disabled */
6082 		si_disable_interrupt_state(rdev);
6083 		return 0;
6084 	}
6085 
6086 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6087 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6088 
6089 	if (!ASIC_IS_NODCE(rdev)) {
6090 		hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6091 		hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6092 		hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6093 		hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6094 		hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6095 		hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6096 	}
6097 
6098 	dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6099 	dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6100 
6101 	thermal_int = RREG32(CG_THERMAL_INT) &
6102 		~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6103 
6104 	/* enable CP interrupts on all rings */
6105 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6106 		DRM_DEBUG("si_irq_set: sw int gfx\n");
6107 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6108 	}
6109 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6110 		DRM_DEBUG("si_irq_set: sw int cp1\n");
6111 		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
6112 	}
6113 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6114 		DRM_DEBUG("si_irq_set: sw int cp2\n");
6115 		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
6116 	}
6117 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6118 		DRM_DEBUG("si_irq_set: sw int dma\n");
6119 		dma_cntl |= TRAP_ENABLE;
6120 	}
6121 
6122 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6123 		DRM_DEBUG("si_irq_set: sw int dma1\n");
6124 		dma_cntl1 |= TRAP_ENABLE;
6125 	}
6126 	if (rdev->irq.crtc_vblank_int[0] ||
6127 	    atomic_read(&rdev->irq.pflip[0])) {
6128 		DRM_DEBUG("si_irq_set: vblank 0\n");
6129 		crtc1 |= VBLANK_INT_MASK;
6130 	}
6131 	if (rdev->irq.crtc_vblank_int[1] ||
6132 	    atomic_read(&rdev->irq.pflip[1])) {
6133 		DRM_DEBUG("si_irq_set: vblank 1\n");
6134 		crtc2 |= VBLANK_INT_MASK;
6135 	}
6136 	if (rdev->irq.crtc_vblank_int[2] ||
6137 	    atomic_read(&rdev->irq.pflip[2])) {
6138 		DRM_DEBUG("si_irq_set: vblank 2\n");
6139 		crtc3 |= VBLANK_INT_MASK;
6140 	}
6141 	if (rdev->irq.crtc_vblank_int[3] ||
6142 	    atomic_read(&rdev->irq.pflip[3])) {
6143 		DRM_DEBUG("si_irq_set: vblank 3\n");
6144 		crtc4 |= VBLANK_INT_MASK;
6145 	}
6146 	if (rdev->irq.crtc_vblank_int[4] ||
6147 	    atomic_read(&rdev->irq.pflip[4])) {
6148 		DRM_DEBUG("si_irq_set: vblank 4\n");
6149 		crtc5 |= VBLANK_INT_MASK;
6150 	}
6151 	if (rdev->irq.crtc_vblank_int[5] ||
6152 	    atomic_read(&rdev->irq.pflip[5])) {
6153 		DRM_DEBUG("si_irq_set: vblank 5\n");
6154 		crtc6 |= VBLANK_INT_MASK;
6155 	}
6156 	if (rdev->irq.hpd[0]) {
6157 		DRM_DEBUG("si_irq_set: hpd 1\n");
6158 		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6159 	}
6160 	if (rdev->irq.hpd[1]) {
6161 		DRM_DEBUG("si_irq_set: hpd 2\n");
6162 		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6163 	}
6164 	if (rdev->irq.hpd[2]) {
6165 		DRM_DEBUG("si_irq_set: hpd 3\n");
6166 		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6167 	}
6168 	if (rdev->irq.hpd[3]) {
6169 		DRM_DEBUG("si_irq_set: hpd 4\n");
6170 		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6171 	}
6172 	if (rdev->irq.hpd[4]) {
6173 		DRM_DEBUG("si_irq_set: hpd 5\n");
6174 		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6175 	}
6176 	if (rdev->irq.hpd[5]) {
6177 		DRM_DEBUG("si_irq_set: hpd 6\n");
6178 		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6179 	}
6180 
6181 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6182 	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
6183 	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
6184 
6185 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
6186 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
6187 
6188 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6189 
6190 	if (rdev->irq.dpm_thermal) {
6191 		DRM_DEBUG("dpm thermal\n");
6192 		thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6193 	}
6194 
6195 	if (rdev->num_crtc >= 2) {
6196 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6197 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6198 	}
6199 	if (rdev->num_crtc >= 4) {
6200 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6201 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6202 	}
6203 	if (rdev->num_crtc >= 6) {
6204 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6205 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6206 	}
6207 
6208 	if (rdev->num_crtc >= 2) {
6209 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
6210 		       GRPH_PFLIP_INT_MASK);
6211 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
6212 		       GRPH_PFLIP_INT_MASK);
6213 	}
6214 	if (rdev->num_crtc >= 4) {
6215 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
6216 		       GRPH_PFLIP_INT_MASK);
6217 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
6218 		       GRPH_PFLIP_INT_MASK);
6219 	}
6220 	if (rdev->num_crtc >= 6) {
6221 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
6222 		       GRPH_PFLIP_INT_MASK);
6223 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
6224 		       GRPH_PFLIP_INT_MASK);
6225 	}
6226 
6227 	if (!ASIC_IS_NODCE(rdev)) {
6228 		WREG32(DC_HPD1_INT_CONTROL, hpd1);
6229 		WREG32(DC_HPD2_INT_CONTROL, hpd2);
6230 		WREG32(DC_HPD3_INT_CONTROL, hpd3);
6231 		WREG32(DC_HPD4_INT_CONTROL, hpd4);
6232 		WREG32(DC_HPD5_INT_CONTROL, hpd5);
6233 		WREG32(DC_HPD6_INT_CONTROL, hpd6);
6234 	}
6235 
6236 	WREG32(CG_THERMAL_INT, thermal_int);
6237 
6238 	/* posting read */
6239 	RREG32(SRBM_STATUS);
6240 
6241 	return 0;
6242 }
6243 
6244 static inline void si_irq_ack(struct radeon_device *rdev)
6245 {
6246 	u32 tmp;
6247 
6248 	if (ASIC_IS_NODCE(rdev))
6249 		return;
6250 
6251 	rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6252 	rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6253 	rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6254 	rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6255 	rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6256 	rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6257 	rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
6258 	rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
6259 	if (rdev->num_crtc >= 4) {
6260 		rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
6261 		rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
6262 	}
6263 	if (rdev->num_crtc >= 6) {
6264 		rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
6265 		rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
6266 	}
6267 
6268 	if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
6269 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6270 	if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
6271 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6272 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
6273 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6274 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
6275 		WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6276 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6277 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6278 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6279 		WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6280 
6281 	if (rdev->num_crtc >= 4) {
6282 		if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
6283 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6284 		if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
6285 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6286 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6287 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6288 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6289 			WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6290 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6291 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6292 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6293 			WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6294 	}
6295 
6296 	if (rdev->num_crtc >= 6) {
6297 		if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
6298 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6299 		if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
6300 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6301 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6302 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6303 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6304 			WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6305 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6306 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6307 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6308 			WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6309 	}
6310 
6311 	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6312 		tmp = RREG32(DC_HPD1_INT_CONTROL);
6313 		tmp |= DC_HPDx_INT_ACK;
6314 		WREG32(DC_HPD1_INT_CONTROL, tmp);
6315 	}
6316 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6317 		tmp = RREG32(DC_HPD2_INT_CONTROL);
6318 		tmp |= DC_HPDx_INT_ACK;
6319 		WREG32(DC_HPD2_INT_CONTROL, tmp);
6320 	}
6321 	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6322 		tmp = RREG32(DC_HPD3_INT_CONTROL);
6323 		tmp |= DC_HPDx_INT_ACK;
6324 		WREG32(DC_HPD3_INT_CONTROL, tmp);
6325 	}
6326 	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6327 		tmp = RREG32(DC_HPD4_INT_CONTROL);
6328 		tmp |= DC_HPDx_INT_ACK;
6329 		WREG32(DC_HPD4_INT_CONTROL, tmp);
6330 	}
6331 	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6332 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6333 		tmp |= DC_HPDx_INT_ACK;
6334 		WREG32(DC_HPD5_INT_CONTROL, tmp);
6335 	}
6336 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6337 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6338 		tmp |= DC_HPDx_INT_ACK;
6339 		WREG32(DC_HPD6_INT_CONTROL, tmp);
6340 	}
6341 
6342 	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT) {
6343 		tmp = RREG32(DC_HPD1_INT_CONTROL);
6344 		tmp |= DC_HPDx_RX_INT_ACK;
6345 		WREG32(DC_HPD1_INT_CONTROL, tmp);
6346 	}
6347 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
6348 		tmp = RREG32(DC_HPD2_INT_CONTROL);
6349 		tmp |= DC_HPDx_RX_INT_ACK;
6350 		WREG32(DC_HPD2_INT_CONTROL, tmp);
6351 	}
6352 	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
6353 		tmp = RREG32(DC_HPD3_INT_CONTROL);
6354 		tmp |= DC_HPDx_RX_INT_ACK;
6355 		WREG32(DC_HPD3_INT_CONTROL, tmp);
6356 	}
6357 	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
6358 		tmp = RREG32(DC_HPD4_INT_CONTROL);
6359 		tmp |= DC_HPDx_RX_INT_ACK;
6360 		WREG32(DC_HPD4_INT_CONTROL, tmp);
6361 	}
6362 	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
6363 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6364 		tmp |= DC_HPDx_RX_INT_ACK;
6365 		WREG32(DC_HPD5_INT_CONTROL, tmp);
6366 	}
6367 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
6368 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6369 		tmp |= DC_HPDx_RX_INT_ACK;
6370 		WREG32(DC_HPD6_INT_CONTROL, tmp);
6371 	}
6372 }
6373 
6374 static void si_irq_disable(struct radeon_device *rdev)
6375 {
6376 	si_disable_interrupts(rdev);
6377 	/* Wait and acknowledge irq */
6378 	mdelay(1);
6379 	si_irq_ack(rdev);
6380 	si_disable_interrupt_state(rdev);
6381 }
6382 
6383 static void si_irq_suspend(struct radeon_device *rdev)
6384 {
6385 	si_irq_disable(rdev);
6386 	si_rlc_stop(rdev);
6387 }
6388 
6389 static void si_irq_fini(struct radeon_device *rdev)
6390 {
6391 	si_irq_suspend(rdev);
6392 	r600_ih_ring_fini(rdev);
6393 }
6394 
6395 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6396 {
6397 	u32 wptr, tmp;
6398 
6399 	if (rdev->wb.enabled)
6400 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6401 	else
6402 		wptr = RREG32(IH_RB_WPTR);
6403 
6404 	if (wptr & RB_OVERFLOW) {
6405 		wptr &= ~RB_OVERFLOW;
6406 		/* When a ring buffer overflow happen start parsing interrupt
6407 		 * from the last not overwritten vector (wptr + 16). Hopefully
6408 		 * this should allow us to catchup.
6409 		 */
6410 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
6411 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
6412 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6413 		tmp = RREG32(IH_RB_CNTL);
6414 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
6415 		WREG32(IH_RB_CNTL, tmp);
6416 	}
6417 	return (wptr & rdev->ih.ptr_mask);
6418 }
6419 
6420 /*        SI IV Ring
6421  * Each IV ring entry is 128 bits:
6422  * [7:0]    - interrupt source id
6423  * [31:8]   - reserved
6424  * [59:32]  - interrupt source data
6425  * [63:60]  - reserved
6426  * [71:64]  - RINGID
6427  * [79:72]  - VMID
6428  * [127:80] - reserved
6429  */
6430 int si_irq_process(struct radeon_device *rdev)
6431 {
6432 	u32 wptr;
6433 	u32 rptr;
6434 	u32 src_id, src_data, ring_id;
6435 	u32 ring_index;
6436 	bool queue_hotplug = false;
6437 	bool queue_dp = false;
6438 	bool queue_thermal = false;
6439 	u32 status, addr;
6440 
6441 	if (!rdev->ih.enabled || rdev->shutdown)
6442 		return IRQ_NONE;
6443 
6444 	wptr = si_get_ih_wptr(rdev);
6445 
6446 restart_ih:
6447 	/* is somebody else already processing irqs? */
6448 	if (atomic_xchg(&rdev->ih.lock, 1))
6449 		return IRQ_NONE;
6450 
6451 	rptr = rdev->ih.rptr;
6452 	DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6453 
6454 	/* Order reading of wptr vs. reading of IH ring data */
6455 	rmb();
6456 
6457 	/* display interrupts */
6458 	si_irq_ack(rdev);
6459 
6460 	while (rptr != wptr) {
6461 		/* wptr/rptr are in bytes! */
6462 		ring_index = rptr / 4;
6463 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6464 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6465 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6466 
6467 		switch (src_id) {
6468 		case 1: /* D1 vblank/vline */
6469 			switch (src_data) {
6470 			case 0: /* D1 vblank */
6471 				if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT))
6472 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6473 
6474 				if (rdev->irq.crtc_vblank_int[0]) {
6475 					drm_handle_vblank(rdev->ddev, 0);
6476 					rdev->pm.vblank_sync = true;
6477 					wake_up(&rdev->irq.vblank_queue);
6478 				}
6479 				if (atomic_read(&rdev->irq.pflip[0]))
6480 					radeon_crtc_handle_vblank(rdev, 0);
6481 				rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6482 				DRM_DEBUG("IH: D1 vblank\n");
6483 
6484 				break;
6485 			case 1: /* D1 vline */
6486 				if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT))
6487 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6488 
6489 				rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6490 				DRM_DEBUG("IH: D1 vline\n");
6491 
6492 				break;
6493 			default:
6494 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6495 				break;
6496 			}
6497 			break;
6498 		case 2: /* D2 vblank/vline */
6499 			switch (src_data) {
6500 			case 0: /* D2 vblank */
6501 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
6502 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6503 
6504 				if (rdev->irq.crtc_vblank_int[1]) {
6505 					drm_handle_vblank(rdev->ddev, 1);
6506 					rdev->pm.vblank_sync = true;
6507 					wake_up(&rdev->irq.vblank_queue);
6508 				}
6509 				if (atomic_read(&rdev->irq.pflip[1]))
6510 					radeon_crtc_handle_vblank(rdev, 1);
6511 				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6512 				DRM_DEBUG("IH: D2 vblank\n");
6513 
6514 				break;
6515 			case 1: /* D2 vline */
6516 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT))
6517 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6518 
6519 				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6520 				DRM_DEBUG("IH: D2 vline\n");
6521 
6522 				break;
6523 			default:
6524 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6525 				break;
6526 			}
6527 			break;
6528 		case 3: /* D3 vblank/vline */
6529 			switch (src_data) {
6530 			case 0: /* D3 vblank */
6531 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
6532 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6533 
6534 				if (rdev->irq.crtc_vblank_int[2]) {
6535 					drm_handle_vblank(rdev->ddev, 2);
6536 					rdev->pm.vblank_sync = true;
6537 					wake_up(&rdev->irq.vblank_queue);
6538 				}
6539 				if (atomic_read(&rdev->irq.pflip[2]))
6540 					radeon_crtc_handle_vblank(rdev, 2);
6541 				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6542 				DRM_DEBUG("IH: D3 vblank\n");
6543 
6544 				break;
6545 			case 1: /* D3 vline */
6546 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
6547 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6548 
6549 				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6550 				DRM_DEBUG("IH: D3 vline\n");
6551 
6552 				break;
6553 			default:
6554 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6555 				break;
6556 			}
6557 			break;
6558 		case 4: /* D4 vblank/vline */
6559 			switch (src_data) {
6560 			case 0: /* D4 vblank */
6561 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
6562 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6563 
6564 				if (rdev->irq.crtc_vblank_int[3]) {
6565 					drm_handle_vblank(rdev->ddev, 3);
6566 					rdev->pm.vblank_sync = true;
6567 					wake_up(&rdev->irq.vblank_queue);
6568 				}
6569 				if (atomic_read(&rdev->irq.pflip[3]))
6570 					radeon_crtc_handle_vblank(rdev, 3);
6571 				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6572 				DRM_DEBUG("IH: D4 vblank\n");
6573 
6574 				break;
6575 			case 1: /* D4 vline */
6576 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
6577 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6578 
6579 				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6580 				DRM_DEBUG("IH: D4 vline\n");
6581 
6582 				break;
6583 			default:
6584 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6585 				break;
6586 			}
6587 			break;
6588 		case 5: /* D5 vblank/vline */
6589 			switch (src_data) {
6590 			case 0: /* D5 vblank */
6591 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
6592 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6593 
6594 				if (rdev->irq.crtc_vblank_int[4]) {
6595 					drm_handle_vblank(rdev->ddev, 4);
6596 					rdev->pm.vblank_sync = true;
6597 					wake_up(&rdev->irq.vblank_queue);
6598 				}
6599 				if (atomic_read(&rdev->irq.pflip[4]))
6600 					radeon_crtc_handle_vblank(rdev, 4);
6601 				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6602 				DRM_DEBUG("IH: D5 vblank\n");
6603 
6604 				break;
6605 			case 1: /* D5 vline */
6606 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
6607 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6608 
6609 				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6610 				DRM_DEBUG("IH: D5 vline\n");
6611 
6612 				break;
6613 			default:
6614 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6615 				break;
6616 			}
6617 			break;
6618 		case 6: /* D6 vblank/vline */
6619 			switch (src_data) {
6620 			case 0: /* D6 vblank */
6621 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
6622 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6623 
6624 				if (rdev->irq.crtc_vblank_int[5]) {
6625 					drm_handle_vblank(rdev->ddev, 5);
6626 					rdev->pm.vblank_sync = true;
6627 					wake_up(&rdev->irq.vblank_queue);
6628 				}
6629 				if (atomic_read(&rdev->irq.pflip[5]))
6630 					radeon_crtc_handle_vblank(rdev, 5);
6631 				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6632 				DRM_DEBUG("IH: D6 vblank\n");
6633 
6634 				break;
6635 			case 1: /* D6 vline */
6636 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
6637 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6638 
6639 				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6640 				DRM_DEBUG("IH: D6 vline\n");
6641 
6642 				break;
6643 			default:
6644 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6645 				break;
6646 			}
6647 			break;
6648 		case 8: /* D1 page flip */
6649 		case 10: /* D2 page flip */
6650 		case 12: /* D3 page flip */
6651 		case 14: /* D4 page flip */
6652 		case 16: /* D5 page flip */
6653 		case 18: /* D6 page flip */
6654 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
6655 			if (radeon_use_pflipirq > 0)
6656 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
6657 			break;
6658 		case 42: /* HPD hotplug */
6659 			switch (src_data) {
6660 			case 0:
6661 				if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT))
6662 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6663 
6664 				rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6665 				queue_hotplug = true;
6666 				DRM_DEBUG("IH: HPD1\n");
6667 
6668 				break;
6669 			case 1:
6670 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT))
6671 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6672 
6673 				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6674 				queue_hotplug = true;
6675 				DRM_DEBUG("IH: HPD2\n");
6676 
6677 				break;
6678 			case 2:
6679 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT))
6680 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6681 
6682 				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6683 				queue_hotplug = true;
6684 				DRM_DEBUG("IH: HPD3\n");
6685 
6686 				break;
6687 			case 3:
6688 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT))
6689 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6690 
6691 				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6692 				queue_hotplug = true;
6693 				DRM_DEBUG("IH: HPD4\n");
6694 
6695 				break;
6696 			case 4:
6697 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT))
6698 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6699 
6700 				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6701 				queue_hotplug = true;
6702 				DRM_DEBUG("IH: HPD5\n");
6703 
6704 				break;
6705 			case 5:
6706 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT))
6707 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6708 
6709 				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6710 				queue_hotplug = true;
6711 				DRM_DEBUG("IH: HPD6\n");
6712 
6713 				break;
6714 			case 6:
6715 				if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT))
6716 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6717 
6718 				rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_RX_INTERRUPT;
6719 				queue_dp = true;
6720 				DRM_DEBUG("IH: HPD_RX 1\n");
6721 
6722 				break;
6723 			case 7:
6724 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT))
6725 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6726 
6727 				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
6728 				queue_dp = true;
6729 				DRM_DEBUG("IH: HPD_RX 2\n");
6730 
6731 				break;
6732 			case 8:
6733 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
6734 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6735 
6736 				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
6737 				queue_dp = true;
6738 				DRM_DEBUG("IH: HPD_RX 3\n");
6739 
6740 				break;
6741 			case 9:
6742 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
6743 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6744 
6745 				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
6746 				queue_dp = true;
6747 				DRM_DEBUG("IH: HPD_RX 4\n");
6748 
6749 				break;
6750 			case 10:
6751 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
6752 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6753 
6754 				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
6755 				queue_dp = true;
6756 				DRM_DEBUG("IH: HPD_RX 5\n");
6757 
6758 				break;
6759 			case 11:
6760 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
6761 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6762 
6763 				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
6764 				queue_dp = true;
6765 				DRM_DEBUG("IH: HPD_RX 6\n");
6766 
6767 				break;
6768 			default:
6769 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6770 				break;
6771 			}
6772 			break;
6773 		case 96:
6774 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
6775 			WREG32(SRBM_INT_ACK, 0x1);
6776 			break;
6777 		case 124: /* UVD */
6778 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6779 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6780 			break;
6781 		case 146:
6782 		case 147:
6783 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6784 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6785 			/* reset addr and status */
6786 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6787 			if (addr == 0x0 && status == 0x0)
6788 				break;
6789 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6790 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6791 				addr);
6792 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6793 				status);
6794 			si_vm_decode_fault(rdev, status, addr);
6795 			break;
6796 		case 176: /* RINGID0 CP_INT */
6797 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6798 			break;
6799 		case 177: /* RINGID1 CP_INT */
6800 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6801 			break;
6802 		case 178: /* RINGID2 CP_INT */
6803 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6804 			break;
6805 		case 181: /* CP EOP event */
6806 			DRM_DEBUG("IH: CP EOP\n");
6807 			switch (ring_id) {
6808 			case 0:
6809 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6810 				break;
6811 			case 1:
6812 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6813 				break;
6814 			case 2:
6815 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6816 				break;
6817 			}
6818 			break;
6819 		case 224: /* DMA trap event */
6820 			DRM_DEBUG("IH: DMA trap\n");
6821 			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6822 			break;
6823 		case 230: /* thermal low to high */
6824 			DRM_DEBUG("IH: thermal low to high\n");
6825 			rdev->pm.dpm.thermal.high_to_low = false;
6826 			queue_thermal = true;
6827 			break;
6828 		case 231: /* thermal high to low */
6829 			DRM_DEBUG("IH: thermal high to low\n");
6830 			rdev->pm.dpm.thermal.high_to_low = true;
6831 			queue_thermal = true;
6832 			break;
6833 		case 233: /* GUI IDLE */
6834 			DRM_DEBUG("IH: GUI idle\n");
6835 			break;
6836 		case 244: /* DMA trap event */
6837 			DRM_DEBUG("IH: DMA1 trap\n");
6838 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6839 			break;
6840 		default:
6841 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6842 			break;
6843 		}
6844 
6845 		/* wptr/rptr are in bytes! */
6846 		rptr += 16;
6847 		rptr &= rdev->ih.ptr_mask;
6848 		WREG32(IH_RB_RPTR, rptr);
6849 	}
6850 	if (queue_dp)
6851 		schedule_work(&rdev->dp_work);
6852 	if (queue_hotplug)
6853 		schedule_delayed_work(&rdev->hotplug_work, 0);
6854 	if (queue_thermal && rdev->pm.dpm_enabled)
6855 		schedule_work(&rdev->pm.dpm.thermal.work);
6856 	rdev->ih.rptr = rptr;
6857 	atomic_set(&rdev->ih.lock, 0);
6858 
6859 	/* make sure wptr hasn't changed while processing */
6860 	wptr = si_get_ih_wptr(rdev);
6861 	if (wptr != rptr)
6862 		goto restart_ih;
6863 
6864 	return IRQ_HANDLED;
6865 }
6866 
6867 /*
6868  * startup/shutdown callbacks
6869  */
6870 static void si_uvd_init(struct radeon_device *rdev)
6871 {
6872 	int r;
6873 
6874 	if (!rdev->has_uvd)
6875 		return;
6876 
6877 	r = radeon_uvd_init(rdev);
6878 	if (r) {
6879 		dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
6880 		/*
6881 		 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
6882 		 * to early fails uvd_v2_2_resume() and thus nothing happens
6883 		 * there. So it is pointless to try to go through that code
6884 		 * hence why we disable uvd here.
6885 		 */
6886 		rdev->has_uvd = 0;
6887 		return;
6888 	}
6889 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
6890 	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
6891 }
6892 
6893 static void si_uvd_start(struct radeon_device *rdev)
6894 {
6895 	int r;
6896 
6897 	if (!rdev->has_uvd)
6898 		return;
6899 
6900 	r = uvd_v2_2_resume(rdev);
6901 	if (r) {
6902 		dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
6903 		goto error;
6904 	}
6905 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
6906 	if (r) {
6907 		dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
6908 		goto error;
6909 	}
6910 	return;
6911 
6912 error:
6913 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6914 }
6915 
6916 static void si_uvd_resume(struct radeon_device *rdev)
6917 {
6918 	struct radeon_ring *ring;
6919 	int r;
6920 
6921 	if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
6922 		return;
6923 
6924 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6925 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
6926 	if (r) {
6927 		dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
6928 		return;
6929 	}
6930 	r = uvd_v1_0_init(rdev);
6931 	if (r) {
6932 		dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
6933 		return;
6934 	}
6935 }
6936 
6937 static void si_vce_init(struct radeon_device *rdev)
6938 {
6939 	int r;
6940 
6941 	if (!rdev->has_vce)
6942 		return;
6943 
6944 	r = radeon_vce_init(rdev);
6945 	if (r) {
6946 		dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
6947 		/*
6948 		 * At this point rdev->vce.vcpu_bo is NULL which trickles down
6949 		 * to early fails si_vce_start() and thus nothing happens
6950 		 * there. So it is pointless to try to go through that code
6951 		 * hence why we disable vce here.
6952 		 */
6953 		rdev->has_vce = 0;
6954 		return;
6955 	}
6956 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
6957 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
6958 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
6959 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
6960 }
6961 
6962 static void si_vce_start(struct radeon_device *rdev)
6963 {
6964 	int r;
6965 
6966 	if (!rdev->has_vce)
6967 		return;
6968 
6969 	r = radeon_vce_resume(rdev);
6970 	if (r) {
6971 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6972 		goto error;
6973 	}
6974 	r = vce_v1_0_resume(rdev);
6975 	if (r) {
6976 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6977 		goto error;
6978 	}
6979 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
6980 	if (r) {
6981 		dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
6982 		goto error;
6983 	}
6984 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
6985 	if (r) {
6986 		dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
6987 		goto error;
6988 	}
6989 	return;
6990 
6991 error:
6992 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
6993 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
6994 }
6995 
6996 static void si_vce_resume(struct radeon_device *rdev)
6997 {
6998 	struct radeon_ring *ring;
6999 	int r;
7000 
7001 	if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
7002 		return;
7003 
7004 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
7005 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
7006 	if (r) {
7007 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
7008 		return;
7009 	}
7010 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
7011 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
7012 	if (r) {
7013 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
7014 		return;
7015 	}
7016 	r = vce_v1_0_init(rdev);
7017 	if (r) {
7018 		dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
7019 		return;
7020 	}
7021 }
7022 
7023 static int si_startup(struct radeon_device *rdev)
7024 {
7025 	struct radeon_ring *ring;
7026 	int r;
7027 
7028 	/* enable pcie gen2/3 link */
7029 	si_pcie_gen3_enable(rdev);
7030 	/* enable aspm */
7031 	si_program_aspm(rdev);
7032 
7033 	/* scratch needs to be initialized before MC */
7034 	r = r600_vram_scratch_init(rdev);
7035 	if (r)
7036 		return r;
7037 
7038 	si_mc_program(rdev);
7039 
7040 	if (!rdev->pm.dpm_enabled) {
7041 		r = si_mc_load_microcode(rdev);
7042 		if (r) {
7043 			DRM_ERROR("Failed to load MC firmware!\n");
7044 			return r;
7045 		}
7046 	}
7047 
7048 	r = si_pcie_gart_enable(rdev);
7049 	if (r)
7050 		return r;
7051 	si_gpu_init(rdev);
7052 
7053 	/* allocate rlc buffers */
7054 	if (rdev->family == CHIP_VERDE) {
7055 		rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
7056 		rdev->rlc.reg_list_size =
7057 			(u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
7058 	}
7059 	rdev->rlc.cs_data = si_cs_data;
7060 	r = sumo_rlc_init(rdev);
7061 	if (r) {
7062 		DRM_ERROR("Failed to init rlc BOs!\n");
7063 		return r;
7064 	}
7065 
7066 	/* allocate wb buffer */
7067 	r = radeon_wb_init(rdev);
7068 	if (r)
7069 		return r;
7070 
7071 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7072 	if (r) {
7073 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7074 		return r;
7075 	}
7076 
7077 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7078 	if (r) {
7079 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7080 		return r;
7081 	}
7082 
7083 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7084 	if (r) {
7085 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7086 		return r;
7087 	}
7088 
7089 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7090 	if (r) {
7091 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7092 		return r;
7093 	}
7094 
7095 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7096 	if (r) {
7097 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7098 		return r;
7099 	}
7100 
7101 	si_uvd_start(rdev);
7102 	si_vce_start(rdev);
7103 
7104 	/* Enable IRQ */
7105 	if (!rdev->irq.installed) {
7106 		r = radeon_irq_kms_init(rdev);
7107 		if (r)
7108 			return r;
7109 	}
7110 
7111 	r = si_irq_init(rdev);
7112 	if (r) {
7113 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
7114 		radeon_irq_kms_fini(rdev);
7115 		return r;
7116 	}
7117 	si_irq_set(rdev);
7118 
7119 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7120 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7121 			     RADEON_CP_PACKET2);
7122 	if (r)
7123 		return r;
7124 
7125 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7126 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7127 			     RADEON_CP_PACKET2);
7128 	if (r)
7129 		return r;
7130 
7131 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7132 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7133 			     RADEON_CP_PACKET2);
7134 	if (r)
7135 		return r;
7136 
7137 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7138 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7139 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
7140 	if (r)
7141 		return r;
7142 
7143 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7144 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7145 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
7146 	if (r)
7147 		return r;
7148 
7149 	r = si_cp_load_microcode(rdev);
7150 	if (r)
7151 		return r;
7152 	r = si_cp_resume(rdev);
7153 	if (r)
7154 		return r;
7155 
7156 	r = cayman_dma_resume(rdev);
7157 	if (r)
7158 		return r;
7159 
7160 	si_uvd_resume(rdev);
7161 	si_vce_resume(rdev);
7162 
7163 	r = radeon_ib_pool_init(rdev);
7164 	if (r) {
7165 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7166 		return r;
7167 	}
7168 
7169 	r = radeon_vm_manager_init(rdev);
7170 	if (r) {
7171 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7172 		return r;
7173 	}
7174 
7175 	r = radeon_audio_init(rdev);
7176 	if (r)
7177 		return r;
7178 
7179 	return 0;
7180 }
7181 
7182 int si_resume(struct radeon_device *rdev)
7183 {
7184 	int r;
7185 
7186 	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
7187 	 * posting will perform necessary task to bring back GPU into good
7188 	 * shape.
7189 	 */
7190 	/* post card */
7191 	atom_asic_init(rdev->mode_info.atom_context);
7192 
7193 	/* init golden registers */
7194 	si_init_golden_registers(rdev);
7195 
7196 	if (rdev->pm.pm_method == PM_METHOD_DPM)
7197 		radeon_pm_resume(rdev);
7198 
7199 	rdev->accel_working = true;
7200 	r = si_startup(rdev);
7201 	if (r) {
7202 		DRM_ERROR("si startup failed on resume\n");
7203 		rdev->accel_working = false;
7204 		return r;
7205 	}
7206 
7207 	return r;
7208 
7209 }
7210 
7211 int si_suspend(struct radeon_device *rdev)
7212 {
7213 	radeon_pm_suspend(rdev);
7214 	radeon_audio_fini(rdev);
7215 	radeon_vm_manager_fini(rdev);
7216 	si_cp_enable(rdev, false);
7217 	cayman_dma_stop(rdev);
7218 	if (rdev->has_uvd) {
7219 		uvd_v1_0_fini(rdev);
7220 		radeon_uvd_suspend(rdev);
7221 	}
7222 	if (rdev->has_vce)
7223 		radeon_vce_suspend(rdev);
7224 	si_fini_pg(rdev);
7225 	si_fini_cg(rdev);
7226 	si_irq_suspend(rdev);
7227 	radeon_wb_disable(rdev);
7228 	si_pcie_gart_disable(rdev);
7229 	return 0;
7230 }
7231 
7232 /* Plan is to move initialization in that function and use
7233  * helper function so that radeon_device_init pretty much
7234  * do nothing more than calling asic specific function. This
7235  * should also allow to remove a bunch of callback function
7236  * like vram_info.
7237  */
7238 int si_init(struct radeon_device *rdev)
7239 {
7240 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7241 	int r;
7242 
7243 	/* Read BIOS */
7244 	if (!radeon_get_bios(rdev)) {
7245 		if (ASIC_IS_AVIVO(rdev))
7246 			return -EINVAL;
7247 	}
7248 	/* Must be an ATOMBIOS */
7249 	if (!rdev->is_atom_bios) {
7250 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7251 		return -EINVAL;
7252 	}
7253 	r = radeon_atombios_init(rdev);
7254 	if (r)
7255 		return r;
7256 
7257 	/* Post card if necessary */
7258 	if (!radeon_card_posted(rdev)) {
7259 		if (!rdev->bios) {
7260 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7261 			return -EINVAL;
7262 		}
7263 		DRM_INFO("GPU not posted. posting now...\n");
7264 		atom_asic_init(rdev->mode_info.atom_context);
7265 	}
7266 	/* init golden registers */
7267 	si_init_golden_registers(rdev);
7268 	/* Initialize scratch registers */
7269 	si_scratch_init(rdev);
7270 	/* Initialize surface registers */
7271 	radeon_surface_init(rdev);
7272 	/* Initialize clocks */
7273 	radeon_get_clock_info(rdev->ddev);
7274 
7275 	/* Fence driver */
7276 	r = radeon_fence_driver_init(rdev);
7277 	if (r)
7278 		return r;
7279 
7280 	/* initialize memory controller */
7281 	r = si_mc_init(rdev);
7282 	if (r)
7283 		return r;
7284 	/* Memory manager */
7285 	r = radeon_bo_init(rdev);
7286 	if (r)
7287 		return r;
7288 
7289 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7290 	    !rdev->rlc_fw || !rdev->mc_fw) {
7291 		r = si_init_microcode(rdev);
7292 		if (r) {
7293 			DRM_ERROR("Failed to load firmware!\n");
7294 			return r;
7295 		}
7296 	}
7297 
7298 	/* Initialize power management */
7299 	radeon_pm_init(rdev);
7300 
7301 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7302 	ring->ring_obj = NULL;
7303 	r600_ring_init(rdev, ring, 1024 * 1024);
7304 
7305 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7306 	ring->ring_obj = NULL;
7307 	r600_ring_init(rdev, ring, 1024 * 1024);
7308 
7309 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7310 	ring->ring_obj = NULL;
7311 	r600_ring_init(rdev, ring, 1024 * 1024);
7312 
7313 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7314 	ring->ring_obj = NULL;
7315 	r600_ring_init(rdev, ring, 64 * 1024);
7316 
7317 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7318 	ring->ring_obj = NULL;
7319 	r600_ring_init(rdev, ring, 64 * 1024);
7320 
7321 	si_uvd_init(rdev);
7322 	si_vce_init(rdev);
7323 
7324 	rdev->ih.ring_obj = NULL;
7325 	r600_ih_ring_init(rdev, 64 * 1024);
7326 
7327 	r = r600_pcie_gart_init(rdev);
7328 	if (r)
7329 		return r;
7330 
7331 	rdev->accel_working = true;
7332 	r = si_startup(rdev);
7333 	if (r) {
7334 		dev_err(rdev->dev, "disabling GPU acceleration\n");
7335 		si_cp_fini(rdev);
7336 		cayman_dma_fini(rdev);
7337 		si_irq_fini(rdev);
7338 		sumo_rlc_fini(rdev);
7339 		radeon_wb_fini(rdev);
7340 		radeon_ib_pool_fini(rdev);
7341 		radeon_vm_manager_fini(rdev);
7342 		radeon_irq_kms_fini(rdev);
7343 		si_pcie_gart_fini(rdev);
7344 		rdev->accel_working = false;
7345 	}
7346 
7347 	/* Don't start up if the MC ucode is missing.
7348 	 * The default clocks and voltages before the MC ucode
7349 	 * is loaded are not suffient for advanced operations.
7350 	 */
7351 	if (!rdev->mc_fw) {
7352 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
7353 		return -EINVAL;
7354 	}
7355 
7356 	return 0;
7357 }
7358 
7359 void si_fini(struct radeon_device *rdev)
7360 {
7361 	radeon_pm_fini(rdev);
7362 	si_cp_fini(rdev);
7363 	cayman_dma_fini(rdev);
7364 	si_fini_pg(rdev);
7365 	si_fini_cg(rdev);
7366 	si_irq_fini(rdev);
7367 	sumo_rlc_fini(rdev);
7368 	radeon_wb_fini(rdev);
7369 	radeon_vm_manager_fini(rdev);
7370 	radeon_ib_pool_fini(rdev);
7371 	radeon_irq_kms_fini(rdev);
7372 	if (rdev->has_uvd) {
7373 		uvd_v1_0_fini(rdev);
7374 		radeon_uvd_fini(rdev);
7375 	}
7376 	if (rdev->has_vce)
7377 		radeon_vce_fini(rdev);
7378 	si_pcie_gart_fini(rdev);
7379 	r600_vram_scratch_fini(rdev);
7380 	radeon_gem_fini(rdev);
7381 	radeon_fence_driver_fini(rdev);
7382 	radeon_bo_fini(rdev);
7383 	radeon_atombios_fini(rdev);
7384 	kfree(rdev->bios);
7385 	rdev->bios = NULL;
7386 }
7387 
7388 /**
7389  * si_get_gpu_clock_counter - return GPU clock counter snapshot
7390  *
7391  * @rdev: radeon_device pointer
7392  *
7393  * Fetches a GPU clock counter snapshot (SI).
7394  * Returns the 64 bit clock counter snapshot.
7395  */
7396 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
7397 {
7398 	uint64_t clock;
7399 
7400 	mutex_lock(&rdev->gpu_clock_mutex);
7401 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
7402 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
7403 		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
7404 	mutex_unlock(&rdev->gpu_clock_mutex);
7405 	return clock;
7406 }
7407 
7408 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
7409 {
7410 	unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
7411 	int r;
7412 
7413 	/* bypass vclk and dclk with bclk */
7414 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7415 		VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
7416 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7417 
7418 	/* put PLL in bypass mode */
7419 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
7420 
7421 	if (!vclk || !dclk) {
7422 		/* keep the Bypass mode */
7423 		return 0;
7424 	}
7425 
7426 	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
7427 					  16384, 0x03FFFFFF, 0, 128, 5,
7428 					  &fb_div, &vclk_div, &dclk_div);
7429 	if (r)
7430 		return r;
7431 
7432 	/* set RESET_ANTI_MUX to 0 */
7433 	WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7434 
7435 	/* set VCO_MODE to 1 */
7436 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
7437 
7438 	/* disable sleep mode */
7439 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
7440 
7441 	/* deassert UPLL_RESET */
7442 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7443 
7444 	mdelay(1);
7445 
7446 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7447 	if (r)
7448 		return r;
7449 
7450 	/* assert UPLL_RESET again */
7451 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
7452 
7453 	/* disable spread spectrum. */
7454 	WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7455 
7456 	/* set feedback divider */
7457 	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
7458 
7459 	/* set ref divider to 0 */
7460 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
7461 
7462 	if (fb_div < 307200)
7463 		WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
7464 	else
7465 		WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
7466 
7467 	/* set PDIV_A and PDIV_B */
7468 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7469 		UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
7470 		~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
7471 
7472 	/* give the PLL some time to settle */
7473 	mdelay(15);
7474 
7475 	/* deassert PLL_RESET */
7476 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7477 
7478 	mdelay(15);
7479 
7480 	/* switch from bypass mode to normal mode */
7481 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
7482 
7483 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7484 	if (r)
7485 		return r;
7486 
7487 	/* switch VCLK and DCLK selection */
7488 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7489 		VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
7490 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7491 
7492 	mdelay(100);
7493 
7494 	return 0;
7495 }
7496 
7497 static void si_pcie_gen3_enable(struct radeon_device *rdev)
7498 {
7499 	struct pci_dev *root = rdev->pdev->bus->self;
7500 	int bridge_pos, gpu_pos;
7501 	u32 speed_cntl, mask, current_data_rate;
7502 	int ret, i;
7503 	u16 tmp16;
7504 
7505 	if (pci_is_root_bus(rdev->pdev->bus))
7506 		return;
7507 
7508 	if (radeon_pcie_gen2 == 0)
7509 		return;
7510 
7511 	if (rdev->flags & RADEON_IS_IGP)
7512 		return;
7513 
7514 	if (!(rdev->flags & RADEON_IS_PCIE))
7515 		return;
7516 
7517 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
7518 	if (ret != 0)
7519 		return;
7520 
7521 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
7522 		return;
7523 
7524 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7525 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7526 		LC_CURRENT_DATA_RATE_SHIFT;
7527 	if (mask & DRM_PCIE_SPEED_80) {
7528 		if (current_data_rate == 2) {
7529 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7530 			return;
7531 		}
7532 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7533 	} else if (mask & DRM_PCIE_SPEED_50) {
7534 		if (current_data_rate == 1) {
7535 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7536 			return;
7537 		}
7538 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7539 	}
7540 
7541 	bridge_pos = pci_pcie_cap(root);
7542 	if (!bridge_pos)
7543 		return;
7544 
7545 	gpu_pos = pci_pcie_cap(rdev->pdev);
7546 	if (!gpu_pos)
7547 		return;
7548 
7549 	if (mask & DRM_PCIE_SPEED_80) {
7550 		/* re-try equalization if gen3 is not already enabled */
7551 		if (current_data_rate != 2) {
7552 			u16 bridge_cfg, gpu_cfg;
7553 			u16 bridge_cfg2, gpu_cfg2;
7554 			u32 max_lw, current_lw, tmp;
7555 
7556 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7557 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7558 
7559 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7560 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7561 
7562 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7563 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7564 
7565 			tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7566 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7567 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7568 
7569 			if (current_lw < max_lw) {
7570 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7571 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
7572 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7573 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7574 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7575 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7576 				}
7577 			}
7578 
7579 			for (i = 0; i < 10; i++) {
7580 				/* check status */
7581 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
7582 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7583 					break;
7584 
7585 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7586 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7587 
7588 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
7589 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
7590 
7591 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7592 				tmp |= LC_SET_QUIESCE;
7593 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7594 
7595 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7596 				tmp |= LC_REDO_EQ;
7597 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7598 
7599 				mdelay(100);
7600 
7601 				/* linkctl */
7602 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
7603 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7604 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7605 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7606 
7607 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
7608 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7609 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7610 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7611 
7612 				/* linkctl2 */
7613 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7614 				tmp16 &= ~((1 << 4) | (7 << 9));
7615 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
7616 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
7617 
7618 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7619 				tmp16 &= ~((1 << 4) | (7 << 9));
7620 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
7621 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7622 
7623 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7624 				tmp &= ~LC_SET_QUIESCE;
7625 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7626 			}
7627 		}
7628 	}
7629 
7630 	/* set the link speed */
7631 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7632 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7633 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7634 
7635 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7636 	tmp16 &= ~0xf;
7637 	if (mask & DRM_PCIE_SPEED_80)
7638 		tmp16 |= 3; /* gen3 */
7639 	else if (mask & DRM_PCIE_SPEED_50)
7640 		tmp16 |= 2; /* gen2 */
7641 	else
7642 		tmp16 |= 1; /* gen1 */
7643 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7644 
7645 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7646 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7647 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7648 
7649 	for (i = 0; i < rdev->usec_timeout; i++) {
7650 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7651 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7652 			break;
7653 		udelay(1);
7654 	}
7655 }
7656 
7657 static void si_program_aspm(struct radeon_device *rdev)
7658 {
7659 	u32 data, orig;
7660 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7661 	bool disable_clkreq = false;
7662 
7663 	if (radeon_aspm == 0)
7664 		return;
7665 
7666 	if (!(rdev->flags & RADEON_IS_PCIE))
7667 		return;
7668 
7669 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7670 	data &= ~LC_XMIT_N_FTS_MASK;
7671 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7672 	if (orig != data)
7673 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7674 
7675 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7676 	data |= LC_GO_TO_RECOVERY;
7677 	if (orig != data)
7678 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7679 
7680 	orig = data = RREG32_PCIE(PCIE_P_CNTL);
7681 	data |= P_IGNORE_EDB_ERR;
7682 	if (orig != data)
7683 		WREG32_PCIE(PCIE_P_CNTL, data);
7684 
7685 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7686 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7687 	data |= LC_PMI_TO_L1_DIS;
7688 	if (!disable_l0s)
7689 		data |= LC_L0S_INACTIVITY(7);
7690 
7691 	if (!disable_l1) {
7692 		data |= LC_L1_INACTIVITY(7);
7693 		data &= ~LC_PMI_TO_L1_DIS;
7694 		if (orig != data)
7695 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7696 
7697 		if (!disable_plloff_in_l1) {
7698 			bool clk_req_support;
7699 
7700 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7701 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7702 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7703 			if (orig != data)
7704 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7705 
7706 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7707 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7708 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7709 			if (orig != data)
7710 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7711 
7712 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7713 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7714 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7715 			if (orig != data)
7716 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7717 
7718 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7719 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7720 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7721 			if (orig != data)
7722 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7723 
7724 			if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7725 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7726 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7727 				if (orig != data)
7728 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7729 
7730 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7731 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7732 				if (orig != data)
7733 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7734 
7735 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7736 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7737 				if (orig != data)
7738 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7739 
7740 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7741 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7742 				if (orig != data)
7743 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7744 
7745 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7746 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7747 				if (orig != data)
7748 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7749 
7750 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7751 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7752 				if (orig != data)
7753 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7754 
7755 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7756 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7757 				if (orig != data)
7758 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7759 
7760 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7761 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7762 				if (orig != data)
7763 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7764 			}
7765 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7766 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7767 			data |= LC_DYN_LANES_PWR_STATE(3);
7768 			if (orig != data)
7769 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7770 
7771 			orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7772 			data &= ~LS2_EXIT_TIME_MASK;
7773 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7774 				data |= LS2_EXIT_TIME(5);
7775 			if (orig != data)
7776 				WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7777 
7778 			orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7779 			data &= ~LS2_EXIT_TIME_MASK;
7780 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7781 				data |= LS2_EXIT_TIME(5);
7782 			if (orig != data)
7783 				WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7784 
7785 			if (!disable_clkreq &&
7786 			    !pci_is_root_bus(rdev->pdev->bus)) {
7787 				struct pci_dev *root = rdev->pdev->bus->self;
7788 				u32 lnkcap;
7789 
7790 				clk_req_support = false;
7791 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7792 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7793 					clk_req_support = true;
7794 			} else {
7795 				clk_req_support = false;
7796 			}
7797 
7798 			if (clk_req_support) {
7799 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7800 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7801 				if (orig != data)
7802 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7803 
7804 				orig = data = RREG32(THM_CLK_CNTL);
7805 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7806 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7807 				if (orig != data)
7808 					WREG32(THM_CLK_CNTL, data);
7809 
7810 				orig = data = RREG32(MISC_CLK_CNTL);
7811 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7812 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7813 				if (orig != data)
7814 					WREG32(MISC_CLK_CNTL, data);
7815 
7816 				orig = data = RREG32(CG_CLKPIN_CNTL);
7817 				data &= ~BCLK_AS_XCLK;
7818 				if (orig != data)
7819 					WREG32(CG_CLKPIN_CNTL, data);
7820 
7821 				orig = data = RREG32(CG_CLKPIN_CNTL_2);
7822 				data &= ~FORCE_BIF_REFCLK_EN;
7823 				if (orig != data)
7824 					WREG32(CG_CLKPIN_CNTL_2, data);
7825 
7826 				orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7827 				data &= ~MPLL_CLKOUT_SEL_MASK;
7828 				data |= MPLL_CLKOUT_SEL(4);
7829 				if (orig != data)
7830 					WREG32(MPLL_BYPASSCLK_SEL, data);
7831 
7832 				orig = data = RREG32(SPLL_CNTL_MODE);
7833 				data &= ~SPLL_REFCLK_SEL_MASK;
7834 				if (orig != data)
7835 					WREG32(SPLL_CNTL_MODE, data);
7836 			}
7837 		}
7838 	} else {
7839 		if (orig != data)
7840 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7841 	}
7842 
7843 	orig = data = RREG32_PCIE(PCIE_CNTL2);
7844 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7845 	if (orig != data)
7846 		WREG32_PCIE(PCIE_CNTL2, data);
7847 
7848 	if (!disable_l0s) {
7849 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7850 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7851 			data = RREG32_PCIE(PCIE_LC_STATUS1);
7852 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7853 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7854 				data &= ~LC_L0S_INACTIVITY_MASK;
7855 				if (orig != data)
7856 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7857 			}
7858 		}
7859 	}
7860 }
7861 
7862 static int si_vce_send_vcepll_ctlreq(struct radeon_device *rdev)
7863 {
7864 	unsigned i;
7865 
7866 	/* make sure VCEPLL_CTLREQ is deasserted */
7867 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7868 
7869 	mdelay(10);
7870 
7871 	/* assert UPLL_CTLREQ */
7872 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
7873 
7874 	/* wait for CTLACK and CTLACK2 to get asserted */
7875 	for (i = 0; i < 100; ++i) {
7876 		uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
7877 		if ((RREG32_SMC(CG_VCEPLL_FUNC_CNTL) & mask) == mask)
7878 			break;
7879 		mdelay(10);
7880 	}
7881 
7882 	/* deassert UPLL_CTLREQ */
7883 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7884 
7885 	if (i == 100) {
7886 		DRM_ERROR("Timeout setting UVD clocks!\n");
7887 		return -ETIMEDOUT;
7888 	}
7889 
7890 	return 0;
7891 }
7892 
7893 int si_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
7894 {
7895 	unsigned fb_div = 0, evclk_div = 0, ecclk_div = 0;
7896 	int r;
7897 
7898 	/* bypass evclk and ecclk with bclk */
7899 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7900 		     EVCLK_SRC_SEL(1) | ECCLK_SRC_SEL(1),
7901 		     ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7902 
7903 	/* put PLL in bypass mode */
7904 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_BYPASS_EN_MASK,
7905 		     ~VCEPLL_BYPASS_EN_MASK);
7906 
7907 	if (!evclk || !ecclk) {
7908 		/* keep the Bypass mode, put PLL to sleep */
7909 		WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7910 			     ~VCEPLL_SLEEP_MASK);
7911 		return 0;
7912 	}
7913 
7914 	r = radeon_uvd_calc_upll_dividers(rdev, evclk, ecclk, 125000, 250000,
7915 					  16384, 0x03FFFFFF, 0, 128, 5,
7916 					  &fb_div, &evclk_div, &ecclk_div);
7917 	if (r)
7918 		return r;
7919 
7920 	/* set RESET_ANTI_MUX to 0 */
7921 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7922 
7923 	/* set VCO_MODE to 1 */
7924 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_VCO_MODE_MASK,
7925 		     ~VCEPLL_VCO_MODE_MASK);
7926 
7927 	/* toggle VCEPLL_SLEEP to 1 then back to 0 */
7928 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7929 		     ~VCEPLL_SLEEP_MASK);
7930 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_SLEEP_MASK);
7931 
7932 	/* deassert VCEPLL_RESET */
7933 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7934 
7935 	mdelay(1);
7936 
7937 	r = si_vce_send_vcepll_ctlreq(rdev);
7938 	if (r)
7939 		return r;
7940 
7941 	/* assert VCEPLL_RESET again */
7942 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_RESET_MASK, ~VCEPLL_RESET_MASK);
7943 
7944 	/* disable spread spectrum. */
7945 	WREG32_SMC_P(CG_VCEPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7946 
7947 	/* set feedback divider */
7948 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_3, VCEPLL_FB_DIV(fb_div), ~VCEPLL_FB_DIV_MASK);
7949 
7950 	/* set ref divider to 0 */
7951 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_REF_DIV_MASK);
7952 
7953 	/* set PDIV_A and PDIV_B */
7954 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7955 		     VCEPLL_PDIV_A(evclk_div) | VCEPLL_PDIV_B(ecclk_div),
7956 		     ~(VCEPLL_PDIV_A_MASK | VCEPLL_PDIV_B_MASK));
7957 
7958 	/* give the PLL some time to settle */
7959 	mdelay(15);
7960 
7961 	/* deassert PLL_RESET */
7962 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7963 
7964 	mdelay(15);
7965 
7966 	/* switch from bypass mode to normal mode */
7967 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_BYPASS_EN_MASK);
7968 
7969 	r = si_vce_send_vcepll_ctlreq(rdev);
7970 	if (r)
7971 		return r;
7972 
7973 	/* switch VCLK and DCLK selection */
7974 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7975 		     EVCLK_SRC_SEL(16) | ECCLK_SRC_SEL(16),
7976 		     ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7977 
7978 	mdelay(100);
7979 
7980 	return 0;
7981 }
7982