xref: /openbmc/linux/drivers/gpu/drm/radeon/si.c (revision c819e2cf)
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include <drm/radeon_drm.h>
31 #include "sid.h"
32 #include "atom.h"
33 #include "si_blit_shaders.h"
34 #include "clearstate_si.h"
35 #include "radeon_ucode.h"
36 
37 
38 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
39 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_mc2.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
44 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
45 
46 MODULE_FIRMWARE("radeon/tahiti_pfp.bin");
47 MODULE_FIRMWARE("radeon/tahiti_me.bin");
48 MODULE_FIRMWARE("radeon/tahiti_ce.bin");
49 MODULE_FIRMWARE("radeon/tahiti_mc.bin");
50 MODULE_FIRMWARE("radeon/tahiti_rlc.bin");
51 MODULE_FIRMWARE("radeon/tahiti_smc.bin");
52 
53 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
54 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
55 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
56 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
57 MODULE_FIRMWARE("radeon/PITCAIRN_mc2.bin");
58 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
59 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
60 
61 MODULE_FIRMWARE("radeon/pitcairn_pfp.bin");
62 MODULE_FIRMWARE("radeon/pitcairn_me.bin");
63 MODULE_FIRMWARE("radeon/pitcairn_ce.bin");
64 MODULE_FIRMWARE("radeon/pitcairn_mc.bin");
65 MODULE_FIRMWARE("radeon/pitcairn_rlc.bin");
66 MODULE_FIRMWARE("radeon/pitcairn_smc.bin");
67 
68 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
69 MODULE_FIRMWARE("radeon/VERDE_me.bin");
70 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
71 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
72 MODULE_FIRMWARE("radeon/VERDE_mc2.bin");
73 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
74 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
75 
76 MODULE_FIRMWARE("radeon/verde_pfp.bin");
77 MODULE_FIRMWARE("radeon/verde_me.bin");
78 MODULE_FIRMWARE("radeon/verde_ce.bin");
79 MODULE_FIRMWARE("radeon/verde_mc.bin");
80 MODULE_FIRMWARE("radeon/verde_rlc.bin");
81 MODULE_FIRMWARE("radeon/verde_smc.bin");
82 
83 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
84 MODULE_FIRMWARE("radeon/OLAND_me.bin");
85 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
86 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
87 MODULE_FIRMWARE("radeon/OLAND_mc2.bin");
88 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
89 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
90 
91 MODULE_FIRMWARE("radeon/oland_pfp.bin");
92 MODULE_FIRMWARE("radeon/oland_me.bin");
93 MODULE_FIRMWARE("radeon/oland_ce.bin");
94 MODULE_FIRMWARE("radeon/oland_mc.bin");
95 MODULE_FIRMWARE("radeon/oland_rlc.bin");
96 MODULE_FIRMWARE("radeon/oland_smc.bin");
97 
98 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
99 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
100 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
101 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
102 MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
103 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
104 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
105 
106 MODULE_FIRMWARE("radeon/hainan_pfp.bin");
107 MODULE_FIRMWARE("radeon/hainan_me.bin");
108 MODULE_FIRMWARE("radeon/hainan_ce.bin");
109 MODULE_FIRMWARE("radeon/hainan_mc.bin");
110 MODULE_FIRMWARE("radeon/hainan_rlc.bin");
111 MODULE_FIRMWARE("radeon/hainan_smc.bin");
112 
113 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
114 static void si_pcie_gen3_enable(struct radeon_device *rdev);
115 static void si_program_aspm(struct radeon_device *rdev);
116 extern void sumo_rlc_fini(struct radeon_device *rdev);
117 extern int sumo_rlc_init(struct radeon_device *rdev);
118 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
119 extern void r600_ih_ring_fini(struct radeon_device *rdev);
120 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
121 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
122 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
123 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
124 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
125 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
126 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
127 					 bool enable);
128 static void si_init_pg(struct radeon_device *rdev);
129 static void si_init_cg(struct radeon_device *rdev);
130 static void si_fini_pg(struct radeon_device *rdev);
131 static void si_fini_cg(struct radeon_device *rdev);
132 static void si_rlc_stop(struct radeon_device *rdev);
133 
134 static const u32 verde_rlc_save_restore_register_list[] =
135 {
136 	(0x8000 << 16) | (0x98f4 >> 2),
137 	0x00000000,
138 	(0x8040 << 16) | (0x98f4 >> 2),
139 	0x00000000,
140 	(0x8000 << 16) | (0xe80 >> 2),
141 	0x00000000,
142 	(0x8040 << 16) | (0xe80 >> 2),
143 	0x00000000,
144 	(0x8000 << 16) | (0x89bc >> 2),
145 	0x00000000,
146 	(0x8040 << 16) | (0x89bc >> 2),
147 	0x00000000,
148 	(0x8000 << 16) | (0x8c1c >> 2),
149 	0x00000000,
150 	(0x8040 << 16) | (0x8c1c >> 2),
151 	0x00000000,
152 	(0x9c00 << 16) | (0x98f0 >> 2),
153 	0x00000000,
154 	(0x9c00 << 16) | (0xe7c >> 2),
155 	0x00000000,
156 	(0x8000 << 16) | (0x9148 >> 2),
157 	0x00000000,
158 	(0x8040 << 16) | (0x9148 >> 2),
159 	0x00000000,
160 	(0x9c00 << 16) | (0x9150 >> 2),
161 	0x00000000,
162 	(0x9c00 << 16) | (0x897c >> 2),
163 	0x00000000,
164 	(0x9c00 << 16) | (0x8d8c >> 2),
165 	0x00000000,
166 	(0x9c00 << 16) | (0xac54 >> 2),
167 	0X00000000,
168 	0x3,
169 	(0x9c00 << 16) | (0x98f8 >> 2),
170 	0x00000000,
171 	(0x9c00 << 16) | (0x9910 >> 2),
172 	0x00000000,
173 	(0x9c00 << 16) | (0x9914 >> 2),
174 	0x00000000,
175 	(0x9c00 << 16) | (0x9918 >> 2),
176 	0x00000000,
177 	(0x9c00 << 16) | (0x991c >> 2),
178 	0x00000000,
179 	(0x9c00 << 16) | (0x9920 >> 2),
180 	0x00000000,
181 	(0x9c00 << 16) | (0x9924 >> 2),
182 	0x00000000,
183 	(0x9c00 << 16) | (0x9928 >> 2),
184 	0x00000000,
185 	(0x9c00 << 16) | (0x992c >> 2),
186 	0x00000000,
187 	(0x9c00 << 16) | (0x9930 >> 2),
188 	0x00000000,
189 	(0x9c00 << 16) | (0x9934 >> 2),
190 	0x00000000,
191 	(0x9c00 << 16) | (0x9938 >> 2),
192 	0x00000000,
193 	(0x9c00 << 16) | (0x993c >> 2),
194 	0x00000000,
195 	(0x9c00 << 16) | (0x9940 >> 2),
196 	0x00000000,
197 	(0x9c00 << 16) | (0x9944 >> 2),
198 	0x00000000,
199 	(0x9c00 << 16) | (0x9948 >> 2),
200 	0x00000000,
201 	(0x9c00 << 16) | (0x994c >> 2),
202 	0x00000000,
203 	(0x9c00 << 16) | (0x9950 >> 2),
204 	0x00000000,
205 	(0x9c00 << 16) | (0x9954 >> 2),
206 	0x00000000,
207 	(0x9c00 << 16) | (0x9958 >> 2),
208 	0x00000000,
209 	(0x9c00 << 16) | (0x995c >> 2),
210 	0x00000000,
211 	(0x9c00 << 16) | (0x9960 >> 2),
212 	0x00000000,
213 	(0x9c00 << 16) | (0x9964 >> 2),
214 	0x00000000,
215 	(0x9c00 << 16) | (0x9968 >> 2),
216 	0x00000000,
217 	(0x9c00 << 16) | (0x996c >> 2),
218 	0x00000000,
219 	(0x9c00 << 16) | (0x9970 >> 2),
220 	0x00000000,
221 	(0x9c00 << 16) | (0x9974 >> 2),
222 	0x00000000,
223 	(0x9c00 << 16) | (0x9978 >> 2),
224 	0x00000000,
225 	(0x9c00 << 16) | (0x997c >> 2),
226 	0x00000000,
227 	(0x9c00 << 16) | (0x9980 >> 2),
228 	0x00000000,
229 	(0x9c00 << 16) | (0x9984 >> 2),
230 	0x00000000,
231 	(0x9c00 << 16) | (0x9988 >> 2),
232 	0x00000000,
233 	(0x9c00 << 16) | (0x998c >> 2),
234 	0x00000000,
235 	(0x9c00 << 16) | (0x8c00 >> 2),
236 	0x00000000,
237 	(0x9c00 << 16) | (0x8c14 >> 2),
238 	0x00000000,
239 	(0x9c00 << 16) | (0x8c04 >> 2),
240 	0x00000000,
241 	(0x9c00 << 16) | (0x8c08 >> 2),
242 	0x00000000,
243 	(0x8000 << 16) | (0x9b7c >> 2),
244 	0x00000000,
245 	(0x8040 << 16) | (0x9b7c >> 2),
246 	0x00000000,
247 	(0x8000 << 16) | (0xe84 >> 2),
248 	0x00000000,
249 	(0x8040 << 16) | (0xe84 >> 2),
250 	0x00000000,
251 	(0x8000 << 16) | (0x89c0 >> 2),
252 	0x00000000,
253 	(0x8040 << 16) | (0x89c0 >> 2),
254 	0x00000000,
255 	(0x8000 << 16) | (0x914c >> 2),
256 	0x00000000,
257 	(0x8040 << 16) | (0x914c >> 2),
258 	0x00000000,
259 	(0x8000 << 16) | (0x8c20 >> 2),
260 	0x00000000,
261 	(0x8040 << 16) | (0x8c20 >> 2),
262 	0x00000000,
263 	(0x8000 << 16) | (0x9354 >> 2),
264 	0x00000000,
265 	(0x8040 << 16) | (0x9354 >> 2),
266 	0x00000000,
267 	(0x9c00 << 16) | (0x9060 >> 2),
268 	0x00000000,
269 	(0x9c00 << 16) | (0x9364 >> 2),
270 	0x00000000,
271 	(0x9c00 << 16) | (0x9100 >> 2),
272 	0x00000000,
273 	(0x9c00 << 16) | (0x913c >> 2),
274 	0x00000000,
275 	(0x8000 << 16) | (0x90e0 >> 2),
276 	0x00000000,
277 	(0x8000 << 16) | (0x90e4 >> 2),
278 	0x00000000,
279 	(0x8000 << 16) | (0x90e8 >> 2),
280 	0x00000000,
281 	(0x8040 << 16) | (0x90e0 >> 2),
282 	0x00000000,
283 	(0x8040 << 16) | (0x90e4 >> 2),
284 	0x00000000,
285 	(0x8040 << 16) | (0x90e8 >> 2),
286 	0x00000000,
287 	(0x9c00 << 16) | (0x8bcc >> 2),
288 	0x00000000,
289 	(0x9c00 << 16) | (0x8b24 >> 2),
290 	0x00000000,
291 	(0x9c00 << 16) | (0x88c4 >> 2),
292 	0x00000000,
293 	(0x9c00 << 16) | (0x8e50 >> 2),
294 	0x00000000,
295 	(0x9c00 << 16) | (0x8c0c >> 2),
296 	0x00000000,
297 	(0x9c00 << 16) | (0x8e58 >> 2),
298 	0x00000000,
299 	(0x9c00 << 16) | (0x8e5c >> 2),
300 	0x00000000,
301 	(0x9c00 << 16) | (0x9508 >> 2),
302 	0x00000000,
303 	(0x9c00 << 16) | (0x950c >> 2),
304 	0x00000000,
305 	(0x9c00 << 16) | (0x9494 >> 2),
306 	0x00000000,
307 	(0x9c00 << 16) | (0xac0c >> 2),
308 	0x00000000,
309 	(0x9c00 << 16) | (0xac10 >> 2),
310 	0x00000000,
311 	(0x9c00 << 16) | (0xac14 >> 2),
312 	0x00000000,
313 	(0x9c00 << 16) | (0xae00 >> 2),
314 	0x00000000,
315 	(0x9c00 << 16) | (0xac08 >> 2),
316 	0x00000000,
317 	(0x9c00 << 16) | (0x88d4 >> 2),
318 	0x00000000,
319 	(0x9c00 << 16) | (0x88c8 >> 2),
320 	0x00000000,
321 	(0x9c00 << 16) | (0x88cc >> 2),
322 	0x00000000,
323 	(0x9c00 << 16) | (0x89b0 >> 2),
324 	0x00000000,
325 	(0x9c00 << 16) | (0x8b10 >> 2),
326 	0x00000000,
327 	(0x9c00 << 16) | (0x8a14 >> 2),
328 	0x00000000,
329 	(0x9c00 << 16) | (0x9830 >> 2),
330 	0x00000000,
331 	(0x9c00 << 16) | (0x9834 >> 2),
332 	0x00000000,
333 	(0x9c00 << 16) | (0x9838 >> 2),
334 	0x00000000,
335 	(0x9c00 << 16) | (0x9a10 >> 2),
336 	0x00000000,
337 	(0x8000 << 16) | (0x9870 >> 2),
338 	0x00000000,
339 	(0x8000 << 16) | (0x9874 >> 2),
340 	0x00000000,
341 	(0x8001 << 16) | (0x9870 >> 2),
342 	0x00000000,
343 	(0x8001 << 16) | (0x9874 >> 2),
344 	0x00000000,
345 	(0x8040 << 16) | (0x9870 >> 2),
346 	0x00000000,
347 	(0x8040 << 16) | (0x9874 >> 2),
348 	0x00000000,
349 	(0x8041 << 16) | (0x9870 >> 2),
350 	0x00000000,
351 	(0x8041 << 16) | (0x9874 >> 2),
352 	0x00000000,
353 	0x00000000
354 };
355 
356 static const u32 tahiti_golden_rlc_registers[] =
357 {
358 	0xc424, 0xffffffff, 0x00601005,
359 	0xc47c, 0xffffffff, 0x10104040,
360 	0xc488, 0xffffffff, 0x0100000a,
361 	0xc314, 0xffffffff, 0x00000800,
362 	0xc30c, 0xffffffff, 0x800000f4,
363 	0xf4a8, 0xffffffff, 0x00000000
364 };
365 
366 static const u32 tahiti_golden_registers[] =
367 {
368 	0x9a10, 0x00010000, 0x00018208,
369 	0x9830, 0xffffffff, 0x00000000,
370 	0x9834, 0xf00fffff, 0x00000400,
371 	0x9838, 0x0002021c, 0x00020200,
372 	0xc78, 0x00000080, 0x00000000,
373 	0xd030, 0x000300c0, 0x00800040,
374 	0xd830, 0x000300c0, 0x00800040,
375 	0x5bb0, 0x000000f0, 0x00000070,
376 	0x5bc0, 0x00200000, 0x50100000,
377 	0x7030, 0x31000311, 0x00000011,
378 	0x277c, 0x00000003, 0x000007ff,
379 	0x240c, 0x000007ff, 0x00000000,
380 	0x8a14, 0xf000001f, 0x00000007,
381 	0x8b24, 0xffffffff, 0x00ffffff,
382 	0x8b10, 0x0000ff0f, 0x00000000,
383 	0x28a4c, 0x07ffffff, 0x4e000000,
384 	0x28350, 0x3f3f3fff, 0x2a00126a,
385 	0x30, 0x000000ff, 0x0040,
386 	0x34, 0x00000040, 0x00004040,
387 	0x9100, 0x07ffffff, 0x03000000,
388 	0x8e88, 0x01ff1f3f, 0x00000000,
389 	0x8e84, 0x01ff1f3f, 0x00000000,
390 	0x9060, 0x0000007f, 0x00000020,
391 	0x9508, 0x00010000, 0x00010000,
392 	0xac14, 0x00000200, 0x000002fb,
393 	0xac10, 0xffffffff, 0x0000543b,
394 	0xac0c, 0xffffffff, 0xa9210876,
395 	0x88d0, 0xffffffff, 0x000fff40,
396 	0x88d4, 0x0000001f, 0x00000010,
397 	0x1410, 0x20000000, 0x20fffed8,
398 	0x15c0, 0x000c0fc0, 0x000c0400
399 };
400 
401 static const u32 tahiti_golden_registers2[] =
402 {
403 	0xc64, 0x00000001, 0x00000001
404 };
405 
406 static const u32 pitcairn_golden_rlc_registers[] =
407 {
408 	0xc424, 0xffffffff, 0x00601004,
409 	0xc47c, 0xffffffff, 0x10102020,
410 	0xc488, 0xffffffff, 0x01000020,
411 	0xc314, 0xffffffff, 0x00000800,
412 	0xc30c, 0xffffffff, 0x800000a4
413 };
414 
415 static const u32 pitcairn_golden_registers[] =
416 {
417 	0x9a10, 0x00010000, 0x00018208,
418 	0x9830, 0xffffffff, 0x00000000,
419 	0x9834, 0xf00fffff, 0x00000400,
420 	0x9838, 0x0002021c, 0x00020200,
421 	0xc78, 0x00000080, 0x00000000,
422 	0xd030, 0x000300c0, 0x00800040,
423 	0xd830, 0x000300c0, 0x00800040,
424 	0x5bb0, 0x000000f0, 0x00000070,
425 	0x5bc0, 0x00200000, 0x50100000,
426 	0x7030, 0x31000311, 0x00000011,
427 	0x2ae4, 0x00073ffe, 0x000022a2,
428 	0x240c, 0x000007ff, 0x00000000,
429 	0x8a14, 0xf000001f, 0x00000007,
430 	0x8b24, 0xffffffff, 0x00ffffff,
431 	0x8b10, 0x0000ff0f, 0x00000000,
432 	0x28a4c, 0x07ffffff, 0x4e000000,
433 	0x28350, 0x3f3f3fff, 0x2a00126a,
434 	0x30, 0x000000ff, 0x0040,
435 	0x34, 0x00000040, 0x00004040,
436 	0x9100, 0x07ffffff, 0x03000000,
437 	0x9060, 0x0000007f, 0x00000020,
438 	0x9508, 0x00010000, 0x00010000,
439 	0xac14, 0x000003ff, 0x000000f7,
440 	0xac10, 0xffffffff, 0x00000000,
441 	0xac0c, 0xffffffff, 0x32761054,
442 	0x88d4, 0x0000001f, 0x00000010,
443 	0x15c0, 0x000c0fc0, 0x000c0400
444 };
445 
446 static const u32 verde_golden_rlc_registers[] =
447 {
448 	0xc424, 0xffffffff, 0x033f1005,
449 	0xc47c, 0xffffffff, 0x10808020,
450 	0xc488, 0xffffffff, 0x00800008,
451 	0xc314, 0xffffffff, 0x00001000,
452 	0xc30c, 0xffffffff, 0x80010014
453 };
454 
455 static const u32 verde_golden_registers[] =
456 {
457 	0x9a10, 0x00010000, 0x00018208,
458 	0x9830, 0xffffffff, 0x00000000,
459 	0x9834, 0xf00fffff, 0x00000400,
460 	0x9838, 0x0002021c, 0x00020200,
461 	0xc78, 0x00000080, 0x00000000,
462 	0xd030, 0x000300c0, 0x00800040,
463 	0xd030, 0x000300c0, 0x00800040,
464 	0xd830, 0x000300c0, 0x00800040,
465 	0xd830, 0x000300c0, 0x00800040,
466 	0x5bb0, 0x000000f0, 0x00000070,
467 	0x5bc0, 0x00200000, 0x50100000,
468 	0x7030, 0x31000311, 0x00000011,
469 	0x2ae4, 0x00073ffe, 0x000022a2,
470 	0x2ae4, 0x00073ffe, 0x000022a2,
471 	0x2ae4, 0x00073ffe, 0x000022a2,
472 	0x240c, 0x000007ff, 0x00000000,
473 	0x240c, 0x000007ff, 0x00000000,
474 	0x240c, 0x000007ff, 0x00000000,
475 	0x8a14, 0xf000001f, 0x00000007,
476 	0x8a14, 0xf000001f, 0x00000007,
477 	0x8a14, 0xf000001f, 0x00000007,
478 	0x8b24, 0xffffffff, 0x00ffffff,
479 	0x8b10, 0x0000ff0f, 0x00000000,
480 	0x28a4c, 0x07ffffff, 0x4e000000,
481 	0x28350, 0x3f3f3fff, 0x0000124a,
482 	0x28350, 0x3f3f3fff, 0x0000124a,
483 	0x28350, 0x3f3f3fff, 0x0000124a,
484 	0x30, 0x000000ff, 0x0040,
485 	0x34, 0x00000040, 0x00004040,
486 	0x9100, 0x07ffffff, 0x03000000,
487 	0x9100, 0x07ffffff, 0x03000000,
488 	0x8e88, 0x01ff1f3f, 0x00000000,
489 	0x8e88, 0x01ff1f3f, 0x00000000,
490 	0x8e88, 0x01ff1f3f, 0x00000000,
491 	0x8e84, 0x01ff1f3f, 0x00000000,
492 	0x8e84, 0x01ff1f3f, 0x00000000,
493 	0x8e84, 0x01ff1f3f, 0x00000000,
494 	0x9060, 0x0000007f, 0x00000020,
495 	0x9508, 0x00010000, 0x00010000,
496 	0xac14, 0x000003ff, 0x00000003,
497 	0xac14, 0x000003ff, 0x00000003,
498 	0xac14, 0x000003ff, 0x00000003,
499 	0xac10, 0xffffffff, 0x00000000,
500 	0xac10, 0xffffffff, 0x00000000,
501 	0xac10, 0xffffffff, 0x00000000,
502 	0xac0c, 0xffffffff, 0x00001032,
503 	0xac0c, 0xffffffff, 0x00001032,
504 	0xac0c, 0xffffffff, 0x00001032,
505 	0x88d4, 0x0000001f, 0x00000010,
506 	0x88d4, 0x0000001f, 0x00000010,
507 	0x88d4, 0x0000001f, 0x00000010,
508 	0x15c0, 0x000c0fc0, 0x000c0400
509 };
510 
511 static const u32 oland_golden_rlc_registers[] =
512 {
513 	0xc424, 0xffffffff, 0x00601005,
514 	0xc47c, 0xffffffff, 0x10104040,
515 	0xc488, 0xffffffff, 0x0100000a,
516 	0xc314, 0xffffffff, 0x00000800,
517 	0xc30c, 0xffffffff, 0x800000f4
518 };
519 
520 static const u32 oland_golden_registers[] =
521 {
522 	0x9a10, 0x00010000, 0x00018208,
523 	0x9830, 0xffffffff, 0x00000000,
524 	0x9834, 0xf00fffff, 0x00000400,
525 	0x9838, 0x0002021c, 0x00020200,
526 	0xc78, 0x00000080, 0x00000000,
527 	0xd030, 0x000300c0, 0x00800040,
528 	0xd830, 0x000300c0, 0x00800040,
529 	0x5bb0, 0x000000f0, 0x00000070,
530 	0x5bc0, 0x00200000, 0x50100000,
531 	0x7030, 0x31000311, 0x00000011,
532 	0x2ae4, 0x00073ffe, 0x000022a2,
533 	0x240c, 0x000007ff, 0x00000000,
534 	0x8a14, 0xf000001f, 0x00000007,
535 	0x8b24, 0xffffffff, 0x00ffffff,
536 	0x8b10, 0x0000ff0f, 0x00000000,
537 	0x28a4c, 0x07ffffff, 0x4e000000,
538 	0x28350, 0x3f3f3fff, 0x00000082,
539 	0x30, 0x000000ff, 0x0040,
540 	0x34, 0x00000040, 0x00004040,
541 	0x9100, 0x07ffffff, 0x03000000,
542 	0x9060, 0x0000007f, 0x00000020,
543 	0x9508, 0x00010000, 0x00010000,
544 	0xac14, 0x000003ff, 0x000000f3,
545 	0xac10, 0xffffffff, 0x00000000,
546 	0xac0c, 0xffffffff, 0x00003210,
547 	0x88d4, 0x0000001f, 0x00000010,
548 	0x15c0, 0x000c0fc0, 0x000c0400
549 };
550 
551 static const u32 hainan_golden_registers[] =
552 {
553 	0x9a10, 0x00010000, 0x00018208,
554 	0x9830, 0xffffffff, 0x00000000,
555 	0x9834, 0xf00fffff, 0x00000400,
556 	0x9838, 0x0002021c, 0x00020200,
557 	0xd0c0, 0xff000fff, 0x00000100,
558 	0xd030, 0x000300c0, 0x00800040,
559 	0xd8c0, 0xff000fff, 0x00000100,
560 	0xd830, 0x000300c0, 0x00800040,
561 	0x2ae4, 0x00073ffe, 0x000022a2,
562 	0x240c, 0x000007ff, 0x00000000,
563 	0x8a14, 0xf000001f, 0x00000007,
564 	0x8b24, 0xffffffff, 0x00ffffff,
565 	0x8b10, 0x0000ff0f, 0x00000000,
566 	0x28a4c, 0x07ffffff, 0x4e000000,
567 	0x28350, 0x3f3f3fff, 0x00000000,
568 	0x30, 0x000000ff, 0x0040,
569 	0x34, 0x00000040, 0x00004040,
570 	0x9100, 0x03e00000, 0x03600000,
571 	0x9060, 0x0000007f, 0x00000020,
572 	0x9508, 0x00010000, 0x00010000,
573 	0xac14, 0x000003ff, 0x000000f1,
574 	0xac10, 0xffffffff, 0x00000000,
575 	0xac0c, 0xffffffff, 0x00003210,
576 	0x88d4, 0x0000001f, 0x00000010,
577 	0x15c0, 0x000c0fc0, 0x000c0400
578 };
579 
580 static const u32 hainan_golden_registers2[] =
581 {
582 	0x98f8, 0xffffffff, 0x02010001
583 };
584 
585 static const u32 tahiti_mgcg_cgcg_init[] =
586 {
587 	0xc400, 0xffffffff, 0xfffffffc,
588 	0x802c, 0xffffffff, 0xe0000000,
589 	0x9a60, 0xffffffff, 0x00000100,
590 	0x92a4, 0xffffffff, 0x00000100,
591 	0xc164, 0xffffffff, 0x00000100,
592 	0x9774, 0xffffffff, 0x00000100,
593 	0x8984, 0xffffffff, 0x06000100,
594 	0x8a18, 0xffffffff, 0x00000100,
595 	0x92a0, 0xffffffff, 0x00000100,
596 	0xc380, 0xffffffff, 0x00000100,
597 	0x8b28, 0xffffffff, 0x00000100,
598 	0x9144, 0xffffffff, 0x00000100,
599 	0x8d88, 0xffffffff, 0x00000100,
600 	0x8d8c, 0xffffffff, 0x00000100,
601 	0x9030, 0xffffffff, 0x00000100,
602 	0x9034, 0xffffffff, 0x00000100,
603 	0x9038, 0xffffffff, 0x00000100,
604 	0x903c, 0xffffffff, 0x00000100,
605 	0xad80, 0xffffffff, 0x00000100,
606 	0xac54, 0xffffffff, 0x00000100,
607 	0x897c, 0xffffffff, 0x06000100,
608 	0x9868, 0xffffffff, 0x00000100,
609 	0x9510, 0xffffffff, 0x00000100,
610 	0xaf04, 0xffffffff, 0x00000100,
611 	0xae04, 0xffffffff, 0x00000100,
612 	0x949c, 0xffffffff, 0x00000100,
613 	0x802c, 0xffffffff, 0xe0000000,
614 	0x9160, 0xffffffff, 0x00010000,
615 	0x9164, 0xffffffff, 0x00030002,
616 	0x9168, 0xffffffff, 0x00040007,
617 	0x916c, 0xffffffff, 0x00060005,
618 	0x9170, 0xffffffff, 0x00090008,
619 	0x9174, 0xffffffff, 0x00020001,
620 	0x9178, 0xffffffff, 0x00040003,
621 	0x917c, 0xffffffff, 0x00000007,
622 	0x9180, 0xffffffff, 0x00060005,
623 	0x9184, 0xffffffff, 0x00090008,
624 	0x9188, 0xffffffff, 0x00030002,
625 	0x918c, 0xffffffff, 0x00050004,
626 	0x9190, 0xffffffff, 0x00000008,
627 	0x9194, 0xffffffff, 0x00070006,
628 	0x9198, 0xffffffff, 0x000a0009,
629 	0x919c, 0xffffffff, 0x00040003,
630 	0x91a0, 0xffffffff, 0x00060005,
631 	0x91a4, 0xffffffff, 0x00000009,
632 	0x91a8, 0xffffffff, 0x00080007,
633 	0x91ac, 0xffffffff, 0x000b000a,
634 	0x91b0, 0xffffffff, 0x00050004,
635 	0x91b4, 0xffffffff, 0x00070006,
636 	0x91b8, 0xffffffff, 0x0008000b,
637 	0x91bc, 0xffffffff, 0x000a0009,
638 	0x91c0, 0xffffffff, 0x000d000c,
639 	0x91c4, 0xffffffff, 0x00060005,
640 	0x91c8, 0xffffffff, 0x00080007,
641 	0x91cc, 0xffffffff, 0x0000000b,
642 	0x91d0, 0xffffffff, 0x000a0009,
643 	0x91d4, 0xffffffff, 0x000d000c,
644 	0x91d8, 0xffffffff, 0x00070006,
645 	0x91dc, 0xffffffff, 0x00090008,
646 	0x91e0, 0xffffffff, 0x0000000c,
647 	0x91e4, 0xffffffff, 0x000b000a,
648 	0x91e8, 0xffffffff, 0x000e000d,
649 	0x91ec, 0xffffffff, 0x00080007,
650 	0x91f0, 0xffffffff, 0x000a0009,
651 	0x91f4, 0xffffffff, 0x0000000d,
652 	0x91f8, 0xffffffff, 0x000c000b,
653 	0x91fc, 0xffffffff, 0x000f000e,
654 	0x9200, 0xffffffff, 0x00090008,
655 	0x9204, 0xffffffff, 0x000b000a,
656 	0x9208, 0xffffffff, 0x000c000f,
657 	0x920c, 0xffffffff, 0x000e000d,
658 	0x9210, 0xffffffff, 0x00110010,
659 	0x9214, 0xffffffff, 0x000a0009,
660 	0x9218, 0xffffffff, 0x000c000b,
661 	0x921c, 0xffffffff, 0x0000000f,
662 	0x9220, 0xffffffff, 0x000e000d,
663 	0x9224, 0xffffffff, 0x00110010,
664 	0x9228, 0xffffffff, 0x000b000a,
665 	0x922c, 0xffffffff, 0x000d000c,
666 	0x9230, 0xffffffff, 0x00000010,
667 	0x9234, 0xffffffff, 0x000f000e,
668 	0x9238, 0xffffffff, 0x00120011,
669 	0x923c, 0xffffffff, 0x000c000b,
670 	0x9240, 0xffffffff, 0x000e000d,
671 	0x9244, 0xffffffff, 0x00000011,
672 	0x9248, 0xffffffff, 0x0010000f,
673 	0x924c, 0xffffffff, 0x00130012,
674 	0x9250, 0xffffffff, 0x000d000c,
675 	0x9254, 0xffffffff, 0x000f000e,
676 	0x9258, 0xffffffff, 0x00100013,
677 	0x925c, 0xffffffff, 0x00120011,
678 	0x9260, 0xffffffff, 0x00150014,
679 	0x9264, 0xffffffff, 0x000e000d,
680 	0x9268, 0xffffffff, 0x0010000f,
681 	0x926c, 0xffffffff, 0x00000013,
682 	0x9270, 0xffffffff, 0x00120011,
683 	0x9274, 0xffffffff, 0x00150014,
684 	0x9278, 0xffffffff, 0x000f000e,
685 	0x927c, 0xffffffff, 0x00110010,
686 	0x9280, 0xffffffff, 0x00000014,
687 	0x9284, 0xffffffff, 0x00130012,
688 	0x9288, 0xffffffff, 0x00160015,
689 	0x928c, 0xffffffff, 0x0010000f,
690 	0x9290, 0xffffffff, 0x00120011,
691 	0x9294, 0xffffffff, 0x00000015,
692 	0x9298, 0xffffffff, 0x00140013,
693 	0x929c, 0xffffffff, 0x00170016,
694 	0x9150, 0xffffffff, 0x96940200,
695 	0x8708, 0xffffffff, 0x00900100,
696 	0xc478, 0xffffffff, 0x00000080,
697 	0xc404, 0xffffffff, 0x0020003f,
698 	0x30, 0xffffffff, 0x0000001c,
699 	0x34, 0x000f0000, 0x000f0000,
700 	0x160c, 0xffffffff, 0x00000100,
701 	0x1024, 0xffffffff, 0x00000100,
702 	0x102c, 0x00000101, 0x00000000,
703 	0x20a8, 0xffffffff, 0x00000104,
704 	0x264c, 0x000c0000, 0x000c0000,
705 	0x2648, 0x000c0000, 0x000c0000,
706 	0x55e4, 0xff000fff, 0x00000100,
707 	0x55e8, 0x00000001, 0x00000001,
708 	0x2f50, 0x00000001, 0x00000001,
709 	0x30cc, 0xc0000fff, 0x00000104,
710 	0xc1e4, 0x00000001, 0x00000001,
711 	0xd0c0, 0xfffffff0, 0x00000100,
712 	0xd8c0, 0xfffffff0, 0x00000100
713 };
714 
715 static const u32 pitcairn_mgcg_cgcg_init[] =
716 {
717 	0xc400, 0xffffffff, 0xfffffffc,
718 	0x802c, 0xffffffff, 0xe0000000,
719 	0x9a60, 0xffffffff, 0x00000100,
720 	0x92a4, 0xffffffff, 0x00000100,
721 	0xc164, 0xffffffff, 0x00000100,
722 	0x9774, 0xffffffff, 0x00000100,
723 	0x8984, 0xffffffff, 0x06000100,
724 	0x8a18, 0xffffffff, 0x00000100,
725 	0x92a0, 0xffffffff, 0x00000100,
726 	0xc380, 0xffffffff, 0x00000100,
727 	0x8b28, 0xffffffff, 0x00000100,
728 	0x9144, 0xffffffff, 0x00000100,
729 	0x8d88, 0xffffffff, 0x00000100,
730 	0x8d8c, 0xffffffff, 0x00000100,
731 	0x9030, 0xffffffff, 0x00000100,
732 	0x9034, 0xffffffff, 0x00000100,
733 	0x9038, 0xffffffff, 0x00000100,
734 	0x903c, 0xffffffff, 0x00000100,
735 	0xad80, 0xffffffff, 0x00000100,
736 	0xac54, 0xffffffff, 0x00000100,
737 	0x897c, 0xffffffff, 0x06000100,
738 	0x9868, 0xffffffff, 0x00000100,
739 	0x9510, 0xffffffff, 0x00000100,
740 	0xaf04, 0xffffffff, 0x00000100,
741 	0xae04, 0xffffffff, 0x00000100,
742 	0x949c, 0xffffffff, 0x00000100,
743 	0x802c, 0xffffffff, 0xe0000000,
744 	0x9160, 0xffffffff, 0x00010000,
745 	0x9164, 0xffffffff, 0x00030002,
746 	0x9168, 0xffffffff, 0x00040007,
747 	0x916c, 0xffffffff, 0x00060005,
748 	0x9170, 0xffffffff, 0x00090008,
749 	0x9174, 0xffffffff, 0x00020001,
750 	0x9178, 0xffffffff, 0x00040003,
751 	0x917c, 0xffffffff, 0x00000007,
752 	0x9180, 0xffffffff, 0x00060005,
753 	0x9184, 0xffffffff, 0x00090008,
754 	0x9188, 0xffffffff, 0x00030002,
755 	0x918c, 0xffffffff, 0x00050004,
756 	0x9190, 0xffffffff, 0x00000008,
757 	0x9194, 0xffffffff, 0x00070006,
758 	0x9198, 0xffffffff, 0x000a0009,
759 	0x919c, 0xffffffff, 0x00040003,
760 	0x91a0, 0xffffffff, 0x00060005,
761 	0x91a4, 0xffffffff, 0x00000009,
762 	0x91a8, 0xffffffff, 0x00080007,
763 	0x91ac, 0xffffffff, 0x000b000a,
764 	0x91b0, 0xffffffff, 0x00050004,
765 	0x91b4, 0xffffffff, 0x00070006,
766 	0x91b8, 0xffffffff, 0x0008000b,
767 	0x91bc, 0xffffffff, 0x000a0009,
768 	0x91c0, 0xffffffff, 0x000d000c,
769 	0x9200, 0xffffffff, 0x00090008,
770 	0x9204, 0xffffffff, 0x000b000a,
771 	0x9208, 0xffffffff, 0x000c000f,
772 	0x920c, 0xffffffff, 0x000e000d,
773 	0x9210, 0xffffffff, 0x00110010,
774 	0x9214, 0xffffffff, 0x000a0009,
775 	0x9218, 0xffffffff, 0x000c000b,
776 	0x921c, 0xffffffff, 0x0000000f,
777 	0x9220, 0xffffffff, 0x000e000d,
778 	0x9224, 0xffffffff, 0x00110010,
779 	0x9228, 0xffffffff, 0x000b000a,
780 	0x922c, 0xffffffff, 0x000d000c,
781 	0x9230, 0xffffffff, 0x00000010,
782 	0x9234, 0xffffffff, 0x000f000e,
783 	0x9238, 0xffffffff, 0x00120011,
784 	0x923c, 0xffffffff, 0x000c000b,
785 	0x9240, 0xffffffff, 0x000e000d,
786 	0x9244, 0xffffffff, 0x00000011,
787 	0x9248, 0xffffffff, 0x0010000f,
788 	0x924c, 0xffffffff, 0x00130012,
789 	0x9250, 0xffffffff, 0x000d000c,
790 	0x9254, 0xffffffff, 0x000f000e,
791 	0x9258, 0xffffffff, 0x00100013,
792 	0x925c, 0xffffffff, 0x00120011,
793 	0x9260, 0xffffffff, 0x00150014,
794 	0x9150, 0xffffffff, 0x96940200,
795 	0x8708, 0xffffffff, 0x00900100,
796 	0xc478, 0xffffffff, 0x00000080,
797 	0xc404, 0xffffffff, 0x0020003f,
798 	0x30, 0xffffffff, 0x0000001c,
799 	0x34, 0x000f0000, 0x000f0000,
800 	0x160c, 0xffffffff, 0x00000100,
801 	0x1024, 0xffffffff, 0x00000100,
802 	0x102c, 0x00000101, 0x00000000,
803 	0x20a8, 0xffffffff, 0x00000104,
804 	0x55e4, 0xff000fff, 0x00000100,
805 	0x55e8, 0x00000001, 0x00000001,
806 	0x2f50, 0x00000001, 0x00000001,
807 	0x30cc, 0xc0000fff, 0x00000104,
808 	0xc1e4, 0x00000001, 0x00000001,
809 	0xd0c0, 0xfffffff0, 0x00000100,
810 	0xd8c0, 0xfffffff0, 0x00000100
811 };
812 
813 static const u32 verde_mgcg_cgcg_init[] =
814 {
815 	0xc400, 0xffffffff, 0xfffffffc,
816 	0x802c, 0xffffffff, 0xe0000000,
817 	0x9a60, 0xffffffff, 0x00000100,
818 	0x92a4, 0xffffffff, 0x00000100,
819 	0xc164, 0xffffffff, 0x00000100,
820 	0x9774, 0xffffffff, 0x00000100,
821 	0x8984, 0xffffffff, 0x06000100,
822 	0x8a18, 0xffffffff, 0x00000100,
823 	0x92a0, 0xffffffff, 0x00000100,
824 	0xc380, 0xffffffff, 0x00000100,
825 	0x8b28, 0xffffffff, 0x00000100,
826 	0x9144, 0xffffffff, 0x00000100,
827 	0x8d88, 0xffffffff, 0x00000100,
828 	0x8d8c, 0xffffffff, 0x00000100,
829 	0x9030, 0xffffffff, 0x00000100,
830 	0x9034, 0xffffffff, 0x00000100,
831 	0x9038, 0xffffffff, 0x00000100,
832 	0x903c, 0xffffffff, 0x00000100,
833 	0xad80, 0xffffffff, 0x00000100,
834 	0xac54, 0xffffffff, 0x00000100,
835 	0x897c, 0xffffffff, 0x06000100,
836 	0x9868, 0xffffffff, 0x00000100,
837 	0x9510, 0xffffffff, 0x00000100,
838 	0xaf04, 0xffffffff, 0x00000100,
839 	0xae04, 0xffffffff, 0x00000100,
840 	0x949c, 0xffffffff, 0x00000100,
841 	0x802c, 0xffffffff, 0xe0000000,
842 	0x9160, 0xffffffff, 0x00010000,
843 	0x9164, 0xffffffff, 0x00030002,
844 	0x9168, 0xffffffff, 0x00040007,
845 	0x916c, 0xffffffff, 0x00060005,
846 	0x9170, 0xffffffff, 0x00090008,
847 	0x9174, 0xffffffff, 0x00020001,
848 	0x9178, 0xffffffff, 0x00040003,
849 	0x917c, 0xffffffff, 0x00000007,
850 	0x9180, 0xffffffff, 0x00060005,
851 	0x9184, 0xffffffff, 0x00090008,
852 	0x9188, 0xffffffff, 0x00030002,
853 	0x918c, 0xffffffff, 0x00050004,
854 	0x9190, 0xffffffff, 0x00000008,
855 	0x9194, 0xffffffff, 0x00070006,
856 	0x9198, 0xffffffff, 0x000a0009,
857 	0x919c, 0xffffffff, 0x00040003,
858 	0x91a0, 0xffffffff, 0x00060005,
859 	0x91a4, 0xffffffff, 0x00000009,
860 	0x91a8, 0xffffffff, 0x00080007,
861 	0x91ac, 0xffffffff, 0x000b000a,
862 	0x91b0, 0xffffffff, 0x00050004,
863 	0x91b4, 0xffffffff, 0x00070006,
864 	0x91b8, 0xffffffff, 0x0008000b,
865 	0x91bc, 0xffffffff, 0x000a0009,
866 	0x91c0, 0xffffffff, 0x000d000c,
867 	0x9200, 0xffffffff, 0x00090008,
868 	0x9204, 0xffffffff, 0x000b000a,
869 	0x9208, 0xffffffff, 0x000c000f,
870 	0x920c, 0xffffffff, 0x000e000d,
871 	0x9210, 0xffffffff, 0x00110010,
872 	0x9214, 0xffffffff, 0x000a0009,
873 	0x9218, 0xffffffff, 0x000c000b,
874 	0x921c, 0xffffffff, 0x0000000f,
875 	0x9220, 0xffffffff, 0x000e000d,
876 	0x9224, 0xffffffff, 0x00110010,
877 	0x9228, 0xffffffff, 0x000b000a,
878 	0x922c, 0xffffffff, 0x000d000c,
879 	0x9230, 0xffffffff, 0x00000010,
880 	0x9234, 0xffffffff, 0x000f000e,
881 	0x9238, 0xffffffff, 0x00120011,
882 	0x923c, 0xffffffff, 0x000c000b,
883 	0x9240, 0xffffffff, 0x000e000d,
884 	0x9244, 0xffffffff, 0x00000011,
885 	0x9248, 0xffffffff, 0x0010000f,
886 	0x924c, 0xffffffff, 0x00130012,
887 	0x9250, 0xffffffff, 0x000d000c,
888 	0x9254, 0xffffffff, 0x000f000e,
889 	0x9258, 0xffffffff, 0x00100013,
890 	0x925c, 0xffffffff, 0x00120011,
891 	0x9260, 0xffffffff, 0x00150014,
892 	0x9150, 0xffffffff, 0x96940200,
893 	0x8708, 0xffffffff, 0x00900100,
894 	0xc478, 0xffffffff, 0x00000080,
895 	0xc404, 0xffffffff, 0x0020003f,
896 	0x30, 0xffffffff, 0x0000001c,
897 	0x34, 0x000f0000, 0x000f0000,
898 	0x160c, 0xffffffff, 0x00000100,
899 	0x1024, 0xffffffff, 0x00000100,
900 	0x102c, 0x00000101, 0x00000000,
901 	0x20a8, 0xffffffff, 0x00000104,
902 	0x264c, 0x000c0000, 0x000c0000,
903 	0x2648, 0x000c0000, 0x000c0000,
904 	0x55e4, 0xff000fff, 0x00000100,
905 	0x55e8, 0x00000001, 0x00000001,
906 	0x2f50, 0x00000001, 0x00000001,
907 	0x30cc, 0xc0000fff, 0x00000104,
908 	0xc1e4, 0x00000001, 0x00000001,
909 	0xd0c0, 0xfffffff0, 0x00000100,
910 	0xd8c0, 0xfffffff0, 0x00000100
911 };
912 
913 static const u32 oland_mgcg_cgcg_init[] =
914 {
915 	0xc400, 0xffffffff, 0xfffffffc,
916 	0x802c, 0xffffffff, 0xe0000000,
917 	0x9a60, 0xffffffff, 0x00000100,
918 	0x92a4, 0xffffffff, 0x00000100,
919 	0xc164, 0xffffffff, 0x00000100,
920 	0x9774, 0xffffffff, 0x00000100,
921 	0x8984, 0xffffffff, 0x06000100,
922 	0x8a18, 0xffffffff, 0x00000100,
923 	0x92a0, 0xffffffff, 0x00000100,
924 	0xc380, 0xffffffff, 0x00000100,
925 	0x8b28, 0xffffffff, 0x00000100,
926 	0x9144, 0xffffffff, 0x00000100,
927 	0x8d88, 0xffffffff, 0x00000100,
928 	0x8d8c, 0xffffffff, 0x00000100,
929 	0x9030, 0xffffffff, 0x00000100,
930 	0x9034, 0xffffffff, 0x00000100,
931 	0x9038, 0xffffffff, 0x00000100,
932 	0x903c, 0xffffffff, 0x00000100,
933 	0xad80, 0xffffffff, 0x00000100,
934 	0xac54, 0xffffffff, 0x00000100,
935 	0x897c, 0xffffffff, 0x06000100,
936 	0x9868, 0xffffffff, 0x00000100,
937 	0x9510, 0xffffffff, 0x00000100,
938 	0xaf04, 0xffffffff, 0x00000100,
939 	0xae04, 0xffffffff, 0x00000100,
940 	0x949c, 0xffffffff, 0x00000100,
941 	0x802c, 0xffffffff, 0xe0000000,
942 	0x9160, 0xffffffff, 0x00010000,
943 	0x9164, 0xffffffff, 0x00030002,
944 	0x9168, 0xffffffff, 0x00040007,
945 	0x916c, 0xffffffff, 0x00060005,
946 	0x9170, 0xffffffff, 0x00090008,
947 	0x9174, 0xffffffff, 0x00020001,
948 	0x9178, 0xffffffff, 0x00040003,
949 	0x917c, 0xffffffff, 0x00000007,
950 	0x9180, 0xffffffff, 0x00060005,
951 	0x9184, 0xffffffff, 0x00090008,
952 	0x9188, 0xffffffff, 0x00030002,
953 	0x918c, 0xffffffff, 0x00050004,
954 	0x9190, 0xffffffff, 0x00000008,
955 	0x9194, 0xffffffff, 0x00070006,
956 	0x9198, 0xffffffff, 0x000a0009,
957 	0x919c, 0xffffffff, 0x00040003,
958 	0x91a0, 0xffffffff, 0x00060005,
959 	0x91a4, 0xffffffff, 0x00000009,
960 	0x91a8, 0xffffffff, 0x00080007,
961 	0x91ac, 0xffffffff, 0x000b000a,
962 	0x91b0, 0xffffffff, 0x00050004,
963 	0x91b4, 0xffffffff, 0x00070006,
964 	0x91b8, 0xffffffff, 0x0008000b,
965 	0x91bc, 0xffffffff, 0x000a0009,
966 	0x91c0, 0xffffffff, 0x000d000c,
967 	0x91c4, 0xffffffff, 0x00060005,
968 	0x91c8, 0xffffffff, 0x00080007,
969 	0x91cc, 0xffffffff, 0x0000000b,
970 	0x91d0, 0xffffffff, 0x000a0009,
971 	0x91d4, 0xffffffff, 0x000d000c,
972 	0x9150, 0xffffffff, 0x96940200,
973 	0x8708, 0xffffffff, 0x00900100,
974 	0xc478, 0xffffffff, 0x00000080,
975 	0xc404, 0xffffffff, 0x0020003f,
976 	0x30, 0xffffffff, 0x0000001c,
977 	0x34, 0x000f0000, 0x000f0000,
978 	0x160c, 0xffffffff, 0x00000100,
979 	0x1024, 0xffffffff, 0x00000100,
980 	0x102c, 0x00000101, 0x00000000,
981 	0x20a8, 0xffffffff, 0x00000104,
982 	0x264c, 0x000c0000, 0x000c0000,
983 	0x2648, 0x000c0000, 0x000c0000,
984 	0x55e4, 0xff000fff, 0x00000100,
985 	0x55e8, 0x00000001, 0x00000001,
986 	0x2f50, 0x00000001, 0x00000001,
987 	0x30cc, 0xc0000fff, 0x00000104,
988 	0xc1e4, 0x00000001, 0x00000001,
989 	0xd0c0, 0xfffffff0, 0x00000100,
990 	0xd8c0, 0xfffffff0, 0x00000100
991 };
992 
993 static const u32 hainan_mgcg_cgcg_init[] =
994 {
995 	0xc400, 0xffffffff, 0xfffffffc,
996 	0x802c, 0xffffffff, 0xe0000000,
997 	0x9a60, 0xffffffff, 0x00000100,
998 	0x92a4, 0xffffffff, 0x00000100,
999 	0xc164, 0xffffffff, 0x00000100,
1000 	0x9774, 0xffffffff, 0x00000100,
1001 	0x8984, 0xffffffff, 0x06000100,
1002 	0x8a18, 0xffffffff, 0x00000100,
1003 	0x92a0, 0xffffffff, 0x00000100,
1004 	0xc380, 0xffffffff, 0x00000100,
1005 	0x8b28, 0xffffffff, 0x00000100,
1006 	0x9144, 0xffffffff, 0x00000100,
1007 	0x8d88, 0xffffffff, 0x00000100,
1008 	0x8d8c, 0xffffffff, 0x00000100,
1009 	0x9030, 0xffffffff, 0x00000100,
1010 	0x9034, 0xffffffff, 0x00000100,
1011 	0x9038, 0xffffffff, 0x00000100,
1012 	0x903c, 0xffffffff, 0x00000100,
1013 	0xad80, 0xffffffff, 0x00000100,
1014 	0xac54, 0xffffffff, 0x00000100,
1015 	0x897c, 0xffffffff, 0x06000100,
1016 	0x9868, 0xffffffff, 0x00000100,
1017 	0x9510, 0xffffffff, 0x00000100,
1018 	0xaf04, 0xffffffff, 0x00000100,
1019 	0xae04, 0xffffffff, 0x00000100,
1020 	0x949c, 0xffffffff, 0x00000100,
1021 	0x802c, 0xffffffff, 0xe0000000,
1022 	0x9160, 0xffffffff, 0x00010000,
1023 	0x9164, 0xffffffff, 0x00030002,
1024 	0x9168, 0xffffffff, 0x00040007,
1025 	0x916c, 0xffffffff, 0x00060005,
1026 	0x9170, 0xffffffff, 0x00090008,
1027 	0x9174, 0xffffffff, 0x00020001,
1028 	0x9178, 0xffffffff, 0x00040003,
1029 	0x917c, 0xffffffff, 0x00000007,
1030 	0x9180, 0xffffffff, 0x00060005,
1031 	0x9184, 0xffffffff, 0x00090008,
1032 	0x9188, 0xffffffff, 0x00030002,
1033 	0x918c, 0xffffffff, 0x00050004,
1034 	0x9190, 0xffffffff, 0x00000008,
1035 	0x9194, 0xffffffff, 0x00070006,
1036 	0x9198, 0xffffffff, 0x000a0009,
1037 	0x919c, 0xffffffff, 0x00040003,
1038 	0x91a0, 0xffffffff, 0x00060005,
1039 	0x91a4, 0xffffffff, 0x00000009,
1040 	0x91a8, 0xffffffff, 0x00080007,
1041 	0x91ac, 0xffffffff, 0x000b000a,
1042 	0x91b0, 0xffffffff, 0x00050004,
1043 	0x91b4, 0xffffffff, 0x00070006,
1044 	0x91b8, 0xffffffff, 0x0008000b,
1045 	0x91bc, 0xffffffff, 0x000a0009,
1046 	0x91c0, 0xffffffff, 0x000d000c,
1047 	0x91c4, 0xffffffff, 0x00060005,
1048 	0x91c8, 0xffffffff, 0x00080007,
1049 	0x91cc, 0xffffffff, 0x0000000b,
1050 	0x91d0, 0xffffffff, 0x000a0009,
1051 	0x91d4, 0xffffffff, 0x000d000c,
1052 	0x9150, 0xffffffff, 0x96940200,
1053 	0x8708, 0xffffffff, 0x00900100,
1054 	0xc478, 0xffffffff, 0x00000080,
1055 	0xc404, 0xffffffff, 0x0020003f,
1056 	0x30, 0xffffffff, 0x0000001c,
1057 	0x34, 0x000f0000, 0x000f0000,
1058 	0x160c, 0xffffffff, 0x00000100,
1059 	0x1024, 0xffffffff, 0x00000100,
1060 	0x20a8, 0xffffffff, 0x00000104,
1061 	0x264c, 0x000c0000, 0x000c0000,
1062 	0x2648, 0x000c0000, 0x000c0000,
1063 	0x2f50, 0x00000001, 0x00000001,
1064 	0x30cc, 0xc0000fff, 0x00000104,
1065 	0xc1e4, 0x00000001, 0x00000001,
1066 	0xd0c0, 0xfffffff0, 0x00000100,
1067 	0xd8c0, 0xfffffff0, 0x00000100
1068 };
1069 
1070 static u32 verde_pg_init[] =
1071 {
1072 	0x353c, 0xffffffff, 0x40000,
1073 	0x3538, 0xffffffff, 0x200010ff,
1074 	0x353c, 0xffffffff, 0x0,
1075 	0x353c, 0xffffffff, 0x0,
1076 	0x353c, 0xffffffff, 0x0,
1077 	0x353c, 0xffffffff, 0x0,
1078 	0x353c, 0xffffffff, 0x0,
1079 	0x353c, 0xffffffff, 0x7007,
1080 	0x3538, 0xffffffff, 0x300010ff,
1081 	0x353c, 0xffffffff, 0x0,
1082 	0x353c, 0xffffffff, 0x0,
1083 	0x353c, 0xffffffff, 0x0,
1084 	0x353c, 0xffffffff, 0x0,
1085 	0x353c, 0xffffffff, 0x0,
1086 	0x353c, 0xffffffff, 0x400000,
1087 	0x3538, 0xffffffff, 0x100010ff,
1088 	0x353c, 0xffffffff, 0x0,
1089 	0x353c, 0xffffffff, 0x0,
1090 	0x353c, 0xffffffff, 0x0,
1091 	0x353c, 0xffffffff, 0x0,
1092 	0x353c, 0xffffffff, 0x0,
1093 	0x353c, 0xffffffff, 0x120200,
1094 	0x3538, 0xffffffff, 0x500010ff,
1095 	0x353c, 0xffffffff, 0x0,
1096 	0x353c, 0xffffffff, 0x0,
1097 	0x353c, 0xffffffff, 0x0,
1098 	0x353c, 0xffffffff, 0x0,
1099 	0x353c, 0xffffffff, 0x0,
1100 	0x353c, 0xffffffff, 0x1e1e16,
1101 	0x3538, 0xffffffff, 0x600010ff,
1102 	0x353c, 0xffffffff, 0x0,
1103 	0x353c, 0xffffffff, 0x0,
1104 	0x353c, 0xffffffff, 0x0,
1105 	0x353c, 0xffffffff, 0x0,
1106 	0x353c, 0xffffffff, 0x0,
1107 	0x353c, 0xffffffff, 0x171f1e,
1108 	0x3538, 0xffffffff, 0x700010ff,
1109 	0x353c, 0xffffffff, 0x0,
1110 	0x353c, 0xffffffff, 0x0,
1111 	0x353c, 0xffffffff, 0x0,
1112 	0x353c, 0xffffffff, 0x0,
1113 	0x353c, 0xffffffff, 0x0,
1114 	0x353c, 0xffffffff, 0x0,
1115 	0x3538, 0xffffffff, 0x9ff,
1116 	0x3500, 0xffffffff, 0x0,
1117 	0x3504, 0xffffffff, 0x10000800,
1118 	0x3504, 0xffffffff, 0xf,
1119 	0x3504, 0xffffffff, 0xf,
1120 	0x3500, 0xffffffff, 0x4,
1121 	0x3504, 0xffffffff, 0x1000051e,
1122 	0x3504, 0xffffffff, 0xffff,
1123 	0x3504, 0xffffffff, 0xffff,
1124 	0x3500, 0xffffffff, 0x8,
1125 	0x3504, 0xffffffff, 0x80500,
1126 	0x3500, 0xffffffff, 0x12,
1127 	0x3504, 0xffffffff, 0x9050c,
1128 	0x3500, 0xffffffff, 0x1d,
1129 	0x3504, 0xffffffff, 0xb052c,
1130 	0x3500, 0xffffffff, 0x2a,
1131 	0x3504, 0xffffffff, 0x1053e,
1132 	0x3500, 0xffffffff, 0x2d,
1133 	0x3504, 0xffffffff, 0x10546,
1134 	0x3500, 0xffffffff, 0x30,
1135 	0x3504, 0xffffffff, 0xa054e,
1136 	0x3500, 0xffffffff, 0x3c,
1137 	0x3504, 0xffffffff, 0x1055f,
1138 	0x3500, 0xffffffff, 0x3f,
1139 	0x3504, 0xffffffff, 0x10567,
1140 	0x3500, 0xffffffff, 0x42,
1141 	0x3504, 0xffffffff, 0x1056f,
1142 	0x3500, 0xffffffff, 0x45,
1143 	0x3504, 0xffffffff, 0x10572,
1144 	0x3500, 0xffffffff, 0x48,
1145 	0x3504, 0xffffffff, 0x20575,
1146 	0x3500, 0xffffffff, 0x4c,
1147 	0x3504, 0xffffffff, 0x190801,
1148 	0x3500, 0xffffffff, 0x67,
1149 	0x3504, 0xffffffff, 0x1082a,
1150 	0x3500, 0xffffffff, 0x6a,
1151 	0x3504, 0xffffffff, 0x1b082d,
1152 	0x3500, 0xffffffff, 0x87,
1153 	0x3504, 0xffffffff, 0x310851,
1154 	0x3500, 0xffffffff, 0xba,
1155 	0x3504, 0xffffffff, 0x891,
1156 	0x3500, 0xffffffff, 0xbc,
1157 	0x3504, 0xffffffff, 0x893,
1158 	0x3500, 0xffffffff, 0xbe,
1159 	0x3504, 0xffffffff, 0x20895,
1160 	0x3500, 0xffffffff, 0xc2,
1161 	0x3504, 0xffffffff, 0x20899,
1162 	0x3500, 0xffffffff, 0xc6,
1163 	0x3504, 0xffffffff, 0x2089d,
1164 	0x3500, 0xffffffff, 0xca,
1165 	0x3504, 0xffffffff, 0x8a1,
1166 	0x3500, 0xffffffff, 0xcc,
1167 	0x3504, 0xffffffff, 0x8a3,
1168 	0x3500, 0xffffffff, 0xce,
1169 	0x3504, 0xffffffff, 0x308a5,
1170 	0x3500, 0xffffffff, 0xd3,
1171 	0x3504, 0xffffffff, 0x6d08cd,
1172 	0x3500, 0xffffffff, 0x142,
1173 	0x3504, 0xffffffff, 0x2000095a,
1174 	0x3504, 0xffffffff, 0x1,
1175 	0x3500, 0xffffffff, 0x144,
1176 	0x3504, 0xffffffff, 0x301f095b,
1177 	0x3500, 0xffffffff, 0x165,
1178 	0x3504, 0xffffffff, 0xc094d,
1179 	0x3500, 0xffffffff, 0x173,
1180 	0x3504, 0xffffffff, 0xf096d,
1181 	0x3500, 0xffffffff, 0x184,
1182 	0x3504, 0xffffffff, 0x15097f,
1183 	0x3500, 0xffffffff, 0x19b,
1184 	0x3504, 0xffffffff, 0xc0998,
1185 	0x3500, 0xffffffff, 0x1a9,
1186 	0x3504, 0xffffffff, 0x409a7,
1187 	0x3500, 0xffffffff, 0x1af,
1188 	0x3504, 0xffffffff, 0xcdc,
1189 	0x3500, 0xffffffff, 0x1b1,
1190 	0x3504, 0xffffffff, 0x800,
1191 	0x3508, 0xffffffff, 0x6c9b2000,
1192 	0x3510, 0xfc00, 0x2000,
1193 	0x3544, 0xffffffff, 0xfc0,
1194 	0x28d4, 0x00000100, 0x100
1195 };
1196 
1197 static void si_init_golden_registers(struct radeon_device *rdev)
1198 {
1199 	switch (rdev->family) {
1200 	case CHIP_TAHITI:
1201 		radeon_program_register_sequence(rdev,
1202 						 tahiti_golden_registers,
1203 						 (const u32)ARRAY_SIZE(tahiti_golden_registers));
1204 		radeon_program_register_sequence(rdev,
1205 						 tahiti_golden_rlc_registers,
1206 						 (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1207 		radeon_program_register_sequence(rdev,
1208 						 tahiti_mgcg_cgcg_init,
1209 						 (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1210 		radeon_program_register_sequence(rdev,
1211 						 tahiti_golden_registers2,
1212 						 (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1213 		break;
1214 	case CHIP_PITCAIRN:
1215 		radeon_program_register_sequence(rdev,
1216 						 pitcairn_golden_registers,
1217 						 (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1218 		radeon_program_register_sequence(rdev,
1219 						 pitcairn_golden_rlc_registers,
1220 						 (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1221 		radeon_program_register_sequence(rdev,
1222 						 pitcairn_mgcg_cgcg_init,
1223 						 (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1224 		break;
1225 	case CHIP_VERDE:
1226 		radeon_program_register_sequence(rdev,
1227 						 verde_golden_registers,
1228 						 (const u32)ARRAY_SIZE(verde_golden_registers));
1229 		radeon_program_register_sequence(rdev,
1230 						 verde_golden_rlc_registers,
1231 						 (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1232 		radeon_program_register_sequence(rdev,
1233 						 verde_mgcg_cgcg_init,
1234 						 (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1235 		radeon_program_register_sequence(rdev,
1236 						 verde_pg_init,
1237 						 (const u32)ARRAY_SIZE(verde_pg_init));
1238 		break;
1239 	case CHIP_OLAND:
1240 		radeon_program_register_sequence(rdev,
1241 						 oland_golden_registers,
1242 						 (const u32)ARRAY_SIZE(oland_golden_registers));
1243 		radeon_program_register_sequence(rdev,
1244 						 oland_golden_rlc_registers,
1245 						 (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1246 		radeon_program_register_sequence(rdev,
1247 						 oland_mgcg_cgcg_init,
1248 						 (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1249 		break;
1250 	case CHIP_HAINAN:
1251 		radeon_program_register_sequence(rdev,
1252 						 hainan_golden_registers,
1253 						 (const u32)ARRAY_SIZE(hainan_golden_registers));
1254 		radeon_program_register_sequence(rdev,
1255 						 hainan_golden_registers2,
1256 						 (const u32)ARRAY_SIZE(hainan_golden_registers2));
1257 		radeon_program_register_sequence(rdev,
1258 						 hainan_mgcg_cgcg_init,
1259 						 (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1260 		break;
1261 	default:
1262 		break;
1263 	}
1264 }
1265 
1266 #define PCIE_BUS_CLK                10000
1267 #define TCLK                        (PCIE_BUS_CLK / 10)
1268 
1269 /**
1270  * si_get_xclk - get the xclk
1271  *
1272  * @rdev: radeon_device pointer
1273  *
1274  * Returns the reference clock used by the gfx engine
1275  * (SI).
1276  */
1277 u32 si_get_xclk(struct radeon_device *rdev)
1278 {
1279         u32 reference_clock = rdev->clock.spll.reference_freq;
1280 	u32 tmp;
1281 
1282 	tmp = RREG32(CG_CLKPIN_CNTL_2);
1283 	if (tmp & MUX_TCLK_TO_XCLK)
1284 		return TCLK;
1285 
1286 	tmp = RREG32(CG_CLKPIN_CNTL);
1287 	if (tmp & XTALIN_DIVIDE)
1288 		return reference_clock / 4;
1289 
1290 	return reference_clock;
1291 }
1292 
1293 /* get temperature in millidegrees */
1294 int si_get_temp(struct radeon_device *rdev)
1295 {
1296 	u32 temp;
1297 	int actual_temp = 0;
1298 
1299 	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1300 		CTF_TEMP_SHIFT;
1301 
1302 	if (temp & 0x200)
1303 		actual_temp = 255;
1304 	else
1305 		actual_temp = temp & 0x1ff;
1306 
1307 	actual_temp = (actual_temp * 1000);
1308 
1309 	return actual_temp;
1310 }
1311 
1312 #define TAHITI_IO_MC_REGS_SIZE 36
1313 
1314 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1315 	{0x0000006f, 0x03044000},
1316 	{0x00000070, 0x0480c018},
1317 	{0x00000071, 0x00000040},
1318 	{0x00000072, 0x01000000},
1319 	{0x00000074, 0x000000ff},
1320 	{0x00000075, 0x00143400},
1321 	{0x00000076, 0x08ec0800},
1322 	{0x00000077, 0x040000cc},
1323 	{0x00000079, 0x00000000},
1324 	{0x0000007a, 0x21000409},
1325 	{0x0000007c, 0x00000000},
1326 	{0x0000007d, 0xe8000000},
1327 	{0x0000007e, 0x044408a8},
1328 	{0x0000007f, 0x00000003},
1329 	{0x00000080, 0x00000000},
1330 	{0x00000081, 0x01000000},
1331 	{0x00000082, 0x02000000},
1332 	{0x00000083, 0x00000000},
1333 	{0x00000084, 0xe3f3e4f4},
1334 	{0x00000085, 0x00052024},
1335 	{0x00000087, 0x00000000},
1336 	{0x00000088, 0x66036603},
1337 	{0x00000089, 0x01000000},
1338 	{0x0000008b, 0x1c0a0000},
1339 	{0x0000008c, 0xff010000},
1340 	{0x0000008e, 0xffffefff},
1341 	{0x0000008f, 0xfff3efff},
1342 	{0x00000090, 0xfff3efbf},
1343 	{0x00000094, 0x00101101},
1344 	{0x00000095, 0x00000fff},
1345 	{0x00000096, 0x00116fff},
1346 	{0x00000097, 0x60010000},
1347 	{0x00000098, 0x10010000},
1348 	{0x00000099, 0x00006000},
1349 	{0x0000009a, 0x00001000},
1350 	{0x0000009f, 0x00a77400}
1351 };
1352 
1353 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1354 	{0x0000006f, 0x03044000},
1355 	{0x00000070, 0x0480c018},
1356 	{0x00000071, 0x00000040},
1357 	{0x00000072, 0x01000000},
1358 	{0x00000074, 0x000000ff},
1359 	{0x00000075, 0x00143400},
1360 	{0x00000076, 0x08ec0800},
1361 	{0x00000077, 0x040000cc},
1362 	{0x00000079, 0x00000000},
1363 	{0x0000007a, 0x21000409},
1364 	{0x0000007c, 0x00000000},
1365 	{0x0000007d, 0xe8000000},
1366 	{0x0000007e, 0x044408a8},
1367 	{0x0000007f, 0x00000003},
1368 	{0x00000080, 0x00000000},
1369 	{0x00000081, 0x01000000},
1370 	{0x00000082, 0x02000000},
1371 	{0x00000083, 0x00000000},
1372 	{0x00000084, 0xe3f3e4f4},
1373 	{0x00000085, 0x00052024},
1374 	{0x00000087, 0x00000000},
1375 	{0x00000088, 0x66036603},
1376 	{0x00000089, 0x01000000},
1377 	{0x0000008b, 0x1c0a0000},
1378 	{0x0000008c, 0xff010000},
1379 	{0x0000008e, 0xffffefff},
1380 	{0x0000008f, 0xfff3efff},
1381 	{0x00000090, 0xfff3efbf},
1382 	{0x00000094, 0x00101101},
1383 	{0x00000095, 0x00000fff},
1384 	{0x00000096, 0x00116fff},
1385 	{0x00000097, 0x60010000},
1386 	{0x00000098, 0x10010000},
1387 	{0x00000099, 0x00006000},
1388 	{0x0000009a, 0x00001000},
1389 	{0x0000009f, 0x00a47400}
1390 };
1391 
1392 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1393 	{0x0000006f, 0x03044000},
1394 	{0x00000070, 0x0480c018},
1395 	{0x00000071, 0x00000040},
1396 	{0x00000072, 0x01000000},
1397 	{0x00000074, 0x000000ff},
1398 	{0x00000075, 0x00143400},
1399 	{0x00000076, 0x08ec0800},
1400 	{0x00000077, 0x040000cc},
1401 	{0x00000079, 0x00000000},
1402 	{0x0000007a, 0x21000409},
1403 	{0x0000007c, 0x00000000},
1404 	{0x0000007d, 0xe8000000},
1405 	{0x0000007e, 0x044408a8},
1406 	{0x0000007f, 0x00000003},
1407 	{0x00000080, 0x00000000},
1408 	{0x00000081, 0x01000000},
1409 	{0x00000082, 0x02000000},
1410 	{0x00000083, 0x00000000},
1411 	{0x00000084, 0xe3f3e4f4},
1412 	{0x00000085, 0x00052024},
1413 	{0x00000087, 0x00000000},
1414 	{0x00000088, 0x66036603},
1415 	{0x00000089, 0x01000000},
1416 	{0x0000008b, 0x1c0a0000},
1417 	{0x0000008c, 0xff010000},
1418 	{0x0000008e, 0xffffefff},
1419 	{0x0000008f, 0xfff3efff},
1420 	{0x00000090, 0xfff3efbf},
1421 	{0x00000094, 0x00101101},
1422 	{0x00000095, 0x00000fff},
1423 	{0x00000096, 0x00116fff},
1424 	{0x00000097, 0x60010000},
1425 	{0x00000098, 0x10010000},
1426 	{0x00000099, 0x00006000},
1427 	{0x0000009a, 0x00001000},
1428 	{0x0000009f, 0x00a37400}
1429 };
1430 
1431 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1432 	{0x0000006f, 0x03044000},
1433 	{0x00000070, 0x0480c018},
1434 	{0x00000071, 0x00000040},
1435 	{0x00000072, 0x01000000},
1436 	{0x00000074, 0x000000ff},
1437 	{0x00000075, 0x00143400},
1438 	{0x00000076, 0x08ec0800},
1439 	{0x00000077, 0x040000cc},
1440 	{0x00000079, 0x00000000},
1441 	{0x0000007a, 0x21000409},
1442 	{0x0000007c, 0x00000000},
1443 	{0x0000007d, 0xe8000000},
1444 	{0x0000007e, 0x044408a8},
1445 	{0x0000007f, 0x00000003},
1446 	{0x00000080, 0x00000000},
1447 	{0x00000081, 0x01000000},
1448 	{0x00000082, 0x02000000},
1449 	{0x00000083, 0x00000000},
1450 	{0x00000084, 0xe3f3e4f4},
1451 	{0x00000085, 0x00052024},
1452 	{0x00000087, 0x00000000},
1453 	{0x00000088, 0x66036603},
1454 	{0x00000089, 0x01000000},
1455 	{0x0000008b, 0x1c0a0000},
1456 	{0x0000008c, 0xff010000},
1457 	{0x0000008e, 0xffffefff},
1458 	{0x0000008f, 0xfff3efff},
1459 	{0x00000090, 0xfff3efbf},
1460 	{0x00000094, 0x00101101},
1461 	{0x00000095, 0x00000fff},
1462 	{0x00000096, 0x00116fff},
1463 	{0x00000097, 0x60010000},
1464 	{0x00000098, 0x10010000},
1465 	{0x00000099, 0x00006000},
1466 	{0x0000009a, 0x00001000},
1467 	{0x0000009f, 0x00a17730}
1468 };
1469 
1470 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1471 	{0x0000006f, 0x03044000},
1472 	{0x00000070, 0x0480c018},
1473 	{0x00000071, 0x00000040},
1474 	{0x00000072, 0x01000000},
1475 	{0x00000074, 0x000000ff},
1476 	{0x00000075, 0x00143400},
1477 	{0x00000076, 0x08ec0800},
1478 	{0x00000077, 0x040000cc},
1479 	{0x00000079, 0x00000000},
1480 	{0x0000007a, 0x21000409},
1481 	{0x0000007c, 0x00000000},
1482 	{0x0000007d, 0xe8000000},
1483 	{0x0000007e, 0x044408a8},
1484 	{0x0000007f, 0x00000003},
1485 	{0x00000080, 0x00000000},
1486 	{0x00000081, 0x01000000},
1487 	{0x00000082, 0x02000000},
1488 	{0x00000083, 0x00000000},
1489 	{0x00000084, 0xe3f3e4f4},
1490 	{0x00000085, 0x00052024},
1491 	{0x00000087, 0x00000000},
1492 	{0x00000088, 0x66036603},
1493 	{0x00000089, 0x01000000},
1494 	{0x0000008b, 0x1c0a0000},
1495 	{0x0000008c, 0xff010000},
1496 	{0x0000008e, 0xffffefff},
1497 	{0x0000008f, 0xfff3efff},
1498 	{0x00000090, 0xfff3efbf},
1499 	{0x00000094, 0x00101101},
1500 	{0x00000095, 0x00000fff},
1501 	{0x00000096, 0x00116fff},
1502 	{0x00000097, 0x60010000},
1503 	{0x00000098, 0x10010000},
1504 	{0x00000099, 0x00006000},
1505 	{0x0000009a, 0x00001000},
1506 	{0x0000009f, 0x00a07730}
1507 };
1508 
1509 /* ucode loading */
1510 int si_mc_load_microcode(struct radeon_device *rdev)
1511 {
1512 	const __be32 *fw_data = NULL;
1513 	const __le32 *new_fw_data = NULL;
1514 	u32 running, blackout = 0;
1515 	u32 *io_mc_regs = NULL;
1516 	const __le32 *new_io_mc_regs = NULL;
1517 	int i, regs_size, ucode_size;
1518 
1519 	if (!rdev->mc_fw)
1520 		return -EINVAL;
1521 
1522 	if (rdev->new_fw) {
1523 		const struct mc_firmware_header_v1_0 *hdr =
1524 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1525 
1526 		radeon_ucode_print_mc_hdr(&hdr->header);
1527 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1528 		new_io_mc_regs = (const __le32 *)
1529 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1530 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1531 		new_fw_data = (const __le32 *)
1532 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1533 	} else {
1534 		ucode_size = rdev->mc_fw->size / 4;
1535 
1536 		switch (rdev->family) {
1537 		case CHIP_TAHITI:
1538 			io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1539 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1540 			break;
1541 		case CHIP_PITCAIRN:
1542 			io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1543 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1544 			break;
1545 		case CHIP_VERDE:
1546 		default:
1547 			io_mc_regs = (u32 *)&verde_io_mc_regs;
1548 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1549 			break;
1550 		case CHIP_OLAND:
1551 			io_mc_regs = (u32 *)&oland_io_mc_regs;
1552 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1553 			break;
1554 		case CHIP_HAINAN:
1555 			io_mc_regs = (u32 *)&hainan_io_mc_regs;
1556 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1557 			break;
1558 		}
1559 		fw_data = (const __be32 *)rdev->mc_fw->data;
1560 	}
1561 
1562 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1563 
1564 	if (running == 0) {
1565 		if (running) {
1566 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1567 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1568 		}
1569 
1570 		/* reset the engine and set to writable */
1571 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1572 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1573 
1574 		/* load mc io regs */
1575 		for (i = 0; i < regs_size; i++) {
1576 			if (rdev->new_fw) {
1577 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1578 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1579 			} else {
1580 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1581 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1582 			}
1583 		}
1584 		/* load the MC ucode */
1585 		for (i = 0; i < ucode_size; i++) {
1586 			if (rdev->new_fw)
1587 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1588 			else
1589 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1590 		}
1591 
1592 		/* put the engine back into the active state */
1593 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1594 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1595 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1596 
1597 		/* wait for training to complete */
1598 		for (i = 0; i < rdev->usec_timeout; i++) {
1599 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1600 				break;
1601 			udelay(1);
1602 		}
1603 		for (i = 0; i < rdev->usec_timeout; i++) {
1604 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1605 				break;
1606 			udelay(1);
1607 		}
1608 
1609 		if (running)
1610 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1611 	}
1612 
1613 	return 0;
1614 }
1615 
1616 static int si_init_microcode(struct radeon_device *rdev)
1617 {
1618 	const char *chip_name;
1619 	const char *new_chip_name;
1620 	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1621 	size_t smc_req_size, mc2_req_size;
1622 	char fw_name[30];
1623 	int err;
1624 	int new_fw = 0;
1625 
1626 	DRM_DEBUG("\n");
1627 
1628 	switch (rdev->family) {
1629 	case CHIP_TAHITI:
1630 		chip_name = "TAHITI";
1631 		new_chip_name = "tahiti";
1632 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1633 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1634 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1635 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1636 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1637 		mc2_req_size = TAHITI_MC_UCODE_SIZE * 4;
1638 		smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1639 		break;
1640 	case CHIP_PITCAIRN:
1641 		chip_name = "PITCAIRN";
1642 		new_chip_name = "pitcairn";
1643 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1644 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1645 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1646 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1647 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1648 		mc2_req_size = PITCAIRN_MC_UCODE_SIZE * 4;
1649 		smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1650 		break;
1651 	case CHIP_VERDE:
1652 		chip_name = "VERDE";
1653 		new_chip_name = "verde";
1654 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1655 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1656 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1657 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1658 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1659 		mc2_req_size = VERDE_MC_UCODE_SIZE * 4;
1660 		smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1661 		break;
1662 	case CHIP_OLAND:
1663 		chip_name = "OLAND";
1664 		new_chip_name = "oland";
1665 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1666 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1667 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1668 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1669 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1670 		smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1671 		break;
1672 	case CHIP_HAINAN:
1673 		chip_name = "HAINAN";
1674 		new_chip_name = "hainan";
1675 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1676 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1677 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1678 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1679 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1680 		smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1681 		break;
1682 	default: BUG();
1683 	}
1684 
1685 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
1686 
1687 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
1688 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1689 	if (err) {
1690 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1691 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1692 		if (err)
1693 			goto out;
1694 		if (rdev->pfp_fw->size != pfp_req_size) {
1695 			printk(KERN_ERR
1696 			       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1697 			       rdev->pfp_fw->size, fw_name);
1698 			err = -EINVAL;
1699 			goto out;
1700 		}
1701 	} else {
1702 		err = radeon_ucode_validate(rdev->pfp_fw);
1703 		if (err) {
1704 			printk(KERN_ERR
1705 			       "si_cp: validation failed for firmware \"%s\"\n",
1706 			       fw_name);
1707 			goto out;
1708 		} else {
1709 			new_fw++;
1710 		}
1711 	}
1712 
1713 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
1714 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1715 	if (err) {
1716 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1717 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1718 		if (err)
1719 			goto out;
1720 		if (rdev->me_fw->size != me_req_size) {
1721 			printk(KERN_ERR
1722 			       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1723 			       rdev->me_fw->size, fw_name);
1724 			err = -EINVAL;
1725 		}
1726 	} else {
1727 		err = radeon_ucode_validate(rdev->me_fw);
1728 		if (err) {
1729 			printk(KERN_ERR
1730 			       "si_cp: validation failed for firmware \"%s\"\n",
1731 			       fw_name);
1732 			goto out;
1733 		} else {
1734 			new_fw++;
1735 		}
1736 	}
1737 
1738 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
1739 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1740 	if (err) {
1741 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1742 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1743 		if (err)
1744 			goto out;
1745 		if (rdev->ce_fw->size != ce_req_size) {
1746 			printk(KERN_ERR
1747 			       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1748 			       rdev->ce_fw->size, fw_name);
1749 			err = -EINVAL;
1750 		}
1751 	} else {
1752 		err = radeon_ucode_validate(rdev->ce_fw);
1753 		if (err) {
1754 			printk(KERN_ERR
1755 			       "si_cp: validation failed for firmware \"%s\"\n",
1756 			       fw_name);
1757 			goto out;
1758 		} else {
1759 			new_fw++;
1760 		}
1761 	}
1762 
1763 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
1764 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1765 	if (err) {
1766 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1767 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1768 		if (err)
1769 			goto out;
1770 		if (rdev->rlc_fw->size != rlc_req_size) {
1771 			printk(KERN_ERR
1772 			       "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1773 			       rdev->rlc_fw->size, fw_name);
1774 			err = -EINVAL;
1775 		}
1776 	} else {
1777 		err = radeon_ucode_validate(rdev->rlc_fw);
1778 		if (err) {
1779 			printk(KERN_ERR
1780 			       "si_cp: validation failed for firmware \"%s\"\n",
1781 			       fw_name);
1782 			goto out;
1783 		} else {
1784 			new_fw++;
1785 		}
1786 	}
1787 
1788 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
1789 	err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1790 	if (err) {
1791 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
1792 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1793 		if (err) {
1794 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1795 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1796 			if (err)
1797 				goto out;
1798 		}
1799 		if ((rdev->mc_fw->size != mc_req_size) &&
1800 		    (rdev->mc_fw->size != mc2_req_size)) {
1801 			printk(KERN_ERR
1802 			       "si_mc: Bogus length %zu in firmware \"%s\"\n",
1803 			       rdev->mc_fw->size, fw_name);
1804 			err = -EINVAL;
1805 		}
1806 		DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
1807 	} else {
1808 		err = radeon_ucode_validate(rdev->mc_fw);
1809 		if (err) {
1810 			printk(KERN_ERR
1811 			       "si_cp: validation failed for firmware \"%s\"\n",
1812 			       fw_name);
1813 			goto out;
1814 		} else {
1815 			new_fw++;
1816 		}
1817 	}
1818 
1819 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
1820 	err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1821 	if (err) {
1822 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1823 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1824 		if (err) {
1825 			printk(KERN_ERR
1826 			       "smc: error loading firmware \"%s\"\n",
1827 			       fw_name);
1828 			release_firmware(rdev->smc_fw);
1829 			rdev->smc_fw = NULL;
1830 			err = 0;
1831 		} else if (rdev->smc_fw->size != smc_req_size) {
1832 			printk(KERN_ERR
1833 			       "si_smc: Bogus length %zu in firmware \"%s\"\n",
1834 			       rdev->smc_fw->size, fw_name);
1835 			err = -EINVAL;
1836 		}
1837 	} else {
1838 		err = radeon_ucode_validate(rdev->smc_fw);
1839 		if (err) {
1840 			printk(KERN_ERR
1841 			       "si_cp: validation failed for firmware \"%s\"\n",
1842 			       fw_name);
1843 			goto out;
1844 		} else {
1845 			new_fw++;
1846 		}
1847 	}
1848 
1849 	if (new_fw == 0) {
1850 		rdev->new_fw = false;
1851 	} else if (new_fw < 6) {
1852 		printk(KERN_ERR "si_fw: mixing new and old firmware!\n");
1853 		err = -EINVAL;
1854 	} else {
1855 		rdev->new_fw = true;
1856 	}
1857 out:
1858 	if (err) {
1859 		if (err != -EINVAL)
1860 			printk(KERN_ERR
1861 			       "si_cp: Failed to load firmware \"%s\"\n",
1862 			       fw_name);
1863 		release_firmware(rdev->pfp_fw);
1864 		rdev->pfp_fw = NULL;
1865 		release_firmware(rdev->me_fw);
1866 		rdev->me_fw = NULL;
1867 		release_firmware(rdev->ce_fw);
1868 		rdev->ce_fw = NULL;
1869 		release_firmware(rdev->rlc_fw);
1870 		rdev->rlc_fw = NULL;
1871 		release_firmware(rdev->mc_fw);
1872 		rdev->mc_fw = NULL;
1873 		release_firmware(rdev->smc_fw);
1874 		rdev->smc_fw = NULL;
1875 	}
1876 	return err;
1877 }
1878 
1879 /* watermark setup */
1880 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1881 				   struct radeon_crtc *radeon_crtc,
1882 				   struct drm_display_mode *mode,
1883 				   struct drm_display_mode *other_mode)
1884 {
1885 	u32 tmp, buffer_alloc, i;
1886 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1887 	/*
1888 	 * Line Buffer Setup
1889 	 * There are 3 line buffers, each one shared by 2 display controllers.
1890 	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1891 	 * the display controllers.  The paritioning is done via one of four
1892 	 * preset allocations specified in bits 21:20:
1893 	 *  0 - half lb
1894 	 *  2 - whole lb, other crtc must be disabled
1895 	 */
1896 	/* this can get tricky if we have two large displays on a paired group
1897 	 * of crtcs.  Ideally for multiple large displays we'd assign them to
1898 	 * non-linked crtcs for maximum line buffer allocation.
1899 	 */
1900 	if (radeon_crtc->base.enabled && mode) {
1901 		if (other_mode) {
1902 			tmp = 0; /* 1/2 */
1903 			buffer_alloc = 1;
1904 		} else {
1905 			tmp = 2; /* whole */
1906 			buffer_alloc = 2;
1907 		}
1908 	} else {
1909 		tmp = 0;
1910 		buffer_alloc = 0;
1911 	}
1912 
1913 	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1914 	       DC_LB_MEMORY_CONFIG(tmp));
1915 
1916 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1917 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1918 	for (i = 0; i < rdev->usec_timeout; i++) {
1919 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1920 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
1921 			break;
1922 		udelay(1);
1923 	}
1924 
1925 	if (radeon_crtc->base.enabled && mode) {
1926 		switch (tmp) {
1927 		case 0:
1928 		default:
1929 			return 4096 * 2;
1930 		case 2:
1931 			return 8192 * 2;
1932 		}
1933 	}
1934 
1935 	/* controller not enabled, so no lb used */
1936 	return 0;
1937 }
1938 
1939 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1940 {
1941 	u32 tmp = RREG32(MC_SHARED_CHMAP);
1942 
1943 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1944 	case 0:
1945 	default:
1946 		return 1;
1947 	case 1:
1948 		return 2;
1949 	case 2:
1950 		return 4;
1951 	case 3:
1952 		return 8;
1953 	case 4:
1954 		return 3;
1955 	case 5:
1956 		return 6;
1957 	case 6:
1958 		return 10;
1959 	case 7:
1960 		return 12;
1961 	case 8:
1962 		return 16;
1963 	}
1964 }
1965 
1966 struct dce6_wm_params {
1967 	u32 dram_channels; /* number of dram channels */
1968 	u32 yclk;          /* bandwidth per dram data pin in kHz */
1969 	u32 sclk;          /* engine clock in kHz */
1970 	u32 disp_clk;      /* display clock in kHz */
1971 	u32 src_width;     /* viewport width */
1972 	u32 active_time;   /* active display time in ns */
1973 	u32 blank_time;    /* blank time in ns */
1974 	bool interlaced;    /* mode is interlaced */
1975 	fixed20_12 vsc;    /* vertical scale ratio */
1976 	u32 num_heads;     /* number of active crtcs */
1977 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1978 	u32 lb_size;       /* line buffer allocated to pipe */
1979 	u32 vtaps;         /* vertical scaler taps */
1980 };
1981 
1982 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1983 {
1984 	/* Calculate raw DRAM Bandwidth */
1985 	fixed20_12 dram_efficiency; /* 0.7 */
1986 	fixed20_12 yclk, dram_channels, bandwidth;
1987 	fixed20_12 a;
1988 
1989 	a.full = dfixed_const(1000);
1990 	yclk.full = dfixed_const(wm->yclk);
1991 	yclk.full = dfixed_div(yclk, a);
1992 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
1993 	a.full = dfixed_const(10);
1994 	dram_efficiency.full = dfixed_const(7);
1995 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
1996 	bandwidth.full = dfixed_mul(dram_channels, yclk);
1997 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
1998 
1999 	return dfixed_trunc(bandwidth);
2000 }
2001 
2002 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2003 {
2004 	/* Calculate DRAM Bandwidth and the part allocated to display. */
2005 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
2006 	fixed20_12 yclk, dram_channels, bandwidth;
2007 	fixed20_12 a;
2008 
2009 	a.full = dfixed_const(1000);
2010 	yclk.full = dfixed_const(wm->yclk);
2011 	yclk.full = dfixed_div(yclk, a);
2012 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2013 	a.full = dfixed_const(10);
2014 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
2015 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
2016 	bandwidth.full = dfixed_mul(dram_channels, yclk);
2017 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
2018 
2019 	return dfixed_trunc(bandwidth);
2020 }
2021 
2022 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
2023 {
2024 	/* Calculate the display Data return Bandwidth */
2025 	fixed20_12 return_efficiency; /* 0.8 */
2026 	fixed20_12 sclk, bandwidth;
2027 	fixed20_12 a;
2028 
2029 	a.full = dfixed_const(1000);
2030 	sclk.full = dfixed_const(wm->sclk);
2031 	sclk.full = dfixed_div(sclk, a);
2032 	a.full = dfixed_const(10);
2033 	return_efficiency.full = dfixed_const(8);
2034 	return_efficiency.full = dfixed_div(return_efficiency, a);
2035 	a.full = dfixed_const(32);
2036 	bandwidth.full = dfixed_mul(a, sclk);
2037 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
2038 
2039 	return dfixed_trunc(bandwidth);
2040 }
2041 
2042 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
2043 {
2044 	return 32;
2045 }
2046 
2047 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
2048 {
2049 	/* Calculate the DMIF Request Bandwidth */
2050 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
2051 	fixed20_12 disp_clk, sclk, bandwidth;
2052 	fixed20_12 a, b1, b2;
2053 	u32 min_bandwidth;
2054 
2055 	a.full = dfixed_const(1000);
2056 	disp_clk.full = dfixed_const(wm->disp_clk);
2057 	disp_clk.full = dfixed_div(disp_clk, a);
2058 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
2059 	b1.full = dfixed_mul(a, disp_clk);
2060 
2061 	a.full = dfixed_const(1000);
2062 	sclk.full = dfixed_const(wm->sclk);
2063 	sclk.full = dfixed_div(sclk, a);
2064 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
2065 	b2.full = dfixed_mul(a, sclk);
2066 
2067 	a.full = dfixed_const(10);
2068 	disp_clk_request_efficiency.full = dfixed_const(8);
2069 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
2070 
2071 	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
2072 
2073 	a.full = dfixed_const(min_bandwidth);
2074 	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
2075 
2076 	return dfixed_trunc(bandwidth);
2077 }
2078 
2079 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
2080 {
2081 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
2082 	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
2083 	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
2084 	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
2085 
2086 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
2087 }
2088 
2089 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
2090 {
2091 	/* Calculate the display mode Average Bandwidth
2092 	 * DisplayMode should contain the source and destination dimensions,
2093 	 * timing, etc.
2094 	 */
2095 	fixed20_12 bpp;
2096 	fixed20_12 line_time;
2097 	fixed20_12 src_width;
2098 	fixed20_12 bandwidth;
2099 	fixed20_12 a;
2100 
2101 	a.full = dfixed_const(1000);
2102 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
2103 	line_time.full = dfixed_div(line_time, a);
2104 	bpp.full = dfixed_const(wm->bytes_per_pixel);
2105 	src_width.full = dfixed_const(wm->src_width);
2106 	bandwidth.full = dfixed_mul(src_width, bpp);
2107 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
2108 	bandwidth.full = dfixed_div(bandwidth, line_time);
2109 
2110 	return dfixed_trunc(bandwidth);
2111 }
2112 
2113 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
2114 {
2115 	/* First calcualte the latency in ns */
2116 	u32 mc_latency = 2000; /* 2000 ns. */
2117 	u32 available_bandwidth = dce6_available_bandwidth(wm);
2118 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
2119 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
2120 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
2121 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
2122 		(wm->num_heads * cursor_line_pair_return_time);
2123 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
2124 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
2125 	u32 tmp, dmif_size = 12288;
2126 	fixed20_12 a, b, c;
2127 
2128 	if (wm->num_heads == 0)
2129 		return 0;
2130 
2131 	a.full = dfixed_const(2);
2132 	b.full = dfixed_const(1);
2133 	if ((wm->vsc.full > a.full) ||
2134 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
2135 	    (wm->vtaps >= 5) ||
2136 	    ((wm->vsc.full >= a.full) && wm->interlaced))
2137 		max_src_lines_per_dst_line = 4;
2138 	else
2139 		max_src_lines_per_dst_line = 2;
2140 
2141 	a.full = dfixed_const(available_bandwidth);
2142 	b.full = dfixed_const(wm->num_heads);
2143 	a.full = dfixed_div(a, b);
2144 
2145 	b.full = dfixed_const(mc_latency + 512);
2146 	c.full = dfixed_const(wm->disp_clk);
2147 	b.full = dfixed_div(b, c);
2148 
2149 	c.full = dfixed_const(dmif_size);
2150 	b.full = dfixed_div(c, b);
2151 
2152 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
2153 
2154 	b.full = dfixed_const(1000);
2155 	c.full = dfixed_const(wm->disp_clk);
2156 	b.full = dfixed_div(c, b);
2157 	c.full = dfixed_const(wm->bytes_per_pixel);
2158 	b.full = dfixed_mul(b, c);
2159 
2160 	lb_fill_bw = min(tmp, dfixed_trunc(b));
2161 
2162 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2163 	b.full = dfixed_const(1000);
2164 	c.full = dfixed_const(lb_fill_bw);
2165 	b.full = dfixed_div(c, b);
2166 	a.full = dfixed_div(a, b);
2167 	line_fill_time = dfixed_trunc(a);
2168 
2169 	if (line_fill_time < wm->active_time)
2170 		return latency;
2171 	else
2172 		return latency + (line_fill_time - wm->active_time);
2173 
2174 }
2175 
2176 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2177 {
2178 	if (dce6_average_bandwidth(wm) <=
2179 	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2180 		return true;
2181 	else
2182 		return false;
2183 };
2184 
2185 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2186 {
2187 	if (dce6_average_bandwidth(wm) <=
2188 	    (dce6_available_bandwidth(wm) / wm->num_heads))
2189 		return true;
2190 	else
2191 		return false;
2192 };
2193 
2194 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2195 {
2196 	u32 lb_partitions = wm->lb_size / wm->src_width;
2197 	u32 line_time = wm->active_time + wm->blank_time;
2198 	u32 latency_tolerant_lines;
2199 	u32 latency_hiding;
2200 	fixed20_12 a;
2201 
2202 	a.full = dfixed_const(1);
2203 	if (wm->vsc.full > a.full)
2204 		latency_tolerant_lines = 1;
2205 	else {
2206 		if (lb_partitions <= (wm->vtaps + 1))
2207 			latency_tolerant_lines = 1;
2208 		else
2209 			latency_tolerant_lines = 2;
2210 	}
2211 
2212 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2213 
2214 	if (dce6_latency_watermark(wm) <= latency_hiding)
2215 		return true;
2216 	else
2217 		return false;
2218 }
2219 
2220 static void dce6_program_watermarks(struct radeon_device *rdev,
2221 					 struct radeon_crtc *radeon_crtc,
2222 					 u32 lb_size, u32 num_heads)
2223 {
2224 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
2225 	struct dce6_wm_params wm_low, wm_high;
2226 	u32 dram_channels;
2227 	u32 pixel_period;
2228 	u32 line_time = 0;
2229 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
2230 	u32 priority_a_mark = 0, priority_b_mark = 0;
2231 	u32 priority_a_cnt = PRIORITY_OFF;
2232 	u32 priority_b_cnt = PRIORITY_OFF;
2233 	u32 tmp, arb_control3;
2234 	fixed20_12 a, b, c;
2235 
2236 	if (radeon_crtc->base.enabled && num_heads && mode) {
2237 		pixel_period = 1000000 / (u32)mode->clock;
2238 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2239 		priority_a_cnt = 0;
2240 		priority_b_cnt = 0;
2241 
2242 		if (rdev->family == CHIP_ARUBA)
2243 			dram_channels = evergreen_get_number_of_dram_channels(rdev);
2244 		else
2245 			dram_channels = si_get_number_of_dram_channels(rdev);
2246 
2247 		/* watermark for high clocks */
2248 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2249 			wm_high.yclk =
2250 				radeon_dpm_get_mclk(rdev, false) * 10;
2251 			wm_high.sclk =
2252 				radeon_dpm_get_sclk(rdev, false) * 10;
2253 		} else {
2254 			wm_high.yclk = rdev->pm.current_mclk * 10;
2255 			wm_high.sclk = rdev->pm.current_sclk * 10;
2256 		}
2257 
2258 		wm_high.disp_clk = mode->clock;
2259 		wm_high.src_width = mode->crtc_hdisplay;
2260 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2261 		wm_high.blank_time = line_time - wm_high.active_time;
2262 		wm_high.interlaced = false;
2263 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2264 			wm_high.interlaced = true;
2265 		wm_high.vsc = radeon_crtc->vsc;
2266 		wm_high.vtaps = 1;
2267 		if (radeon_crtc->rmx_type != RMX_OFF)
2268 			wm_high.vtaps = 2;
2269 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2270 		wm_high.lb_size = lb_size;
2271 		wm_high.dram_channels = dram_channels;
2272 		wm_high.num_heads = num_heads;
2273 
2274 		/* watermark for low clocks */
2275 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2276 			wm_low.yclk =
2277 				radeon_dpm_get_mclk(rdev, true) * 10;
2278 			wm_low.sclk =
2279 				radeon_dpm_get_sclk(rdev, true) * 10;
2280 		} else {
2281 			wm_low.yclk = rdev->pm.current_mclk * 10;
2282 			wm_low.sclk = rdev->pm.current_sclk * 10;
2283 		}
2284 
2285 		wm_low.disp_clk = mode->clock;
2286 		wm_low.src_width = mode->crtc_hdisplay;
2287 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2288 		wm_low.blank_time = line_time - wm_low.active_time;
2289 		wm_low.interlaced = false;
2290 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2291 			wm_low.interlaced = true;
2292 		wm_low.vsc = radeon_crtc->vsc;
2293 		wm_low.vtaps = 1;
2294 		if (radeon_crtc->rmx_type != RMX_OFF)
2295 			wm_low.vtaps = 2;
2296 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2297 		wm_low.lb_size = lb_size;
2298 		wm_low.dram_channels = dram_channels;
2299 		wm_low.num_heads = num_heads;
2300 
2301 		/* set for high clocks */
2302 		latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2303 		/* set for low clocks */
2304 		latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2305 
2306 		/* possibly force display priority to high */
2307 		/* should really do this at mode validation time... */
2308 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2309 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2310 		    !dce6_check_latency_hiding(&wm_high) ||
2311 		    (rdev->disp_priority == 2)) {
2312 			DRM_DEBUG_KMS("force priority to high\n");
2313 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2314 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2315 		}
2316 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2317 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2318 		    !dce6_check_latency_hiding(&wm_low) ||
2319 		    (rdev->disp_priority == 2)) {
2320 			DRM_DEBUG_KMS("force priority to high\n");
2321 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2322 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2323 		}
2324 
2325 		a.full = dfixed_const(1000);
2326 		b.full = dfixed_const(mode->clock);
2327 		b.full = dfixed_div(b, a);
2328 		c.full = dfixed_const(latency_watermark_a);
2329 		c.full = dfixed_mul(c, b);
2330 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2331 		c.full = dfixed_div(c, a);
2332 		a.full = dfixed_const(16);
2333 		c.full = dfixed_div(c, a);
2334 		priority_a_mark = dfixed_trunc(c);
2335 		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2336 
2337 		a.full = dfixed_const(1000);
2338 		b.full = dfixed_const(mode->clock);
2339 		b.full = dfixed_div(b, a);
2340 		c.full = dfixed_const(latency_watermark_b);
2341 		c.full = dfixed_mul(c, b);
2342 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2343 		c.full = dfixed_div(c, a);
2344 		a.full = dfixed_const(16);
2345 		c.full = dfixed_div(c, a);
2346 		priority_b_mark = dfixed_trunc(c);
2347 		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2348 	}
2349 
2350 	/* select wm A */
2351 	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2352 	tmp = arb_control3;
2353 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2354 	tmp |= LATENCY_WATERMARK_MASK(1);
2355 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2356 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2357 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2358 		LATENCY_HIGH_WATERMARK(line_time)));
2359 	/* select wm B */
2360 	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2361 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2362 	tmp |= LATENCY_WATERMARK_MASK(2);
2363 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2364 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2365 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2366 		LATENCY_HIGH_WATERMARK(line_time)));
2367 	/* restore original selection */
2368 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2369 
2370 	/* write the priority marks */
2371 	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2372 	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2373 
2374 	/* save values for DPM */
2375 	radeon_crtc->line_time = line_time;
2376 	radeon_crtc->wm_high = latency_watermark_a;
2377 	radeon_crtc->wm_low = latency_watermark_b;
2378 }
2379 
2380 void dce6_bandwidth_update(struct radeon_device *rdev)
2381 {
2382 	struct drm_display_mode *mode0 = NULL;
2383 	struct drm_display_mode *mode1 = NULL;
2384 	u32 num_heads = 0, lb_size;
2385 	int i;
2386 
2387 	if (!rdev->mode_info.mode_config_initialized)
2388 		return;
2389 
2390 	radeon_update_display_priority(rdev);
2391 
2392 	for (i = 0; i < rdev->num_crtc; i++) {
2393 		if (rdev->mode_info.crtcs[i]->base.enabled)
2394 			num_heads++;
2395 	}
2396 	for (i = 0; i < rdev->num_crtc; i += 2) {
2397 		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2398 		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2399 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2400 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2401 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2402 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2403 	}
2404 }
2405 
2406 /*
2407  * Core functions
2408  */
2409 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2410 {
2411 	const u32 num_tile_mode_states = 32;
2412 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2413 
2414 	switch (rdev->config.si.mem_row_size_in_kb) {
2415 	case 1:
2416 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2417 		break;
2418 	case 2:
2419 	default:
2420 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2421 		break;
2422 	case 4:
2423 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2424 		break;
2425 	}
2426 
2427 	if ((rdev->family == CHIP_TAHITI) ||
2428 	    (rdev->family == CHIP_PITCAIRN)) {
2429 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2430 			switch (reg_offset) {
2431 			case 0:  /* non-AA compressed depth or any compressed stencil */
2432 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2433 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2434 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2435 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2436 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2437 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2438 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2439 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2440 				break;
2441 			case 1:  /* 2xAA/4xAA compressed depth only */
2442 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2443 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2444 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2445 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2446 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2447 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2448 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2449 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2450 				break;
2451 			case 2:  /* 8xAA compressed depth only */
2452 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2453 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2454 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2455 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2456 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2457 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2459 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2460 				break;
2461 			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2462 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2463 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2464 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2465 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2466 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2467 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2468 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2469 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2470 				break;
2471 			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2472 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2473 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2474 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2475 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2476 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2477 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2479 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2480 				break;
2481 			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2482 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2483 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2484 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2485 						 TILE_SPLIT(split_equal_to_row_size) |
2486 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2487 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2488 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2489 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2490 				break;
2491 			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2492 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2493 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2494 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2495 						 TILE_SPLIT(split_equal_to_row_size) |
2496 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2497 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2498 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2499 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2500 				break;
2501 			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2502 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2503 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2504 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2505 						 TILE_SPLIT(split_equal_to_row_size) |
2506 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2507 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2508 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2509 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2510 				break;
2511 			case 8:  /* 1D and 1D Array Surfaces */
2512 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2513 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2514 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2515 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2516 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2517 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2518 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2519 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2520 				break;
2521 			case 9:  /* Displayable maps. */
2522 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2523 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2524 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2525 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2526 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2527 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2528 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2529 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2530 				break;
2531 			case 10:  /* Display 8bpp. */
2532 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2533 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2534 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2535 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2536 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2537 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2538 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2539 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2540 				break;
2541 			case 11:  /* Display 16bpp. */
2542 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2543 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2544 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2545 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2546 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2547 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2548 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2549 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2550 				break;
2551 			case 12:  /* Display 32bpp. */
2552 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2553 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2554 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2555 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2556 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2557 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2558 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2559 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2560 				break;
2561 			case 13:  /* Thin. */
2562 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2563 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2564 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2565 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2566 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2567 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2568 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2569 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2570 				break;
2571 			case 14:  /* Thin 8 bpp. */
2572 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2573 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2574 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2575 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2576 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2577 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2578 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2579 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2580 				break;
2581 			case 15:  /* Thin 16 bpp. */
2582 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2583 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2584 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2585 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2586 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2587 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2588 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2589 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2590 				break;
2591 			case 16:  /* Thin 32 bpp. */
2592 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2593 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2594 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2595 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2596 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2597 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2598 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2599 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2600 				break;
2601 			case 17:  /* Thin 64 bpp. */
2602 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2603 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2604 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2605 						 TILE_SPLIT(split_equal_to_row_size) |
2606 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2607 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2608 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2609 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2610 				break;
2611 			case 21:  /* 8 bpp PRT. */
2612 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2613 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2614 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2615 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2616 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2617 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2618 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2619 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2620 				break;
2621 			case 22:  /* 16 bpp PRT */
2622 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2623 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2624 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2625 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2626 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2627 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2628 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2629 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2630 				break;
2631 			case 23:  /* 32 bpp PRT */
2632 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2633 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2634 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2635 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2636 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2637 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2638 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2639 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2640 				break;
2641 			case 24:  /* 64 bpp PRT */
2642 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2643 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2644 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2645 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2646 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2647 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2648 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2649 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2650 				break;
2651 			case 25:  /* 128 bpp PRT */
2652 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2653 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2654 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2655 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2656 						 NUM_BANKS(ADDR_SURF_8_BANK) |
2657 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2658 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2659 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2660 				break;
2661 			default:
2662 				gb_tile_moden = 0;
2663 				break;
2664 			}
2665 			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2666 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2667 		}
2668 	} else if ((rdev->family == CHIP_VERDE) ||
2669 		   (rdev->family == CHIP_OLAND) ||
2670 		   (rdev->family == CHIP_HAINAN)) {
2671 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2672 			switch (reg_offset) {
2673 			case 0:  /* non-AA compressed depth or any compressed stencil */
2674 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2675 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2676 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2677 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2678 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2679 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2680 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2681 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2682 				break;
2683 			case 1:  /* 2xAA/4xAA compressed depth only */
2684 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2685 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2686 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2687 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2688 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2689 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2690 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2691 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2692 				break;
2693 			case 2:  /* 8xAA compressed depth only */
2694 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2695 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2696 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2697 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2698 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2699 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2700 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2701 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2702 				break;
2703 			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2704 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2705 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2706 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2707 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2708 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2709 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2710 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2711 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2712 				break;
2713 			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2714 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2715 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2716 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2717 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2718 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2719 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2720 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2721 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2722 				break;
2723 			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2724 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2725 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2726 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2727 						 TILE_SPLIT(split_equal_to_row_size) |
2728 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2729 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2730 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2731 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2732 				break;
2733 			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2734 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2735 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2736 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2737 						 TILE_SPLIT(split_equal_to_row_size) |
2738 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2739 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2740 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2741 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2742 				break;
2743 			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2744 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2745 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2746 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2747 						 TILE_SPLIT(split_equal_to_row_size) |
2748 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2749 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2750 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2751 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2752 				break;
2753 			case 8:  /* 1D and 1D Array Surfaces */
2754 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2755 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2756 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2757 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2758 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2759 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2760 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2761 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2762 				break;
2763 			case 9:  /* Displayable maps. */
2764 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2765 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2766 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2767 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2768 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2769 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2770 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2771 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2772 				break;
2773 			case 10:  /* Display 8bpp. */
2774 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2775 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2776 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2777 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2778 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2779 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2780 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2781 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2782 				break;
2783 			case 11:  /* Display 16bpp. */
2784 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2785 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2786 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2787 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2788 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2789 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2790 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2791 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2792 				break;
2793 			case 12:  /* Display 32bpp. */
2794 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2795 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2796 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2797 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2798 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2799 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2800 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2801 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2802 				break;
2803 			case 13:  /* Thin. */
2804 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2805 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2806 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2807 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2808 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2809 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2810 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2811 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2812 				break;
2813 			case 14:  /* Thin 8 bpp. */
2814 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2815 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2816 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2817 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2818 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2819 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2820 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2821 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2822 				break;
2823 			case 15:  /* Thin 16 bpp. */
2824 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2825 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2826 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2827 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2828 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2829 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2830 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2831 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2832 				break;
2833 			case 16:  /* Thin 32 bpp. */
2834 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2835 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2836 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2837 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2838 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2839 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2840 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2841 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2842 				break;
2843 			case 17:  /* Thin 64 bpp. */
2844 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2845 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2846 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2847 						 TILE_SPLIT(split_equal_to_row_size) |
2848 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2849 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2850 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2851 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2852 				break;
2853 			case 21:  /* 8 bpp PRT. */
2854 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2855 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2856 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2857 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2858 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2859 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2860 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2861 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2862 				break;
2863 			case 22:  /* 16 bpp PRT */
2864 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2865 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2866 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2867 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2868 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2869 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2870 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2871 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2872 				break;
2873 			case 23:  /* 32 bpp PRT */
2874 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2875 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2876 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2877 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2878 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2879 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2880 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2881 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2882 				break;
2883 			case 24:  /* 64 bpp PRT */
2884 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2885 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2886 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2887 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2888 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2889 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2890 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2891 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2892 				break;
2893 			case 25:  /* 128 bpp PRT */
2894 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2895 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2896 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2897 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2898 						 NUM_BANKS(ADDR_SURF_8_BANK) |
2899 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2900 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2901 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2902 				break;
2903 			default:
2904 				gb_tile_moden = 0;
2905 				break;
2906 			}
2907 			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2908 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2909 		}
2910 	} else
2911 		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2912 }
2913 
2914 static void si_select_se_sh(struct radeon_device *rdev,
2915 			    u32 se_num, u32 sh_num)
2916 {
2917 	u32 data = INSTANCE_BROADCAST_WRITES;
2918 
2919 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2920 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2921 	else if (se_num == 0xffffffff)
2922 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2923 	else if (sh_num == 0xffffffff)
2924 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2925 	else
2926 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2927 	WREG32(GRBM_GFX_INDEX, data);
2928 }
2929 
2930 static u32 si_create_bitmask(u32 bit_width)
2931 {
2932 	u32 i, mask = 0;
2933 
2934 	for (i = 0; i < bit_width; i++) {
2935 		mask <<= 1;
2936 		mask |= 1;
2937 	}
2938 	return mask;
2939 }
2940 
2941 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2942 {
2943 	u32 data, mask;
2944 
2945 	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2946 	if (data & 1)
2947 		data &= INACTIVE_CUS_MASK;
2948 	else
2949 		data = 0;
2950 	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2951 
2952 	data >>= INACTIVE_CUS_SHIFT;
2953 
2954 	mask = si_create_bitmask(cu_per_sh);
2955 
2956 	return ~data & mask;
2957 }
2958 
2959 static void si_setup_spi(struct radeon_device *rdev,
2960 			 u32 se_num, u32 sh_per_se,
2961 			 u32 cu_per_sh)
2962 {
2963 	int i, j, k;
2964 	u32 data, mask, active_cu;
2965 
2966 	for (i = 0; i < se_num; i++) {
2967 		for (j = 0; j < sh_per_se; j++) {
2968 			si_select_se_sh(rdev, i, j);
2969 			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2970 			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2971 
2972 			mask = 1;
2973 			for (k = 0; k < 16; k++) {
2974 				mask <<= k;
2975 				if (active_cu & mask) {
2976 					data &= ~mask;
2977 					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2978 					break;
2979 				}
2980 			}
2981 		}
2982 	}
2983 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2984 }
2985 
2986 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2987 			      u32 max_rb_num_per_se,
2988 			      u32 sh_per_se)
2989 {
2990 	u32 data, mask;
2991 
2992 	data = RREG32(CC_RB_BACKEND_DISABLE);
2993 	if (data & 1)
2994 		data &= BACKEND_DISABLE_MASK;
2995 	else
2996 		data = 0;
2997 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2998 
2999 	data >>= BACKEND_DISABLE_SHIFT;
3000 
3001 	mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
3002 
3003 	return data & mask;
3004 }
3005 
3006 static void si_setup_rb(struct radeon_device *rdev,
3007 			u32 se_num, u32 sh_per_se,
3008 			u32 max_rb_num_per_se)
3009 {
3010 	int i, j;
3011 	u32 data, mask;
3012 	u32 disabled_rbs = 0;
3013 	u32 enabled_rbs = 0;
3014 
3015 	for (i = 0; i < se_num; i++) {
3016 		for (j = 0; j < sh_per_se; j++) {
3017 			si_select_se_sh(rdev, i, j);
3018 			data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3019 			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
3020 		}
3021 	}
3022 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3023 
3024 	mask = 1;
3025 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3026 		if (!(disabled_rbs & mask))
3027 			enabled_rbs |= mask;
3028 		mask <<= 1;
3029 	}
3030 
3031 	rdev->config.si.backend_enable_mask = enabled_rbs;
3032 
3033 	for (i = 0; i < se_num; i++) {
3034 		si_select_se_sh(rdev, i, 0xffffffff);
3035 		data = 0;
3036 		for (j = 0; j < sh_per_se; j++) {
3037 			switch (enabled_rbs & 3) {
3038 			case 1:
3039 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3040 				break;
3041 			case 2:
3042 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3043 				break;
3044 			case 3:
3045 			default:
3046 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3047 				break;
3048 			}
3049 			enabled_rbs >>= 2;
3050 		}
3051 		WREG32(PA_SC_RASTER_CONFIG, data);
3052 	}
3053 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3054 }
3055 
3056 static void si_gpu_init(struct radeon_device *rdev)
3057 {
3058 	u32 gb_addr_config = 0;
3059 	u32 mc_shared_chmap, mc_arb_ramcfg;
3060 	u32 sx_debug_1;
3061 	u32 hdp_host_path_cntl;
3062 	u32 tmp;
3063 	int i, j;
3064 
3065 	switch (rdev->family) {
3066 	case CHIP_TAHITI:
3067 		rdev->config.si.max_shader_engines = 2;
3068 		rdev->config.si.max_tile_pipes = 12;
3069 		rdev->config.si.max_cu_per_sh = 8;
3070 		rdev->config.si.max_sh_per_se = 2;
3071 		rdev->config.si.max_backends_per_se = 4;
3072 		rdev->config.si.max_texture_channel_caches = 12;
3073 		rdev->config.si.max_gprs = 256;
3074 		rdev->config.si.max_gs_threads = 32;
3075 		rdev->config.si.max_hw_contexts = 8;
3076 
3077 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3078 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3079 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3080 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3081 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3082 		break;
3083 	case CHIP_PITCAIRN:
3084 		rdev->config.si.max_shader_engines = 2;
3085 		rdev->config.si.max_tile_pipes = 8;
3086 		rdev->config.si.max_cu_per_sh = 5;
3087 		rdev->config.si.max_sh_per_se = 2;
3088 		rdev->config.si.max_backends_per_se = 4;
3089 		rdev->config.si.max_texture_channel_caches = 8;
3090 		rdev->config.si.max_gprs = 256;
3091 		rdev->config.si.max_gs_threads = 32;
3092 		rdev->config.si.max_hw_contexts = 8;
3093 
3094 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3095 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3096 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3097 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3098 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3099 		break;
3100 	case CHIP_VERDE:
3101 	default:
3102 		rdev->config.si.max_shader_engines = 1;
3103 		rdev->config.si.max_tile_pipes = 4;
3104 		rdev->config.si.max_cu_per_sh = 5;
3105 		rdev->config.si.max_sh_per_se = 2;
3106 		rdev->config.si.max_backends_per_se = 4;
3107 		rdev->config.si.max_texture_channel_caches = 4;
3108 		rdev->config.si.max_gprs = 256;
3109 		rdev->config.si.max_gs_threads = 32;
3110 		rdev->config.si.max_hw_contexts = 8;
3111 
3112 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3113 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3114 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3115 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3116 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3117 		break;
3118 	case CHIP_OLAND:
3119 		rdev->config.si.max_shader_engines = 1;
3120 		rdev->config.si.max_tile_pipes = 4;
3121 		rdev->config.si.max_cu_per_sh = 6;
3122 		rdev->config.si.max_sh_per_se = 1;
3123 		rdev->config.si.max_backends_per_se = 2;
3124 		rdev->config.si.max_texture_channel_caches = 4;
3125 		rdev->config.si.max_gprs = 256;
3126 		rdev->config.si.max_gs_threads = 16;
3127 		rdev->config.si.max_hw_contexts = 8;
3128 
3129 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3130 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3131 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3132 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3133 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3134 		break;
3135 	case CHIP_HAINAN:
3136 		rdev->config.si.max_shader_engines = 1;
3137 		rdev->config.si.max_tile_pipes = 4;
3138 		rdev->config.si.max_cu_per_sh = 5;
3139 		rdev->config.si.max_sh_per_se = 1;
3140 		rdev->config.si.max_backends_per_se = 1;
3141 		rdev->config.si.max_texture_channel_caches = 2;
3142 		rdev->config.si.max_gprs = 256;
3143 		rdev->config.si.max_gs_threads = 16;
3144 		rdev->config.si.max_hw_contexts = 8;
3145 
3146 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3147 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3148 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3149 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3150 		gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
3151 		break;
3152 	}
3153 
3154 	/* Initialize HDP */
3155 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3156 		WREG32((0x2c14 + j), 0x00000000);
3157 		WREG32((0x2c18 + j), 0x00000000);
3158 		WREG32((0x2c1c + j), 0x00000000);
3159 		WREG32((0x2c20 + j), 0x00000000);
3160 		WREG32((0x2c24 + j), 0x00000000);
3161 	}
3162 
3163 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3164 
3165 	evergreen_fix_pci_max_read_req_size(rdev);
3166 
3167 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3168 
3169 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3170 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3171 
3172 	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3173 	rdev->config.si.mem_max_burst_length_bytes = 256;
3174 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3175 	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3176 	if (rdev->config.si.mem_row_size_in_kb > 4)
3177 		rdev->config.si.mem_row_size_in_kb = 4;
3178 	/* XXX use MC settings? */
3179 	rdev->config.si.shader_engine_tile_size = 32;
3180 	rdev->config.si.num_gpus = 1;
3181 	rdev->config.si.multi_gpu_tile_size = 64;
3182 
3183 	/* fix up row size */
3184 	gb_addr_config &= ~ROW_SIZE_MASK;
3185 	switch (rdev->config.si.mem_row_size_in_kb) {
3186 	case 1:
3187 	default:
3188 		gb_addr_config |= ROW_SIZE(0);
3189 		break;
3190 	case 2:
3191 		gb_addr_config |= ROW_SIZE(1);
3192 		break;
3193 	case 4:
3194 		gb_addr_config |= ROW_SIZE(2);
3195 		break;
3196 	}
3197 
3198 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3199 	 * not have bank info, so create a custom tiling dword.
3200 	 * bits 3:0   num_pipes
3201 	 * bits 7:4   num_banks
3202 	 * bits 11:8  group_size
3203 	 * bits 15:12 row_size
3204 	 */
3205 	rdev->config.si.tile_config = 0;
3206 	switch (rdev->config.si.num_tile_pipes) {
3207 	case 1:
3208 		rdev->config.si.tile_config |= (0 << 0);
3209 		break;
3210 	case 2:
3211 		rdev->config.si.tile_config |= (1 << 0);
3212 		break;
3213 	case 4:
3214 		rdev->config.si.tile_config |= (2 << 0);
3215 		break;
3216 	case 8:
3217 	default:
3218 		/* XXX what about 12? */
3219 		rdev->config.si.tile_config |= (3 << 0);
3220 		break;
3221 	}
3222 	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3223 	case 0: /* four banks */
3224 		rdev->config.si.tile_config |= 0 << 4;
3225 		break;
3226 	case 1: /* eight banks */
3227 		rdev->config.si.tile_config |= 1 << 4;
3228 		break;
3229 	case 2: /* sixteen banks */
3230 	default:
3231 		rdev->config.si.tile_config |= 2 << 4;
3232 		break;
3233 	}
3234 	rdev->config.si.tile_config |=
3235 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3236 	rdev->config.si.tile_config |=
3237 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3238 
3239 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3240 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3241 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3242 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3243 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3244 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3245 	if (rdev->has_uvd) {
3246 		WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3247 		WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3248 		WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3249 	}
3250 
3251 	si_tiling_mode_table_init(rdev);
3252 
3253 	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3254 		    rdev->config.si.max_sh_per_se,
3255 		    rdev->config.si.max_backends_per_se);
3256 
3257 	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3258 		     rdev->config.si.max_sh_per_se,
3259 		     rdev->config.si.max_cu_per_sh);
3260 
3261 	rdev->config.si.active_cus = 0;
3262 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
3263 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
3264 			rdev->config.si.active_cus +=
3265 				hweight32(si_get_cu_active_bitmap(rdev, i, j));
3266 		}
3267 	}
3268 
3269 	/* set HW defaults for 3D engine */
3270 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3271 				     ROQ_IB2_START(0x2b)));
3272 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3273 
3274 	sx_debug_1 = RREG32(SX_DEBUG_1);
3275 	WREG32(SX_DEBUG_1, sx_debug_1);
3276 
3277 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3278 
3279 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3280 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3281 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3282 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3283 
3284 	WREG32(VGT_NUM_INSTANCES, 1);
3285 
3286 	WREG32(CP_PERFMON_CNTL, 0);
3287 
3288 	WREG32(SQ_CONFIG, 0);
3289 
3290 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3291 					  FORCE_EOV_MAX_REZ_CNT(255)));
3292 
3293 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3294 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3295 
3296 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3297 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3298 
3299 	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3300 	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3301 	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3302 	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3303 	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3304 	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3305 	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3306 	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3307 
3308 	tmp = RREG32(HDP_MISC_CNTL);
3309 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3310 	WREG32(HDP_MISC_CNTL, tmp);
3311 
3312 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3313 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3314 
3315 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3316 
3317 	udelay(50);
3318 }
3319 
3320 /*
3321  * GPU scratch registers helpers function.
3322  */
3323 static void si_scratch_init(struct radeon_device *rdev)
3324 {
3325 	int i;
3326 
3327 	rdev->scratch.num_reg = 7;
3328 	rdev->scratch.reg_base = SCRATCH_REG0;
3329 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3330 		rdev->scratch.free[i] = true;
3331 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3332 	}
3333 }
3334 
3335 void si_fence_ring_emit(struct radeon_device *rdev,
3336 			struct radeon_fence *fence)
3337 {
3338 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3339 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3340 
3341 	/* flush read cache over gart */
3342 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3343 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3344 	radeon_ring_write(ring, 0);
3345 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3346 	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3347 			  PACKET3_TC_ACTION_ENA |
3348 			  PACKET3_SH_KCACHE_ACTION_ENA |
3349 			  PACKET3_SH_ICACHE_ACTION_ENA);
3350 	radeon_ring_write(ring, 0xFFFFFFFF);
3351 	radeon_ring_write(ring, 0);
3352 	radeon_ring_write(ring, 10); /* poll interval */
3353 	/* EVENT_WRITE_EOP - flush caches, send int */
3354 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3355 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3356 	radeon_ring_write(ring, lower_32_bits(addr));
3357 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3358 	radeon_ring_write(ring, fence->seq);
3359 	radeon_ring_write(ring, 0);
3360 }
3361 
3362 /*
3363  * IB stuff
3364  */
3365 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3366 {
3367 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3368 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3369 	u32 header;
3370 
3371 	if (ib->is_const_ib) {
3372 		/* set switch buffer packet before const IB */
3373 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3374 		radeon_ring_write(ring, 0);
3375 
3376 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3377 	} else {
3378 		u32 next_rptr;
3379 		if (ring->rptr_save_reg) {
3380 			next_rptr = ring->wptr + 3 + 4 + 8;
3381 			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3382 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3383 						  PACKET3_SET_CONFIG_REG_START) >> 2));
3384 			radeon_ring_write(ring, next_rptr);
3385 		} else if (rdev->wb.enabled) {
3386 			next_rptr = ring->wptr + 5 + 4 + 8;
3387 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3388 			radeon_ring_write(ring, (1 << 8));
3389 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3390 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3391 			radeon_ring_write(ring, next_rptr);
3392 		}
3393 
3394 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3395 	}
3396 
3397 	radeon_ring_write(ring, header);
3398 	radeon_ring_write(ring,
3399 #ifdef __BIG_ENDIAN
3400 			  (2 << 0) |
3401 #endif
3402 			  (ib->gpu_addr & 0xFFFFFFFC));
3403 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3404 	radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
3405 
3406 	if (!ib->is_const_ib) {
3407 		/* flush read cache over gart for this vmid */
3408 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3409 		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3410 		radeon_ring_write(ring, vm_id);
3411 		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3412 		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3413 				  PACKET3_TC_ACTION_ENA |
3414 				  PACKET3_SH_KCACHE_ACTION_ENA |
3415 				  PACKET3_SH_ICACHE_ACTION_ENA);
3416 		radeon_ring_write(ring, 0xFFFFFFFF);
3417 		radeon_ring_write(ring, 0);
3418 		radeon_ring_write(ring, 10); /* poll interval */
3419 	}
3420 }
3421 
3422 /*
3423  * CP.
3424  */
3425 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3426 {
3427 	if (enable)
3428 		WREG32(CP_ME_CNTL, 0);
3429 	else {
3430 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3431 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3432 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3433 		WREG32(SCRATCH_UMSK, 0);
3434 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3435 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3436 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3437 	}
3438 	udelay(50);
3439 }
3440 
3441 static int si_cp_load_microcode(struct radeon_device *rdev)
3442 {
3443 	int i;
3444 
3445 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3446 		return -EINVAL;
3447 
3448 	si_cp_enable(rdev, false);
3449 
3450 	if (rdev->new_fw) {
3451 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3452 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3453 		const struct gfx_firmware_header_v1_0 *ce_hdr =
3454 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3455 		const struct gfx_firmware_header_v1_0 *me_hdr =
3456 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3457 		const __le32 *fw_data;
3458 		u32 fw_size;
3459 
3460 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3461 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3462 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3463 
3464 		/* PFP */
3465 		fw_data = (const __le32 *)
3466 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3467 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3468 		WREG32(CP_PFP_UCODE_ADDR, 0);
3469 		for (i = 0; i < fw_size; i++)
3470 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3471 		WREG32(CP_PFP_UCODE_ADDR, 0);
3472 
3473 		/* CE */
3474 		fw_data = (const __le32 *)
3475 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3476 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3477 		WREG32(CP_CE_UCODE_ADDR, 0);
3478 		for (i = 0; i < fw_size; i++)
3479 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3480 		WREG32(CP_CE_UCODE_ADDR, 0);
3481 
3482 		/* ME */
3483 		fw_data = (const __be32 *)
3484 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3485 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3486 		WREG32(CP_ME_RAM_WADDR, 0);
3487 		for (i = 0; i < fw_size; i++)
3488 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3489 		WREG32(CP_ME_RAM_WADDR, 0);
3490 	} else {
3491 		const __be32 *fw_data;
3492 
3493 		/* PFP */
3494 		fw_data = (const __be32 *)rdev->pfp_fw->data;
3495 		WREG32(CP_PFP_UCODE_ADDR, 0);
3496 		for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3497 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3498 		WREG32(CP_PFP_UCODE_ADDR, 0);
3499 
3500 		/* CE */
3501 		fw_data = (const __be32 *)rdev->ce_fw->data;
3502 		WREG32(CP_CE_UCODE_ADDR, 0);
3503 		for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3504 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3505 		WREG32(CP_CE_UCODE_ADDR, 0);
3506 
3507 		/* ME */
3508 		fw_data = (const __be32 *)rdev->me_fw->data;
3509 		WREG32(CP_ME_RAM_WADDR, 0);
3510 		for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3511 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3512 		WREG32(CP_ME_RAM_WADDR, 0);
3513 	}
3514 
3515 	WREG32(CP_PFP_UCODE_ADDR, 0);
3516 	WREG32(CP_CE_UCODE_ADDR, 0);
3517 	WREG32(CP_ME_RAM_WADDR, 0);
3518 	WREG32(CP_ME_RAM_RADDR, 0);
3519 	return 0;
3520 }
3521 
3522 static int si_cp_start(struct radeon_device *rdev)
3523 {
3524 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3525 	int r, i;
3526 
3527 	r = radeon_ring_lock(rdev, ring, 7 + 4);
3528 	if (r) {
3529 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3530 		return r;
3531 	}
3532 	/* init the CP */
3533 	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3534 	radeon_ring_write(ring, 0x1);
3535 	radeon_ring_write(ring, 0x0);
3536 	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3537 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3538 	radeon_ring_write(ring, 0);
3539 	radeon_ring_write(ring, 0);
3540 
3541 	/* init the CE partitions */
3542 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3543 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3544 	radeon_ring_write(ring, 0xc000);
3545 	radeon_ring_write(ring, 0xe000);
3546 	radeon_ring_unlock_commit(rdev, ring, false);
3547 
3548 	si_cp_enable(rdev, true);
3549 
3550 	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3551 	if (r) {
3552 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3553 		return r;
3554 	}
3555 
3556 	/* setup clear context state */
3557 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3558 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3559 
3560 	for (i = 0; i < si_default_size; i++)
3561 		radeon_ring_write(ring, si_default_state[i]);
3562 
3563 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3564 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3565 
3566 	/* set clear context state */
3567 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3568 	radeon_ring_write(ring, 0);
3569 
3570 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3571 	radeon_ring_write(ring, 0x00000316);
3572 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3573 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3574 
3575 	radeon_ring_unlock_commit(rdev, ring, false);
3576 
3577 	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3578 		ring = &rdev->ring[i];
3579 		r = radeon_ring_lock(rdev, ring, 2);
3580 
3581 		/* clear the compute context state */
3582 		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3583 		radeon_ring_write(ring, 0);
3584 
3585 		radeon_ring_unlock_commit(rdev, ring, false);
3586 	}
3587 
3588 	return 0;
3589 }
3590 
3591 static void si_cp_fini(struct radeon_device *rdev)
3592 {
3593 	struct radeon_ring *ring;
3594 	si_cp_enable(rdev, false);
3595 
3596 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3597 	radeon_ring_fini(rdev, ring);
3598 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3599 
3600 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3601 	radeon_ring_fini(rdev, ring);
3602 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3603 
3604 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3605 	radeon_ring_fini(rdev, ring);
3606 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3607 }
3608 
3609 static int si_cp_resume(struct radeon_device *rdev)
3610 {
3611 	struct radeon_ring *ring;
3612 	u32 tmp;
3613 	u32 rb_bufsz;
3614 	int r;
3615 
3616 	si_enable_gui_idle_interrupt(rdev, false);
3617 
3618 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3619 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3620 
3621 	/* Set the write pointer delay */
3622 	WREG32(CP_RB_WPTR_DELAY, 0);
3623 
3624 	WREG32(CP_DEBUG, 0);
3625 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3626 
3627 	/* ring 0 - compute and gfx */
3628 	/* Set ring buffer size */
3629 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3630 	rb_bufsz = order_base_2(ring->ring_size / 8);
3631 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3632 #ifdef __BIG_ENDIAN
3633 	tmp |= BUF_SWAP_32BIT;
3634 #endif
3635 	WREG32(CP_RB0_CNTL, tmp);
3636 
3637 	/* Initialize the ring buffer's read and write pointers */
3638 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3639 	ring->wptr = 0;
3640 	WREG32(CP_RB0_WPTR, ring->wptr);
3641 
3642 	/* set the wb address whether it's enabled or not */
3643 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3644 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3645 
3646 	if (rdev->wb.enabled)
3647 		WREG32(SCRATCH_UMSK, 0xff);
3648 	else {
3649 		tmp |= RB_NO_UPDATE;
3650 		WREG32(SCRATCH_UMSK, 0);
3651 	}
3652 
3653 	mdelay(1);
3654 	WREG32(CP_RB0_CNTL, tmp);
3655 
3656 	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3657 
3658 	/* ring1  - compute only */
3659 	/* Set ring buffer size */
3660 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3661 	rb_bufsz = order_base_2(ring->ring_size / 8);
3662 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3663 #ifdef __BIG_ENDIAN
3664 	tmp |= BUF_SWAP_32BIT;
3665 #endif
3666 	WREG32(CP_RB1_CNTL, tmp);
3667 
3668 	/* Initialize the ring buffer's read and write pointers */
3669 	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3670 	ring->wptr = 0;
3671 	WREG32(CP_RB1_WPTR, ring->wptr);
3672 
3673 	/* set the wb address whether it's enabled or not */
3674 	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3675 	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3676 
3677 	mdelay(1);
3678 	WREG32(CP_RB1_CNTL, tmp);
3679 
3680 	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3681 
3682 	/* ring2 - compute only */
3683 	/* Set ring buffer size */
3684 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3685 	rb_bufsz = order_base_2(ring->ring_size / 8);
3686 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3687 #ifdef __BIG_ENDIAN
3688 	tmp |= BUF_SWAP_32BIT;
3689 #endif
3690 	WREG32(CP_RB2_CNTL, tmp);
3691 
3692 	/* Initialize the ring buffer's read and write pointers */
3693 	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3694 	ring->wptr = 0;
3695 	WREG32(CP_RB2_WPTR, ring->wptr);
3696 
3697 	/* set the wb address whether it's enabled or not */
3698 	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3699 	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3700 
3701 	mdelay(1);
3702 	WREG32(CP_RB2_CNTL, tmp);
3703 
3704 	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3705 
3706 	/* start the rings */
3707 	si_cp_start(rdev);
3708 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3709 	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3710 	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3711 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3712 	if (r) {
3713 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3714 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3715 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3716 		return r;
3717 	}
3718 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3719 	if (r) {
3720 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3721 	}
3722 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3723 	if (r) {
3724 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3725 	}
3726 
3727 	si_enable_gui_idle_interrupt(rdev, true);
3728 
3729 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3730 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3731 
3732 	return 0;
3733 }
3734 
3735 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3736 {
3737 	u32 reset_mask = 0;
3738 	u32 tmp;
3739 
3740 	/* GRBM_STATUS */
3741 	tmp = RREG32(GRBM_STATUS);
3742 	if (tmp & (PA_BUSY | SC_BUSY |
3743 		   BCI_BUSY | SX_BUSY |
3744 		   TA_BUSY | VGT_BUSY |
3745 		   DB_BUSY | CB_BUSY |
3746 		   GDS_BUSY | SPI_BUSY |
3747 		   IA_BUSY | IA_BUSY_NO_DMA))
3748 		reset_mask |= RADEON_RESET_GFX;
3749 
3750 	if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3751 		   CP_BUSY | CP_COHERENCY_BUSY))
3752 		reset_mask |= RADEON_RESET_CP;
3753 
3754 	if (tmp & GRBM_EE_BUSY)
3755 		reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3756 
3757 	/* GRBM_STATUS2 */
3758 	tmp = RREG32(GRBM_STATUS2);
3759 	if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3760 		reset_mask |= RADEON_RESET_RLC;
3761 
3762 	/* DMA_STATUS_REG 0 */
3763 	tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3764 	if (!(tmp & DMA_IDLE))
3765 		reset_mask |= RADEON_RESET_DMA;
3766 
3767 	/* DMA_STATUS_REG 1 */
3768 	tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3769 	if (!(tmp & DMA_IDLE))
3770 		reset_mask |= RADEON_RESET_DMA1;
3771 
3772 	/* SRBM_STATUS2 */
3773 	tmp = RREG32(SRBM_STATUS2);
3774 	if (tmp & DMA_BUSY)
3775 		reset_mask |= RADEON_RESET_DMA;
3776 
3777 	if (tmp & DMA1_BUSY)
3778 		reset_mask |= RADEON_RESET_DMA1;
3779 
3780 	/* SRBM_STATUS */
3781 	tmp = RREG32(SRBM_STATUS);
3782 
3783 	if (tmp & IH_BUSY)
3784 		reset_mask |= RADEON_RESET_IH;
3785 
3786 	if (tmp & SEM_BUSY)
3787 		reset_mask |= RADEON_RESET_SEM;
3788 
3789 	if (tmp & GRBM_RQ_PENDING)
3790 		reset_mask |= RADEON_RESET_GRBM;
3791 
3792 	if (tmp & VMC_BUSY)
3793 		reset_mask |= RADEON_RESET_VMC;
3794 
3795 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3796 		   MCC_BUSY | MCD_BUSY))
3797 		reset_mask |= RADEON_RESET_MC;
3798 
3799 	if (evergreen_is_display_hung(rdev))
3800 		reset_mask |= RADEON_RESET_DISPLAY;
3801 
3802 	/* VM_L2_STATUS */
3803 	tmp = RREG32(VM_L2_STATUS);
3804 	if (tmp & L2_BUSY)
3805 		reset_mask |= RADEON_RESET_VMC;
3806 
3807 	/* Skip MC reset as it's mostly likely not hung, just busy */
3808 	if (reset_mask & RADEON_RESET_MC) {
3809 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3810 		reset_mask &= ~RADEON_RESET_MC;
3811 	}
3812 
3813 	return reset_mask;
3814 }
3815 
3816 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3817 {
3818 	struct evergreen_mc_save save;
3819 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3820 	u32 tmp;
3821 
3822 	if (reset_mask == 0)
3823 		return;
3824 
3825 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3826 
3827 	evergreen_print_gpu_status_regs(rdev);
3828 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3829 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3830 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3831 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3832 
3833 	/* disable PG/CG */
3834 	si_fini_pg(rdev);
3835 	si_fini_cg(rdev);
3836 
3837 	/* stop the rlc */
3838 	si_rlc_stop(rdev);
3839 
3840 	/* Disable CP parsing/prefetching */
3841 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3842 
3843 	if (reset_mask & RADEON_RESET_DMA) {
3844 		/* dma0 */
3845 		tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3846 		tmp &= ~DMA_RB_ENABLE;
3847 		WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3848 	}
3849 	if (reset_mask & RADEON_RESET_DMA1) {
3850 		/* dma1 */
3851 		tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3852 		tmp &= ~DMA_RB_ENABLE;
3853 		WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3854 	}
3855 
3856 	udelay(50);
3857 
3858 	evergreen_mc_stop(rdev, &save);
3859 	if (evergreen_mc_wait_for_idle(rdev)) {
3860 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3861 	}
3862 
3863 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3864 		grbm_soft_reset = SOFT_RESET_CB |
3865 			SOFT_RESET_DB |
3866 			SOFT_RESET_GDS |
3867 			SOFT_RESET_PA |
3868 			SOFT_RESET_SC |
3869 			SOFT_RESET_BCI |
3870 			SOFT_RESET_SPI |
3871 			SOFT_RESET_SX |
3872 			SOFT_RESET_TC |
3873 			SOFT_RESET_TA |
3874 			SOFT_RESET_VGT |
3875 			SOFT_RESET_IA;
3876 	}
3877 
3878 	if (reset_mask & RADEON_RESET_CP) {
3879 		grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3880 
3881 		srbm_soft_reset |= SOFT_RESET_GRBM;
3882 	}
3883 
3884 	if (reset_mask & RADEON_RESET_DMA)
3885 		srbm_soft_reset |= SOFT_RESET_DMA;
3886 
3887 	if (reset_mask & RADEON_RESET_DMA1)
3888 		srbm_soft_reset |= SOFT_RESET_DMA1;
3889 
3890 	if (reset_mask & RADEON_RESET_DISPLAY)
3891 		srbm_soft_reset |= SOFT_RESET_DC;
3892 
3893 	if (reset_mask & RADEON_RESET_RLC)
3894 		grbm_soft_reset |= SOFT_RESET_RLC;
3895 
3896 	if (reset_mask & RADEON_RESET_SEM)
3897 		srbm_soft_reset |= SOFT_RESET_SEM;
3898 
3899 	if (reset_mask & RADEON_RESET_IH)
3900 		srbm_soft_reset |= SOFT_RESET_IH;
3901 
3902 	if (reset_mask & RADEON_RESET_GRBM)
3903 		srbm_soft_reset |= SOFT_RESET_GRBM;
3904 
3905 	if (reset_mask & RADEON_RESET_VMC)
3906 		srbm_soft_reset |= SOFT_RESET_VMC;
3907 
3908 	if (reset_mask & RADEON_RESET_MC)
3909 		srbm_soft_reset |= SOFT_RESET_MC;
3910 
3911 	if (grbm_soft_reset) {
3912 		tmp = RREG32(GRBM_SOFT_RESET);
3913 		tmp |= grbm_soft_reset;
3914 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3915 		WREG32(GRBM_SOFT_RESET, tmp);
3916 		tmp = RREG32(GRBM_SOFT_RESET);
3917 
3918 		udelay(50);
3919 
3920 		tmp &= ~grbm_soft_reset;
3921 		WREG32(GRBM_SOFT_RESET, tmp);
3922 		tmp = RREG32(GRBM_SOFT_RESET);
3923 	}
3924 
3925 	if (srbm_soft_reset) {
3926 		tmp = RREG32(SRBM_SOFT_RESET);
3927 		tmp |= srbm_soft_reset;
3928 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3929 		WREG32(SRBM_SOFT_RESET, tmp);
3930 		tmp = RREG32(SRBM_SOFT_RESET);
3931 
3932 		udelay(50);
3933 
3934 		tmp &= ~srbm_soft_reset;
3935 		WREG32(SRBM_SOFT_RESET, tmp);
3936 		tmp = RREG32(SRBM_SOFT_RESET);
3937 	}
3938 
3939 	/* Wait a little for things to settle down */
3940 	udelay(50);
3941 
3942 	evergreen_mc_resume(rdev, &save);
3943 	udelay(50);
3944 
3945 	evergreen_print_gpu_status_regs(rdev);
3946 }
3947 
3948 static void si_set_clk_bypass_mode(struct radeon_device *rdev)
3949 {
3950 	u32 tmp, i;
3951 
3952 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3953 	tmp |= SPLL_BYPASS_EN;
3954 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3955 
3956 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3957 	tmp |= SPLL_CTLREQ_CHG;
3958 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3959 
3960 	for (i = 0; i < rdev->usec_timeout; i++) {
3961 		if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
3962 			break;
3963 		udelay(1);
3964 	}
3965 
3966 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3967 	tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
3968 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3969 
3970 	tmp = RREG32(MPLL_CNTL_MODE);
3971 	tmp &= ~MPLL_MCLK_SEL;
3972 	WREG32(MPLL_CNTL_MODE, tmp);
3973 }
3974 
3975 static void si_spll_powerdown(struct radeon_device *rdev)
3976 {
3977 	u32 tmp;
3978 
3979 	tmp = RREG32(SPLL_CNTL_MODE);
3980 	tmp |= SPLL_SW_DIR_CONTROL;
3981 	WREG32(SPLL_CNTL_MODE, tmp);
3982 
3983 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3984 	tmp |= SPLL_RESET;
3985 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3986 
3987 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3988 	tmp |= SPLL_SLEEP;
3989 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3990 
3991 	tmp = RREG32(SPLL_CNTL_MODE);
3992 	tmp &= ~SPLL_SW_DIR_CONTROL;
3993 	WREG32(SPLL_CNTL_MODE, tmp);
3994 }
3995 
3996 static void si_gpu_pci_config_reset(struct radeon_device *rdev)
3997 {
3998 	struct evergreen_mc_save save;
3999 	u32 tmp, i;
4000 
4001 	dev_info(rdev->dev, "GPU pci config reset\n");
4002 
4003 	/* disable dpm? */
4004 
4005 	/* disable cg/pg */
4006 	si_fini_pg(rdev);
4007 	si_fini_cg(rdev);
4008 
4009 	/* Disable CP parsing/prefetching */
4010 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4011 	/* dma0 */
4012 	tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
4013 	tmp &= ~DMA_RB_ENABLE;
4014 	WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
4015 	/* dma1 */
4016 	tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
4017 	tmp &= ~DMA_RB_ENABLE;
4018 	WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
4019 	/* XXX other engines? */
4020 
4021 	/* halt the rlc, disable cp internal ints */
4022 	si_rlc_stop(rdev);
4023 
4024 	udelay(50);
4025 
4026 	/* disable mem access */
4027 	evergreen_mc_stop(rdev, &save);
4028 	if (evergreen_mc_wait_for_idle(rdev)) {
4029 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
4030 	}
4031 
4032 	/* set mclk/sclk to bypass */
4033 	si_set_clk_bypass_mode(rdev);
4034 	/* powerdown spll */
4035 	si_spll_powerdown(rdev);
4036 	/* disable BM */
4037 	pci_clear_master(rdev->pdev);
4038 	/* reset */
4039 	radeon_pci_config_reset(rdev);
4040 	/* wait for asic to come out of reset */
4041 	for (i = 0; i < rdev->usec_timeout; i++) {
4042 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
4043 			break;
4044 		udelay(1);
4045 	}
4046 }
4047 
4048 int si_asic_reset(struct radeon_device *rdev)
4049 {
4050 	u32 reset_mask;
4051 
4052 	reset_mask = si_gpu_check_soft_reset(rdev);
4053 
4054 	if (reset_mask)
4055 		r600_set_bios_scratch_engine_hung(rdev, true);
4056 
4057 	/* try soft reset */
4058 	si_gpu_soft_reset(rdev, reset_mask);
4059 
4060 	reset_mask = si_gpu_check_soft_reset(rdev);
4061 
4062 	/* try pci config reset */
4063 	if (reset_mask && radeon_hard_reset)
4064 		si_gpu_pci_config_reset(rdev);
4065 
4066 	reset_mask = si_gpu_check_soft_reset(rdev);
4067 
4068 	if (!reset_mask)
4069 		r600_set_bios_scratch_engine_hung(rdev, false);
4070 
4071 	return 0;
4072 }
4073 
4074 /**
4075  * si_gfx_is_lockup - Check if the GFX engine is locked up
4076  *
4077  * @rdev: radeon_device pointer
4078  * @ring: radeon_ring structure holding ring information
4079  *
4080  * Check if the GFX engine is locked up.
4081  * Returns true if the engine appears to be locked up, false if not.
4082  */
4083 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4084 {
4085 	u32 reset_mask = si_gpu_check_soft_reset(rdev);
4086 
4087 	if (!(reset_mask & (RADEON_RESET_GFX |
4088 			    RADEON_RESET_COMPUTE |
4089 			    RADEON_RESET_CP))) {
4090 		radeon_ring_lockup_update(rdev, ring);
4091 		return false;
4092 	}
4093 	return radeon_ring_test_lockup(rdev, ring);
4094 }
4095 
4096 /* MC */
4097 static void si_mc_program(struct radeon_device *rdev)
4098 {
4099 	struct evergreen_mc_save save;
4100 	u32 tmp;
4101 	int i, j;
4102 
4103 	/* Initialize HDP */
4104 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4105 		WREG32((0x2c14 + j), 0x00000000);
4106 		WREG32((0x2c18 + j), 0x00000000);
4107 		WREG32((0x2c1c + j), 0x00000000);
4108 		WREG32((0x2c20 + j), 0x00000000);
4109 		WREG32((0x2c24 + j), 0x00000000);
4110 	}
4111 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4112 
4113 	evergreen_mc_stop(rdev, &save);
4114 	if (radeon_mc_wait_for_idle(rdev)) {
4115 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4116 	}
4117 	if (!ASIC_IS_NODCE(rdev))
4118 		/* Lockout access through VGA aperture*/
4119 		WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4120 	/* Update configuration */
4121 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4122 	       rdev->mc.vram_start >> 12);
4123 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4124 	       rdev->mc.vram_end >> 12);
4125 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4126 	       rdev->vram_scratch.gpu_addr >> 12);
4127 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4128 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4129 	WREG32(MC_VM_FB_LOCATION, tmp);
4130 	/* XXX double check these! */
4131 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4132 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4133 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4134 	WREG32(MC_VM_AGP_BASE, 0);
4135 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4136 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4137 	if (radeon_mc_wait_for_idle(rdev)) {
4138 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4139 	}
4140 	evergreen_mc_resume(rdev, &save);
4141 	if (!ASIC_IS_NODCE(rdev)) {
4142 		/* we need to own VRAM, so turn off the VGA renderer here
4143 		 * to stop it overwriting our objects */
4144 		rv515_vga_render_disable(rdev);
4145 	}
4146 }
4147 
4148 void si_vram_gtt_location(struct radeon_device *rdev,
4149 			  struct radeon_mc *mc)
4150 {
4151 	if (mc->mc_vram_size > 0xFFC0000000ULL) {
4152 		/* leave room for at least 1024M GTT */
4153 		dev_warn(rdev->dev, "limiting VRAM\n");
4154 		mc->real_vram_size = 0xFFC0000000ULL;
4155 		mc->mc_vram_size = 0xFFC0000000ULL;
4156 	}
4157 	radeon_vram_location(rdev, &rdev->mc, 0);
4158 	rdev->mc.gtt_base_align = 0;
4159 	radeon_gtt_location(rdev, mc);
4160 }
4161 
4162 static int si_mc_init(struct radeon_device *rdev)
4163 {
4164 	u32 tmp;
4165 	int chansize, numchan;
4166 
4167 	/* Get VRAM informations */
4168 	rdev->mc.vram_is_ddr = true;
4169 	tmp = RREG32(MC_ARB_RAMCFG);
4170 	if (tmp & CHANSIZE_OVERRIDE) {
4171 		chansize = 16;
4172 	} else if (tmp & CHANSIZE_MASK) {
4173 		chansize = 64;
4174 	} else {
4175 		chansize = 32;
4176 	}
4177 	tmp = RREG32(MC_SHARED_CHMAP);
4178 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4179 	case 0:
4180 	default:
4181 		numchan = 1;
4182 		break;
4183 	case 1:
4184 		numchan = 2;
4185 		break;
4186 	case 2:
4187 		numchan = 4;
4188 		break;
4189 	case 3:
4190 		numchan = 8;
4191 		break;
4192 	case 4:
4193 		numchan = 3;
4194 		break;
4195 	case 5:
4196 		numchan = 6;
4197 		break;
4198 	case 6:
4199 		numchan = 10;
4200 		break;
4201 	case 7:
4202 		numchan = 12;
4203 		break;
4204 	case 8:
4205 		numchan = 16;
4206 		break;
4207 	}
4208 	rdev->mc.vram_width = numchan * chansize;
4209 	/* Could aper size report 0 ? */
4210 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4211 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4212 	/* size in MB on si */
4213 	tmp = RREG32(CONFIG_MEMSIZE);
4214 	/* some boards may have garbage in the upper 16 bits */
4215 	if (tmp & 0xffff0000) {
4216 		DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
4217 		if (tmp & 0xffff)
4218 			tmp &= 0xffff;
4219 	}
4220 	rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
4221 	rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
4222 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
4223 	si_vram_gtt_location(rdev, &rdev->mc);
4224 	radeon_update_bandwidth_info(rdev);
4225 
4226 	return 0;
4227 }
4228 
4229 /*
4230  * GART
4231  */
4232 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
4233 {
4234 	/* flush hdp cache */
4235 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
4236 
4237 	/* bits 0-15 are the VM contexts0-15 */
4238 	WREG32(VM_INVALIDATE_REQUEST, 1);
4239 }
4240 
4241 static int si_pcie_gart_enable(struct radeon_device *rdev)
4242 {
4243 	int r, i;
4244 
4245 	if (rdev->gart.robj == NULL) {
4246 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4247 		return -EINVAL;
4248 	}
4249 	r = radeon_gart_table_vram_pin(rdev);
4250 	if (r)
4251 		return r;
4252 	/* Setup TLB control */
4253 	WREG32(MC_VM_MX_L1_TLB_CNTL,
4254 	       (0xA << 7) |
4255 	       ENABLE_L1_TLB |
4256 	       ENABLE_L1_FRAGMENT_PROCESSING |
4257 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4258 	       ENABLE_ADVANCED_DRIVER_MODEL |
4259 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4260 	/* Setup L2 cache */
4261 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4262 	       ENABLE_L2_FRAGMENT_PROCESSING |
4263 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4264 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4265 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4266 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4267 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4268 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4269 	       BANK_SELECT(4) |
4270 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
4271 	/* setup context0 */
4272 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4273 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4274 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4275 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4276 			(u32)(rdev->dummy_page.addr >> 12));
4277 	WREG32(VM_CONTEXT0_CNTL2, 0);
4278 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4279 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4280 
4281 	WREG32(0x15D4, 0);
4282 	WREG32(0x15D8, 0);
4283 	WREG32(0x15DC, 0);
4284 
4285 	/* empty context1-15 */
4286 	/* set vm size, must be a multiple of 4 */
4287 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4288 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4289 	/* Assign the pt base to something valid for now; the pts used for
4290 	 * the VMs are determined by the application and setup and assigned
4291 	 * on the fly in the vm part of radeon_gart.c
4292 	 */
4293 	for (i = 1; i < 16; i++) {
4294 		if (i < 8)
4295 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4296 			       rdev->vm_manager.saved_table_addr[i]);
4297 		else
4298 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4299 			       rdev->vm_manager.saved_table_addr[i]);
4300 	}
4301 
4302 	/* enable context1-15 */
4303 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4304 	       (u32)(rdev->dummy_page.addr >> 12));
4305 	WREG32(VM_CONTEXT1_CNTL2, 4);
4306 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4307 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
4308 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4309 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4310 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4311 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4312 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4313 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4314 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4315 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4316 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4317 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4318 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4319 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4320 
4321 	si_pcie_gart_tlb_flush(rdev);
4322 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4323 		 (unsigned)(rdev->mc.gtt_size >> 20),
4324 		 (unsigned long long)rdev->gart.table_addr);
4325 	rdev->gart.ready = true;
4326 	return 0;
4327 }
4328 
4329 static void si_pcie_gart_disable(struct radeon_device *rdev)
4330 {
4331 	unsigned i;
4332 
4333 	for (i = 1; i < 16; ++i) {
4334 		uint32_t reg;
4335 		if (i < 8)
4336 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
4337 		else
4338 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
4339 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
4340 	}
4341 
4342 	/* Disable all tables */
4343 	WREG32(VM_CONTEXT0_CNTL, 0);
4344 	WREG32(VM_CONTEXT1_CNTL, 0);
4345 	/* Setup TLB control */
4346 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4347 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4348 	/* Setup L2 cache */
4349 	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4350 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4351 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4352 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4353 	WREG32(VM_L2_CNTL2, 0);
4354 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4355 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4356 	radeon_gart_table_vram_unpin(rdev);
4357 }
4358 
4359 static void si_pcie_gart_fini(struct radeon_device *rdev)
4360 {
4361 	si_pcie_gart_disable(rdev);
4362 	radeon_gart_table_vram_free(rdev);
4363 	radeon_gart_fini(rdev);
4364 }
4365 
4366 /* vm parser */
4367 static bool si_vm_reg_valid(u32 reg)
4368 {
4369 	/* context regs are fine */
4370 	if (reg >= 0x28000)
4371 		return true;
4372 
4373 	/* check config regs */
4374 	switch (reg) {
4375 	case GRBM_GFX_INDEX:
4376 	case CP_STRMOUT_CNTL:
4377 	case VGT_VTX_VECT_EJECT_REG:
4378 	case VGT_CACHE_INVALIDATION:
4379 	case VGT_ESGS_RING_SIZE:
4380 	case VGT_GSVS_RING_SIZE:
4381 	case VGT_GS_VERTEX_REUSE:
4382 	case VGT_PRIMITIVE_TYPE:
4383 	case VGT_INDEX_TYPE:
4384 	case VGT_NUM_INDICES:
4385 	case VGT_NUM_INSTANCES:
4386 	case VGT_TF_RING_SIZE:
4387 	case VGT_HS_OFFCHIP_PARAM:
4388 	case VGT_TF_MEMORY_BASE:
4389 	case PA_CL_ENHANCE:
4390 	case PA_SU_LINE_STIPPLE_VALUE:
4391 	case PA_SC_LINE_STIPPLE_STATE:
4392 	case PA_SC_ENHANCE:
4393 	case SQC_CACHES:
4394 	case SPI_STATIC_THREAD_MGMT_1:
4395 	case SPI_STATIC_THREAD_MGMT_2:
4396 	case SPI_STATIC_THREAD_MGMT_3:
4397 	case SPI_PS_MAX_WAVE_ID:
4398 	case SPI_CONFIG_CNTL:
4399 	case SPI_CONFIG_CNTL_1:
4400 	case TA_CNTL_AUX:
4401 		return true;
4402 	default:
4403 		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4404 		return false;
4405 	}
4406 }
4407 
4408 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4409 				  u32 *ib, struct radeon_cs_packet *pkt)
4410 {
4411 	switch (pkt->opcode) {
4412 	case PACKET3_NOP:
4413 	case PACKET3_SET_BASE:
4414 	case PACKET3_SET_CE_DE_COUNTERS:
4415 	case PACKET3_LOAD_CONST_RAM:
4416 	case PACKET3_WRITE_CONST_RAM:
4417 	case PACKET3_WRITE_CONST_RAM_OFFSET:
4418 	case PACKET3_DUMP_CONST_RAM:
4419 	case PACKET3_INCREMENT_CE_COUNTER:
4420 	case PACKET3_WAIT_ON_DE_COUNTER:
4421 	case PACKET3_CE_WRITE:
4422 		break;
4423 	default:
4424 		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4425 		return -EINVAL;
4426 	}
4427 	return 0;
4428 }
4429 
4430 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4431 {
4432 	u32 start_reg, reg, i;
4433 	u32 command = ib[idx + 4];
4434 	u32 info = ib[idx + 1];
4435 	u32 idx_value = ib[idx];
4436 	if (command & PACKET3_CP_DMA_CMD_SAS) {
4437 		/* src address space is register */
4438 		if (((info & 0x60000000) >> 29) == 0) {
4439 			start_reg = idx_value << 2;
4440 			if (command & PACKET3_CP_DMA_CMD_SAIC) {
4441 				reg = start_reg;
4442 				if (!si_vm_reg_valid(reg)) {
4443 					DRM_ERROR("CP DMA Bad SRC register\n");
4444 					return -EINVAL;
4445 				}
4446 			} else {
4447 				for (i = 0; i < (command & 0x1fffff); i++) {
4448 					reg = start_reg + (4 * i);
4449 					if (!si_vm_reg_valid(reg)) {
4450 						DRM_ERROR("CP DMA Bad SRC register\n");
4451 						return -EINVAL;
4452 					}
4453 				}
4454 			}
4455 		}
4456 	}
4457 	if (command & PACKET3_CP_DMA_CMD_DAS) {
4458 		/* dst address space is register */
4459 		if (((info & 0x00300000) >> 20) == 0) {
4460 			start_reg = ib[idx + 2];
4461 			if (command & PACKET3_CP_DMA_CMD_DAIC) {
4462 				reg = start_reg;
4463 				if (!si_vm_reg_valid(reg)) {
4464 					DRM_ERROR("CP DMA Bad DST register\n");
4465 					return -EINVAL;
4466 				}
4467 			} else {
4468 				for (i = 0; i < (command & 0x1fffff); i++) {
4469 					reg = start_reg + (4 * i);
4470 				if (!si_vm_reg_valid(reg)) {
4471 						DRM_ERROR("CP DMA Bad DST register\n");
4472 						return -EINVAL;
4473 					}
4474 				}
4475 			}
4476 		}
4477 	}
4478 	return 0;
4479 }
4480 
4481 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4482 				   u32 *ib, struct radeon_cs_packet *pkt)
4483 {
4484 	int r;
4485 	u32 idx = pkt->idx + 1;
4486 	u32 idx_value = ib[idx];
4487 	u32 start_reg, end_reg, reg, i;
4488 
4489 	switch (pkt->opcode) {
4490 	case PACKET3_NOP:
4491 	case PACKET3_SET_BASE:
4492 	case PACKET3_CLEAR_STATE:
4493 	case PACKET3_INDEX_BUFFER_SIZE:
4494 	case PACKET3_DISPATCH_DIRECT:
4495 	case PACKET3_DISPATCH_INDIRECT:
4496 	case PACKET3_ALLOC_GDS:
4497 	case PACKET3_WRITE_GDS_RAM:
4498 	case PACKET3_ATOMIC_GDS:
4499 	case PACKET3_ATOMIC:
4500 	case PACKET3_OCCLUSION_QUERY:
4501 	case PACKET3_SET_PREDICATION:
4502 	case PACKET3_COND_EXEC:
4503 	case PACKET3_PRED_EXEC:
4504 	case PACKET3_DRAW_INDIRECT:
4505 	case PACKET3_DRAW_INDEX_INDIRECT:
4506 	case PACKET3_INDEX_BASE:
4507 	case PACKET3_DRAW_INDEX_2:
4508 	case PACKET3_CONTEXT_CONTROL:
4509 	case PACKET3_INDEX_TYPE:
4510 	case PACKET3_DRAW_INDIRECT_MULTI:
4511 	case PACKET3_DRAW_INDEX_AUTO:
4512 	case PACKET3_DRAW_INDEX_IMMD:
4513 	case PACKET3_NUM_INSTANCES:
4514 	case PACKET3_DRAW_INDEX_MULTI_AUTO:
4515 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4516 	case PACKET3_DRAW_INDEX_OFFSET_2:
4517 	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4518 	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4519 	case PACKET3_MPEG_INDEX:
4520 	case PACKET3_WAIT_REG_MEM:
4521 	case PACKET3_MEM_WRITE:
4522 	case PACKET3_PFP_SYNC_ME:
4523 	case PACKET3_SURFACE_SYNC:
4524 	case PACKET3_EVENT_WRITE:
4525 	case PACKET3_EVENT_WRITE_EOP:
4526 	case PACKET3_EVENT_WRITE_EOS:
4527 	case PACKET3_SET_CONTEXT_REG:
4528 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4529 	case PACKET3_SET_SH_REG:
4530 	case PACKET3_SET_SH_REG_OFFSET:
4531 	case PACKET3_INCREMENT_DE_COUNTER:
4532 	case PACKET3_WAIT_ON_CE_COUNTER:
4533 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4534 	case PACKET3_ME_WRITE:
4535 		break;
4536 	case PACKET3_COPY_DATA:
4537 		if ((idx_value & 0xf00) == 0) {
4538 			reg = ib[idx + 3] * 4;
4539 			if (!si_vm_reg_valid(reg))
4540 				return -EINVAL;
4541 		}
4542 		break;
4543 	case PACKET3_WRITE_DATA:
4544 		if ((idx_value & 0xf00) == 0) {
4545 			start_reg = ib[idx + 1] * 4;
4546 			if (idx_value & 0x10000) {
4547 				if (!si_vm_reg_valid(start_reg))
4548 					return -EINVAL;
4549 			} else {
4550 				for (i = 0; i < (pkt->count - 2); i++) {
4551 					reg = start_reg + (4 * i);
4552 					if (!si_vm_reg_valid(reg))
4553 						return -EINVAL;
4554 				}
4555 			}
4556 		}
4557 		break;
4558 	case PACKET3_COND_WRITE:
4559 		if (idx_value & 0x100) {
4560 			reg = ib[idx + 5] * 4;
4561 			if (!si_vm_reg_valid(reg))
4562 				return -EINVAL;
4563 		}
4564 		break;
4565 	case PACKET3_COPY_DW:
4566 		if (idx_value & 0x2) {
4567 			reg = ib[idx + 3] * 4;
4568 			if (!si_vm_reg_valid(reg))
4569 				return -EINVAL;
4570 		}
4571 		break;
4572 	case PACKET3_SET_CONFIG_REG:
4573 		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4574 		end_reg = 4 * pkt->count + start_reg - 4;
4575 		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4576 		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4577 		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4578 			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4579 			return -EINVAL;
4580 		}
4581 		for (i = 0; i < pkt->count; i++) {
4582 			reg = start_reg + (4 * i);
4583 			if (!si_vm_reg_valid(reg))
4584 				return -EINVAL;
4585 		}
4586 		break;
4587 	case PACKET3_CP_DMA:
4588 		r = si_vm_packet3_cp_dma_check(ib, idx);
4589 		if (r)
4590 			return r;
4591 		break;
4592 	default:
4593 		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4594 		return -EINVAL;
4595 	}
4596 	return 0;
4597 }
4598 
4599 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4600 				       u32 *ib, struct radeon_cs_packet *pkt)
4601 {
4602 	int r;
4603 	u32 idx = pkt->idx + 1;
4604 	u32 idx_value = ib[idx];
4605 	u32 start_reg, reg, i;
4606 
4607 	switch (pkt->opcode) {
4608 	case PACKET3_NOP:
4609 	case PACKET3_SET_BASE:
4610 	case PACKET3_CLEAR_STATE:
4611 	case PACKET3_DISPATCH_DIRECT:
4612 	case PACKET3_DISPATCH_INDIRECT:
4613 	case PACKET3_ALLOC_GDS:
4614 	case PACKET3_WRITE_GDS_RAM:
4615 	case PACKET3_ATOMIC_GDS:
4616 	case PACKET3_ATOMIC:
4617 	case PACKET3_OCCLUSION_QUERY:
4618 	case PACKET3_SET_PREDICATION:
4619 	case PACKET3_COND_EXEC:
4620 	case PACKET3_PRED_EXEC:
4621 	case PACKET3_CONTEXT_CONTROL:
4622 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4623 	case PACKET3_WAIT_REG_MEM:
4624 	case PACKET3_MEM_WRITE:
4625 	case PACKET3_PFP_SYNC_ME:
4626 	case PACKET3_SURFACE_SYNC:
4627 	case PACKET3_EVENT_WRITE:
4628 	case PACKET3_EVENT_WRITE_EOP:
4629 	case PACKET3_EVENT_WRITE_EOS:
4630 	case PACKET3_SET_CONTEXT_REG:
4631 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4632 	case PACKET3_SET_SH_REG:
4633 	case PACKET3_SET_SH_REG_OFFSET:
4634 	case PACKET3_INCREMENT_DE_COUNTER:
4635 	case PACKET3_WAIT_ON_CE_COUNTER:
4636 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4637 	case PACKET3_ME_WRITE:
4638 		break;
4639 	case PACKET3_COPY_DATA:
4640 		if ((idx_value & 0xf00) == 0) {
4641 			reg = ib[idx + 3] * 4;
4642 			if (!si_vm_reg_valid(reg))
4643 				return -EINVAL;
4644 		}
4645 		break;
4646 	case PACKET3_WRITE_DATA:
4647 		if ((idx_value & 0xf00) == 0) {
4648 			start_reg = ib[idx + 1] * 4;
4649 			if (idx_value & 0x10000) {
4650 				if (!si_vm_reg_valid(start_reg))
4651 					return -EINVAL;
4652 			} else {
4653 				for (i = 0; i < (pkt->count - 2); i++) {
4654 					reg = start_reg + (4 * i);
4655 					if (!si_vm_reg_valid(reg))
4656 						return -EINVAL;
4657 				}
4658 			}
4659 		}
4660 		break;
4661 	case PACKET3_COND_WRITE:
4662 		if (idx_value & 0x100) {
4663 			reg = ib[idx + 5] * 4;
4664 			if (!si_vm_reg_valid(reg))
4665 				return -EINVAL;
4666 		}
4667 		break;
4668 	case PACKET3_COPY_DW:
4669 		if (idx_value & 0x2) {
4670 			reg = ib[idx + 3] * 4;
4671 			if (!si_vm_reg_valid(reg))
4672 				return -EINVAL;
4673 		}
4674 		break;
4675 	case PACKET3_CP_DMA:
4676 		r = si_vm_packet3_cp_dma_check(ib, idx);
4677 		if (r)
4678 			return r;
4679 		break;
4680 	default:
4681 		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4682 		return -EINVAL;
4683 	}
4684 	return 0;
4685 }
4686 
4687 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4688 {
4689 	int ret = 0;
4690 	u32 idx = 0, i;
4691 	struct radeon_cs_packet pkt;
4692 
4693 	do {
4694 		pkt.idx = idx;
4695 		pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4696 		pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4697 		pkt.one_reg_wr = 0;
4698 		switch (pkt.type) {
4699 		case RADEON_PACKET_TYPE0:
4700 			dev_err(rdev->dev, "Packet0 not allowed!\n");
4701 			for (i = 0; i < ib->length_dw; i++) {
4702 				if (i == idx)
4703 					printk("\t0x%08x <---\n", ib->ptr[i]);
4704 				else
4705 					printk("\t0x%08x\n", ib->ptr[i]);
4706 			}
4707 			ret = -EINVAL;
4708 			break;
4709 		case RADEON_PACKET_TYPE2:
4710 			idx += 1;
4711 			break;
4712 		case RADEON_PACKET_TYPE3:
4713 			pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4714 			if (ib->is_const_ib)
4715 				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4716 			else {
4717 				switch (ib->ring) {
4718 				case RADEON_RING_TYPE_GFX_INDEX:
4719 					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4720 					break;
4721 				case CAYMAN_RING_TYPE_CP1_INDEX:
4722 				case CAYMAN_RING_TYPE_CP2_INDEX:
4723 					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4724 					break;
4725 				default:
4726 					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4727 					ret = -EINVAL;
4728 					break;
4729 				}
4730 			}
4731 			idx += pkt.count + 2;
4732 			break;
4733 		default:
4734 			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4735 			ret = -EINVAL;
4736 			break;
4737 		}
4738 		if (ret)
4739 			break;
4740 	} while (idx < ib->length_dw);
4741 
4742 	return ret;
4743 }
4744 
4745 /*
4746  * vm
4747  */
4748 int si_vm_init(struct radeon_device *rdev)
4749 {
4750 	/* number of VMs */
4751 	rdev->vm_manager.nvm = 16;
4752 	/* base offset of vram pages */
4753 	rdev->vm_manager.vram_base_offset = 0;
4754 
4755 	return 0;
4756 }
4757 
4758 void si_vm_fini(struct radeon_device *rdev)
4759 {
4760 }
4761 
4762 /**
4763  * si_vm_decode_fault - print human readable fault info
4764  *
4765  * @rdev: radeon_device pointer
4766  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4767  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4768  *
4769  * Print human readable fault information (SI).
4770  */
4771 static void si_vm_decode_fault(struct radeon_device *rdev,
4772 			       u32 status, u32 addr)
4773 {
4774 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4775 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4776 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4777 	char *block;
4778 
4779 	if (rdev->family == CHIP_TAHITI) {
4780 		switch (mc_id) {
4781 		case 160:
4782 		case 144:
4783 		case 96:
4784 		case 80:
4785 		case 224:
4786 		case 208:
4787 		case 32:
4788 		case 16:
4789 			block = "CB";
4790 			break;
4791 		case 161:
4792 		case 145:
4793 		case 97:
4794 		case 81:
4795 		case 225:
4796 		case 209:
4797 		case 33:
4798 		case 17:
4799 			block = "CB_FMASK";
4800 			break;
4801 		case 162:
4802 		case 146:
4803 		case 98:
4804 		case 82:
4805 		case 226:
4806 		case 210:
4807 		case 34:
4808 		case 18:
4809 			block = "CB_CMASK";
4810 			break;
4811 		case 163:
4812 		case 147:
4813 		case 99:
4814 		case 83:
4815 		case 227:
4816 		case 211:
4817 		case 35:
4818 		case 19:
4819 			block = "CB_IMMED";
4820 			break;
4821 		case 164:
4822 		case 148:
4823 		case 100:
4824 		case 84:
4825 		case 228:
4826 		case 212:
4827 		case 36:
4828 		case 20:
4829 			block = "DB";
4830 			break;
4831 		case 165:
4832 		case 149:
4833 		case 101:
4834 		case 85:
4835 		case 229:
4836 		case 213:
4837 		case 37:
4838 		case 21:
4839 			block = "DB_HTILE";
4840 			break;
4841 		case 167:
4842 		case 151:
4843 		case 103:
4844 		case 87:
4845 		case 231:
4846 		case 215:
4847 		case 39:
4848 		case 23:
4849 			block = "DB_STEN";
4850 			break;
4851 		case 72:
4852 		case 68:
4853 		case 64:
4854 		case 8:
4855 		case 4:
4856 		case 0:
4857 		case 136:
4858 		case 132:
4859 		case 128:
4860 		case 200:
4861 		case 196:
4862 		case 192:
4863 			block = "TC";
4864 			break;
4865 		case 112:
4866 		case 48:
4867 			block = "CP";
4868 			break;
4869 		case 49:
4870 		case 177:
4871 		case 50:
4872 		case 178:
4873 			block = "SH";
4874 			break;
4875 		case 53:
4876 		case 190:
4877 			block = "VGT";
4878 			break;
4879 		case 117:
4880 			block = "IH";
4881 			break;
4882 		case 51:
4883 		case 115:
4884 			block = "RLC";
4885 			break;
4886 		case 119:
4887 		case 183:
4888 			block = "DMA0";
4889 			break;
4890 		case 61:
4891 			block = "DMA1";
4892 			break;
4893 		case 248:
4894 		case 120:
4895 			block = "HDP";
4896 			break;
4897 		default:
4898 			block = "unknown";
4899 			break;
4900 		}
4901 	} else {
4902 		switch (mc_id) {
4903 		case 32:
4904 		case 16:
4905 		case 96:
4906 		case 80:
4907 		case 160:
4908 		case 144:
4909 		case 224:
4910 		case 208:
4911 			block = "CB";
4912 			break;
4913 		case 33:
4914 		case 17:
4915 		case 97:
4916 		case 81:
4917 		case 161:
4918 		case 145:
4919 		case 225:
4920 		case 209:
4921 			block = "CB_FMASK";
4922 			break;
4923 		case 34:
4924 		case 18:
4925 		case 98:
4926 		case 82:
4927 		case 162:
4928 		case 146:
4929 		case 226:
4930 		case 210:
4931 			block = "CB_CMASK";
4932 			break;
4933 		case 35:
4934 		case 19:
4935 		case 99:
4936 		case 83:
4937 		case 163:
4938 		case 147:
4939 		case 227:
4940 		case 211:
4941 			block = "CB_IMMED";
4942 			break;
4943 		case 36:
4944 		case 20:
4945 		case 100:
4946 		case 84:
4947 		case 164:
4948 		case 148:
4949 		case 228:
4950 		case 212:
4951 			block = "DB";
4952 			break;
4953 		case 37:
4954 		case 21:
4955 		case 101:
4956 		case 85:
4957 		case 165:
4958 		case 149:
4959 		case 229:
4960 		case 213:
4961 			block = "DB_HTILE";
4962 			break;
4963 		case 39:
4964 		case 23:
4965 		case 103:
4966 		case 87:
4967 		case 167:
4968 		case 151:
4969 		case 231:
4970 		case 215:
4971 			block = "DB_STEN";
4972 			break;
4973 		case 72:
4974 		case 68:
4975 		case 8:
4976 		case 4:
4977 		case 136:
4978 		case 132:
4979 		case 200:
4980 		case 196:
4981 			block = "TC";
4982 			break;
4983 		case 112:
4984 		case 48:
4985 			block = "CP";
4986 			break;
4987 		case 49:
4988 		case 177:
4989 		case 50:
4990 		case 178:
4991 			block = "SH";
4992 			break;
4993 		case 53:
4994 			block = "VGT";
4995 			break;
4996 		case 117:
4997 			block = "IH";
4998 			break;
4999 		case 51:
5000 		case 115:
5001 			block = "RLC";
5002 			break;
5003 		case 119:
5004 		case 183:
5005 			block = "DMA0";
5006 			break;
5007 		case 61:
5008 			block = "DMA1";
5009 			break;
5010 		case 248:
5011 		case 120:
5012 			block = "HDP";
5013 			break;
5014 		default:
5015 			block = "unknown";
5016 			break;
5017 		}
5018 	}
5019 
5020 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5021 	       protections, vmid, addr,
5022 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5023 	       block, mc_id);
5024 }
5025 
5026 void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5027 		 unsigned vm_id, uint64_t pd_addr)
5028 {
5029 	/* write new base address */
5030 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5031 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5032 				 WRITE_DATA_DST_SEL(0)));
5033 
5034 	if (vm_id < 8) {
5035 		radeon_ring_write(ring,
5036 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5037 	} else {
5038 		radeon_ring_write(ring,
5039 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5040 	}
5041 	radeon_ring_write(ring, 0);
5042 	radeon_ring_write(ring, pd_addr >> 12);
5043 
5044 	/* flush hdp cache */
5045 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5046 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5047 				 WRITE_DATA_DST_SEL(0)));
5048 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5049 	radeon_ring_write(ring, 0);
5050 	radeon_ring_write(ring, 0x1);
5051 
5052 	/* bits 0-15 are the VM contexts0-15 */
5053 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5054 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5055 				 WRITE_DATA_DST_SEL(0)));
5056 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5057 	radeon_ring_write(ring, 0);
5058 	radeon_ring_write(ring, 1 << vm_id);
5059 
5060 	/* wait for the invalidate to complete */
5061 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5062 	radeon_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) |  /* always */
5063 				 WAIT_REG_MEM_ENGINE(0))); /* me */
5064 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5065 	radeon_ring_write(ring, 0);
5066 	radeon_ring_write(ring, 0); /* ref */
5067 	radeon_ring_write(ring, 0); /* mask */
5068 	radeon_ring_write(ring, 0x20); /* poll interval */
5069 
5070 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
5071 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5072 	radeon_ring_write(ring, 0x0);
5073 }
5074 
5075 /*
5076  *  Power and clock gating
5077  */
5078 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
5079 {
5080 	int i;
5081 
5082 	for (i = 0; i < rdev->usec_timeout; i++) {
5083 		if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
5084 			break;
5085 		udelay(1);
5086 	}
5087 
5088 	for (i = 0; i < rdev->usec_timeout; i++) {
5089 		if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
5090 			break;
5091 		udelay(1);
5092 	}
5093 }
5094 
5095 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
5096 					 bool enable)
5097 {
5098 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5099 	u32 mask;
5100 	int i;
5101 
5102 	if (enable)
5103 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5104 	else
5105 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5106 	WREG32(CP_INT_CNTL_RING0, tmp);
5107 
5108 	if (!enable) {
5109 		/* read a gfx register */
5110 		tmp = RREG32(DB_DEPTH_INFO);
5111 
5112 		mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
5113 		for (i = 0; i < rdev->usec_timeout; i++) {
5114 			if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
5115 				break;
5116 			udelay(1);
5117 		}
5118 	}
5119 }
5120 
5121 static void si_set_uvd_dcm(struct radeon_device *rdev,
5122 			   bool sw_mode)
5123 {
5124 	u32 tmp, tmp2;
5125 
5126 	tmp = RREG32(UVD_CGC_CTRL);
5127 	tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
5128 	tmp |= DCM | CG_DT(1) | CLK_OD(4);
5129 
5130 	if (sw_mode) {
5131 		tmp &= ~0x7ffff800;
5132 		tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
5133 	} else {
5134 		tmp |= 0x7ffff800;
5135 		tmp2 = 0;
5136 	}
5137 
5138 	WREG32(UVD_CGC_CTRL, tmp);
5139 	WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
5140 }
5141 
5142 void si_init_uvd_internal_cg(struct radeon_device *rdev)
5143 {
5144 	bool hw_mode = true;
5145 
5146 	if (hw_mode) {
5147 		si_set_uvd_dcm(rdev, false);
5148 	} else {
5149 		u32 tmp = RREG32(UVD_CGC_CTRL);
5150 		tmp &= ~DCM;
5151 		WREG32(UVD_CGC_CTRL, tmp);
5152 	}
5153 }
5154 
5155 static u32 si_halt_rlc(struct radeon_device *rdev)
5156 {
5157 	u32 data, orig;
5158 
5159 	orig = data = RREG32(RLC_CNTL);
5160 
5161 	if (data & RLC_ENABLE) {
5162 		data &= ~RLC_ENABLE;
5163 		WREG32(RLC_CNTL, data);
5164 
5165 		si_wait_for_rlc_serdes(rdev);
5166 	}
5167 
5168 	return orig;
5169 }
5170 
5171 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
5172 {
5173 	u32 tmp;
5174 
5175 	tmp = RREG32(RLC_CNTL);
5176 	if (tmp != rlc)
5177 		WREG32(RLC_CNTL, rlc);
5178 }
5179 
5180 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
5181 {
5182 	u32 data, orig;
5183 
5184 	orig = data = RREG32(DMA_PG);
5185 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
5186 		data |= PG_CNTL_ENABLE;
5187 	else
5188 		data &= ~PG_CNTL_ENABLE;
5189 	if (orig != data)
5190 		WREG32(DMA_PG, data);
5191 }
5192 
5193 static void si_init_dma_pg(struct radeon_device *rdev)
5194 {
5195 	u32 tmp;
5196 
5197 	WREG32(DMA_PGFSM_WRITE,  0x00002000);
5198 	WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
5199 
5200 	for (tmp = 0; tmp < 5; tmp++)
5201 		WREG32(DMA_PGFSM_WRITE, 0);
5202 }
5203 
5204 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
5205 			       bool enable)
5206 {
5207 	u32 tmp;
5208 
5209 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5210 		tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
5211 		WREG32(RLC_TTOP_D, tmp);
5212 
5213 		tmp = RREG32(RLC_PG_CNTL);
5214 		tmp |= GFX_PG_ENABLE;
5215 		WREG32(RLC_PG_CNTL, tmp);
5216 
5217 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5218 		tmp |= AUTO_PG_EN;
5219 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5220 	} else {
5221 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5222 		tmp &= ~AUTO_PG_EN;
5223 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5224 
5225 		tmp = RREG32(DB_RENDER_CONTROL);
5226 	}
5227 }
5228 
5229 static void si_init_gfx_cgpg(struct radeon_device *rdev)
5230 {
5231 	u32 tmp;
5232 
5233 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5234 
5235 	tmp = RREG32(RLC_PG_CNTL);
5236 	tmp |= GFX_PG_SRC;
5237 	WREG32(RLC_PG_CNTL, tmp);
5238 
5239 	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5240 
5241 	tmp = RREG32(RLC_AUTO_PG_CTRL);
5242 
5243 	tmp &= ~GRBM_REG_SGIT_MASK;
5244 	tmp |= GRBM_REG_SGIT(0x700);
5245 	tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5246 	WREG32(RLC_AUTO_PG_CTRL, tmp);
5247 }
5248 
5249 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5250 {
5251 	u32 mask = 0, tmp, tmp1;
5252 	int i;
5253 
5254 	si_select_se_sh(rdev, se, sh);
5255 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5256 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5257 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5258 
5259 	tmp &= 0xffff0000;
5260 
5261 	tmp |= tmp1;
5262 	tmp >>= 16;
5263 
5264 	for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5265 		mask <<= 1;
5266 		mask |= 1;
5267 	}
5268 
5269 	return (~tmp) & mask;
5270 }
5271 
5272 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5273 {
5274 	u32 i, j, k, active_cu_number = 0;
5275 	u32 mask, counter, cu_bitmap;
5276 	u32 tmp = 0;
5277 
5278 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5279 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5280 			mask = 1;
5281 			cu_bitmap = 0;
5282 			counter  = 0;
5283 			for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5284 				if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5285 					if (counter < 2)
5286 						cu_bitmap |= mask;
5287 					counter++;
5288 				}
5289 				mask <<= 1;
5290 			}
5291 
5292 			active_cu_number += counter;
5293 			tmp |= (cu_bitmap << (i * 16 + j * 8));
5294 		}
5295 	}
5296 
5297 	WREG32(RLC_PG_AO_CU_MASK, tmp);
5298 
5299 	tmp = RREG32(RLC_MAX_PG_CU);
5300 	tmp &= ~MAX_PU_CU_MASK;
5301 	tmp |= MAX_PU_CU(active_cu_number);
5302 	WREG32(RLC_MAX_PG_CU, tmp);
5303 }
5304 
5305 static void si_enable_cgcg(struct radeon_device *rdev,
5306 			   bool enable)
5307 {
5308 	u32 data, orig, tmp;
5309 
5310 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5311 
5312 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5313 		si_enable_gui_idle_interrupt(rdev, true);
5314 
5315 		WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5316 
5317 		tmp = si_halt_rlc(rdev);
5318 
5319 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5320 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5321 		WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5322 
5323 		si_wait_for_rlc_serdes(rdev);
5324 
5325 		si_update_rlc(rdev, tmp);
5326 
5327 		WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5328 
5329 		data |= CGCG_EN | CGLS_EN;
5330 	} else {
5331 		si_enable_gui_idle_interrupt(rdev, false);
5332 
5333 		RREG32(CB_CGTT_SCLK_CTRL);
5334 		RREG32(CB_CGTT_SCLK_CTRL);
5335 		RREG32(CB_CGTT_SCLK_CTRL);
5336 		RREG32(CB_CGTT_SCLK_CTRL);
5337 
5338 		data &= ~(CGCG_EN | CGLS_EN);
5339 	}
5340 
5341 	if (orig != data)
5342 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5343 }
5344 
5345 static void si_enable_mgcg(struct radeon_device *rdev,
5346 			   bool enable)
5347 {
5348 	u32 data, orig, tmp = 0;
5349 
5350 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5351 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5352 		data = 0x96940200;
5353 		if (orig != data)
5354 			WREG32(CGTS_SM_CTRL_REG, data);
5355 
5356 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5357 			orig = data = RREG32(CP_MEM_SLP_CNTL);
5358 			data |= CP_MEM_LS_EN;
5359 			if (orig != data)
5360 				WREG32(CP_MEM_SLP_CNTL, data);
5361 		}
5362 
5363 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5364 		data &= 0xffffffc0;
5365 		if (orig != data)
5366 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5367 
5368 		tmp = si_halt_rlc(rdev);
5369 
5370 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5371 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5372 		WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5373 
5374 		si_update_rlc(rdev, tmp);
5375 	} else {
5376 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5377 		data |= 0x00000003;
5378 		if (orig != data)
5379 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5380 
5381 		data = RREG32(CP_MEM_SLP_CNTL);
5382 		if (data & CP_MEM_LS_EN) {
5383 			data &= ~CP_MEM_LS_EN;
5384 			WREG32(CP_MEM_SLP_CNTL, data);
5385 		}
5386 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5387 		data |= LS_OVERRIDE | OVERRIDE;
5388 		if (orig != data)
5389 			WREG32(CGTS_SM_CTRL_REG, data);
5390 
5391 		tmp = si_halt_rlc(rdev);
5392 
5393 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5394 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5395 		WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5396 
5397 		si_update_rlc(rdev, tmp);
5398 	}
5399 }
5400 
5401 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5402 			       bool enable)
5403 {
5404 	u32 orig, data, tmp;
5405 
5406 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5407 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5408 		tmp |= 0x3fff;
5409 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5410 
5411 		orig = data = RREG32(UVD_CGC_CTRL);
5412 		data |= DCM;
5413 		if (orig != data)
5414 			WREG32(UVD_CGC_CTRL, data);
5415 
5416 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5417 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5418 	} else {
5419 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5420 		tmp &= ~0x3fff;
5421 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5422 
5423 		orig = data = RREG32(UVD_CGC_CTRL);
5424 		data &= ~DCM;
5425 		if (orig != data)
5426 			WREG32(UVD_CGC_CTRL, data);
5427 
5428 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5429 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5430 	}
5431 }
5432 
5433 static const u32 mc_cg_registers[] =
5434 {
5435 	MC_HUB_MISC_HUB_CG,
5436 	MC_HUB_MISC_SIP_CG,
5437 	MC_HUB_MISC_VM_CG,
5438 	MC_XPB_CLK_GAT,
5439 	ATC_MISC_CG,
5440 	MC_CITF_MISC_WR_CG,
5441 	MC_CITF_MISC_RD_CG,
5442 	MC_CITF_MISC_VM_CG,
5443 	VM_L2_CG,
5444 };
5445 
5446 static void si_enable_mc_ls(struct radeon_device *rdev,
5447 			    bool enable)
5448 {
5449 	int i;
5450 	u32 orig, data;
5451 
5452 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5453 		orig = data = RREG32(mc_cg_registers[i]);
5454 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5455 			data |= MC_LS_ENABLE;
5456 		else
5457 			data &= ~MC_LS_ENABLE;
5458 		if (data != orig)
5459 			WREG32(mc_cg_registers[i], data);
5460 	}
5461 }
5462 
5463 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5464 			       bool enable)
5465 {
5466 	int i;
5467 	u32 orig, data;
5468 
5469 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5470 		orig = data = RREG32(mc_cg_registers[i]);
5471 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5472 			data |= MC_CG_ENABLE;
5473 		else
5474 			data &= ~MC_CG_ENABLE;
5475 		if (data != orig)
5476 			WREG32(mc_cg_registers[i], data);
5477 	}
5478 }
5479 
5480 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5481 			       bool enable)
5482 {
5483 	u32 orig, data, offset;
5484 	int i;
5485 
5486 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5487 		for (i = 0; i < 2; i++) {
5488 			if (i == 0)
5489 				offset = DMA0_REGISTER_OFFSET;
5490 			else
5491 				offset = DMA1_REGISTER_OFFSET;
5492 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5493 			data &= ~MEM_POWER_OVERRIDE;
5494 			if (data != orig)
5495 				WREG32(DMA_POWER_CNTL + offset, data);
5496 			WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5497 		}
5498 	} else {
5499 		for (i = 0; i < 2; i++) {
5500 			if (i == 0)
5501 				offset = DMA0_REGISTER_OFFSET;
5502 			else
5503 				offset = DMA1_REGISTER_OFFSET;
5504 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5505 			data |= MEM_POWER_OVERRIDE;
5506 			if (data != orig)
5507 				WREG32(DMA_POWER_CNTL + offset, data);
5508 
5509 			orig = data = RREG32(DMA_CLK_CTRL + offset);
5510 			data = 0xff000000;
5511 			if (data != orig)
5512 				WREG32(DMA_CLK_CTRL + offset, data);
5513 		}
5514 	}
5515 }
5516 
5517 static void si_enable_bif_mgls(struct radeon_device *rdev,
5518 			       bool enable)
5519 {
5520 	u32 orig, data;
5521 
5522 	orig = data = RREG32_PCIE(PCIE_CNTL2);
5523 
5524 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5525 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5526 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5527 	else
5528 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5529 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5530 
5531 	if (orig != data)
5532 		WREG32_PCIE(PCIE_CNTL2, data);
5533 }
5534 
5535 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5536 			       bool enable)
5537 {
5538 	u32 orig, data;
5539 
5540 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
5541 
5542 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5543 		data &= ~CLOCK_GATING_DIS;
5544 	else
5545 		data |= CLOCK_GATING_DIS;
5546 
5547 	if (orig != data)
5548 		WREG32(HDP_HOST_PATH_CNTL, data);
5549 }
5550 
5551 static void si_enable_hdp_ls(struct radeon_device *rdev,
5552 			     bool enable)
5553 {
5554 	u32 orig, data;
5555 
5556 	orig = data = RREG32(HDP_MEM_POWER_LS);
5557 
5558 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5559 		data |= HDP_LS_ENABLE;
5560 	else
5561 		data &= ~HDP_LS_ENABLE;
5562 
5563 	if (orig != data)
5564 		WREG32(HDP_MEM_POWER_LS, data);
5565 }
5566 
5567 static void si_update_cg(struct radeon_device *rdev,
5568 			 u32 block, bool enable)
5569 {
5570 	if (block & RADEON_CG_BLOCK_GFX) {
5571 		si_enable_gui_idle_interrupt(rdev, false);
5572 		/* order matters! */
5573 		if (enable) {
5574 			si_enable_mgcg(rdev, true);
5575 			si_enable_cgcg(rdev, true);
5576 		} else {
5577 			si_enable_cgcg(rdev, false);
5578 			si_enable_mgcg(rdev, false);
5579 		}
5580 		si_enable_gui_idle_interrupt(rdev, true);
5581 	}
5582 
5583 	if (block & RADEON_CG_BLOCK_MC) {
5584 		si_enable_mc_mgcg(rdev, enable);
5585 		si_enable_mc_ls(rdev, enable);
5586 	}
5587 
5588 	if (block & RADEON_CG_BLOCK_SDMA) {
5589 		si_enable_dma_mgcg(rdev, enable);
5590 	}
5591 
5592 	if (block & RADEON_CG_BLOCK_BIF) {
5593 		si_enable_bif_mgls(rdev, enable);
5594 	}
5595 
5596 	if (block & RADEON_CG_BLOCK_UVD) {
5597 		if (rdev->has_uvd) {
5598 			si_enable_uvd_mgcg(rdev, enable);
5599 		}
5600 	}
5601 
5602 	if (block & RADEON_CG_BLOCK_HDP) {
5603 		si_enable_hdp_mgcg(rdev, enable);
5604 		si_enable_hdp_ls(rdev, enable);
5605 	}
5606 }
5607 
5608 static void si_init_cg(struct radeon_device *rdev)
5609 {
5610 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5611 			    RADEON_CG_BLOCK_MC |
5612 			    RADEON_CG_BLOCK_SDMA |
5613 			    RADEON_CG_BLOCK_BIF |
5614 			    RADEON_CG_BLOCK_HDP), true);
5615 	if (rdev->has_uvd) {
5616 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5617 		si_init_uvd_internal_cg(rdev);
5618 	}
5619 }
5620 
5621 static void si_fini_cg(struct radeon_device *rdev)
5622 {
5623 	if (rdev->has_uvd) {
5624 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5625 	}
5626 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5627 			    RADEON_CG_BLOCK_MC |
5628 			    RADEON_CG_BLOCK_SDMA |
5629 			    RADEON_CG_BLOCK_BIF |
5630 			    RADEON_CG_BLOCK_HDP), false);
5631 }
5632 
5633 u32 si_get_csb_size(struct radeon_device *rdev)
5634 {
5635 	u32 count = 0;
5636 	const struct cs_section_def *sect = NULL;
5637 	const struct cs_extent_def *ext = NULL;
5638 
5639 	if (rdev->rlc.cs_data == NULL)
5640 		return 0;
5641 
5642 	/* begin clear state */
5643 	count += 2;
5644 	/* context control state */
5645 	count += 3;
5646 
5647 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5648 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5649 			if (sect->id == SECT_CONTEXT)
5650 				count += 2 + ext->reg_count;
5651 			else
5652 				return 0;
5653 		}
5654 	}
5655 	/* pa_sc_raster_config */
5656 	count += 3;
5657 	/* end clear state */
5658 	count += 2;
5659 	/* clear state */
5660 	count += 2;
5661 
5662 	return count;
5663 }
5664 
5665 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5666 {
5667 	u32 count = 0, i;
5668 	const struct cs_section_def *sect = NULL;
5669 	const struct cs_extent_def *ext = NULL;
5670 
5671 	if (rdev->rlc.cs_data == NULL)
5672 		return;
5673 	if (buffer == NULL)
5674 		return;
5675 
5676 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5677 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5678 
5679 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5680 	buffer[count++] = cpu_to_le32(0x80000000);
5681 	buffer[count++] = cpu_to_le32(0x80000000);
5682 
5683 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5684 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5685 			if (sect->id == SECT_CONTEXT) {
5686 				buffer[count++] =
5687 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5688 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5689 				for (i = 0; i < ext->reg_count; i++)
5690 					buffer[count++] = cpu_to_le32(ext->extent[i]);
5691 			} else {
5692 				return;
5693 			}
5694 		}
5695 	}
5696 
5697 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5698 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5699 	switch (rdev->family) {
5700 	case CHIP_TAHITI:
5701 	case CHIP_PITCAIRN:
5702 		buffer[count++] = cpu_to_le32(0x2a00126a);
5703 		break;
5704 	case CHIP_VERDE:
5705 		buffer[count++] = cpu_to_le32(0x0000124a);
5706 		break;
5707 	case CHIP_OLAND:
5708 		buffer[count++] = cpu_to_le32(0x00000082);
5709 		break;
5710 	case CHIP_HAINAN:
5711 		buffer[count++] = cpu_to_le32(0x00000000);
5712 		break;
5713 	default:
5714 		buffer[count++] = cpu_to_le32(0x00000000);
5715 		break;
5716 	}
5717 
5718 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5719 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5720 
5721 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5722 	buffer[count++] = cpu_to_le32(0);
5723 }
5724 
5725 static void si_init_pg(struct radeon_device *rdev)
5726 {
5727 	if (rdev->pg_flags) {
5728 		if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5729 			si_init_dma_pg(rdev);
5730 		}
5731 		si_init_ao_cu_mask(rdev);
5732 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5733 			si_init_gfx_cgpg(rdev);
5734 		} else {
5735 			WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5736 			WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5737 		}
5738 		si_enable_dma_pg(rdev, true);
5739 		si_enable_gfx_cgpg(rdev, true);
5740 	} else {
5741 		WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5742 		WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5743 	}
5744 }
5745 
5746 static void si_fini_pg(struct radeon_device *rdev)
5747 {
5748 	if (rdev->pg_flags) {
5749 		si_enable_dma_pg(rdev, false);
5750 		si_enable_gfx_cgpg(rdev, false);
5751 	}
5752 }
5753 
5754 /*
5755  * RLC
5756  */
5757 void si_rlc_reset(struct radeon_device *rdev)
5758 {
5759 	u32 tmp = RREG32(GRBM_SOFT_RESET);
5760 
5761 	tmp |= SOFT_RESET_RLC;
5762 	WREG32(GRBM_SOFT_RESET, tmp);
5763 	udelay(50);
5764 	tmp &= ~SOFT_RESET_RLC;
5765 	WREG32(GRBM_SOFT_RESET, tmp);
5766 	udelay(50);
5767 }
5768 
5769 static void si_rlc_stop(struct radeon_device *rdev)
5770 {
5771 	WREG32(RLC_CNTL, 0);
5772 
5773 	si_enable_gui_idle_interrupt(rdev, false);
5774 
5775 	si_wait_for_rlc_serdes(rdev);
5776 }
5777 
5778 static void si_rlc_start(struct radeon_device *rdev)
5779 {
5780 	WREG32(RLC_CNTL, RLC_ENABLE);
5781 
5782 	si_enable_gui_idle_interrupt(rdev, true);
5783 
5784 	udelay(50);
5785 }
5786 
5787 static bool si_lbpw_supported(struct radeon_device *rdev)
5788 {
5789 	u32 tmp;
5790 
5791 	/* Enable LBPW only for DDR3 */
5792 	tmp = RREG32(MC_SEQ_MISC0);
5793 	if ((tmp & 0xF0000000) == 0xB0000000)
5794 		return true;
5795 	return false;
5796 }
5797 
5798 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5799 {
5800 	u32 tmp;
5801 
5802 	tmp = RREG32(RLC_LB_CNTL);
5803 	if (enable)
5804 		tmp |= LOAD_BALANCE_ENABLE;
5805 	else
5806 		tmp &= ~LOAD_BALANCE_ENABLE;
5807 	WREG32(RLC_LB_CNTL, tmp);
5808 
5809 	if (!enable) {
5810 		si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5811 		WREG32(SPI_LB_CU_MASK, 0x00ff);
5812 	}
5813 }
5814 
5815 static int si_rlc_resume(struct radeon_device *rdev)
5816 {
5817 	u32 i;
5818 
5819 	if (!rdev->rlc_fw)
5820 		return -EINVAL;
5821 
5822 	si_rlc_stop(rdev);
5823 
5824 	si_rlc_reset(rdev);
5825 
5826 	si_init_pg(rdev);
5827 
5828 	si_init_cg(rdev);
5829 
5830 	WREG32(RLC_RL_BASE, 0);
5831 	WREG32(RLC_RL_SIZE, 0);
5832 	WREG32(RLC_LB_CNTL, 0);
5833 	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5834 	WREG32(RLC_LB_CNTR_INIT, 0);
5835 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5836 
5837 	WREG32(RLC_MC_CNTL, 0);
5838 	WREG32(RLC_UCODE_CNTL, 0);
5839 
5840 	if (rdev->new_fw) {
5841 		const struct rlc_firmware_header_v1_0 *hdr =
5842 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5843 		u32 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5844 		const __le32 *fw_data = (const __le32 *)
5845 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5846 
5847 		radeon_ucode_print_rlc_hdr(&hdr->header);
5848 
5849 		for (i = 0; i < fw_size; i++) {
5850 			WREG32(RLC_UCODE_ADDR, i);
5851 			WREG32(RLC_UCODE_DATA, le32_to_cpup(fw_data++));
5852 		}
5853 	} else {
5854 		const __be32 *fw_data =
5855 			(const __be32 *)rdev->rlc_fw->data;
5856 		for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5857 			WREG32(RLC_UCODE_ADDR, i);
5858 			WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5859 		}
5860 	}
5861 	WREG32(RLC_UCODE_ADDR, 0);
5862 
5863 	si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5864 
5865 	si_rlc_start(rdev);
5866 
5867 	return 0;
5868 }
5869 
5870 static void si_enable_interrupts(struct radeon_device *rdev)
5871 {
5872 	u32 ih_cntl = RREG32(IH_CNTL);
5873 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5874 
5875 	ih_cntl |= ENABLE_INTR;
5876 	ih_rb_cntl |= IH_RB_ENABLE;
5877 	WREG32(IH_CNTL, ih_cntl);
5878 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5879 	rdev->ih.enabled = true;
5880 }
5881 
5882 static void si_disable_interrupts(struct radeon_device *rdev)
5883 {
5884 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5885 	u32 ih_cntl = RREG32(IH_CNTL);
5886 
5887 	ih_rb_cntl &= ~IH_RB_ENABLE;
5888 	ih_cntl &= ~ENABLE_INTR;
5889 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5890 	WREG32(IH_CNTL, ih_cntl);
5891 	/* set rptr, wptr to 0 */
5892 	WREG32(IH_RB_RPTR, 0);
5893 	WREG32(IH_RB_WPTR, 0);
5894 	rdev->ih.enabled = false;
5895 	rdev->ih.rptr = 0;
5896 }
5897 
5898 static void si_disable_interrupt_state(struct radeon_device *rdev)
5899 {
5900 	u32 tmp;
5901 
5902 	tmp = RREG32(CP_INT_CNTL_RING0) &
5903 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5904 	WREG32(CP_INT_CNTL_RING0, tmp);
5905 	WREG32(CP_INT_CNTL_RING1, 0);
5906 	WREG32(CP_INT_CNTL_RING2, 0);
5907 	tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5908 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5909 	tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5910 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5911 	WREG32(GRBM_INT_CNTL, 0);
5912 	if (rdev->num_crtc >= 2) {
5913 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5914 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5915 	}
5916 	if (rdev->num_crtc >= 4) {
5917 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5918 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5919 	}
5920 	if (rdev->num_crtc >= 6) {
5921 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5922 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5923 	}
5924 
5925 	if (rdev->num_crtc >= 2) {
5926 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5927 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5928 	}
5929 	if (rdev->num_crtc >= 4) {
5930 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5931 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5932 	}
5933 	if (rdev->num_crtc >= 6) {
5934 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5935 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5936 	}
5937 
5938 	if (!ASIC_IS_NODCE(rdev)) {
5939 		WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5940 
5941 		tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5942 		WREG32(DC_HPD1_INT_CONTROL, tmp);
5943 		tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5944 		WREG32(DC_HPD2_INT_CONTROL, tmp);
5945 		tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5946 		WREG32(DC_HPD3_INT_CONTROL, tmp);
5947 		tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5948 		WREG32(DC_HPD4_INT_CONTROL, tmp);
5949 		tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5950 		WREG32(DC_HPD5_INT_CONTROL, tmp);
5951 		tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5952 		WREG32(DC_HPD6_INT_CONTROL, tmp);
5953 	}
5954 }
5955 
5956 static int si_irq_init(struct radeon_device *rdev)
5957 {
5958 	int ret = 0;
5959 	int rb_bufsz;
5960 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5961 
5962 	/* allocate ring */
5963 	ret = r600_ih_ring_alloc(rdev);
5964 	if (ret)
5965 		return ret;
5966 
5967 	/* disable irqs */
5968 	si_disable_interrupts(rdev);
5969 
5970 	/* init rlc */
5971 	ret = si_rlc_resume(rdev);
5972 	if (ret) {
5973 		r600_ih_ring_fini(rdev);
5974 		return ret;
5975 	}
5976 
5977 	/* setup interrupt control */
5978 	/* set dummy read address to ring address */
5979 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5980 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
5981 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5982 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5983 	 */
5984 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5985 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5986 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5987 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
5988 
5989 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5990 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
5991 
5992 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5993 		      IH_WPTR_OVERFLOW_CLEAR |
5994 		      (rb_bufsz << 1));
5995 
5996 	if (rdev->wb.enabled)
5997 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5998 
5999 	/* set the writeback address whether it's enabled or not */
6000 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6001 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6002 
6003 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6004 
6005 	/* set rptr, wptr to 0 */
6006 	WREG32(IH_RB_RPTR, 0);
6007 	WREG32(IH_RB_WPTR, 0);
6008 
6009 	/* Default settings for IH_CNTL (disabled at first) */
6010 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6011 	/* RPTR_REARM only works if msi's are enabled */
6012 	if (rdev->msi_enabled)
6013 		ih_cntl |= RPTR_REARM;
6014 	WREG32(IH_CNTL, ih_cntl);
6015 
6016 	/* force the active interrupt state to all disabled */
6017 	si_disable_interrupt_state(rdev);
6018 
6019 	pci_set_master(rdev->pdev);
6020 
6021 	/* enable irqs */
6022 	si_enable_interrupts(rdev);
6023 
6024 	return ret;
6025 }
6026 
6027 int si_irq_set(struct radeon_device *rdev)
6028 {
6029 	u32 cp_int_cntl;
6030 	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
6031 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6032 	u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
6033 	u32 grbm_int_cntl = 0;
6034 	u32 dma_cntl, dma_cntl1;
6035 	u32 thermal_int = 0;
6036 
6037 	if (!rdev->irq.installed) {
6038 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6039 		return -EINVAL;
6040 	}
6041 	/* don't enable anything if the ih is disabled */
6042 	if (!rdev->ih.enabled) {
6043 		si_disable_interrupts(rdev);
6044 		/* force the active interrupt state to all disabled */
6045 		si_disable_interrupt_state(rdev);
6046 		return 0;
6047 	}
6048 
6049 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6050 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6051 
6052 	if (!ASIC_IS_NODCE(rdev)) {
6053 		hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6054 		hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6055 		hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6056 		hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6057 		hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6058 		hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6059 	}
6060 
6061 	dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6062 	dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6063 
6064 	thermal_int = RREG32(CG_THERMAL_INT) &
6065 		~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6066 
6067 	/* enable CP interrupts on all rings */
6068 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6069 		DRM_DEBUG("si_irq_set: sw int gfx\n");
6070 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6071 	}
6072 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6073 		DRM_DEBUG("si_irq_set: sw int cp1\n");
6074 		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
6075 	}
6076 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6077 		DRM_DEBUG("si_irq_set: sw int cp2\n");
6078 		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
6079 	}
6080 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6081 		DRM_DEBUG("si_irq_set: sw int dma\n");
6082 		dma_cntl |= TRAP_ENABLE;
6083 	}
6084 
6085 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6086 		DRM_DEBUG("si_irq_set: sw int dma1\n");
6087 		dma_cntl1 |= TRAP_ENABLE;
6088 	}
6089 	if (rdev->irq.crtc_vblank_int[0] ||
6090 	    atomic_read(&rdev->irq.pflip[0])) {
6091 		DRM_DEBUG("si_irq_set: vblank 0\n");
6092 		crtc1 |= VBLANK_INT_MASK;
6093 	}
6094 	if (rdev->irq.crtc_vblank_int[1] ||
6095 	    atomic_read(&rdev->irq.pflip[1])) {
6096 		DRM_DEBUG("si_irq_set: vblank 1\n");
6097 		crtc2 |= VBLANK_INT_MASK;
6098 	}
6099 	if (rdev->irq.crtc_vblank_int[2] ||
6100 	    atomic_read(&rdev->irq.pflip[2])) {
6101 		DRM_DEBUG("si_irq_set: vblank 2\n");
6102 		crtc3 |= VBLANK_INT_MASK;
6103 	}
6104 	if (rdev->irq.crtc_vblank_int[3] ||
6105 	    atomic_read(&rdev->irq.pflip[3])) {
6106 		DRM_DEBUG("si_irq_set: vblank 3\n");
6107 		crtc4 |= VBLANK_INT_MASK;
6108 	}
6109 	if (rdev->irq.crtc_vblank_int[4] ||
6110 	    atomic_read(&rdev->irq.pflip[4])) {
6111 		DRM_DEBUG("si_irq_set: vblank 4\n");
6112 		crtc5 |= VBLANK_INT_MASK;
6113 	}
6114 	if (rdev->irq.crtc_vblank_int[5] ||
6115 	    atomic_read(&rdev->irq.pflip[5])) {
6116 		DRM_DEBUG("si_irq_set: vblank 5\n");
6117 		crtc6 |= VBLANK_INT_MASK;
6118 	}
6119 	if (rdev->irq.hpd[0]) {
6120 		DRM_DEBUG("si_irq_set: hpd 1\n");
6121 		hpd1 |= DC_HPDx_INT_EN;
6122 	}
6123 	if (rdev->irq.hpd[1]) {
6124 		DRM_DEBUG("si_irq_set: hpd 2\n");
6125 		hpd2 |= DC_HPDx_INT_EN;
6126 	}
6127 	if (rdev->irq.hpd[2]) {
6128 		DRM_DEBUG("si_irq_set: hpd 3\n");
6129 		hpd3 |= DC_HPDx_INT_EN;
6130 	}
6131 	if (rdev->irq.hpd[3]) {
6132 		DRM_DEBUG("si_irq_set: hpd 4\n");
6133 		hpd4 |= DC_HPDx_INT_EN;
6134 	}
6135 	if (rdev->irq.hpd[4]) {
6136 		DRM_DEBUG("si_irq_set: hpd 5\n");
6137 		hpd5 |= DC_HPDx_INT_EN;
6138 	}
6139 	if (rdev->irq.hpd[5]) {
6140 		DRM_DEBUG("si_irq_set: hpd 6\n");
6141 		hpd6 |= DC_HPDx_INT_EN;
6142 	}
6143 
6144 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6145 	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
6146 	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
6147 
6148 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
6149 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
6150 
6151 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6152 
6153 	if (rdev->irq.dpm_thermal) {
6154 		DRM_DEBUG("dpm thermal\n");
6155 		thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6156 	}
6157 
6158 	if (rdev->num_crtc >= 2) {
6159 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6160 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6161 	}
6162 	if (rdev->num_crtc >= 4) {
6163 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6164 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6165 	}
6166 	if (rdev->num_crtc >= 6) {
6167 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6168 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6169 	}
6170 
6171 	if (rdev->num_crtc >= 2) {
6172 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
6173 		       GRPH_PFLIP_INT_MASK);
6174 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
6175 		       GRPH_PFLIP_INT_MASK);
6176 	}
6177 	if (rdev->num_crtc >= 4) {
6178 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
6179 		       GRPH_PFLIP_INT_MASK);
6180 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
6181 		       GRPH_PFLIP_INT_MASK);
6182 	}
6183 	if (rdev->num_crtc >= 6) {
6184 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
6185 		       GRPH_PFLIP_INT_MASK);
6186 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
6187 		       GRPH_PFLIP_INT_MASK);
6188 	}
6189 
6190 	if (!ASIC_IS_NODCE(rdev)) {
6191 		WREG32(DC_HPD1_INT_CONTROL, hpd1);
6192 		WREG32(DC_HPD2_INT_CONTROL, hpd2);
6193 		WREG32(DC_HPD3_INT_CONTROL, hpd3);
6194 		WREG32(DC_HPD4_INT_CONTROL, hpd4);
6195 		WREG32(DC_HPD5_INT_CONTROL, hpd5);
6196 		WREG32(DC_HPD6_INT_CONTROL, hpd6);
6197 	}
6198 
6199 	WREG32(CG_THERMAL_INT, thermal_int);
6200 
6201 	return 0;
6202 }
6203 
6204 static inline void si_irq_ack(struct radeon_device *rdev)
6205 {
6206 	u32 tmp;
6207 
6208 	if (ASIC_IS_NODCE(rdev))
6209 		return;
6210 
6211 	rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6212 	rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6213 	rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6214 	rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6215 	rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6216 	rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6217 	rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
6218 	rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
6219 	if (rdev->num_crtc >= 4) {
6220 		rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
6221 		rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
6222 	}
6223 	if (rdev->num_crtc >= 6) {
6224 		rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
6225 		rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
6226 	}
6227 
6228 	if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
6229 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6230 	if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
6231 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6232 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
6233 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6234 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
6235 		WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6236 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6237 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6238 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6239 		WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6240 
6241 	if (rdev->num_crtc >= 4) {
6242 		if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
6243 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6244 		if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
6245 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6246 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6247 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6248 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6249 			WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6250 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6251 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6252 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6253 			WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6254 	}
6255 
6256 	if (rdev->num_crtc >= 6) {
6257 		if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
6258 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6259 		if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
6260 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6261 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6262 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6263 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6264 			WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6265 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6266 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6267 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6268 			WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6269 	}
6270 
6271 	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6272 		tmp = RREG32(DC_HPD1_INT_CONTROL);
6273 		tmp |= DC_HPDx_INT_ACK;
6274 		WREG32(DC_HPD1_INT_CONTROL, tmp);
6275 	}
6276 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6277 		tmp = RREG32(DC_HPD2_INT_CONTROL);
6278 		tmp |= DC_HPDx_INT_ACK;
6279 		WREG32(DC_HPD2_INT_CONTROL, tmp);
6280 	}
6281 	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6282 		tmp = RREG32(DC_HPD3_INT_CONTROL);
6283 		tmp |= DC_HPDx_INT_ACK;
6284 		WREG32(DC_HPD3_INT_CONTROL, tmp);
6285 	}
6286 	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6287 		tmp = RREG32(DC_HPD4_INT_CONTROL);
6288 		tmp |= DC_HPDx_INT_ACK;
6289 		WREG32(DC_HPD4_INT_CONTROL, tmp);
6290 	}
6291 	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6292 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6293 		tmp |= DC_HPDx_INT_ACK;
6294 		WREG32(DC_HPD5_INT_CONTROL, tmp);
6295 	}
6296 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6297 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6298 		tmp |= DC_HPDx_INT_ACK;
6299 		WREG32(DC_HPD6_INT_CONTROL, tmp);
6300 	}
6301 }
6302 
6303 static void si_irq_disable(struct radeon_device *rdev)
6304 {
6305 	si_disable_interrupts(rdev);
6306 	/* Wait and acknowledge irq */
6307 	mdelay(1);
6308 	si_irq_ack(rdev);
6309 	si_disable_interrupt_state(rdev);
6310 }
6311 
6312 static void si_irq_suspend(struct radeon_device *rdev)
6313 {
6314 	si_irq_disable(rdev);
6315 	si_rlc_stop(rdev);
6316 }
6317 
6318 static void si_irq_fini(struct radeon_device *rdev)
6319 {
6320 	si_irq_suspend(rdev);
6321 	r600_ih_ring_fini(rdev);
6322 }
6323 
6324 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6325 {
6326 	u32 wptr, tmp;
6327 
6328 	if (rdev->wb.enabled)
6329 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6330 	else
6331 		wptr = RREG32(IH_RB_WPTR);
6332 
6333 	if (wptr & RB_OVERFLOW) {
6334 		wptr &= ~RB_OVERFLOW;
6335 		/* When a ring buffer overflow happen start parsing interrupt
6336 		 * from the last not overwritten vector (wptr + 16). Hopefully
6337 		 * this should allow us to catchup.
6338 		 */
6339 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
6340 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
6341 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6342 		tmp = RREG32(IH_RB_CNTL);
6343 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
6344 		WREG32(IH_RB_CNTL, tmp);
6345 	}
6346 	return (wptr & rdev->ih.ptr_mask);
6347 }
6348 
6349 /*        SI IV Ring
6350  * Each IV ring entry is 128 bits:
6351  * [7:0]    - interrupt source id
6352  * [31:8]   - reserved
6353  * [59:32]  - interrupt source data
6354  * [63:60]  - reserved
6355  * [71:64]  - RINGID
6356  * [79:72]  - VMID
6357  * [127:80] - reserved
6358  */
6359 int si_irq_process(struct radeon_device *rdev)
6360 {
6361 	u32 wptr;
6362 	u32 rptr;
6363 	u32 src_id, src_data, ring_id;
6364 	u32 ring_index;
6365 	bool queue_hotplug = false;
6366 	bool queue_thermal = false;
6367 	u32 status, addr;
6368 
6369 	if (!rdev->ih.enabled || rdev->shutdown)
6370 		return IRQ_NONE;
6371 
6372 	wptr = si_get_ih_wptr(rdev);
6373 
6374 restart_ih:
6375 	/* is somebody else already processing irqs? */
6376 	if (atomic_xchg(&rdev->ih.lock, 1))
6377 		return IRQ_NONE;
6378 
6379 	rptr = rdev->ih.rptr;
6380 	DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6381 
6382 	/* Order reading of wptr vs. reading of IH ring data */
6383 	rmb();
6384 
6385 	/* display interrupts */
6386 	si_irq_ack(rdev);
6387 
6388 	while (rptr != wptr) {
6389 		/* wptr/rptr are in bytes! */
6390 		ring_index = rptr / 4;
6391 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6392 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6393 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6394 
6395 		switch (src_id) {
6396 		case 1: /* D1 vblank/vline */
6397 			switch (src_data) {
6398 			case 0: /* D1 vblank */
6399 				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
6400 					if (rdev->irq.crtc_vblank_int[0]) {
6401 						drm_handle_vblank(rdev->ddev, 0);
6402 						rdev->pm.vblank_sync = true;
6403 						wake_up(&rdev->irq.vblank_queue);
6404 					}
6405 					if (atomic_read(&rdev->irq.pflip[0]))
6406 						radeon_crtc_handle_vblank(rdev, 0);
6407 					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6408 					DRM_DEBUG("IH: D1 vblank\n");
6409 				}
6410 				break;
6411 			case 1: /* D1 vline */
6412 				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
6413 					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6414 					DRM_DEBUG("IH: D1 vline\n");
6415 				}
6416 				break;
6417 			default:
6418 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6419 				break;
6420 			}
6421 			break;
6422 		case 2: /* D2 vblank/vline */
6423 			switch (src_data) {
6424 			case 0: /* D2 vblank */
6425 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6426 					if (rdev->irq.crtc_vblank_int[1]) {
6427 						drm_handle_vblank(rdev->ddev, 1);
6428 						rdev->pm.vblank_sync = true;
6429 						wake_up(&rdev->irq.vblank_queue);
6430 					}
6431 					if (atomic_read(&rdev->irq.pflip[1]))
6432 						radeon_crtc_handle_vblank(rdev, 1);
6433 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6434 					DRM_DEBUG("IH: D2 vblank\n");
6435 				}
6436 				break;
6437 			case 1: /* D2 vline */
6438 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6439 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6440 					DRM_DEBUG("IH: D2 vline\n");
6441 				}
6442 				break;
6443 			default:
6444 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6445 				break;
6446 			}
6447 			break;
6448 		case 3: /* D3 vblank/vline */
6449 			switch (src_data) {
6450 			case 0: /* D3 vblank */
6451 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6452 					if (rdev->irq.crtc_vblank_int[2]) {
6453 						drm_handle_vblank(rdev->ddev, 2);
6454 						rdev->pm.vblank_sync = true;
6455 						wake_up(&rdev->irq.vblank_queue);
6456 					}
6457 					if (atomic_read(&rdev->irq.pflip[2]))
6458 						radeon_crtc_handle_vblank(rdev, 2);
6459 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6460 					DRM_DEBUG("IH: D3 vblank\n");
6461 				}
6462 				break;
6463 			case 1: /* D3 vline */
6464 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6465 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6466 					DRM_DEBUG("IH: D3 vline\n");
6467 				}
6468 				break;
6469 			default:
6470 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6471 				break;
6472 			}
6473 			break;
6474 		case 4: /* D4 vblank/vline */
6475 			switch (src_data) {
6476 			case 0: /* D4 vblank */
6477 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6478 					if (rdev->irq.crtc_vblank_int[3]) {
6479 						drm_handle_vblank(rdev->ddev, 3);
6480 						rdev->pm.vblank_sync = true;
6481 						wake_up(&rdev->irq.vblank_queue);
6482 					}
6483 					if (atomic_read(&rdev->irq.pflip[3]))
6484 						radeon_crtc_handle_vblank(rdev, 3);
6485 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6486 					DRM_DEBUG("IH: D4 vblank\n");
6487 				}
6488 				break;
6489 			case 1: /* D4 vline */
6490 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6491 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6492 					DRM_DEBUG("IH: D4 vline\n");
6493 				}
6494 				break;
6495 			default:
6496 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6497 				break;
6498 			}
6499 			break;
6500 		case 5: /* D5 vblank/vline */
6501 			switch (src_data) {
6502 			case 0: /* D5 vblank */
6503 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6504 					if (rdev->irq.crtc_vblank_int[4]) {
6505 						drm_handle_vblank(rdev->ddev, 4);
6506 						rdev->pm.vblank_sync = true;
6507 						wake_up(&rdev->irq.vblank_queue);
6508 					}
6509 					if (atomic_read(&rdev->irq.pflip[4]))
6510 						radeon_crtc_handle_vblank(rdev, 4);
6511 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6512 					DRM_DEBUG("IH: D5 vblank\n");
6513 				}
6514 				break;
6515 			case 1: /* D5 vline */
6516 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6517 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6518 					DRM_DEBUG("IH: D5 vline\n");
6519 				}
6520 				break;
6521 			default:
6522 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6523 				break;
6524 			}
6525 			break;
6526 		case 6: /* D6 vblank/vline */
6527 			switch (src_data) {
6528 			case 0: /* D6 vblank */
6529 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6530 					if (rdev->irq.crtc_vblank_int[5]) {
6531 						drm_handle_vblank(rdev->ddev, 5);
6532 						rdev->pm.vblank_sync = true;
6533 						wake_up(&rdev->irq.vblank_queue);
6534 					}
6535 					if (atomic_read(&rdev->irq.pflip[5]))
6536 						radeon_crtc_handle_vblank(rdev, 5);
6537 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6538 					DRM_DEBUG("IH: D6 vblank\n");
6539 				}
6540 				break;
6541 			case 1: /* D6 vline */
6542 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6543 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6544 					DRM_DEBUG("IH: D6 vline\n");
6545 				}
6546 				break;
6547 			default:
6548 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6549 				break;
6550 			}
6551 			break;
6552 		case 8: /* D1 page flip */
6553 		case 10: /* D2 page flip */
6554 		case 12: /* D3 page flip */
6555 		case 14: /* D4 page flip */
6556 		case 16: /* D5 page flip */
6557 		case 18: /* D6 page flip */
6558 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
6559 			if (radeon_use_pflipirq > 0)
6560 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
6561 			break;
6562 		case 42: /* HPD hotplug */
6563 			switch (src_data) {
6564 			case 0:
6565 				if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6566 					rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6567 					queue_hotplug = true;
6568 					DRM_DEBUG("IH: HPD1\n");
6569 				}
6570 				break;
6571 			case 1:
6572 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6573 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6574 					queue_hotplug = true;
6575 					DRM_DEBUG("IH: HPD2\n");
6576 				}
6577 				break;
6578 			case 2:
6579 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6580 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6581 					queue_hotplug = true;
6582 					DRM_DEBUG("IH: HPD3\n");
6583 				}
6584 				break;
6585 			case 3:
6586 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6587 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6588 					queue_hotplug = true;
6589 					DRM_DEBUG("IH: HPD4\n");
6590 				}
6591 				break;
6592 			case 4:
6593 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6594 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6595 					queue_hotplug = true;
6596 					DRM_DEBUG("IH: HPD5\n");
6597 				}
6598 				break;
6599 			case 5:
6600 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6601 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6602 					queue_hotplug = true;
6603 					DRM_DEBUG("IH: HPD6\n");
6604 				}
6605 				break;
6606 			default:
6607 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6608 				break;
6609 			}
6610 			break;
6611 		case 124: /* UVD */
6612 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6613 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6614 			break;
6615 		case 146:
6616 		case 147:
6617 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6618 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6619 			/* reset addr and status */
6620 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6621 			if (addr == 0x0 && status == 0x0)
6622 				break;
6623 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6624 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6625 				addr);
6626 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6627 				status);
6628 			si_vm_decode_fault(rdev, status, addr);
6629 			break;
6630 		case 176: /* RINGID0 CP_INT */
6631 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6632 			break;
6633 		case 177: /* RINGID1 CP_INT */
6634 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6635 			break;
6636 		case 178: /* RINGID2 CP_INT */
6637 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6638 			break;
6639 		case 181: /* CP EOP event */
6640 			DRM_DEBUG("IH: CP EOP\n");
6641 			switch (ring_id) {
6642 			case 0:
6643 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6644 				break;
6645 			case 1:
6646 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6647 				break;
6648 			case 2:
6649 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6650 				break;
6651 			}
6652 			break;
6653 		case 224: /* DMA trap event */
6654 			DRM_DEBUG("IH: DMA trap\n");
6655 			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6656 			break;
6657 		case 230: /* thermal low to high */
6658 			DRM_DEBUG("IH: thermal low to high\n");
6659 			rdev->pm.dpm.thermal.high_to_low = false;
6660 			queue_thermal = true;
6661 			break;
6662 		case 231: /* thermal high to low */
6663 			DRM_DEBUG("IH: thermal high to low\n");
6664 			rdev->pm.dpm.thermal.high_to_low = true;
6665 			queue_thermal = true;
6666 			break;
6667 		case 233: /* GUI IDLE */
6668 			DRM_DEBUG("IH: GUI idle\n");
6669 			break;
6670 		case 244: /* DMA trap event */
6671 			DRM_DEBUG("IH: DMA1 trap\n");
6672 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6673 			break;
6674 		default:
6675 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6676 			break;
6677 		}
6678 
6679 		/* wptr/rptr are in bytes! */
6680 		rptr += 16;
6681 		rptr &= rdev->ih.ptr_mask;
6682 		WREG32(IH_RB_RPTR, rptr);
6683 	}
6684 	if (queue_hotplug)
6685 		schedule_work(&rdev->hotplug_work);
6686 	if (queue_thermal && rdev->pm.dpm_enabled)
6687 		schedule_work(&rdev->pm.dpm.thermal.work);
6688 	rdev->ih.rptr = rptr;
6689 	atomic_set(&rdev->ih.lock, 0);
6690 
6691 	/* make sure wptr hasn't changed while processing */
6692 	wptr = si_get_ih_wptr(rdev);
6693 	if (wptr != rptr)
6694 		goto restart_ih;
6695 
6696 	return IRQ_HANDLED;
6697 }
6698 
6699 /*
6700  * startup/shutdown callbacks
6701  */
6702 static int si_startup(struct radeon_device *rdev)
6703 {
6704 	struct radeon_ring *ring;
6705 	int r;
6706 
6707 	/* enable pcie gen2/3 link */
6708 	si_pcie_gen3_enable(rdev);
6709 	/* enable aspm */
6710 	si_program_aspm(rdev);
6711 
6712 	/* scratch needs to be initialized before MC */
6713 	r = r600_vram_scratch_init(rdev);
6714 	if (r)
6715 		return r;
6716 
6717 	si_mc_program(rdev);
6718 
6719 	if (!rdev->pm.dpm_enabled) {
6720 		r = si_mc_load_microcode(rdev);
6721 		if (r) {
6722 			DRM_ERROR("Failed to load MC firmware!\n");
6723 			return r;
6724 		}
6725 	}
6726 
6727 	r = si_pcie_gart_enable(rdev);
6728 	if (r)
6729 		return r;
6730 	si_gpu_init(rdev);
6731 
6732 	/* allocate rlc buffers */
6733 	if (rdev->family == CHIP_VERDE) {
6734 		rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
6735 		rdev->rlc.reg_list_size =
6736 			(u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
6737 	}
6738 	rdev->rlc.cs_data = si_cs_data;
6739 	r = sumo_rlc_init(rdev);
6740 	if (r) {
6741 		DRM_ERROR("Failed to init rlc BOs!\n");
6742 		return r;
6743 	}
6744 
6745 	/* allocate wb buffer */
6746 	r = radeon_wb_init(rdev);
6747 	if (r)
6748 		return r;
6749 
6750 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6751 	if (r) {
6752 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6753 		return r;
6754 	}
6755 
6756 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6757 	if (r) {
6758 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6759 		return r;
6760 	}
6761 
6762 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6763 	if (r) {
6764 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6765 		return r;
6766 	}
6767 
6768 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6769 	if (r) {
6770 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6771 		return r;
6772 	}
6773 
6774 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6775 	if (r) {
6776 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6777 		return r;
6778 	}
6779 
6780 	if (rdev->has_uvd) {
6781 		r = uvd_v2_2_resume(rdev);
6782 		if (!r) {
6783 			r = radeon_fence_driver_start_ring(rdev,
6784 							   R600_RING_TYPE_UVD_INDEX);
6785 			if (r)
6786 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6787 		}
6788 		if (r)
6789 			rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6790 	}
6791 
6792 	/* Enable IRQ */
6793 	if (!rdev->irq.installed) {
6794 		r = radeon_irq_kms_init(rdev);
6795 		if (r)
6796 			return r;
6797 	}
6798 
6799 	r = si_irq_init(rdev);
6800 	if (r) {
6801 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
6802 		radeon_irq_kms_fini(rdev);
6803 		return r;
6804 	}
6805 	si_irq_set(rdev);
6806 
6807 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6808 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6809 			     RADEON_CP_PACKET2);
6810 	if (r)
6811 		return r;
6812 
6813 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6814 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6815 			     RADEON_CP_PACKET2);
6816 	if (r)
6817 		return r;
6818 
6819 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6820 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6821 			     RADEON_CP_PACKET2);
6822 	if (r)
6823 		return r;
6824 
6825 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6826 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6827 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6828 	if (r)
6829 		return r;
6830 
6831 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6832 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6833 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6834 	if (r)
6835 		return r;
6836 
6837 	r = si_cp_load_microcode(rdev);
6838 	if (r)
6839 		return r;
6840 	r = si_cp_resume(rdev);
6841 	if (r)
6842 		return r;
6843 
6844 	r = cayman_dma_resume(rdev);
6845 	if (r)
6846 		return r;
6847 
6848 	if (rdev->has_uvd) {
6849 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6850 		if (ring->ring_size) {
6851 			r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
6852 					     RADEON_CP_PACKET2);
6853 			if (!r)
6854 				r = uvd_v1_0_init(rdev);
6855 			if (r)
6856 				DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6857 		}
6858 	}
6859 
6860 	r = radeon_ib_pool_init(rdev);
6861 	if (r) {
6862 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6863 		return r;
6864 	}
6865 
6866 	r = radeon_vm_manager_init(rdev);
6867 	if (r) {
6868 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6869 		return r;
6870 	}
6871 
6872 	r = dce6_audio_init(rdev);
6873 	if (r)
6874 		return r;
6875 
6876 	return 0;
6877 }
6878 
6879 int si_resume(struct radeon_device *rdev)
6880 {
6881 	int r;
6882 
6883 	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6884 	 * posting will perform necessary task to bring back GPU into good
6885 	 * shape.
6886 	 */
6887 	/* post card */
6888 	atom_asic_init(rdev->mode_info.atom_context);
6889 
6890 	/* init golden registers */
6891 	si_init_golden_registers(rdev);
6892 
6893 	if (rdev->pm.pm_method == PM_METHOD_DPM)
6894 		radeon_pm_resume(rdev);
6895 
6896 	rdev->accel_working = true;
6897 	r = si_startup(rdev);
6898 	if (r) {
6899 		DRM_ERROR("si startup failed on resume\n");
6900 		rdev->accel_working = false;
6901 		return r;
6902 	}
6903 
6904 	return r;
6905 
6906 }
6907 
6908 int si_suspend(struct radeon_device *rdev)
6909 {
6910 	radeon_pm_suspend(rdev);
6911 	dce6_audio_fini(rdev);
6912 	radeon_vm_manager_fini(rdev);
6913 	si_cp_enable(rdev, false);
6914 	cayman_dma_stop(rdev);
6915 	if (rdev->has_uvd) {
6916 		uvd_v1_0_fini(rdev);
6917 		radeon_uvd_suspend(rdev);
6918 	}
6919 	si_fini_pg(rdev);
6920 	si_fini_cg(rdev);
6921 	si_irq_suspend(rdev);
6922 	radeon_wb_disable(rdev);
6923 	si_pcie_gart_disable(rdev);
6924 	return 0;
6925 }
6926 
6927 /* Plan is to move initialization in that function and use
6928  * helper function so that radeon_device_init pretty much
6929  * do nothing more than calling asic specific function. This
6930  * should also allow to remove a bunch of callback function
6931  * like vram_info.
6932  */
6933 int si_init(struct radeon_device *rdev)
6934 {
6935 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6936 	int r;
6937 
6938 	/* Read BIOS */
6939 	if (!radeon_get_bios(rdev)) {
6940 		if (ASIC_IS_AVIVO(rdev))
6941 			return -EINVAL;
6942 	}
6943 	/* Must be an ATOMBIOS */
6944 	if (!rdev->is_atom_bios) {
6945 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6946 		return -EINVAL;
6947 	}
6948 	r = radeon_atombios_init(rdev);
6949 	if (r)
6950 		return r;
6951 
6952 	/* Post card if necessary */
6953 	if (!radeon_card_posted(rdev)) {
6954 		if (!rdev->bios) {
6955 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6956 			return -EINVAL;
6957 		}
6958 		DRM_INFO("GPU not posted. posting now...\n");
6959 		atom_asic_init(rdev->mode_info.atom_context);
6960 	}
6961 	/* init golden registers */
6962 	si_init_golden_registers(rdev);
6963 	/* Initialize scratch registers */
6964 	si_scratch_init(rdev);
6965 	/* Initialize surface registers */
6966 	radeon_surface_init(rdev);
6967 	/* Initialize clocks */
6968 	radeon_get_clock_info(rdev->ddev);
6969 
6970 	/* Fence driver */
6971 	r = radeon_fence_driver_init(rdev);
6972 	if (r)
6973 		return r;
6974 
6975 	/* initialize memory controller */
6976 	r = si_mc_init(rdev);
6977 	if (r)
6978 		return r;
6979 	/* Memory manager */
6980 	r = radeon_bo_init(rdev);
6981 	if (r)
6982 		return r;
6983 
6984 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6985 	    !rdev->rlc_fw || !rdev->mc_fw) {
6986 		r = si_init_microcode(rdev);
6987 		if (r) {
6988 			DRM_ERROR("Failed to load firmware!\n");
6989 			return r;
6990 		}
6991 	}
6992 
6993 	/* Initialize power management */
6994 	radeon_pm_init(rdev);
6995 
6996 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6997 	ring->ring_obj = NULL;
6998 	r600_ring_init(rdev, ring, 1024 * 1024);
6999 
7000 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7001 	ring->ring_obj = NULL;
7002 	r600_ring_init(rdev, ring, 1024 * 1024);
7003 
7004 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7005 	ring->ring_obj = NULL;
7006 	r600_ring_init(rdev, ring, 1024 * 1024);
7007 
7008 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7009 	ring->ring_obj = NULL;
7010 	r600_ring_init(rdev, ring, 64 * 1024);
7011 
7012 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7013 	ring->ring_obj = NULL;
7014 	r600_ring_init(rdev, ring, 64 * 1024);
7015 
7016 	if (rdev->has_uvd) {
7017 		r = radeon_uvd_init(rdev);
7018 		if (!r) {
7019 			ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7020 			ring->ring_obj = NULL;
7021 			r600_ring_init(rdev, ring, 4096);
7022 		}
7023 	}
7024 
7025 	rdev->ih.ring_obj = NULL;
7026 	r600_ih_ring_init(rdev, 64 * 1024);
7027 
7028 	r = r600_pcie_gart_init(rdev);
7029 	if (r)
7030 		return r;
7031 
7032 	rdev->accel_working = true;
7033 	r = si_startup(rdev);
7034 	if (r) {
7035 		dev_err(rdev->dev, "disabling GPU acceleration\n");
7036 		si_cp_fini(rdev);
7037 		cayman_dma_fini(rdev);
7038 		si_irq_fini(rdev);
7039 		sumo_rlc_fini(rdev);
7040 		radeon_wb_fini(rdev);
7041 		radeon_ib_pool_fini(rdev);
7042 		radeon_vm_manager_fini(rdev);
7043 		radeon_irq_kms_fini(rdev);
7044 		si_pcie_gart_fini(rdev);
7045 		rdev->accel_working = false;
7046 	}
7047 
7048 	/* Don't start up if the MC ucode is missing.
7049 	 * The default clocks and voltages before the MC ucode
7050 	 * is loaded are not suffient for advanced operations.
7051 	 */
7052 	if (!rdev->mc_fw) {
7053 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
7054 		return -EINVAL;
7055 	}
7056 
7057 	return 0;
7058 }
7059 
7060 void si_fini(struct radeon_device *rdev)
7061 {
7062 	radeon_pm_fini(rdev);
7063 	si_cp_fini(rdev);
7064 	cayman_dma_fini(rdev);
7065 	si_fini_pg(rdev);
7066 	si_fini_cg(rdev);
7067 	si_irq_fini(rdev);
7068 	sumo_rlc_fini(rdev);
7069 	radeon_wb_fini(rdev);
7070 	radeon_vm_manager_fini(rdev);
7071 	radeon_ib_pool_fini(rdev);
7072 	radeon_irq_kms_fini(rdev);
7073 	if (rdev->has_uvd) {
7074 		uvd_v1_0_fini(rdev);
7075 		radeon_uvd_fini(rdev);
7076 	}
7077 	si_pcie_gart_fini(rdev);
7078 	r600_vram_scratch_fini(rdev);
7079 	radeon_gem_fini(rdev);
7080 	radeon_fence_driver_fini(rdev);
7081 	radeon_bo_fini(rdev);
7082 	radeon_atombios_fini(rdev);
7083 	kfree(rdev->bios);
7084 	rdev->bios = NULL;
7085 }
7086 
7087 /**
7088  * si_get_gpu_clock_counter - return GPU clock counter snapshot
7089  *
7090  * @rdev: radeon_device pointer
7091  *
7092  * Fetches a GPU clock counter snapshot (SI).
7093  * Returns the 64 bit clock counter snapshot.
7094  */
7095 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
7096 {
7097 	uint64_t clock;
7098 
7099 	mutex_lock(&rdev->gpu_clock_mutex);
7100 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
7101 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
7102 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
7103 	mutex_unlock(&rdev->gpu_clock_mutex);
7104 	return clock;
7105 }
7106 
7107 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
7108 {
7109 	unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
7110 	int r;
7111 
7112 	/* bypass vclk and dclk with bclk */
7113 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7114 		VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
7115 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7116 
7117 	/* put PLL in bypass mode */
7118 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
7119 
7120 	if (!vclk || !dclk) {
7121 		/* keep the Bypass mode, put PLL to sleep */
7122 		WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
7123 		return 0;
7124 	}
7125 
7126 	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
7127 					  16384, 0x03FFFFFF, 0, 128, 5,
7128 					  &fb_div, &vclk_div, &dclk_div);
7129 	if (r)
7130 		return r;
7131 
7132 	/* set RESET_ANTI_MUX to 0 */
7133 	WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7134 
7135 	/* set VCO_MODE to 1 */
7136 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
7137 
7138 	/* toggle UPLL_SLEEP to 1 then back to 0 */
7139 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
7140 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
7141 
7142 	/* deassert UPLL_RESET */
7143 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7144 
7145 	mdelay(1);
7146 
7147 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7148 	if (r)
7149 		return r;
7150 
7151 	/* assert UPLL_RESET again */
7152 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
7153 
7154 	/* disable spread spectrum. */
7155 	WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7156 
7157 	/* set feedback divider */
7158 	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
7159 
7160 	/* set ref divider to 0 */
7161 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
7162 
7163 	if (fb_div < 307200)
7164 		WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
7165 	else
7166 		WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
7167 
7168 	/* set PDIV_A and PDIV_B */
7169 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7170 		UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
7171 		~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
7172 
7173 	/* give the PLL some time to settle */
7174 	mdelay(15);
7175 
7176 	/* deassert PLL_RESET */
7177 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7178 
7179 	mdelay(15);
7180 
7181 	/* switch from bypass mode to normal mode */
7182 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
7183 
7184 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7185 	if (r)
7186 		return r;
7187 
7188 	/* switch VCLK and DCLK selection */
7189 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7190 		VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
7191 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7192 
7193 	mdelay(100);
7194 
7195 	return 0;
7196 }
7197 
7198 static void si_pcie_gen3_enable(struct radeon_device *rdev)
7199 {
7200 	struct pci_dev *root = rdev->pdev->bus->self;
7201 	int bridge_pos, gpu_pos;
7202 	u32 speed_cntl, mask, current_data_rate;
7203 	int ret, i;
7204 	u16 tmp16;
7205 
7206 	if (pci_is_root_bus(rdev->pdev->bus))
7207 		return;
7208 
7209 	if (radeon_pcie_gen2 == 0)
7210 		return;
7211 
7212 	if (rdev->flags & RADEON_IS_IGP)
7213 		return;
7214 
7215 	if (!(rdev->flags & RADEON_IS_PCIE))
7216 		return;
7217 
7218 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
7219 	if (ret != 0)
7220 		return;
7221 
7222 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
7223 		return;
7224 
7225 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7226 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7227 		LC_CURRENT_DATA_RATE_SHIFT;
7228 	if (mask & DRM_PCIE_SPEED_80) {
7229 		if (current_data_rate == 2) {
7230 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7231 			return;
7232 		}
7233 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7234 	} else if (mask & DRM_PCIE_SPEED_50) {
7235 		if (current_data_rate == 1) {
7236 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7237 			return;
7238 		}
7239 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7240 	}
7241 
7242 	bridge_pos = pci_pcie_cap(root);
7243 	if (!bridge_pos)
7244 		return;
7245 
7246 	gpu_pos = pci_pcie_cap(rdev->pdev);
7247 	if (!gpu_pos)
7248 		return;
7249 
7250 	if (mask & DRM_PCIE_SPEED_80) {
7251 		/* re-try equalization if gen3 is not already enabled */
7252 		if (current_data_rate != 2) {
7253 			u16 bridge_cfg, gpu_cfg;
7254 			u16 bridge_cfg2, gpu_cfg2;
7255 			u32 max_lw, current_lw, tmp;
7256 
7257 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7258 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7259 
7260 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7261 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7262 
7263 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7264 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7265 
7266 			tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7267 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7268 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7269 
7270 			if (current_lw < max_lw) {
7271 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7272 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
7273 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7274 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7275 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7276 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7277 				}
7278 			}
7279 
7280 			for (i = 0; i < 10; i++) {
7281 				/* check status */
7282 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
7283 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7284 					break;
7285 
7286 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7287 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7288 
7289 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
7290 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
7291 
7292 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7293 				tmp |= LC_SET_QUIESCE;
7294 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7295 
7296 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7297 				tmp |= LC_REDO_EQ;
7298 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7299 
7300 				mdelay(100);
7301 
7302 				/* linkctl */
7303 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
7304 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7305 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7306 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7307 
7308 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
7309 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7310 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7311 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7312 
7313 				/* linkctl2 */
7314 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7315 				tmp16 &= ~((1 << 4) | (7 << 9));
7316 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
7317 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
7318 
7319 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7320 				tmp16 &= ~((1 << 4) | (7 << 9));
7321 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
7322 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7323 
7324 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7325 				tmp &= ~LC_SET_QUIESCE;
7326 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7327 			}
7328 		}
7329 	}
7330 
7331 	/* set the link speed */
7332 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7333 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7334 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7335 
7336 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7337 	tmp16 &= ~0xf;
7338 	if (mask & DRM_PCIE_SPEED_80)
7339 		tmp16 |= 3; /* gen3 */
7340 	else if (mask & DRM_PCIE_SPEED_50)
7341 		tmp16 |= 2; /* gen2 */
7342 	else
7343 		tmp16 |= 1; /* gen1 */
7344 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7345 
7346 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7347 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7348 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7349 
7350 	for (i = 0; i < rdev->usec_timeout; i++) {
7351 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7352 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7353 			break;
7354 		udelay(1);
7355 	}
7356 }
7357 
7358 static void si_program_aspm(struct radeon_device *rdev)
7359 {
7360 	u32 data, orig;
7361 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7362 	bool disable_clkreq = false;
7363 
7364 	if (radeon_aspm == 0)
7365 		return;
7366 
7367 	if (!(rdev->flags & RADEON_IS_PCIE))
7368 		return;
7369 
7370 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7371 	data &= ~LC_XMIT_N_FTS_MASK;
7372 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7373 	if (orig != data)
7374 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7375 
7376 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7377 	data |= LC_GO_TO_RECOVERY;
7378 	if (orig != data)
7379 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7380 
7381 	orig = data = RREG32_PCIE(PCIE_P_CNTL);
7382 	data |= P_IGNORE_EDB_ERR;
7383 	if (orig != data)
7384 		WREG32_PCIE(PCIE_P_CNTL, data);
7385 
7386 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7387 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7388 	data |= LC_PMI_TO_L1_DIS;
7389 	if (!disable_l0s)
7390 		data |= LC_L0S_INACTIVITY(7);
7391 
7392 	if (!disable_l1) {
7393 		data |= LC_L1_INACTIVITY(7);
7394 		data &= ~LC_PMI_TO_L1_DIS;
7395 		if (orig != data)
7396 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7397 
7398 		if (!disable_plloff_in_l1) {
7399 			bool clk_req_support;
7400 
7401 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7402 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7403 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7404 			if (orig != data)
7405 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7406 
7407 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7408 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7409 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7410 			if (orig != data)
7411 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7412 
7413 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7414 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7415 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7416 			if (orig != data)
7417 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7418 
7419 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7420 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7421 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7422 			if (orig != data)
7423 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7424 
7425 			if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7426 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7427 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7428 				if (orig != data)
7429 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7430 
7431 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7432 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7433 				if (orig != data)
7434 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7435 
7436 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7437 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7438 				if (orig != data)
7439 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7440 
7441 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7442 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7443 				if (orig != data)
7444 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7445 
7446 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7447 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7448 				if (orig != data)
7449 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7450 
7451 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7452 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7453 				if (orig != data)
7454 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7455 
7456 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7457 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7458 				if (orig != data)
7459 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7460 
7461 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7462 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7463 				if (orig != data)
7464 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7465 			}
7466 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7467 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7468 			data |= LC_DYN_LANES_PWR_STATE(3);
7469 			if (orig != data)
7470 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7471 
7472 			orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7473 			data &= ~LS2_EXIT_TIME_MASK;
7474 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7475 				data |= LS2_EXIT_TIME(5);
7476 			if (orig != data)
7477 				WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7478 
7479 			orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7480 			data &= ~LS2_EXIT_TIME_MASK;
7481 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7482 				data |= LS2_EXIT_TIME(5);
7483 			if (orig != data)
7484 				WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7485 
7486 			if (!disable_clkreq &&
7487 			    !pci_is_root_bus(rdev->pdev->bus)) {
7488 				struct pci_dev *root = rdev->pdev->bus->self;
7489 				u32 lnkcap;
7490 
7491 				clk_req_support = false;
7492 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7493 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7494 					clk_req_support = true;
7495 			} else {
7496 				clk_req_support = false;
7497 			}
7498 
7499 			if (clk_req_support) {
7500 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7501 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7502 				if (orig != data)
7503 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7504 
7505 				orig = data = RREG32(THM_CLK_CNTL);
7506 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7507 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7508 				if (orig != data)
7509 					WREG32(THM_CLK_CNTL, data);
7510 
7511 				orig = data = RREG32(MISC_CLK_CNTL);
7512 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7513 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7514 				if (orig != data)
7515 					WREG32(MISC_CLK_CNTL, data);
7516 
7517 				orig = data = RREG32(CG_CLKPIN_CNTL);
7518 				data &= ~BCLK_AS_XCLK;
7519 				if (orig != data)
7520 					WREG32(CG_CLKPIN_CNTL, data);
7521 
7522 				orig = data = RREG32(CG_CLKPIN_CNTL_2);
7523 				data &= ~FORCE_BIF_REFCLK_EN;
7524 				if (orig != data)
7525 					WREG32(CG_CLKPIN_CNTL_2, data);
7526 
7527 				orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7528 				data &= ~MPLL_CLKOUT_SEL_MASK;
7529 				data |= MPLL_CLKOUT_SEL(4);
7530 				if (orig != data)
7531 					WREG32(MPLL_BYPASSCLK_SEL, data);
7532 
7533 				orig = data = RREG32(SPLL_CNTL_MODE);
7534 				data &= ~SPLL_REFCLK_SEL_MASK;
7535 				if (orig != data)
7536 					WREG32(SPLL_CNTL_MODE, data);
7537 			}
7538 		}
7539 	} else {
7540 		if (orig != data)
7541 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7542 	}
7543 
7544 	orig = data = RREG32_PCIE(PCIE_CNTL2);
7545 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7546 	if (orig != data)
7547 		WREG32_PCIE(PCIE_CNTL2, data);
7548 
7549 	if (!disable_l0s) {
7550 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7551 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7552 			data = RREG32_PCIE(PCIE_LC_STATUS1);
7553 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7554 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7555 				data &= ~LC_L0S_INACTIVITY_MASK;
7556 				if (orig != data)
7557 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7558 			}
7559 		}
7560 	}
7561 }
7562