xref: /openbmc/linux/drivers/gpu/drm/radeon/si.c (revision 609e478b)
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include <drm/radeon_drm.h>
31 #include "sid.h"
32 #include "atom.h"
33 #include "si_blit_shaders.h"
34 #include "clearstate_si.h"
35 #include "radeon_ucode.h"
36 
37 
38 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
39 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_mc2.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
44 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
45 
46 MODULE_FIRMWARE("radeon/tahiti_pfp.bin");
47 MODULE_FIRMWARE("radeon/tahiti_me.bin");
48 MODULE_FIRMWARE("radeon/tahiti_ce.bin");
49 MODULE_FIRMWARE("radeon/tahiti_mc.bin");
50 MODULE_FIRMWARE("radeon/tahiti_rlc.bin");
51 MODULE_FIRMWARE("radeon/tahiti_smc.bin");
52 
53 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
54 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
55 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
56 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
57 MODULE_FIRMWARE("radeon/PITCAIRN_mc2.bin");
58 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
59 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
60 
61 MODULE_FIRMWARE("radeon/pitcairn_pfp.bin");
62 MODULE_FIRMWARE("radeon/pitcairn_me.bin");
63 MODULE_FIRMWARE("radeon/pitcairn_ce.bin");
64 MODULE_FIRMWARE("radeon/pitcairn_mc.bin");
65 MODULE_FIRMWARE("radeon/pitcairn_rlc.bin");
66 MODULE_FIRMWARE("radeon/pitcairn_smc.bin");
67 
68 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
69 MODULE_FIRMWARE("radeon/VERDE_me.bin");
70 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
71 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
72 MODULE_FIRMWARE("radeon/VERDE_mc2.bin");
73 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
74 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
75 
76 MODULE_FIRMWARE("radeon/verde_pfp.bin");
77 MODULE_FIRMWARE("radeon/verde_me.bin");
78 MODULE_FIRMWARE("radeon/verde_ce.bin");
79 MODULE_FIRMWARE("radeon/verde_mc.bin");
80 MODULE_FIRMWARE("radeon/verde_rlc.bin");
81 MODULE_FIRMWARE("radeon/verde_smc.bin");
82 
83 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
84 MODULE_FIRMWARE("radeon/OLAND_me.bin");
85 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
86 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
87 MODULE_FIRMWARE("radeon/OLAND_mc2.bin");
88 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
89 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
90 
91 MODULE_FIRMWARE("radeon/oland_pfp.bin");
92 MODULE_FIRMWARE("radeon/oland_me.bin");
93 MODULE_FIRMWARE("radeon/oland_ce.bin");
94 MODULE_FIRMWARE("radeon/oland_mc.bin");
95 MODULE_FIRMWARE("radeon/oland_rlc.bin");
96 MODULE_FIRMWARE("radeon/oland_smc.bin");
97 
98 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
99 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
100 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
101 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
102 MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
103 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
104 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
105 
106 MODULE_FIRMWARE("radeon/hainan_pfp.bin");
107 MODULE_FIRMWARE("radeon/hainan_me.bin");
108 MODULE_FIRMWARE("radeon/hainan_ce.bin");
109 MODULE_FIRMWARE("radeon/hainan_mc.bin");
110 MODULE_FIRMWARE("radeon/hainan_rlc.bin");
111 MODULE_FIRMWARE("radeon/hainan_smc.bin");
112 
113 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
114 static void si_pcie_gen3_enable(struct radeon_device *rdev);
115 static void si_program_aspm(struct radeon_device *rdev);
116 extern void sumo_rlc_fini(struct radeon_device *rdev);
117 extern int sumo_rlc_init(struct radeon_device *rdev);
118 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
119 extern void r600_ih_ring_fini(struct radeon_device *rdev);
120 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
121 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
122 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
123 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
124 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
125 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
126 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
127 					 bool enable);
128 static void si_init_pg(struct radeon_device *rdev);
129 static void si_init_cg(struct radeon_device *rdev);
130 static void si_fini_pg(struct radeon_device *rdev);
131 static void si_fini_cg(struct radeon_device *rdev);
132 static void si_rlc_stop(struct radeon_device *rdev);
133 
134 static const u32 verde_rlc_save_restore_register_list[] =
135 {
136 	(0x8000 << 16) | (0x98f4 >> 2),
137 	0x00000000,
138 	(0x8040 << 16) | (0x98f4 >> 2),
139 	0x00000000,
140 	(0x8000 << 16) | (0xe80 >> 2),
141 	0x00000000,
142 	(0x8040 << 16) | (0xe80 >> 2),
143 	0x00000000,
144 	(0x8000 << 16) | (0x89bc >> 2),
145 	0x00000000,
146 	(0x8040 << 16) | (0x89bc >> 2),
147 	0x00000000,
148 	(0x8000 << 16) | (0x8c1c >> 2),
149 	0x00000000,
150 	(0x8040 << 16) | (0x8c1c >> 2),
151 	0x00000000,
152 	(0x9c00 << 16) | (0x98f0 >> 2),
153 	0x00000000,
154 	(0x9c00 << 16) | (0xe7c >> 2),
155 	0x00000000,
156 	(0x8000 << 16) | (0x9148 >> 2),
157 	0x00000000,
158 	(0x8040 << 16) | (0x9148 >> 2),
159 	0x00000000,
160 	(0x9c00 << 16) | (0x9150 >> 2),
161 	0x00000000,
162 	(0x9c00 << 16) | (0x897c >> 2),
163 	0x00000000,
164 	(0x9c00 << 16) | (0x8d8c >> 2),
165 	0x00000000,
166 	(0x9c00 << 16) | (0xac54 >> 2),
167 	0X00000000,
168 	0x3,
169 	(0x9c00 << 16) | (0x98f8 >> 2),
170 	0x00000000,
171 	(0x9c00 << 16) | (0x9910 >> 2),
172 	0x00000000,
173 	(0x9c00 << 16) | (0x9914 >> 2),
174 	0x00000000,
175 	(0x9c00 << 16) | (0x9918 >> 2),
176 	0x00000000,
177 	(0x9c00 << 16) | (0x991c >> 2),
178 	0x00000000,
179 	(0x9c00 << 16) | (0x9920 >> 2),
180 	0x00000000,
181 	(0x9c00 << 16) | (0x9924 >> 2),
182 	0x00000000,
183 	(0x9c00 << 16) | (0x9928 >> 2),
184 	0x00000000,
185 	(0x9c00 << 16) | (0x992c >> 2),
186 	0x00000000,
187 	(0x9c00 << 16) | (0x9930 >> 2),
188 	0x00000000,
189 	(0x9c00 << 16) | (0x9934 >> 2),
190 	0x00000000,
191 	(0x9c00 << 16) | (0x9938 >> 2),
192 	0x00000000,
193 	(0x9c00 << 16) | (0x993c >> 2),
194 	0x00000000,
195 	(0x9c00 << 16) | (0x9940 >> 2),
196 	0x00000000,
197 	(0x9c00 << 16) | (0x9944 >> 2),
198 	0x00000000,
199 	(0x9c00 << 16) | (0x9948 >> 2),
200 	0x00000000,
201 	(0x9c00 << 16) | (0x994c >> 2),
202 	0x00000000,
203 	(0x9c00 << 16) | (0x9950 >> 2),
204 	0x00000000,
205 	(0x9c00 << 16) | (0x9954 >> 2),
206 	0x00000000,
207 	(0x9c00 << 16) | (0x9958 >> 2),
208 	0x00000000,
209 	(0x9c00 << 16) | (0x995c >> 2),
210 	0x00000000,
211 	(0x9c00 << 16) | (0x9960 >> 2),
212 	0x00000000,
213 	(0x9c00 << 16) | (0x9964 >> 2),
214 	0x00000000,
215 	(0x9c00 << 16) | (0x9968 >> 2),
216 	0x00000000,
217 	(0x9c00 << 16) | (0x996c >> 2),
218 	0x00000000,
219 	(0x9c00 << 16) | (0x9970 >> 2),
220 	0x00000000,
221 	(0x9c00 << 16) | (0x9974 >> 2),
222 	0x00000000,
223 	(0x9c00 << 16) | (0x9978 >> 2),
224 	0x00000000,
225 	(0x9c00 << 16) | (0x997c >> 2),
226 	0x00000000,
227 	(0x9c00 << 16) | (0x9980 >> 2),
228 	0x00000000,
229 	(0x9c00 << 16) | (0x9984 >> 2),
230 	0x00000000,
231 	(0x9c00 << 16) | (0x9988 >> 2),
232 	0x00000000,
233 	(0x9c00 << 16) | (0x998c >> 2),
234 	0x00000000,
235 	(0x9c00 << 16) | (0x8c00 >> 2),
236 	0x00000000,
237 	(0x9c00 << 16) | (0x8c14 >> 2),
238 	0x00000000,
239 	(0x9c00 << 16) | (0x8c04 >> 2),
240 	0x00000000,
241 	(0x9c00 << 16) | (0x8c08 >> 2),
242 	0x00000000,
243 	(0x8000 << 16) | (0x9b7c >> 2),
244 	0x00000000,
245 	(0x8040 << 16) | (0x9b7c >> 2),
246 	0x00000000,
247 	(0x8000 << 16) | (0xe84 >> 2),
248 	0x00000000,
249 	(0x8040 << 16) | (0xe84 >> 2),
250 	0x00000000,
251 	(0x8000 << 16) | (0x89c0 >> 2),
252 	0x00000000,
253 	(0x8040 << 16) | (0x89c0 >> 2),
254 	0x00000000,
255 	(0x8000 << 16) | (0x914c >> 2),
256 	0x00000000,
257 	(0x8040 << 16) | (0x914c >> 2),
258 	0x00000000,
259 	(0x8000 << 16) | (0x8c20 >> 2),
260 	0x00000000,
261 	(0x8040 << 16) | (0x8c20 >> 2),
262 	0x00000000,
263 	(0x8000 << 16) | (0x9354 >> 2),
264 	0x00000000,
265 	(0x8040 << 16) | (0x9354 >> 2),
266 	0x00000000,
267 	(0x9c00 << 16) | (0x9060 >> 2),
268 	0x00000000,
269 	(0x9c00 << 16) | (0x9364 >> 2),
270 	0x00000000,
271 	(0x9c00 << 16) | (0x9100 >> 2),
272 	0x00000000,
273 	(0x9c00 << 16) | (0x913c >> 2),
274 	0x00000000,
275 	(0x8000 << 16) | (0x90e0 >> 2),
276 	0x00000000,
277 	(0x8000 << 16) | (0x90e4 >> 2),
278 	0x00000000,
279 	(0x8000 << 16) | (0x90e8 >> 2),
280 	0x00000000,
281 	(0x8040 << 16) | (0x90e0 >> 2),
282 	0x00000000,
283 	(0x8040 << 16) | (0x90e4 >> 2),
284 	0x00000000,
285 	(0x8040 << 16) | (0x90e8 >> 2),
286 	0x00000000,
287 	(0x9c00 << 16) | (0x8bcc >> 2),
288 	0x00000000,
289 	(0x9c00 << 16) | (0x8b24 >> 2),
290 	0x00000000,
291 	(0x9c00 << 16) | (0x88c4 >> 2),
292 	0x00000000,
293 	(0x9c00 << 16) | (0x8e50 >> 2),
294 	0x00000000,
295 	(0x9c00 << 16) | (0x8c0c >> 2),
296 	0x00000000,
297 	(0x9c00 << 16) | (0x8e58 >> 2),
298 	0x00000000,
299 	(0x9c00 << 16) | (0x8e5c >> 2),
300 	0x00000000,
301 	(0x9c00 << 16) | (0x9508 >> 2),
302 	0x00000000,
303 	(0x9c00 << 16) | (0x950c >> 2),
304 	0x00000000,
305 	(0x9c00 << 16) | (0x9494 >> 2),
306 	0x00000000,
307 	(0x9c00 << 16) | (0xac0c >> 2),
308 	0x00000000,
309 	(0x9c00 << 16) | (0xac10 >> 2),
310 	0x00000000,
311 	(0x9c00 << 16) | (0xac14 >> 2),
312 	0x00000000,
313 	(0x9c00 << 16) | (0xae00 >> 2),
314 	0x00000000,
315 	(0x9c00 << 16) | (0xac08 >> 2),
316 	0x00000000,
317 	(0x9c00 << 16) | (0x88d4 >> 2),
318 	0x00000000,
319 	(0x9c00 << 16) | (0x88c8 >> 2),
320 	0x00000000,
321 	(0x9c00 << 16) | (0x88cc >> 2),
322 	0x00000000,
323 	(0x9c00 << 16) | (0x89b0 >> 2),
324 	0x00000000,
325 	(0x9c00 << 16) | (0x8b10 >> 2),
326 	0x00000000,
327 	(0x9c00 << 16) | (0x8a14 >> 2),
328 	0x00000000,
329 	(0x9c00 << 16) | (0x9830 >> 2),
330 	0x00000000,
331 	(0x9c00 << 16) | (0x9834 >> 2),
332 	0x00000000,
333 	(0x9c00 << 16) | (0x9838 >> 2),
334 	0x00000000,
335 	(0x9c00 << 16) | (0x9a10 >> 2),
336 	0x00000000,
337 	(0x8000 << 16) | (0x9870 >> 2),
338 	0x00000000,
339 	(0x8000 << 16) | (0x9874 >> 2),
340 	0x00000000,
341 	(0x8001 << 16) | (0x9870 >> 2),
342 	0x00000000,
343 	(0x8001 << 16) | (0x9874 >> 2),
344 	0x00000000,
345 	(0x8040 << 16) | (0x9870 >> 2),
346 	0x00000000,
347 	(0x8040 << 16) | (0x9874 >> 2),
348 	0x00000000,
349 	(0x8041 << 16) | (0x9870 >> 2),
350 	0x00000000,
351 	(0x8041 << 16) | (0x9874 >> 2),
352 	0x00000000,
353 	0x00000000
354 };
355 
356 static const u32 tahiti_golden_rlc_registers[] =
357 {
358 	0xc424, 0xffffffff, 0x00601005,
359 	0xc47c, 0xffffffff, 0x10104040,
360 	0xc488, 0xffffffff, 0x0100000a,
361 	0xc314, 0xffffffff, 0x00000800,
362 	0xc30c, 0xffffffff, 0x800000f4,
363 	0xf4a8, 0xffffffff, 0x00000000
364 };
365 
366 static const u32 tahiti_golden_registers[] =
367 {
368 	0x9a10, 0x00010000, 0x00018208,
369 	0x9830, 0xffffffff, 0x00000000,
370 	0x9834, 0xf00fffff, 0x00000400,
371 	0x9838, 0x0002021c, 0x00020200,
372 	0xc78, 0x00000080, 0x00000000,
373 	0xd030, 0x000300c0, 0x00800040,
374 	0xd830, 0x000300c0, 0x00800040,
375 	0x5bb0, 0x000000f0, 0x00000070,
376 	0x5bc0, 0x00200000, 0x50100000,
377 	0x7030, 0x31000311, 0x00000011,
378 	0x277c, 0x00000003, 0x000007ff,
379 	0x240c, 0x000007ff, 0x00000000,
380 	0x8a14, 0xf000001f, 0x00000007,
381 	0x8b24, 0xffffffff, 0x00ffffff,
382 	0x8b10, 0x0000ff0f, 0x00000000,
383 	0x28a4c, 0x07ffffff, 0x4e000000,
384 	0x28350, 0x3f3f3fff, 0x2a00126a,
385 	0x30, 0x000000ff, 0x0040,
386 	0x34, 0x00000040, 0x00004040,
387 	0x9100, 0x07ffffff, 0x03000000,
388 	0x8e88, 0x01ff1f3f, 0x00000000,
389 	0x8e84, 0x01ff1f3f, 0x00000000,
390 	0x9060, 0x0000007f, 0x00000020,
391 	0x9508, 0x00010000, 0x00010000,
392 	0xac14, 0x00000200, 0x000002fb,
393 	0xac10, 0xffffffff, 0x0000543b,
394 	0xac0c, 0xffffffff, 0xa9210876,
395 	0x88d0, 0xffffffff, 0x000fff40,
396 	0x88d4, 0x0000001f, 0x00000010,
397 	0x1410, 0x20000000, 0x20fffed8,
398 	0x15c0, 0x000c0fc0, 0x000c0400
399 };
400 
401 static const u32 tahiti_golden_registers2[] =
402 {
403 	0xc64, 0x00000001, 0x00000001
404 };
405 
406 static const u32 pitcairn_golden_rlc_registers[] =
407 {
408 	0xc424, 0xffffffff, 0x00601004,
409 	0xc47c, 0xffffffff, 0x10102020,
410 	0xc488, 0xffffffff, 0x01000020,
411 	0xc314, 0xffffffff, 0x00000800,
412 	0xc30c, 0xffffffff, 0x800000a4
413 };
414 
415 static const u32 pitcairn_golden_registers[] =
416 {
417 	0x9a10, 0x00010000, 0x00018208,
418 	0x9830, 0xffffffff, 0x00000000,
419 	0x9834, 0xf00fffff, 0x00000400,
420 	0x9838, 0x0002021c, 0x00020200,
421 	0xc78, 0x00000080, 0x00000000,
422 	0xd030, 0x000300c0, 0x00800040,
423 	0xd830, 0x000300c0, 0x00800040,
424 	0x5bb0, 0x000000f0, 0x00000070,
425 	0x5bc0, 0x00200000, 0x50100000,
426 	0x7030, 0x31000311, 0x00000011,
427 	0x2ae4, 0x00073ffe, 0x000022a2,
428 	0x240c, 0x000007ff, 0x00000000,
429 	0x8a14, 0xf000001f, 0x00000007,
430 	0x8b24, 0xffffffff, 0x00ffffff,
431 	0x8b10, 0x0000ff0f, 0x00000000,
432 	0x28a4c, 0x07ffffff, 0x4e000000,
433 	0x28350, 0x3f3f3fff, 0x2a00126a,
434 	0x30, 0x000000ff, 0x0040,
435 	0x34, 0x00000040, 0x00004040,
436 	0x9100, 0x07ffffff, 0x03000000,
437 	0x9060, 0x0000007f, 0x00000020,
438 	0x9508, 0x00010000, 0x00010000,
439 	0xac14, 0x000003ff, 0x000000f7,
440 	0xac10, 0xffffffff, 0x00000000,
441 	0xac0c, 0xffffffff, 0x32761054,
442 	0x88d4, 0x0000001f, 0x00000010,
443 	0x15c0, 0x000c0fc0, 0x000c0400
444 };
445 
446 static const u32 verde_golden_rlc_registers[] =
447 {
448 	0xc424, 0xffffffff, 0x033f1005,
449 	0xc47c, 0xffffffff, 0x10808020,
450 	0xc488, 0xffffffff, 0x00800008,
451 	0xc314, 0xffffffff, 0x00001000,
452 	0xc30c, 0xffffffff, 0x80010014
453 };
454 
455 static const u32 verde_golden_registers[] =
456 {
457 	0x9a10, 0x00010000, 0x00018208,
458 	0x9830, 0xffffffff, 0x00000000,
459 	0x9834, 0xf00fffff, 0x00000400,
460 	0x9838, 0x0002021c, 0x00020200,
461 	0xc78, 0x00000080, 0x00000000,
462 	0xd030, 0x000300c0, 0x00800040,
463 	0xd030, 0x000300c0, 0x00800040,
464 	0xd830, 0x000300c0, 0x00800040,
465 	0xd830, 0x000300c0, 0x00800040,
466 	0x5bb0, 0x000000f0, 0x00000070,
467 	0x5bc0, 0x00200000, 0x50100000,
468 	0x7030, 0x31000311, 0x00000011,
469 	0x2ae4, 0x00073ffe, 0x000022a2,
470 	0x2ae4, 0x00073ffe, 0x000022a2,
471 	0x2ae4, 0x00073ffe, 0x000022a2,
472 	0x240c, 0x000007ff, 0x00000000,
473 	0x240c, 0x000007ff, 0x00000000,
474 	0x240c, 0x000007ff, 0x00000000,
475 	0x8a14, 0xf000001f, 0x00000007,
476 	0x8a14, 0xf000001f, 0x00000007,
477 	0x8a14, 0xf000001f, 0x00000007,
478 	0x8b24, 0xffffffff, 0x00ffffff,
479 	0x8b10, 0x0000ff0f, 0x00000000,
480 	0x28a4c, 0x07ffffff, 0x4e000000,
481 	0x28350, 0x3f3f3fff, 0x0000124a,
482 	0x28350, 0x3f3f3fff, 0x0000124a,
483 	0x28350, 0x3f3f3fff, 0x0000124a,
484 	0x30, 0x000000ff, 0x0040,
485 	0x34, 0x00000040, 0x00004040,
486 	0x9100, 0x07ffffff, 0x03000000,
487 	0x9100, 0x07ffffff, 0x03000000,
488 	0x8e88, 0x01ff1f3f, 0x00000000,
489 	0x8e88, 0x01ff1f3f, 0x00000000,
490 	0x8e88, 0x01ff1f3f, 0x00000000,
491 	0x8e84, 0x01ff1f3f, 0x00000000,
492 	0x8e84, 0x01ff1f3f, 0x00000000,
493 	0x8e84, 0x01ff1f3f, 0x00000000,
494 	0x9060, 0x0000007f, 0x00000020,
495 	0x9508, 0x00010000, 0x00010000,
496 	0xac14, 0x000003ff, 0x00000003,
497 	0xac14, 0x000003ff, 0x00000003,
498 	0xac14, 0x000003ff, 0x00000003,
499 	0xac10, 0xffffffff, 0x00000000,
500 	0xac10, 0xffffffff, 0x00000000,
501 	0xac10, 0xffffffff, 0x00000000,
502 	0xac0c, 0xffffffff, 0x00001032,
503 	0xac0c, 0xffffffff, 0x00001032,
504 	0xac0c, 0xffffffff, 0x00001032,
505 	0x88d4, 0x0000001f, 0x00000010,
506 	0x88d4, 0x0000001f, 0x00000010,
507 	0x88d4, 0x0000001f, 0x00000010,
508 	0x15c0, 0x000c0fc0, 0x000c0400
509 };
510 
511 static const u32 oland_golden_rlc_registers[] =
512 {
513 	0xc424, 0xffffffff, 0x00601005,
514 	0xc47c, 0xffffffff, 0x10104040,
515 	0xc488, 0xffffffff, 0x0100000a,
516 	0xc314, 0xffffffff, 0x00000800,
517 	0xc30c, 0xffffffff, 0x800000f4
518 };
519 
520 static const u32 oland_golden_registers[] =
521 {
522 	0x9a10, 0x00010000, 0x00018208,
523 	0x9830, 0xffffffff, 0x00000000,
524 	0x9834, 0xf00fffff, 0x00000400,
525 	0x9838, 0x0002021c, 0x00020200,
526 	0xc78, 0x00000080, 0x00000000,
527 	0xd030, 0x000300c0, 0x00800040,
528 	0xd830, 0x000300c0, 0x00800040,
529 	0x5bb0, 0x000000f0, 0x00000070,
530 	0x5bc0, 0x00200000, 0x50100000,
531 	0x7030, 0x31000311, 0x00000011,
532 	0x2ae4, 0x00073ffe, 0x000022a2,
533 	0x240c, 0x000007ff, 0x00000000,
534 	0x8a14, 0xf000001f, 0x00000007,
535 	0x8b24, 0xffffffff, 0x00ffffff,
536 	0x8b10, 0x0000ff0f, 0x00000000,
537 	0x28a4c, 0x07ffffff, 0x4e000000,
538 	0x28350, 0x3f3f3fff, 0x00000082,
539 	0x30, 0x000000ff, 0x0040,
540 	0x34, 0x00000040, 0x00004040,
541 	0x9100, 0x07ffffff, 0x03000000,
542 	0x9060, 0x0000007f, 0x00000020,
543 	0x9508, 0x00010000, 0x00010000,
544 	0xac14, 0x000003ff, 0x000000f3,
545 	0xac10, 0xffffffff, 0x00000000,
546 	0xac0c, 0xffffffff, 0x00003210,
547 	0x88d4, 0x0000001f, 0x00000010,
548 	0x15c0, 0x000c0fc0, 0x000c0400
549 };
550 
551 static const u32 hainan_golden_registers[] =
552 {
553 	0x9a10, 0x00010000, 0x00018208,
554 	0x9830, 0xffffffff, 0x00000000,
555 	0x9834, 0xf00fffff, 0x00000400,
556 	0x9838, 0x0002021c, 0x00020200,
557 	0xd0c0, 0xff000fff, 0x00000100,
558 	0xd030, 0x000300c0, 0x00800040,
559 	0xd8c0, 0xff000fff, 0x00000100,
560 	0xd830, 0x000300c0, 0x00800040,
561 	0x2ae4, 0x00073ffe, 0x000022a2,
562 	0x240c, 0x000007ff, 0x00000000,
563 	0x8a14, 0xf000001f, 0x00000007,
564 	0x8b24, 0xffffffff, 0x00ffffff,
565 	0x8b10, 0x0000ff0f, 0x00000000,
566 	0x28a4c, 0x07ffffff, 0x4e000000,
567 	0x28350, 0x3f3f3fff, 0x00000000,
568 	0x30, 0x000000ff, 0x0040,
569 	0x34, 0x00000040, 0x00004040,
570 	0x9100, 0x03e00000, 0x03600000,
571 	0x9060, 0x0000007f, 0x00000020,
572 	0x9508, 0x00010000, 0x00010000,
573 	0xac14, 0x000003ff, 0x000000f1,
574 	0xac10, 0xffffffff, 0x00000000,
575 	0xac0c, 0xffffffff, 0x00003210,
576 	0x88d4, 0x0000001f, 0x00000010,
577 	0x15c0, 0x000c0fc0, 0x000c0400
578 };
579 
580 static const u32 hainan_golden_registers2[] =
581 {
582 	0x98f8, 0xffffffff, 0x02010001
583 };
584 
585 static const u32 tahiti_mgcg_cgcg_init[] =
586 {
587 	0xc400, 0xffffffff, 0xfffffffc,
588 	0x802c, 0xffffffff, 0xe0000000,
589 	0x9a60, 0xffffffff, 0x00000100,
590 	0x92a4, 0xffffffff, 0x00000100,
591 	0xc164, 0xffffffff, 0x00000100,
592 	0x9774, 0xffffffff, 0x00000100,
593 	0x8984, 0xffffffff, 0x06000100,
594 	0x8a18, 0xffffffff, 0x00000100,
595 	0x92a0, 0xffffffff, 0x00000100,
596 	0xc380, 0xffffffff, 0x00000100,
597 	0x8b28, 0xffffffff, 0x00000100,
598 	0x9144, 0xffffffff, 0x00000100,
599 	0x8d88, 0xffffffff, 0x00000100,
600 	0x8d8c, 0xffffffff, 0x00000100,
601 	0x9030, 0xffffffff, 0x00000100,
602 	0x9034, 0xffffffff, 0x00000100,
603 	0x9038, 0xffffffff, 0x00000100,
604 	0x903c, 0xffffffff, 0x00000100,
605 	0xad80, 0xffffffff, 0x00000100,
606 	0xac54, 0xffffffff, 0x00000100,
607 	0x897c, 0xffffffff, 0x06000100,
608 	0x9868, 0xffffffff, 0x00000100,
609 	0x9510, 0xffffffff, 0x00000100,
610 	0xaf04, 0xffffffff, 0x00000100,
611 	0xae04, 0xffffffff, 0x00000100,
612 	0x949c, 0xffffffff, 0x00000100,
613 	0x802c, 0xffffffff, 0xe0000000,
614 	0x9160, 0xffffffff, 0x00010000,
615 	0x9164, 0xffffffff, 0x00030002,
616 	0x9168, 0xffffffff, 0x00040007,
617 	0x916c, 0xffffffff, 0x00060005,
618 	0x9170, 0xffffffff, 0x00090008,
619 	0x9174, 0xffffffff, 0x00020001,
620 	0x9178, 0xffffffff, 0x00040003,
621 	0x917c, 0xffffffff, 0x00000007,
622 	0x9180, 0xffffffff, 0x00060005,
623 	0x9184, 0xffffffff, 0x00090008,
624 	0x9188, 0xffffffff, 0x00030002,
625 	0x918c, 0xffffffff, 0x00050004,
626 	0x9190, 0xffffffff, 0x00000008,
627 	0x9194, 0xffffffff, 0x00070006,
628 	0x9198, 0xffffffff, 0x000a0009,
629 	0x919c, 0xffffffff, 0x00040003,
630 	0x91a0, 0xffffffff, 0x00060005,
631 	0x91a4, 0xffffffff, 0x00000009,
632 	0x91a8, 0xffffffff, 0x00080007,
633 	0x91ac, 0xffffffff, 0x000b000a,
634 	0x91b0, 0xffffffff, 0x00050004,
635 	0x91b4, 0xffffffff, 0x00070006,
636 	0x91b8, 0xffffffff, 0x0008000b,
637 	0x91bc, 0xffffffff, 0x000a0009,
638 	0x91c0, 0xffffffff, 0x000d000c,
639 	0x91c4, 0xffffffff, 0x00060005,
640 	0x91c8, 0xffffffff, 0x00080007,
641 	0x91cc, 0xffffffff, 0x0000000b,
642 	0x91d0, 0xffffffff, 0x000a0009,
643 	0x91d4, 0xffffffff, 0x000d000c,
644 	0x91d8, 0xffffffff, 0x00070006,
645 	0x91dc, 0xffffffff, 0x00090008,
646 	0x91e0, 0xffffffff, 0x0000000c,
647 	0x91e4, 0xffffffff, 0x000b000a,
648 	0x91e8, 0xffffffff, 0x000e000d,
649 	0x91ec, 0xffffffff, 0x00080007,
650 	0x91f0, 0xffffffff, 0x000a0009,
651 	0x91f4, 0xffffffff, 0x0000000d,
652 	0x91f8, 0xffffffff, 0x000c000b,
653 	0x91fc, 0xffffffff, 0x000f000e,
654 	0x9200, 0xffffffff, 0x00090008,
655 	0x9204, 0xffffffff, 0x000b000a,
656 	0x9208, 0xffffffff, 0x000c000f,
657 	0x920c, 0xffffffff, 0x000e000d,
658 	0x9210, 0xffffffff, 0x00110010,
659 	0x9214, 0xffffffff, 0x000a0009,
660 	0x9218, 0xffffffff, 0x000c000b,
661 	0x921c, 0xffffffff, 0x0000000f,
662 	0x9220, 0xffffffff, 0x000e000d,
663 	0x9224, 0xffffffff, 0x00110010,
664 	0x9228, 0xffffffff, 0x000b000a,
665 	0x922c, 0xffffffff, 0x000d000c,
666 	0x9230, 0xffffffff, 0x00000010,
667 	0x9234, 0xffffffff, 0x000f000e,
668 	0x9238, 0xffffffff, 0x00120011,
669 	0x923c, 0xffffffff, 0x000c000b,
670 	0x9240, 0xffffffff, 0x000e000d,
671 	0x9244, 0xffffffff, 0x00000011,
672 	0x9248, 0xffffffff, 0x0010000f,
673 	0x924c, 0xffffffff, 0x00130012,
674 	0x9250, 0xffffffff, 0x000d000c,
675 	0x9254, 0xffffffff, 0x000f000e,
676 	0x9258, 0xffffffff, 0x00100013,
677 	0x925c, 0xffffffff, 0x00120011,
678 	0x9260, 0xffffffff, 0x00150014,
679 	0x9264, 0xffffffff, 0x000e000d,
680 	0x9268, 0xffffffff, 0x0010000f,
681 	0x926c, 0xffffffff, 0x00000013,
682 	0x9270, 0xffffffff, 0x00120011,
683 	0x9274, 0xffffffff, 0x00150014,
684 	0x9278, 0xffffffff, 0x000f000e,
685 	0x927c, 0xffffffff, 0x00110010,
686 	0x9280, 0xffffffff, 0x00000014,
687 	0x9284, 0xffffffff, 0x00130012,
688 	0x9288, 0xffffffff, 0x00160015,
689 	0x928c, 0xffffffff, 0x0010000f,
690 	0x9290, 0xffffffff, 0x00120011,
691 	0x9294, 0xffffffff, 0x00000015,
692 	0x9298, 0xffffffff, 0x00140013,
693 	0x929c, 0xffffffff, 0x00170016,
694 	0x9150, 0xffffffff, 0x96940200,
695 	0x8708, 0xffffffff, 0x00900100,
696 	0xc478, 0xffffffff, 0x00000080,
697 	0xc404, 0xffffffff, 0x0020003f,
698 	0x30, 0xffffffff, 0x0000001c,
699 	0x34, 0x000f0000, 0x000f0000,
700 	0x160c, 0xffffffff, 0x00000100,
701 	0x1024, 0xffffffff, 0x00000100,
702 	0x102c, 0x00000101, 0x00000000,
703 	0x20a8, 0xffffffff, 0x00000104,
704 	0x264c, 0x000c0000, 0x000c0000,
705 	0x2648, 0x000c0000, 0x000c0000,
706 	0x55e4, 0xff000fff, 0x00000100,
707 	0x55e8, 0x00000001, 0x00000001,
708 	0x2f50, 0x00000001, 0x00000001,
709 	0x30cc, 0xc0000fff, 0x00000104,
710 	0xc1e4, 0x00000001, 0x00000001,
711 	0xd0c0, 0xfffffff0, 0x00000100,
712 	0xd8c0, 0xfffffff0, 0x00000100
713 };
714 
715 static const u32 pitcairn_mgcg_cgcg_init[] =
716 {
717 	0xc400, 0xffffffff, 0xfffffffc,
718 	0x802c, 0xffffffff, 0xe0000000,
719 	0x9a60, 0xffffffff, 0x00000100,
720 	0x92a4, 0xffffffff, 0x00000100,
721 	0xc164, 0xffffffff, 0x00000100,
722 	0x9774, 0xffffffff, 0x00000100,
723 	0x8984, 0xffffffff, 0x06000100,
724 	0x8a18, 0xffffffff, 0x00000100,
725 	0x92a0, 0xffffffff, 0x00000100,
726 	0xc380, 0xffffffff, 0x00000100,
727 	0x8b28, 0xffffffff, 0x00000100,
728 	0x9144, 0xffffffff, 0x00000100,
729 	0x8d88, 0xffffffff, 0x00000100,
730 	0x8d8c, 0xffffffff, 0x00000100,
731 	0x9030, 0xffffffff, 0x00000100,
732 	0x9034, 0xffffffff, 0x00000100,
733 	0x9038, 0xffffffff, 0x00000100,
734 	0x903c, 0xffffffff, 0x00000100,
735 	0xad80, 0xffffffff, 0x00000100,
736 	0xac54, 0xffffffff, 0x00000100,
737 	0x897c, 0xffffffff, 0x06000100,
738 	0x9868, 0xffffffff, 0x00000100,
739 	0x9510, 0xffffffff, 0x00000100,
740 	0xaf04, 0xffffffff, 0x00000100,
741 	0xae04, 0xffffffff, 0x00000100,
742 	0x949c, 0xffffffff, 0x00000100,
743 	0x802c, 0xffffffff, 0xe0000000,
744 	0x9160, 0xffffffff, 0x00010000,
745 	0x9164, 0xffffffff, 0x00030002,
746 	0x9168, 0xffffffff, 0x00040007,
747 	0x916c, 0xffffffff, 0x00060005,
748 	0x9170, 0xffffffff, 0x00090008,
749 	0x9174, 0xffffffff, 0x00020001,
750 	0x9178, 0xffffffff, 0x00040003,
751 	0x917c, 0xffffffff, 0x00000007,
752 	0x9180, 0xffffffff, 0x00060005,
753 	0x9184, 0xffffffff, 0x00090008,
754 	0x9188, 0xffffffff, 0x00030002,
755 	0x918c, 0xffffffff, 0x00050004,
756 	0x9190, 0xffffffff, 0x00000008,
757 	0x9194, 0xffffffff, 0x00070006,
758 	0x9198, 0xffffffff, 0x000a0009,
759 	0x919c, 0xffffffff, 0x00040003,
760 	0x91a0, 0xffffffff, 0x00060005,
761 	0x91a4, 0xffffffff, 0x00000009,
762 	0x91a8, 0xffffffff, 0x00080007,
763 	0x91ac, 0xffffffff, 0x000b000a,
764 	0x91b0, 0xffffffff, 0x00050004,
765 	0x91b4, 0xffffffff, 0x00070006,
766 	0x91b8, 0xffffffff, 0x0008000b,
767 	0x91bc, 0xffffffff, 0x000a0009,
768 	0x91c0, 0xffffffff, 0x000d000c,
769 	0x9200, 0xffffffff, 0x00090008,
770 	0x9204, 0xffffffff, 0x000b000a,
771 	0x9208, 0xffffffff, 0x000c000f,
772 	0x920c, 0xffffffff, 0x000e000d,
773 	0x9210, 0xffffffff, 0x00110010,
774 	0x9214, 0xffffffff, 0x000a0009,
775 	0x9218, 0xffffffff, 0x000c000b,
776 	0x921c, 0xffffffff, 0x0000000f,
777 	0x9220, 0xffffffff, 0x000e000d,
778 	0x9224, 0xffffffff, 0x00110010,
779 	0x9228, 0xffffffff, 0x000b000a,
780 	0x922c, 0xffffffff, 0x000d000c,
781 	0x9230, 0xffffffff, 0x00000010,
782 	0x9234, 0xffffffff, 0x000f000e,
783 	0x9238, 0xffffffff, 0x00120011,
784 	0x923c, 0xffffffff, 0x000c000b,
785 	0x9240, 0xffffffff, 0x000e000d,
786 	0x9244, 0xffffffff, 0x00000011,
787 	0x9248, 0xffffffff, 0x0010000f,
788 	0x924c, 0xffffffff, 0x00130012,
789 	0x9250, 0xffffffff, 0x000d000c,
790 	0x9254, 0xffffffff, 0x000f000e,
791 	0x9258, 0xffffffff, 0x00100013,
792 	0x925c, 0xffffffff, 0x00120011,
793 	0x9260, 0xffffffff, 0x00150014,
794 	0x9150, 0xffffffff, 0x96940200,
795 	0x8708, 0xffffffff, 0x00900100,
796 	0xc478, 0xffffffff, 0x00000080,
797 	0xc404, 0xffffffff, 0x0020003f,
798 	0x30, 0xffffffff, 0x0000001c,
799 	0x34, 0x000f0000, 0x000f0000,
800 	0x160c, 0xffffffff, 0x00000100,
801 	0x1024, 0xffffffff, 0x00000100,
802 	0x102c, 0x00000101, 0x00000000,
803 	0x20a8, 0xffffffff, 0x00000104,
804 	0x55e4, 0xff000fff, 0x00000100,
805 	0x55e8, 0x00000001, 0x00000001,
806 	0x2f50, 0x00000001, 0x00000001,
807 	0x30cc, 0xc0000fff, 0x00000104,
808 	0xc1e4, 0x00000001, 0x00000001,
809 	0xd0c0, 0xfffffff0, 0x00000100,
810 	0xd8c0, 0xfffffff0, 0x00000100
811 };
812 
813 static const u32 verde_mgcg_cgcg_init[] =
814 {
815 	0xc400, 0xffffffff, 0xfffffffc,
816 	0x802c, 0xffffffff, 0xe0000000,
817 	0x9a60, 0xffffffff, 0x00000100,
818 	0x92a4, 0xffffffff, 0x00000100,
819 	0xc164, 0xffffffff, 0x00000100,
820 	0x9774, 0xffffffff, 0x00000100,
821 	0x8984, 0xffffffff, 0x06000100,
822 	0x8a18, 0xffffffff, 0x00000100,
823 	0x92a0, 0xffffffff, 0x00000100,
824 	0xc380, 0xffffffff, 0x00000100,
825 	0x8b28, 0xffffffff, 0x00000100,
826 	0x9144, 0xffffffff, 0x00000100,
827 	0x8d88, 0xffffffff, 0x00000100,
828 	0x8d8c, 0xffffffff, 0x00000100,
829 	0x9030, 0xffffffff, 0x00000100,
830 	0x9034, 0xffffffff, 0x00000100,
831 	0x9038, 0xffffffff, 0x00000100,
832 	0x903c, 0xffffffff, 0x00000100,
833 	0xad80, 0xffffffff, 0x00000100,
834 	0xac54, 0xffffffff, 0x00000100,
835 	0x897c, 0xffffffff, 0x06000100,
836 	0x9868, 0xffffffff, 0x00000100,
837 	0x9510, 0xffffffff, 0x00000100,
838 	0xaf04, 0xffffffff, 0x00000100,
839 	0xae04, 0xffffffff, 0x00000100,
840 	0x949c, 0xffffffff, 0x00000100,
841 	0x802c, 0xffffffff, 0xe0000000,
842 	0x9160, 0xffffffff, 0x00010000,
843 	0x9164, 0xffffffff, 0x00030002,
844 	0x9168, 0xffffffff, 0x00040007,
845 	0x916c, 0xffffffff, 0x00060005,
846 	0x9170, 0xffffffff, 0x00090008,
847 	0x9174, 0xffffffff, 0x00020001,
848 	0x9178, 0xffffffff, 0x00040003,
849 	0x917c, 0xffffffff, 0x00000007,
850 	0x9180, 0xffffffff, 0x00060005,
851 	0x9184, 0xffffffff, 0x00090008,
852 	0x9188, 0xffffffff, 0x00030002,
853 	0x918c, 0xffffffff, 0x00050004,
854 	0x9190, 0xffffffff, 0x00000008,
855 	0x9194, 0xffffffff, 0x00070006,
856 	0x9198, 0xffffffff, 0x000a0009,
857 	0x919c, 0xffffffff, 0x00040003,
858 	0x91a0, 0xffffffff, 0x00060005,
859 	0x91a4, 0xffffffff, 0x00000009,
860 	0x91a8, 0xffffffff, 0x00080007,
861 	0x91ac, 0xffffffff, 0x000b000a,
862 	0x91b0, 0xffffffff, 0x00050004,
863 	0x91b4, 0xffffffff, 0x00070006,
864 	0x91b8, 0xffffffff, 0x0008000b,
865 	0x91bc, 0xffffffff, 0x000a0009,
866 	0x91c0, 0xffffffff, 0x000d000c,
867 	0x9200, 0xffffffff, 0x00090008,
868 	0x9204, 0xffffffff, 0x000b000a,
869 	0x9208, 0xffffffff, 0x000c000f,
870 	0x920c, 0xffffffff, 0x000e000d,
871 	0x9210, 0xffffffff, 0x00110010,
872 	0x9214, 0xffffffff, 0x000a0009,
873 	0x9218, 0xffffffff, 0x000c000b,
874 	0x921c, 0xffffffff, 0x0000000f,
875 	0x9220, 0xffffffff, 0x000e000d,
876 	0x9224, 0xffffffff, 0x00110010,
877 	0x9228, 0xffffffff, 0x000b000a,
878 	0x922c, 0xffffffff, 0x000d000c,
879 	0x9230, 0xffffffff, 0x00000010,
880 	0x9234, 0xffffffff, 0x000f000e,
881 	0x9238, 0xffffffff, 0x00120011,
882 	0x923c, 0xffffffff, 0x000c000b,
883 	0x9240, 0xffffffff, 0x000e000d,
884 	0x9244, 0xffffffff, 0x00000011,
885 	0x9248, 0xffffffff, 0x0010000f,
886 	0x924c, 0xffffffff, 0x00130012,
887 	0x9250, 0xffffffff, 0x000d000c,
888 	0x9254, 0xffffffff, 0x000f000e,
889 	0x9258, 0xffffffff, 0x00100013,
890 	0x925c, 0xffffffff, 0x00120011,
891 	0x9260, 0xffffffff, 0x00150014,
892 	0x9150, 0xffffffff, 0x96940200,
893 	0x8708, 0xffffffff, 0x00900100,
894 	0xc478, 0xffffffff, 0x00000080,
895 	0xc404, 0xffffffff, 0x0020003f,
896 	0x30, 0xffffffff, 0x0000001c,
897 	0x34, 0x000f0000, 0x000f0000,
898 	0x160c, 0xffffffff, 0x00000100,
899 	0x1024, 0xffffffff, 0x00000100,
900 	0x102c, 0x00000101, 0x00000000,
901 	0x20a8, 0xffffffff, 0x00000104,
902 	0x264c, 0x000c0000, 0x000c0000,
903 	0x2648, 0x000c0000, 0x000c0000,
904 	0x55e4, 0xff000fff, 0x00000100,
905 	0x55e8, 0x00000001, 0x00000001,
906 	0x2f50, 0x00000001, 0x00000001,
907 	0x30cc, 0xc0000fff, 0x00000104,
908 	0xc1e4, 0x00000001, 0x00000001,
909 	0xd0c0, 0xfffffff0, 0x00000100,
910 	0xd8c0, 0xfffffff0, 0x00000100
911 };
912 
913 static const u32 oland_mgcg_cgcg_init[] =
914 {
915 	0xc400, 0xffffffff, 0xfffffffc,
916 	0x802c, 0xffffffff, 0xe0000000,
917 	0x9a60, 0xffffffff, 0x00000100,
918 	0x92a4, 0xffffffff, 0x00000100,
919 	0xc164, 0xffffffff, 0x00000100,
920 	0x9774, 0xffffffff, 0x00000100,
921 	0x8984, 0xffffffff, 0x06000100,
922 	0x8a18, 0xffffffff, 0x00000100,
923 	0x92a0, 0xffffffff, 0x00000100,
924 	0xc380, 0xffffffff, 0x00000100,
925 	0x8b28, 0xffffffff, 0x00000100,
926 	0x9144, 0xffffffff, 0x00000100,
927 	0x8d88, 0xffffffff, 0x00000100,
928 	0x8d8c, 0xffffffff, 0x00000100,
929 	0x9030, 0xffffffff, 0x00000100,
930 	0x9034, 0xffffffff, 0x00000100,
931 	0x9038, 0xffffffff, 0x00000100,
932 	0x903c, 0xffffffff, 0x00000100,
933 	0xad80, 0xffffffff, 0x00000100,
934 	0xac54, 0xffffffff, 0x00000100,
935 	0x897c, 0xffffffff, 0x06000100,
936 	0x9868, 0xffffffff, 0x00000100,
937 	0x9510, 0xffffffff, 0x00000100,
938 	0xaf04, 0xffffffff, 0x00000100,
939 	0xae04, 0xffffffff, 0x00000100,
940 	0x949c, 0xffffffff, 0x00000100,
941 	0x802c, 0xffffffff, 0xe0000000,
942 	0x9160, 0xffffffff, 0x00010000,
943 	0x9164, 0xffffffff, 0x00030002,
944 	0x9168, 0xffffffff, 0x00040007,
945 	0x916c, 0xffffffff, 0x00060005,
946 	0x9170, 0xffffffff, 0x00090008,
947 	0x9174, 0xffffffff, 0x00020001,
948 	0x9178, 0xffffffff, 0x00040003,
949 	0x917c, 0xffffffff, 0x00000007,
950 	0x9180, 0xffffffff, 0x00060005,
951 	0x9184, 0xffffffff, 0x00090008,
952 	0x9188, 0xffffffff, 0x00030002,
953 	0x918c, 0xffffffff, 0x00050004,
954 	0x9190, 0xffffffff, 0x00000008,
955 	0x9194, 0xffffffff, 0x00070006,
956 	0x9198, 0xffffffff, 0x000a0009,
957 	0x919c, 0xffffffff, 0x00040003,
958 	0x91a0, 0xffffffff, 0x00060005,
959 	0x91a4, 0xffffffff, 0x00000009,
960 	0x91a8, 0xffffffff, 0x00080007,
961 	0x91ac, 0xffffffff, 0x000b000a,
962 	0x91b0, 0xffffffff, 0x00050004,
963 	0x91b4, 0xffffffff, 0x00070006,
964 	0x91b8, 0xffffffff, 0x0008000b,
965 	0x91bc, 0xffffffff, 0x000a0009,
966 	0x91c0, 0xffffffff, 0x000d000c,
967 	0x91c4, 0xffffffff, 0x00060005,
968 	0x91c8, 0xffffffff, 0x00080007,
969 	0x91cc, 0xffffffff, 0x0000000b,
970 	0x91d0, 0xffffffff, 0x000a0009,
971 	0x91d4, 0xffffffff, 0x000d000c,
972 	0x9150, 0xffffffff, 0x96940200,
973 	0x8708, 0xffffffff, 0x00900100,
974 	0xc478, 0xffffffff, 0x00000080,
975 	0xc404, 0xffffffff, 0x0020003f,
976 	0x30, 0xffffffff, 0x0000001c,
977 	0x34, 0x000f0000, 0x000f0000,
978 	0x160c, 0xffffffff, 0x00000100,
979 	0x1024, 0xffffffff, 0x00000100,
980 	0x102c, 0x00000101, 0x00000000,
981 	0x20a8, 0xffffffff, 0x00000104,
982 	0x264c, 0x000c0000, 0x000c0000,
983 	0x2648, 0x000c0000, 0x000c0000,
984 	0x55e4, 0xff000fff, 0x00000100,
985 	0x55e8, 0x00000001, 0x00000001,
986 	0x2f50, 0x00000001, 0x00000001,
987 	0x30cc, 0xc0000fff, 0x00000104,
988 	0xc1e4, 0x00000001, 0x00000001,
989 	0xd0c0, 0xfffffff0, 0x00000100,
990 	0xd8c0, 0xfffffff0, 0x00000100
991 };
992 
993 static const u32 hainan_mgcg_cgcg_init[] =
994 {
995 	0xc400, 0xffffffff, 0xfffffffc,
996 	0x802c, 0xffffffff, 0xe0000000,
997 	0x9a60, 0xffffffff, 0x00000100,
998 	0x92a4, 0xffffffff, 0x00000100,
999 	0xc164, 0xffffffff, 0x00000100,
1000 	0x9774, 0xffffffff, 0x00000100,
1001 	0x8984, 0xffffffff, 0x06000100,
1002 	0x8a18, 0xffffffff, 0x00000100,
1003 	0x92a0, 0xffffffff, 0x00000100,
1004 	0xc380, 0xffffffff, 0x00000100,
1005 	0x8b28, 0xffffffff, 0x00000100,
1006 	0x9144, 0xffffffff, 0x00000100,
1007 	0x8d88, 0xffffffff, 0x00000100,
1008 	0x8d8c, 0xffffffff, 0x00000100,
1009 	0x9030, 0xffffffff, 0x00000100,
1010 	0x9034, 0xffffffff, 0x00000100,
1011 	0x9038, 0xffffffff, 0x00000100,
1012 	0x903c, 0xffffffff, 0x00000100,
1013 	0xad80, 0xffffffff, 0x00000100,
1014 	0xac54, 0xffffffff, 0x00000100,
1015 	0x897c, 0xffffffff, 0x06000100,
1016 	0x9868, 0xffffffff, 0x00000100,
1017 	0x9510, 0xffffffff, 0x00000100,
1018 	0xaf04, 0xffffffff, 0x00000100,
1019 	0xae04, 0xffffffff, 0x00000100,
1020 	0x949c, 0xffffffff, 0x00000100,
1021 	0x802c, 0xffffffff, 0xe0000000,
1022 	0x9160, 0xffffffff, 0x00010000,
1023 	0x9164, 0xffffffff, 0x00030002,
1024 	0x9168, 0xffffffff, 0x00040007,
1025 	0x916c, 0xffffffff, 0x00060005,
1026 	0x9170, 0xffffffff, 0x00090008,
1027 	0x9174, 0xffffffff, 0x00020001,
1028 	0x9178, 0xffffffff, 0x00040003,
1029 	0x917c, 0xffffffff, 0x00000007,
1030 	0x9180, 0xffffffff, 0x00060005,
1031 	0x9184, 0xffffffff, 0x00090008,
1032 	0x9188, 0xffffffff, 0x00030002,
1033 	0x918c, 0xffffffff, 0x00050004,
1034 	0x9190, 0xffffffff, 0x00000008,
1035 	0x9194, 0xffffffff, 0x00070006,
1036 	0x9198, 0xffffffff, 0x000a0009,
1037 	0x919c, 0xffffffff, 0x00040003,
1038 	0x91a0, 0xffffffff, 0x00060005,
1039 	0x91a4, 0xffffffff, 0x00000009,
1040 	0x91a8, 0xffffffff, 0x00080007,
1041 	0x91ac, 0xffffffff, 0x000b000a,
1042 	0x91b0, 0xffffffff, 0x00050004,
1043 	0x91b4, 0xffffffff, 0x00070006,
1044 	0x91b8, 0xffffffff, 0x0008000b,
1045 	0x91bc, 0xffffffff, 0x000a0009,
1046 	0x91c0, 0xffffffff, 0x000d000c,
1047 	0x91c4, 0xffffffff, 0x00060005,
1048 	0x91c8, 0xffffffff, 0x00080007,
1049 	0x91cc, 0xffffffff, 0x0000000b,
1050 	0x91d0, 0xffffffff, 0x000a0009,
1051 	0x91d4, 0xffffffff, 0x000d000c,
1052 	0x9150, 0xffffffff, 0x96940200,
1053 	0x8708, 0xffffffff, 0x00900100,
1054 	0xc478, 0xffffffff, 0x00000080,
1055 	0xc404, 0xffffffff, 0x0020003f,
1056 	0x30, 0xffffffff, 0x0000001c,
1057 	0x34, 0x000f0000, 0x000f0000,
1058 	0x160c, 0xffffffff, 0x00000100,
1059 	0x1024, 0xffffffff, 0x00000100,
1060 	0x20a8, 0xffffffff, 0x00000104,
1061 	0x264c, 0x000c0000, 0x000c0000,
1062 	0x2648, 0x000c0000, 0x000c0000,
1063 	0x2f50, 0x00000001, 0x00000001,
1064 	0x30cc, 0xc0000fff, 0x00000104,
1065 	0xc1e4, 0x00000001, 0x00000001,
1066 	0xd0c0, 0xfffffff0, 0x00000100,
1067 	0xd8c0, 0xfffffff0, 0x00000100
1068 };
1069 
1070 static u32 verde_pg_init[] =
1071 {
1072 	0x353c, 0xffffffff, 0x40000,
1073 	0x3538, 0xffffffff, 0x200010ff,
1074 	0x353c, 0xffffffff, 0x0,
1075 	0x353c, 0xffffffff, 0x0,
1076 	0x353c, 0xffffffff, 0x0,
1077 	0x353c, 0xffffffff, 0x0,
1078 	0x353c, 0xffffffff, 0x0,
1079 	0x353c, 0xffffffff, 0x7007,
1080 	0x3538, 0xffffffff, 0x300010ff,
1081 	0x353c, 0xffffffff, 0x0,
1082 	0x353c, 0xffffffff, 0x0,
1083 	0x353c, 0xffffffff, 0x0,
1084 	0x353c, 0xffffffff, 0x0,
1085 	0x353c, 0xffffffff, 0x0,
1086 	0x353c, 0xffffffff, 0x400000,
1087 	0x3538, 0xffffffff, 0x100010ff,
1088 	0x353c, 0xffffffff, 0x0,
1089 	0x353c, 0xffffffff, 0x0,
1090 	0x353c, 0xffffffff, 0x0,
1091 	0x353c, 0xffffffff, 0x0,
1092 	0x353c, 0xffffffff, 0x0,
1093 	0x353c, 0xffffffff, 0x120200,
1094 	0x3538, 0xffffffff, 0x500010ff,
1095 	0x353c, 0xffffffff, 0x0,
1096 	0x353c, 0xffffffff, 0x0,
1097 	0x353c, 0xffffffff, 0x0,
1098 	0x353c, 0xffffffff, 0x0,
1099 	0x353c, 0xffffffff, 0x0,
1100 	0x353c, 0xffffffff, 0x1e1e16,
1101 	0x3538, 0xffffffff, 0x600010ff,
1102 	0x353c, 0xffffffff, 0x0,
1103 	0x353c, 0xffffffff, 0x0,
1104 	0x353c, 0xffffffff, 0x0,
1105 	0x353c, 0xffffffff, 0x0,
1106 	0x353c, 0xffffffff, 0x0,
1107 	0x353c, 0xffffffff, 0x171f1e,
1108 	0x3538, 0xffffffff, 0x700010ff,
1109 	0x353c, 0xffffffff, 0x0,
1110 	0x353c, 0xffffffff, 0x0,
1111 	0x353c, 0xffffffff, 0x0,
1112 	0x353c, 0xffffffff, 0x0,
1113 	0x353c, 0xffffffff, 0x0,
1114 	0x353c, 0xffffffff, 0x0,
1115 	0x3538, 0xffffffff, 0x9ff,
1116 	0x3500, 0xffffffff, 0x0,
1117 	0x3504, 0xffffffff, 0x10000800,
1118 	0x3504, 0xffffffff, 0xf,
1119 	0x3504, 0xffffffff, 0xf,
1120 	0x3500, 0xffffffff, 0x4,
1121 	0x3504, 0xffffffff, 0x1000051e,
1122 	0x3504, 0xffffffff, 0xffff,
1123 	0x3504, 0xffffffff, 0xffff,
1124 	0x3500, 0xffffffff, 0x8,
1125 	0x3504, 0xffffffff, 0x80500,
1126 	0x3500, 0xffffffff, 0x12,
1127 	0x3504, 0xffffffff, 0x9050c,
1128 	0x3500, 0xffffffff, 0x1d,
1129 	0x3504, 0xffffffff, 0xb052c,
1130 	0x3500, 0xffffffff, 0x2a,
1131 	0x3504, 0xffffffff, 0x1053e,
1132 	0x3500, 0xffffffff, 0x2d,
1133 	0x3504, 0xffffffff, 0x10546,
1134 	0x3500, 0xffffffff, 0x30,
1135 	0x3504, 0xffffffff, 0xa054e,
1136 	0x3500, 0xffffffff, 0x3c,
1137 	0x3504, 0xffffffff, 0x1055f,
1138 	0x3500, 0xffffffff, 0x3f,
1139 	0x3504, 0xffffffff, 0x10567,
1140 	0x3500, 0xffffffff, 0x42,
1141 	0x3504, 0xffffffff, 0x1056f,
1142 	0x3500, 0xffffffff, 0x45,
1143 	0x3504, 0xffffffff, 0x10572,
1144 	0x3500, 0xffffffff, 0x48,
1145 	0x3504, 0xffffffff, 0x20575,
1146 	0x3500, 0xffffffff, 0x4c,
1147 	0x3504, 0xffffffff, 0x190801,
1148 	0x3500, 0xffffffff, 0x67,
1149 	0x3504, 0xffffffff, 0x1082a,
1150 	0x3500, 0xffffffff, 0x6a,
1151 	0x3504, 0xffffffff, 0x1b082d,
1152 	0x3500, 0xffffffff, 0x87,
1153 	0x3504, 0xffffffff, 0x310851,
1154 	0x3500, 0xffffffff, 0xba,
1155 	0x3504, 0xffffffff, 0x891,
1156 	0x3500, 0xffffffff, 0xbc,
1157 	0x3504, 0xffffffff, 0x893,
1158 	0x3500, 0xffffffff, 0xbe,
1159 	0x3504, 0xffffffff, 0x20895,
1160 	0x3500, 0xffffffff, 0xc2,
1161 	0x3504, 0xffffffff, 0x20899,
1162 	0x3500, 0xffffffff, 0xc6,
1163 	0x3504, 0xffffffff, 0x2089d,
1164 	0x3500, 0xffffffff, 0xca,
1165 	0x3504, 0xffffffff, 0x8a1,
1166 	0x3500, 0xffffffff, 0xcc,
1167 	0x3504, 0xffffffff, 0x8a3,
1168 	0x3500, 0xffffffff, 0xce,
1169 	0x3504, 0xffffffff, 0x308a5,
1170 	0x3500, 0xffffffff, 0xd3,
1171 	0x3504, 0xffffffff, 0x6d08cd,
1172 	0x3500, 0xffffffff, 0x142,
1173 	0x3504, 0xffffffff, 0x2000095a,
1174 	0x3504, 0xffffffff, 0x1,
1175 	0x3500, 0xffffffff, 0x144,
1176 	0x3504, 0xffffffff, 0x301f095b,
1177 	0x3500, 0xffffffff, 0x165,
1178 	0x3504, 0xffffffff, 0xc094d,
1179 	0x3500, 0xffffffff, 0x173,
1180 	0x3504, 0xffffffff, 0xf096d,
1181 	0x3500, 0xffffffff, 0x184,
1182 	0x3504, 0xffffffff, 0x15097f,
1183 	0x3500, 0xffffffff, 0x19b,
1184 	0x3504, 0xffffffff, 0xc0998,
1185 	0x3500, 0xffffffff, 0x1a9,
1186 	0x3504, 0xffffffff, 0x409a7,
1187 	0x3500, 0xffffffff, 0x1af,
1188 	0x3504, 0xffffffff, 0xcdc,
1189 	0x3500, 0xffffffff, 0x1b1,
1190 	0x3504, 0xffffffff, 0x800,
1191 	0x3508, 0xffffffff, 0x6c9b2000,
1192 	0x3510, 0xfc00, 0x2000,
1193 	0x3544, 0xffffffff, 0xfc0,
1194 	0x28d4, 0x00000100, 0x100
1195 };
1196 
1197 static void si_init_golden_registers(struct radeon_device *rdev)
1198 {
1199 	switch (rdev->family) {
1200 	case CHIP_TAHITI:
1201 		radeon_program_register_sequence(rdev,
1202 						 tahiti_golden_registers,
1203 						 (const u32)ARRAY_SIZE(tahiti_golden_registers));
1204 		radeon_program_register_sequence(rdev,
1205 						 tahiti_golden_rlc_registers,
1206 						 (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1207 		radeon_program_register_sequence(rdev,
1208 						 tahiti_mgcg_cgcg_init,
1209 						 (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1210 		radeon_program_register_sequence(rdev,
1211 						 tahiti_golden_registers2,
1212 						 (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1213 		break;
1214 	case CHIP_PITCAIRN:
1215 		radeon_program_register_sequence(rdev,
1216 						 pitcairn_golden_registers,
1217 						 (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1218 		radeon_program_register_sequence(rdev,
1219 						 pitcairn_golden_rlc_registers,
1220 						 (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1221 		radeon_program_register_sequence(rdev,
1222 						 pitcairn_mgcg_cgcg_init,
1223 						 (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1224 		break;
1225 	case CHIP_VERDE:
1226 		radeon_program_register_sequence(rdev,
1227 						 verde_golden_registers,
1228 						 (const u32)ARRAY_SIZE(verde_golden_registers));
1229 		radeon_program_register_sequence(rdev,
1230 						 verde_golden_rlc_registers,
1231 						 (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1232 		radeon_program_register_sequence(rdev,
1233 						 verde_mgcg_cgcg_init,
1234 						 (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1235 		radeon_program_register_sequence(rdev,
1236 						 verde_pg_init,
1237 						 (const u32)ARRAY_SIZE(verde_pg_init));
1238 		break;
1239 	case CHIP_OLAND:
1240 		radeon_program_register_sequence(rdev,
1241 						 oland_golden_registers,
1242 						 (const u32)ARRAY_SIZE(oland_golden_registers));
1243 		radeon_program_register_sequence(rdev,
1244 						 oland_golden_rlc_registers,
1245 						 (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1246 		radeon_program_register_sequence(rdev,
1247 						 oland_mgcg_cgcg_init,
1248 						 (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1249 		break;
1250 	case CHIP_HAINAN:
1251 		radeon_program_register_sequence(rdev,
1252 						 hainan_golden_registers,
1253 						 (const u32)ARRAY_SIZE(hainan_golden_registers));
1254 		radeon_program_register_sequence(rdev,
1255 						 hainan_golden_registers2,
1256 						 (const u32)ARRAY_SIZE(hainan_golden_registers2));
1257 		radeon_program_register_sequence(rdev,
1258 						 hainan_mgcg_cgcg_init,
1259 						 (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1260 		break;
1261 	default:
1262 		break;
1263 	}
1264 }
1265 
1266 #define PCIE_BUS_CLK                10000
1267 #define TCLK                        (PCIE_BUS_CLK / 10)
1268 
1269 /**
1270  * si_get_xclk - get the xclk
1271  *
1272  * @rdev: radeon_device pointer
1273  *
1274  * Returns the reference clock used by the gfx engine
1275  * (SI).
1276  */
1277 u32 si_get_xclk(struct radeon_device *rdev)
1278 {
1279         u32 reference_clock = rdev->clock.spll.reference_freq;
1280 	u32 tmp;
1281 
1282 	tmp = RREG32(CG_CLKPIN_CNTL_2);
1283 	if (tmp & MUX_TCLK_TO_XCLK)
1284 		return TCLK;
1285 
1286 	tmp = RREG32(CG_CLKPIN_CNTL);
1287 	if (tmp & XTALIN_DIVIDE)
1288 		return reference_clock / 4;
1289 
1290 	return reference_clock;
1291 }
1292 
1293 /* get temperature in millidegrees */
1294 int si_get_temp(struct radeon_device *rdev)
1295 {
1296 	u32 temp;
1297 	int actual_temp = 0;
1298 
1299 	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1300 		CTF_TEMP_SHIFT;
1301 
1302 	if (temp & 0x200)
1303 		actual_temp = 255;
1304 	else
1305 		actual_temp = temp & 0x1ff;
1306 
1307 	actual_temp = (actual_temp * 1000);
1308 
1309 	return actual_temp;
1310 }
1311 
1312 #define TAHITI_IO_MC_REGS_SIZE 36
1313 
1314 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1315 	{0x0000006f, 0x03044000},
1316 	{0x00000070, 0x0480c018},
1317 	{0x00000071, 0x00000040},
1318 	{0x00000072, 0x01000000},
1319 	{0x00000074, 0x000000ff},
1320 	{0x00000075, 0x00143400},
1321 	{0x00000076, 0x08ec0800},
1322 	{0x00000077, 0x040000cc},
1323 	{0x00000079, 0x00000000},
1324 	{0x0000007a, 0x21000409},
1325 	{0x0000007c, 0x00000000},
1326 	{0x0000007d, 0xe8000000},
1327 	{0x0000007e, 0x044408a8},
1328 	{0x0000007f, 0x00000003},
1329 	{0x00000080, 0x00000000},
1330 	{0x00000081, 0x01000000},
1331 	{0x00000082, 0x02000000},
1332 	{0x00000083, 0x00000000},
1333 	{0x00000084, 0xe3f3e4f4},
1334 	{0x00000085, 0x00052024},
1335 	{0x00000087, 0x00000000},
1336 	{0x00000088, 0x66036603},
1337 	{0x00000089, 0x01000000},
1338 	{0x0000008b, 0x1c0a0000},
1339 	{0x0000008c, 0xff010000},
1340 	{0x0000008e, 0xffffefff},
1341 	{0x0000008f, 0xfff3efff},
1342 	{0x00000090, 0xfff3efbf},
1343 	{0x00000094, 0x00101101},
1344 	{0x00000095, 0x00000fff},
1345 	{0x00000096, 0x00116fff},
1346 	{0x00000097, 0x60010000},
1347 	{0x00000098, 0x10010000},
1348 	{0x00000099, 0x00006000},
1349 	{0x0000009a, 0x00001000},
1350 	{0x0000009f, 0x00a77400}
1351 };
1352 
1353 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1354 	{0x0000006f, 0x03044000},
1355 	{0x00000070, 0x0480c018},
1356 	{0x00000071, 0x00000040},
1357 	{0x00000072, 0x01000000},
1358 	{0x00000074, 0x000000ff},
1359 	{0x00000075, 0x00143400},
1360 	{0x00000076, 0x08ec0800},
1361 	{0x00000077, 0x040000cc},
1362 	{0x00000079, 0x00000000},
1363 	{0x0000007a, 0x21000409},
1364 	{0x0000007c, 0x00000000},
1365 	{0x0000007d, 0xe8000000},
1366 	{0x0000007e, 0x044408a8},
1367 	{0x0000007f, 0x00000003},
1368 	{0x00000080, 0x00000000},
1369 	{0x00000081, 0x01000000},
1370 	{0x00000082, 0x02000000},
1371 	{0x00000083, 0x00000000},
1372 	{0x00000084, 0xe3f3e4f4},
1373 	{0x00000085, 0x00052024},
1374 	{0x00000087, 0x00000000},
1375 	{0x00000088, 0x66036603},
1376 	{0x00000089, 0x01000000},
1377 	{0x0000008b, 0x1c0a0000},
1378 	{0x0000008c, 0xff010000},
1379 	{0x0000008e, 0xffffefff},
1380 	{0x0000008f, 0xfff3efff},
1381 	{0x00000090, 0xfff3efbf},
1382 	{0x00000094, 0x00101101},
1383 	{0x00000095, 0x00000fff},
1384 	{0x00000096, 0x00116fff},
1385 	{0x00000097, 0x60010000},
1386 	{0x00000098, 0x10010000},
1387 	{0x00000099, 0x00006000},
1388 	{0x0000009a, 0x00001000},
1389 	{0x0000009f, 0x00a47400}
1390 };
1391 
1392 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1393 	{0x0000006f, 0x03044000},
1394 	{0x00000070, 0x0480c018},
1395 	{0x00000071, 0x00000040},
1396 	{0x00000072, 0x01000000},
1397 	{0x00000074, 0x000000ff},
1398 	{0x00000075, 0x00143400},
1399 	{0x00000076, 0x08ec0800},
1400 	{0x00000077, 0x040000cc},
1401 	{0x00000079, 0x00000000},
1402 	{0x0000007a, 0x21000409},
1403 	{0x0000007c, 0x00000000},
1404 	{0x0000007d, 0xe8000000},
1405 	{0x0000007e, 0x044408a8},
1406 	{0x0000007f, 0x00000003},
1407 	{0x00000080, 0x00000000},
1408 	{0x00000081, 0x01000000},
1409 	{0x00000082, 0x02000000},
1410 	{0x00000083, 0x00000000},
1411 	{0x00000084, 0xe3f3e4f4},
1412 	{0x00000085, 0x00052024},
1413 	{0x00000087, 0x00000000},
1414 	{0x00000088, 0x66036603},
1415 	{0x00000089, 0x01000000},
1416 	{0x0000008b, 0x1c0a0000},
1417 	{0x0000008c, 0xff010000},
1418 	{0x0000008e, 0xffffefff},
1419 	{0x0000008f, 0xfff3efff},
1420 	{0x00000090, 0xfff3efbf},
1421 	{0x00000094, 0x00101101},
1422 	{0x00000095, 0x00000fff},
1423 	{0x00000096, 0x00116fff},
1424 	{0x00000097, 0x60010000},
1425 	{0x00000098, 0x10010000},
1426 	{0x00000099, 0x00006000},
1427 	{0x0000009a, 0x00001000},
1428 	{0x0000009f, 0x00a37400}
1429 };
1430 
1431 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1432 	{0x0000006f, 0x03044000},
1433 	{0x00000070, 0x0480c018},
1434 	{0x00000071, 0x00000040},
1435 	{0x00000072, 0x01000000},
1436 	{0x00000074, 0x000000ff},
1437 	{0x00000075, 0x00143400},
1438 	{0x00000076, 0x08ec0800},
1439 	{0x00000077, 0x040000cc},
1440 	{0x00000079, 0x00000000},
1441 	{0x0000007a, 0x21000409},
1442 	{0x0000007c, 0x00000000},
1443 	{0x0000007d, 0xe8000000},
1444 	{0x0000007e, 0x044408a8},
1445 	{0x0000007f, 0x00000003},
1446 	{0x00000080, 0x00000000},
1447 	{0x00000081, 0x01000000},
1448 	{0x00000082, 0x02000000},
1449 	{0x00000083, 0x00000000},
1450 	{0x00000084, 0xe3f3e4f4},
1451 	{0x00000085, 0x00052024},
1452 	{0x00000087, 0x00000000},
1453 	{0x00000088, 0x66036603},
1454 	{0x00000089, 0x01000000},
1455 	{0x0000008b, 0x1c0a0000},
1456 	{0x0000008c, 0xff010000},
1457 	{0x0000008e, 0xffffefff},
1458 	{0x0000008f, 0xfff3efff},
1459 	{0x00000090, 0xfff3efbf},
1460 	{0x00000094, 0x00101101},
1461 	{0x00000095, 0x00000fff},
1462 	{0x00000096, 0x00116fff},
1463 	{0x00000097, 0x60010000},
1464 	{0x00000098, 0x10010000},
1465 	{0x00000099, 0x00006000},
1466 	{0x0000009a, 0x00001000},
1467 	{0x0000009f, 0x00a17730}
1468 };
1469 
1470 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1471 	{0x0000006f, 0x03044000},
1472 	{0x00000070, 0x0480c018},
1473 	{0x00000071, 0x00000040},
1474 	{0x00000072, 0x01000000},
1475 	{0x00000074, 0x000000ff},
1476 	{0x00000075, 0x00143400},
1477 	{0x00000076, 0x08ec0800},
1478 	{0x00000077, 0x040000cc},
1479 	{0x00000079, 0x00000000},
1480 	{0x0000007a, 0x21000409},
1481 	{0x0000007c, 0x00000000},
1482 	{0x0000007d, 0xe8000000},
1483 	{0x0000007e, 0x044408a8},
1484 	{0x0000007f, 0x00000003},
1485 	{0x00000080, 0x00000000},
1486 	{0x00000081, 0x01000000},
1487 	{0x00000082, 0x02000000},
1488 	{0x00000083, 0x00000000},
1489 	{0x00000084, 0xe3f3e4f4},
1490 	{0x00000085, 0x00052024},
1491 	{0x00000087, 0x00000000},
1492 	{0x00000088, 0x66036603},
1493 	{0x00000089, 0x01000000},
1494 	{0x0000008b, 0x1c0a0000},
1495 	{0x0000008c, 0xff010000},
1496 	{0x0000008e, 0xffffefff},
1497 	{0x0000008f, 0xfff3efff},
1498 	{0x00000090, 0xfff3efbf},
1499 	{0x00000094, 0x00101101},
1500 	{0x00000095, 0x00000fff},
1501 	{0x00000096, 0x00116fff},
1502 	{0x00000097, 0x60010000},
1503 	{0x00000098, 0x10010000},
1504 	{0x00000099, 0x00006000},
1505 	{0x0000009a, 0x00001000},
1506 	{0x0000009f, 0x00a07730}
1507 };
1508 
1509 /* ucode loading */
1510 int si_mc_load_microcode(struct radeon_device *rdev)
1511 {
1512 	const __be32 *fw_data = NULL;
1513 	const __le32 *new_fw_data = NULL;
1514 	u32 running, blackout = 0;
1515 	u32 *io_mc_regs = NULL;
1516 	const __le32 *new_io_mc_regs = NULL;
1517 	int i, regs_size, ucode_size;
1518 
1519 	if (!rdev->mc_fw)
1520 		return -EINVAL;
1521 
1522 	if (rdev->new_fw) {
1523 		const struct mc_firmware_header_v1_0 *hdr =
1524 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1525 
1526 		radeon_ucode_print_mc_hdr(&hdr->header);
1527 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1528 		new_io_mc_regs = (const __le32 *)
1529 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1530 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1531 		new_fw_data = (const __le32 *)
1532 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1533 	} else {
1534 		ucode_size = rdev->mc_fw->size / 4;
1535 
1536 		switch (rdev->family) {
1537 		case CHIP_TAHITI:
1538 			io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1539 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1540 			break;
1541 		case CHIP_PITCAIRN:
1542 			io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1543 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1544 			break;
1545 		case CHIP_VERDE:
1546 		default:
1547 			io_mc_regs = (u32 *)&verde_io_mc_regs;
1548 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1549 			break;
1550 		case CHIP_OLAND:
1551 			io_mc_regs = (u32 *)&oland_io_mc_regs;
1552 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1553 			break;
1554 		case CHIP_HAINAN:
1555 			io_mc_regs = (u32 *)&hainan_io_mc_regs;
1556 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1557 			break;
1558 		}
1559 		fw_data = (const __be32 *)rdev->mc_fw->data;
1560 	}
1561 
1562 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1563 
1564 	if (running == 0) {
1565 		if (running) {
1566 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1567 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1568 		}
1569 
1570 		/* reset the engine and set to writable */
1571 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1572 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1573 
1574 		/* load mc io regs */
1575 		for (i = 0; i < regs_size; i++) {
1576 			if (rdev->new_fw) {
1577 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1578 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1579 			} else {
1580 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1581 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1582 			}
1583 		}
1584 		/* load the MC ucode */
1585 		for (i = 0; i < ucode_size; i++) {
1586 			if (rdev->new_fw)
1587 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1588 			else
1589 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1590 		}
1591 
1592 		/* put the engine back into the active state */
1593 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1594 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1595 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1596 
1597 		/* wait for training to complete */
1598 		for (i = 0; i < rdev->usec_timeout; i++) {
1599 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1600 				break;
1601 			udelay(1);
1602 		}
1603 		for (i = 0; i < rdev->usec_timeout; i++) {
1604 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1605 				break;
1606 			udelay(1);
1607 		}
1608 
1609 		if (running)
1610 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1611 	}
1612 
1613 	return 0;
1614 }
1615 
1616 static int si_init_microcode(struct radeon_device *rdev)
1617 {
1618 	const char *chip_name;
1619 	const char *new_chip_name;
1620 	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1621 	size_t smc_req_size, mc2_req_size;
1622 	char fw_name[30];
1623 	int err;
1624 	int new_fw = 0;
1625 
1626 	DRM_DEBUG("\n");
1627 
1628 	switch (rdev->family) {
1629 	case CHIP_TAHITI:
1630 		chip_name = "TAHITI";
1631 		new_chip_name = "tahiti";
1632 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1633 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1634 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1635 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1636 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1637 		mc2_req_size = TAHITI_MC_UCODE_SIZE * 4;
1638 		smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1639 		break;
1640 	case CHIP_PITCAIRN:
1641 		chip_name = "PITCAIRN";
1642 		new_chip_name = "pitcairn";
1643 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1644 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1645 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1646 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1647 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1648 		mc2_req_size = PITCAIRN_MC_UCODE_SIZE * 4;
1649 		smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1650 		break;
1651 	case CHIP_VERDE:
1652 		chip_name = "VERDE";
1653 		new_chip_name = "verde";
1654 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1655 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1656 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1657 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1658 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1659 		mc2_req_size = VERDE_MC_UCODE_SIZE * 4;
1660 		smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1661 		break;
1662 	case CHIP_OLAND:
1663 		chip_name = "OLAND";
1664 		new_chip_name = "oland";
1665 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1666 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1667 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1668 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1669 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1670 		smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1671 		break;
1672 	case CHIP_HAINAN:
1673 		chip_name = "HAINAN";
1674 		new_chip_name = "hainan";
1675 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1676 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1677 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1678 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1679 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1680 		smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1681 		break;
1682 	default: BUG();
1683 	}
1684 
1685 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
1686 
1687 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
1688 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1689 	if (err) {
1690 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1691 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1692 		if (err)
1693 			goto out;
1694 		if (rdev->pfp_fw->size != pfp_req_size) {
1695 			printk(KERN_ERR
1696 			       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1697 			       rdev->pfp_fw->size, fw_name);
1698 			err = -EINVAL;
1699 			goto out;
1700 		}
1701 	} else {
1702 		err = radeon_ucode_validate(rdev->pfp_fw);
1703 		if (err) {
1704 			printk(KERN_ERR
1705 			       "si_cp: validation failed for firmware \"%s\"\n",
1706 			       fw_name);
1707 			goto out;
1708 		} else {
1709 			new_fw++;
1710 		}
1711 	}
1712 
1713 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
1714 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1715 	if (err) {
1716 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1717 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1718 		if (err)
1719 			goto out;
1720 		if (rdev->me_fw->size != me_req_size) {
1721 			printk(KERN_ERR
1722 			       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1723 			       rdev->me_fw->size, fw_name);
1724 			err = -EINVAL;
1725 		}
1726 	} else {
1727 		err = radeon_ucode_validate(rdev->me_fw);
1728 		if (err) {
1729 			printk(KERN_ERR
1730 			       "si_cp: validation failed for firmware \"%s\"\n",
1731 			       fw_name);
1732 			goto out;
1733 		} else {
1734 			new_fw++;
1735 		}
1736 	}
1737 
1738 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
1739 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1740 	if (err) {
1741 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1742 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1743 		if (err)
1744 			goto out;
1745 		if (rdev->ce_fw->size != ce_req_size) {
1746 			printk(KERN_ERR
1747 			       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1748 			       rdev->ce_fw->size, fw_name);
1749 			err = -EINVAL;
1750 		}
1751 	} else {
1752 		err = radeon_ucode_validate(rdev->ce_fw);
1753 		if (err) {
1754 			printk(KERN_ERR
1755 			       "si_cp: validation failed for firmware \"%s\"\n",
1756 			       fw_name);
1757 			goto out;
1758 		} else {
1759 			new_fw++;
1760 		}
1761 	}
1762 
1763 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
1764 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1765 	if (err) {
1766 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1767 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1768 		if (err)
1769 			goto out;
1770 		if (rdev->rlc_fw->size != rlc_req_size) {
1771 			printk(KERN_ERR
1772 			       "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1773 			       rdev->rlc_fw->size, fw_name);
1774 			err = -EINVAL;
1775 		}
1776 	} else {
1777 		err = radeon_ucode_validate(rdev->rlc_fw);
1778 		if (err) {
1779 			printk(KERN_ERR
1780 			       "si_cp: validation failed for firmware \"%s\"\n",
1781 			       fw_name);
1782 			goto out;
1783 		} else {
1784 			new_fw++;
1785 		}
1786 	}
1787 
1788 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
1789 	err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1790 	if (err) {
1791 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
1792 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1793 		if (err) {
1794 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1795 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1796 			if (err)
1797 				goto out;
1798 		}
1799 		if ((rdev->mc_fw->size != mc_req_size) &&
1800 		    (rdev->mc_fw->size != mc2_req_size)) {
1801 			printk(KERN_ERR
1802 			       "si_mc: Bogus length %zu in firmware \"%s\"\n",
1803 			       rdev->mc_fw->size, fw_name);
1804 			err = -EINVAL;
1805 		}
1806 		DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
1807 	} else {
1808 		err = radeon_ucode_validate(rdev->mc_fw);
1809 		if (err) {
1810 			printk(KERN_ERR
1811 			       "si_cp: validation failed for firmware \"%s\"\n",
1812 			       fw_name);
1813 			goto out;
1814 		} else {
1815 			new_fw++;
1816 		}
1817 	}
1818 
1819 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
1820 	err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1821 	if (err) {
1822 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1823 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1824 		if (err) {
1825 			printk(KERN_ERR
1826 			       "smc: error loading firmware \"%s\"\n",
1827 			       fw_name);
1828 			release_firmware(rdev->smc_fw);
1829 			rdev->smc_fw = NULL;
1830 			err = 0;
1831 		} else if (rdev->smc_fw->size != smc_req_size) {
1832 			printk(KERN_ERR
1833 			       "si_smc: Bogus length %zu in firmware \"%s\"\n",
1834 			       rdev->smc_fw->size, fw_name);
1835 			err = -EINVAL;
1836 		}
1837 	} else {
1838 		err = radeon_ucode_validate(rdev->smc_fw);
1839 		if (err) {
1840 			printk(KERN_ERR
1841 			       "si_cp: validation failed for firmware \"%s\"\n",
1842 			       fw_name);
1843 			goto out;
1844 		} else {
1845 			new_fw++;
1846 		}
1847 	}
1848 
1849 	if (new_fw == 0) {
1850 		rdev->new_fw = false;
1851 	} else if (new_fw < 6) {
1852 		printk(KERN_ERR "si_fw: mixing new and old firmware!\n");
1853 		err = -EINVAL;
1854 	} else {
1855 		rdev->new_fw = true;
1856 	}
1857 out:
1858 	if (err) {
1859 		if (err != -EINVAL)
1860 			printk(KERN_ERR
1861 			       "si_cp: Failed to load firmware \"%s\"\n",
1862 			       fw_name);
1863 		release_firmware(rdev->pfp_fw);
1864 		rdev->pfp_fw = NULL;
1865 		release_firmware(rdev->me_fw);
1866 		rdev->me_fw = NULL;
1867 		release_firmware(rdev->ce_fw);
1868 		rdev->ce_fw = NULL;
1869 		release_firmware(rdev->rlc_fw);
1870 		rdev->rlc_fw = NULL;
1871 		release_firmware(rdev->mc_fw);
1872 		rdev->mc_fw = NULL;
1873 		release_firmware(rdev->smc_fw);
1874 		rdev->smc_fw = NULL;
1875 	}
1876 	return err;
1877 }
1878 
1879 /* watermark setup */
1880 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1881 				   struct radeon_crtc *radeon_crtc,
1882 				   struct drm_display_mode *mode,
1883 				   struct drm_display_mode *other_mode)
1884 {
1885 	u32 tmp, buffer_alloc, i;
1886 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1887 	/*
1888 	 * Line Buffer Setup
1889 	 * There are 3 line buffers, each one shared by 2 display controllers.
1890 	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1891 	 * the display controllers.  The paritioning is done via one of four
1892 	 * preset allocations specified in bits 21:20:
1893 	 *  0 - half lb
1894 	 *  2 - whole lb, other crtc must be disabled
1895 	 */
1896 	/* this can get tricky if we have two large displays on a paired group
1897 	 * of crtcs.  Ideally for multiple large displays we'd assign them to
1898 	 * non-linked crtcs for maximum line buffer allocation.
1899 	 */
1900 	if (radeon_crtc->base.enabled && mode) {
1901 		if (other_mode) {
1902 			tmp = 0; /* 1/2 */
1903 			buffer_alloc = 1;
1904 		} else {
1905 			tmp = 2; /* whole */
1906 			buffer_alloc = 2;
1907 		}
1908 	} else {
1909 		tmp = 0;
1910 		buffer_alloc = 0;
1911 	}
1912 
1913 	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1914 	       DC_LB_MEMORY_CONFIG(tmp));
1915 
1916 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1917 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1918 	for (i = 0; i < rdev->usec_timeout; i++) {
1919 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1920 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
1921 			break;
1922 		udelay(1);
1923 	}
1924 
1925 	if (radeon_crtc->base.enabled && mode) {
1926 		switch (tmp) {
1927 		case 0:
1928 		default:
1929 			return 4096 * 2;
1930 		case 2:
1931 			return 8192 * 2;
1932 		}
1933 	}
1934 
1935 	/* controller not enabled, so no lb used */
1936 	return 0;
1937 }
1938 
1939 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1940 {
1941 	u32 tmp = RREG32(MC_SHARED_CHMAP);
1942 
1943 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1944 	case 0:
1945 	default:
1946 		return 1;
1947 	case 1:
1948 		return 2;
1949 	case 2:
1950 		return 4;
1951 	case 3:
1952 		return 8;
1953 	case 4:
1954 		return 3;
1955 	case 5:
1956 		return 6;
1957 	case 6:
1958 		return 10;
1959 	case 7:
1960 		return 12;
1961 	case 8:
1962 		return 16;
1963 	}
1964 }
1965 
1966 struct dce6_wm_params {
1967 	u32 dram_channels; /* number of dram channels */
1968 	u32 yclk;          /* bandwidth per dram data pin in kHz */
1969 	u32 sclk;          /* engine clock in kHz */
1970 	u32 disp_clk;      /* display clock in kHz */
1971 	u32 src_width;     /* viewport width */
1972 	u32 active_time;   /* active display time in ns */
1973 	u32 blank_time;    /* blank time in ns */
1974 	bool interlaced;    /* mode is interlaced */
1975 	fixed20_12 vsc;    /* vertical scale ratio */
1976 	u32 num_heads;     /* number of active crtcs */
1977 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1978 	u32 lb_size;       /* line buffer allocated to pipe */
1979 	u32 vtaps;         /* vertical scaler taps */
1980 };
1981 
1982 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1983 {
1984 	/* Calculate raw DRAM Bandwidth */
1985 	fixed20_12 dram_efficiency; /* 0.7 */
1986 	fixed20_12 yclk, dram_channels, bandwidth;
1987 	fixed20_12 a;
1988 
1989 	a.full = dfixed_const(1000);
1990 	yclk.full = dfixed_const(wm->yclk);
1991 	yclk.full = dfixed_div(yclk, a);
1992 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
1993 	a.full = dfixed_const(10);
1994 	dram_efficiency.full = dfixed_const(7);
1995 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
1996 	bandwidth.full = dfixed_mul(dram_channels, yclk);
1997 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
1998 
1999 	return dfixed_trunc(bandwidth);
2000 }
2001 
2002 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2003 {
2004 	/* Calculate DRAM Bandwidth and the part allocated to display. */
2005 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
2006 	fixed20_12 yclk, dram_channels, bandwidth;
2007 	fixed20_12 a;
2008 
2009 	a.full = dfixed_const(1000);
2010 	yclk.full = dfixed_const(wm->yclk);
2011 	yclk.full = dfixed_div(yclk, a);
2012 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2013 	a.full = dfixed_const(10);
2014 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
2015 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
2016 	bandwidth.full = dfixed_mul(dram_channels, yclk);
2017 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
2018 
2019 	return dfixed_trunc(bandwidth);
2020 }
2021 
2022 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
2023 {
2024 	/* Calculate the display Data return Bandwidth */
2025 	fixed20_12 return_efficiency; /* 0.8 */
2026 	fixed20_12 sclk, bandwidth;
2027 	fixed20_12 a;
2028 
2029 	a.full = dfixed_const(1000);
2030 	sclk.full = dfixed_const(wm->sclk);
2031 	sclk.full = dfixed_div(sclk, a);
2032 	a.full = dfixed_const(10);
2033 	return_efficiency.full = dfixed_const(8);
2034 	return_efficiency.full = dfixed_div(return_efficiency, a);
2035 	a.full = dfixed_const(32);
2036 	bandwidth.full = dfixed_mul(a, sclk);
2037 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
2038 
2039 	return dfixed_trunc(bandwidth);
2040 }
2041 
2042 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
2043 {
2044 	return 32;
2045 }
2046 
2047 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
2048 {
2049 	/* Calculate the DMIF Request Bandwidth */
2050 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
2051 	fixed20_12 disp_clk, sclk, bandwidth;
2052 	fixed20_12 a, b1, b2;
2053 	u32 min_bandwidth;
2054 
2055 	a.full = dfixed_const(1000);
2056 	disp_clk.full = dfixed_const(wm->disp_clk);
2057 	disp_clk.full = dfixed_div(disp_clk, a);
2058 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
2059 	b1.full = dfixed_mul(a, disp_clk);
2060 
2061 	a.full = dfixed_const(1000);
2062 	sclk.full = dfixed_const(wm->sclk);
2063 	sclk.full = dfixed_div(sclk, a);
2064 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
2065 	b2.full = dfixed_mul(a, sclk);
2066 
2067 	a.full = dfixed_const(10);
2068 	disp_clk_request_efficiency.full = dfixed_const(8);
2069 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
2070 
2071 	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
2072 
2073 	a.full = dfixed_const(min_bandwidth);
2074 	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
2075 
2076 	return dfixed_trunc(bandwidth);
2077 }
2078 
2079 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
2080 {
2081 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
2082 	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
2083 	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
2084 	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
2085 
2086 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
2087 }
2088 
2089 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
2090 {
2091 	/* Calculate the display mode Average Bandwidth
2092 	 * DisplayMode should contain the source and destination dimensions,
2093 	 * timing, etc.
2094 	 */
2095 	fixed20_12 bpp;
2096 	fixed20_12 line_time;
2097 	fixed20_12 src_width;
2098 	fixed20_12 bandwidth;
2099 	fixed20_12 a;
2100 
2101 	a.full = dfixed_const(1000);
2102 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
2103 	line_time.full = dfixed_div(line_time, a);
2104 	bpp.full = dfixed_const(wm->bytes_per_pixel);
2105 	src_width.full = dfixed_const(wm->src_width);
2106 	bandwidth.full = dfixed_mul(src_width, bpp);
2107 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
2108 	bandwidth.full = dfixed_div(bandwidth, line_time);
2109 
2110 	return dfixed_trunc(bandwidth);
2111 }
2112 
2113 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
2114 {
2115 	/* First calcualte the latency in ns */
2116 	u32 mc_latency = 2000; /* 2000 ns. */
2117 	u32 available_bandwidth = dce6_available_bandwidth(wm);
2118 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
2119 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
2120 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
2121 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
2122 		(wm->num_heads * cursor_line_pair_return_time);
2123 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
2124 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
2125 	u32 tmp, dmif_size = 12288;
2126 	fixed20_12 a, b, c;
2127 
2128 	if (wm->num_heads == 0)
2129 		return 0;
2130 
2131 	a.full = dfixed_const(2);
2132 	b.full = dfixed_const(1);
2133 	if ((wm->vsc.full > a.full) ||
2134 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
2135 	    (wm->vtaps >= 5) ||
2136 	    ((wm->vsc.full >= a.full) && wm->interlaced))
2137 		max_src_lines_per_dst_line = 4;
2138 	else
2139 		max_src_lines_per_dst_line = 2;
2140 
2141 	a.full = dfixed_const(available_bandwidth);
2142 	b.full = dfixed_const(wm->num_heads);
2143 	a.full = dfixed_div(a, b);
2144 
2145 	b.full = dfixed_const(mc_latency + 512);
2146 	c.full = dfixed_const(wm->disp_clk);
2147 	b.full = dfixed_div(b, c);
2148 
2149 	c.full = dfixed_const(dmif_size);
2150 	b.full = dfixed_div(c, b);
2151 
2152 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
2153 
2154 	b.full = dfixed_const(1000);
2155 	c.full = dfixed_const(wm->disp_clk);
2156 	b.full = dfixed_div(c, b);
2157 	c.full = dfixed_const(wm->bytes_per_pixel);
2158 	b.full = dfixed_mul(b, c);
2159 
2160 	lb_fill_bw = min(tmp, dfixed_trunc(b));
2161 
2162 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2163 	b.full = dfixed_const(1000);
2164 	c.full = dfixed_const(lb_fill_bw);
2165 	b.full = dfixed_div(c, b);
2166 	a.full = dfixed_div(a, b);
2167 	line_fill_time = dfixed_trunc(a);
2168 
2169 	if (line_fill_time < wm->active_time)
2170 		return latency;
2171 	else
2172 		return latency + (line_fill_time - wm->active_time);
2173 
2174 }
2175 
2176 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2177 {
2178 	if (dce6_average_bandwidth(wm) <=
2179 	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2180 		return true;
2181 	else
2182 		return false;
2183 };
2184 
2185 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2186 {
2187 	if (dce6_average_bandwidth(wm) <=
2188 	    (dce6_available_bandwidth(wm) / wm->num_heads))
2189 		return true;
2190 	else
2191 		return false;
2192 };
2193 
2194 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2195 {
2196 	u32 lb_partitions = wm->lb_size / wm->src_width;
2197 	u32 line_time = wm->active_time + wm->blank_time;
2198 	u32 latency_tolerant_lines;
2199 	u32 latency_hiding;
2200 	fixed20_12 a;
2201 
2202 	a.full = dfixed_const(1);
2203 	if (wm->vsc.full > a.full)
2204 		latency_tolerant_lines = 1;
2205 	else {
2206 		if (lb_partitions <= (wm->vtaps + 1))
2207 			latency_tolerant_lines = 1;
2208 		else
2209 			latency_tolerant_lines = 2;
2210 	}
2211 
2212 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2213 
2214 	if (dce6_latency_watermark(wm) <= latency_hiding)
2215 		return true;
2216 	else
2217 		return false;
2218 }
2219 
2220 static void dce6_program_watermarks(struct radeon_device *rdev,
2221 					 struct radeon_crtc *radeon_crtc,
2222 					 u32 lb_size, u32 num_heads)
2223 {
2224 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
2225 	struct dce6_wm_params wm_low, wm_high;
2226 	u32 dram_channels;
2227 	u32 pixel_period;
2228 	u32 line_time = 0;
2229 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
2230 	u32 priority_a_mark = 0, priority_b_mark = 0;
2231 	u32 priority_a_cnt = PRIORITY_OFF;
2232 	u32 priority_b_cnt = PRIORITY_OFF;
2233 	u32 tmp, arb_control3;
2234 	fixed20_12 a, b, c;
2235 
2236 	if (radeon_crtc->base.enabled && num_heads && mode) {
2237 		pixel_period = 1000000 / (u32)mode->clock;
2238 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2239 		priority_a_cnt = 0;
2240 		priority_b_cnt = 0;
2241 
2242 		if (rdev->family == CHIP_ARUBA)
2243 			dram_channels = evergreen_get_number_of_dram_channels(rdev);
2244 		else
2245 			dram_channels = si_get_number_of_dram_channels(rdev);
2246 
2247 		/* watermark for high clocks */
2248 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2249 			wm_high.yclk =
2250 				radeon_dpm_get_mclk(rdev, false) * 10;
2251 			wm_high.sclk =
2252 				radeon_dpm_get_sclk(rdev, false) * 10;
2253 		} else {
2254 			wm_high.yclk = rdev->pm.current_mclk * 10;
2255 			wm_high.sclk = rdev->pm.current_sclk * 10;
2256 		}
2257 
2258 		wm_high.disp_clk = mode->clock;
2259 		wm_high.src_width = mode->crtc_hdisplay;
2260 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2261 		wm_high.blank_time = line_time - wm_high.active_time;
2262 		wm_high.interlaced = false;
2263 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2264 			wm_high.interlaced = true;
2265 		wm_high.vsc = radeon_crtc->vsc;
2266 		wm_high.vtaps = 1;
2267 		if (radeon_crtc->rmx_type != RMX_OFF)
2268 			wm_high.vtaps = 2;
2269 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2270 		wm_high.lb_size = lb_size;
2271 		wm_high.dram_channels = dram_channels;
2272 		wm_high.num_heads = num_heads;
2273 
2274 		/* watermark for low clocks */
2275 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2276 			wm_low.yclk =
2277 				radeon_dpm_get_mclk(rdev, true) * 10;
2278 			wm_low.sclk =
2279 				radeon_dpm_get_sclk(rdev, true) * 10;
2280 		} else {
2281 			wm_low.yclk = rdev->pm.current_mclk * 10;
2282 			wm_low.sclk = rdev->pm.current_sclk * 10;
2283 		}
2284 
2285 		wm_low.disp_clk = mode->clock;
2286 		wm_low.src_width = mode->crtc_hdisplay;
2287 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2288 		wm_low.blank_time = line_time - wm_low.active_time;
2289 		wm_low.interlaced = false;
2290 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2291 			wm_low.interlaced = true;
2292 		wm_low.vsc = radeon_crtc->vsc;
2293 		wm_low.vtaps = 1;
2294 		if (radeon_crtc->rmx_type != RMX_OFF)
2295 			wm_low.vtaps = 2;
2296 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2297 		wm_low.lb_size = lb_size;
2298 		wm_low.dram_channels = dram_channels;
2299 		wm_low.num_heads = num_heads;
2300 
2301 		/* set for high clocks */
2302 		latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2303 		/* set for low clocks */
2304 		latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2305 
2306 		/* possibly force display priority to high */
2307 		/* should really do this at mode validation time... */
2308 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2309 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2310 		    !dce6_check_latency_hiding(&wm_high) ||
2311 		    (rdev->disp_priority == 2)) {
2312 			DRM_DEBUG_KMS("force priority to high\n");
2313 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2314 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2315 		}
2316 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2317 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2318 		    !dce6_check_latency_hiding(&wm_low) ||
2319 		    (rdev->disp_priority == 2)) {
2320 			DRM_DEBUG_KMS("force priority to high\n");
2321 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2322 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2323 		}
2324 
2325 		a.full = dfixed_const(1000);
2326 		b.full = dfixed_const(mode->clock);
2327 		b.full = dfixed_div(b, a);
2328 		c.full = dfixed_const(latency_watermark_a);
2329 		c.full = dfixed_mul(c, b);
2330 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2331 		c.full = dfixed_div(c, a);
2332 		a.full = dfixed_const(16);
2333 		c.full = dfixed_div(c, a);
2334 		priority_a_mark = dfixed_trunc(c);
2335 		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2336 
2337 		a.full = dfixed_const(1000);
2338 		b.full = dfixed_const(mode->clock);
2339 		b.full = dfixed_div(b, a);
2340 		c.full = dfixed_const(latency_watermark_b);
2341 		c.full = dfixed_mul(c, b);
2342 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2343 		c.full = dfixed_div(c, a);
2344 		a.full = dfixed_const(16);
2345 		c.full = dfixed_div(c, a);
2346 		priority_b_mark = dfixed_trunc(c);
2347 		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2348 	}
2349 
2350 	/* select wm A */
2351 	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2352 	tmp = arb_control3;
2353 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2354 	tmp |= LATENCY_WATERMARK_MASK(1);
2355 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2356 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2357 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2358 		LATENCY_HIGH_WATERMARK(line_time)));
2359 	/* select wm B */
2360 	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2361 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2362 	tmp |= LATENCY_WATERMARK_MASK(2);
2363 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2364 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2365 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2366 		LATENCY_HIGH_WATERMARK(line_time)));
2367 	/* restore original selection */
2368 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2369 
2370 	/* write the priority marks */
2371 	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2372 	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2373 
2374 	/* save values for DPM */
2375 	radeon_crtc->line_time = line_time;
2376 	radeon_crtc->wm_high = latency_watermark_a;
2377 	radeon_crtc->wm_low = latency_watermark_b;
2378 }
2379 
2380 void dce6_bandwidth_update(struct radeon_device *rdev)
2381 {
2382 	struct drm_display_mode *mode0 = NULL;
2383 	struct drm_display_mode *mode1 = NULL;
2384 	u32 num_heads = 0, lb_size;
2385 	int i;
2386 
2387 	radeon_update_display_priority(rdev);
2388 
2389 	for (i = 0; i < rdev->num_crtc; i++) {
2390 		if (rdev->mode_info.crtcs[i]->base.enabled)
2391 			num_heads++;
2392 	}
2393 	for (i = 0; i < rdev->num_crtc; i += 2) {
2394 		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2395 		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2396 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2397 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2398 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2399 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2400 	}
2401 }
2402 
2403 /*
2404  * Core functions
2405  */
2406 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2407 {
2408 	const u32 num_tile_mode_states = 32;
2409 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2410 
2411 	switch (rdev->config.si.mem_row_size_in_kb) {
2412 	case 1:
2413 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2414 		break;
2415 	case 2:
2416 	default:
2417 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2418 		break;
2419 	case 4:
2420 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2421 		break;
2422 	}
2423 
2424 	if ((rdev->family == CHIP_TAHITI) ||
2425 	    (rdev->family == CHIP_PITCAIRN)) {
2426 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2427 			switch (reg_offset) {
2428 			case 0:  /* non-AA compressed depth or any compressed stencil */
2429 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2430 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2431 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2432 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2433 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2434 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2435 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2436 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2437 				break;
2438 			case 1:  /* 2xAA/4xAA compressed depth only */
2439 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2440 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2441 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2442 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2443 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2444 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2445 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2446 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2447 				break;
2448 			case 2:  /* 8xAA compressed depth only */
2449 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2450 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2451 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2452 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2453 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2454 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2455 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2456 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2457 				break;
2458 			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2459 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2460 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2461 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2462 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2463 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2464 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2465 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2466 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2467 				break;
2468 			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2469 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2470 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2471 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2472 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2473 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2474 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2475 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2476 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2477 				break;
2478 			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2479 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2480 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2481 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2482 						 TILE_SPLIT(split_equal_to_row_size) |
2483 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2484 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2486 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2487 				break;
2488 			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2489 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2490 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2491 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2492 						 TILE_SPLIT(split_equal_to_row_size) |
2493 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2494 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2495 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2496 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2497 				break;
2498 			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2499 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2500 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2501 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2502 						 TILE_SPLIT(split_equal_to_row_size) |
2503 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2504 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2505 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2506 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2507 				break;
2508 			case 8:  /* 1D and 1D Array Surfaces */
2509 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2510 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2511 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2512 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2513 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2514 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2515 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2516 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2517 				break;
2518 			case 9:  /* Displayable maps. */
2519 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2520 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2521 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2522 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2523 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2524 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2525 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2526 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2527 				break;
2528 			case 10:  /* Display 8bpp. */
2529 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2530 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2531 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2532 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2533 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2534 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2535 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2536 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2537 				break;
2538 			case 11:  /* Display 16bpp. */
2539 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2540 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2541 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2542 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2543 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2544 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2545 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2546 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2547 				break;
2548 			case 12:  /* Display 32bpp. */
2549 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2550 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2551 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2552 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2553 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2554 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2555 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2556 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2557 				break;
2558 			case 13:  /* Thin. */
2559 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2560 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2561 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2562 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2563 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2564 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2565 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2566 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2567 				break;
2568 			case 14:  /* Thin 8 bpp. */
2569 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2570 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2571 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2572 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2573 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2574 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2575 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2576 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2577 				break;
2578 			case 15:  /* Thin 16 bpp. */
2579 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2580 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2581 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2582 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2583 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2584 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2585 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2586 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2587 				break;
2588 			case 16:  /* Thin 32 bpp. */
2589 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2590 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2591 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2592 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2593 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2594 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2595 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2596 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2597 				break;
2598 			case 17:  /* Thin 64 bpp. */
2599 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2600 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2601 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2602 						 TILE_SPLIT(split_equal_to_row_size) |
2603 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2604 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2605 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2606 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2607 				break;
2608 			case 21:  /* 8 bpp PRT. */
2609 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2610 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2611 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2612 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2613 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2614 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2615 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2616 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2617 				break;
2618 			case 22:  /* 16 bpp PRT */
2619 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2620 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2621 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2622 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2623 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2624 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2625 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2626 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2627 				break;
2628 			case 23:  /* 32 bpp PRT */
2629 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2630 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2631 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2632 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2633 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2634 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2635 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2636 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2637 				break;
2638 			case 24:  /* 64 bpp PRT */
2639 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2640 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2641 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2642 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2643 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2644 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2645 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2646 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2647 				break;
2648 			case 25:  /* 128 bpp PRT */
2649 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2650 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2651 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2652 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2653 						 NUM_BANKS(ADDR_SURF_8_BANK) |
2654 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2655 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2656 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2657 				break;
2658 			default:
2659 				gb_tile_moden = 0;
2660 				break;
2661 			}
2662 			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2663 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2664 		}
2665 	} else if ((rdev->family == CHIP_VERDE) ||
2666 		   (rdev->family == CHIP_OLAND) ||
2667 		   (rdev->family == CHIP_HAINAN)) {
2668 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2669 			switch (reg_offset) {
2670 			case 0:  /* non-AA compressed depth or any compressed stencil */
2671 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2672 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2673 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2674 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2675 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2676 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2677 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2678 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2679 				break;
2680 			case 1:  /* 2xAA/4xAA compressed depth only */
2681 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2682 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2683 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2684 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2685 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2686 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2687 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2688 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2689 				break;
2690 			case 2:  /* 8xAA compressed depth only */
2691 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2692 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2693 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2694 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2695 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2696 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2697 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2698 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2699 				break;
2700 			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2701 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2702 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2703 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2704 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2705 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2706 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2707 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2708 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2709 				break;
2710 			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2711 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2712 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2713 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2714 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2715 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2716 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2717 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2718 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2719 				break;
2720 			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2721 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2722 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2723 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2724 						 TILE_SPLIT(split_equal_to_row_size) |
2725 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2726 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2728 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2729 				break;
2730 			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2731 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2732 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2733 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2734 						 TILE_SPLIT(split_equal_to_row_size) |
2735 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2736 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2737 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2738 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2739 				break;
2740 			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2741 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2742 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2743 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2744 						 TILE_SPLIT(split_equal_to_row_size) |
2745 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2746 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2747 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2748 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2749 				break;
2750 			case 8:  /* 1D and 1D Array Surfaces */
2751 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2752 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2753 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2754 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2755 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2756 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2757 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2758 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2759 				break;
2760 			case 9:  /* Displayable maps. */
2761 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2762 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2763 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2764 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2765 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2766 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2767 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2768 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2769 				break;
2770 			case 10:  /* Display 8bpp. */
2771 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2772 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2773 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2774 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2775 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2776 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2777 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2778 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2779 				break;
2780 			case 11:  /* Display 16bpp. */
2781 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2782 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2783 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2784 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2785 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2786 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2787 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2788 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2789 				break;
2790 			case 12:  /* Display 32bpp. */
2791 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2792 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2793 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2794 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2795 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2796 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2797 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2798 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2799 				break;
2800 			case 13:  /* Thin. */
2801 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2802 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2803 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2804 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2805 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2806 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2807 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2808 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2809 				break;
2810 			case 14:  /* Thin 8 bpp. */
2811 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2812 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2813 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2814 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2815 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2816 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2817 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2818 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2819 				break;
2820 			case 15:  /* Thin 16 bpp. */
2821 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2822 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2823 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2824 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2825 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2826 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2827 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2828 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2829 				break;
2830 			case 16:  /* Thin 32 bpp. */
2831 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2832 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2833 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2834 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2835 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2836 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2837 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2838 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2839 				break;
2840 			case 17:  /* Thin 64 bpp. */
2841 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2842 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2843 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2844 						 TILE_SPLIT(split_equal_to_row_size) |
2845 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2846 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2847 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2848 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2849 				break;
2850 			case 21:  /* 8 bpp PRT. */
2851 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2852 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2853 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2854 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2855 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2856 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2857 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2858 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2859 				break;
2860 			case 22:  /* 16 bpp PRT */
2861 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2862 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2863 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2864 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2865 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2866 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2867 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2868 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2869 				break;
2870 			case 23:  /* 32 bpp PRT */
2871 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2872 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2873 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2874 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2875 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2876 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2877 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2878 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2879 				break;
2880 			case 24:  /* 64 bpp PRT */
2881 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2882 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2883 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2884 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2885 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2886 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2887 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2888 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2889 				break;
2890 			case 25:  /* 128 bpp PRT */
2891 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2892 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2893 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2894 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2895 						 NUM_BANKS(ADDR_SURF_8_BANK) |
2896 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2897 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2898 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2899 				break;
2900 			default:
2901 				gb_tile_moden = 0;
2902 				break;
2903 			}
2904 			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2905 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2906 		}
2907 	} else
2908 		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2909 }
2910 
2911 static void si_select_se_sh(struct radeon_device *rdev,
2912 			    u32 se_num, u32 sh_num)
2913 {
2914 	u32 data = INSTANCE_BROADCAST_WRITES;
2915 
2916 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2917 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2918 	else if (se_num == 0xffffffff)
2919 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2920 	else if (sh_num == 0xffffffff)
2921 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2922 	else
2923 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2924 	WREG32(GRBM_GFX_INDEX, data);
2925 }
2926 
2927 static u32 si_create_bitmask(u32 bit_width)
2928 {
2929 	u32 i, mask = 0;
2930 
2931 	for (i = 0; i < bit_width; i++) {
2932 		mask <<= 1;
2933 		mask |= 1;
2934 	}
2935 	return mask;
2936 }
2937 
2938 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2939 {
2940 	u32 data, mask;
2941 
2942 	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2943 	if (data & 1)
2944 		data &= INACTIVE_CUS_MASK;
2945 	else
2946 		data = 0;
2947 	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2948 
2949 	data >>= INACTIVE_CUS_SHIFT;
2950 
2951 	mask = si_create_bitmask(cu_per_sh);
2952 
2953 	return ~data & mask;
2954 }
2955 
2956 static void si_setup_spi(struct radeon_device *rdev,
2957 			 u32 se_num, u32 sh_per_se,
2958 			 u32 cu_per_sh)
2959 {
2960 	int i, j, k;
2961 	u32 data, mask, active_cu;
2962 
2963 	for (i = 0; i < se_num; i++) {
2964 		for (j = 0; j < sh_per_se; j++) {
2965 			si_select_se_sh(rdev, i, j);
2966 			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2967 			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2968 
2969 			mask = 1;
2970 			for (k = 0; k < 16; k++) {
2971 				mask <<= k;
2972 				if (active_cu & mask) {
2973 					data &= ~mask;
2974 					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2975 					break;
2976 				}
2977 			}
2978 		}
2979 	}
2980 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2981 }
2982 
2983 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2984 			      u32 max_rb_num_per_se,
2985 			      u32 sh_per_se)
2986 {
2987 	u32 data, mask;
2988 
2989 	data = RREG32(CC_RB_BACKEND_DISABLE);
2990 	if (data & 1)
2991 		data &= BACKEND_DISABLE_MASK;
2992 	else
2993 		data = 0;
2994 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2995 
2996 	data >>= BACKEND_DISABLE_SHIFT;
2997 
2998 	mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
2999 
3000 	return data & mask;
3001 }
3002 
3003 static void si_setup_rb(struct radeon_device *rdev,
3004 			u32 se_num, u32 sh_per_se,
3005 			u32 max_rb_num_per_se)
3006 {
3007 	int i, j;
3008 	u32 data, mask;
3009 	u32 disabled_rbs = 0;
3010 	u32 enabled_rbs = 0;
3011 
3012 	for (i = 0; i < se_num; i++) {
3013 		for (j = 0; j < sh_per_se; j++) {
3014 			si_select_se_sh(rdev, i, j);
3015 			data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3016 			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
3017 		}
3018 	}
3019 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3020 
3021 	mask = 1;
3022 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3023 		if (!(disabled_rbs & mask))
3024 			enabled_rbs |= mask;
3025 		mask <<= 1;
3026 	}
3027 
3028 	rdev->config.si.backend_enable_mask = enabled_rbs;
3029 
3030 	for (i = 0; i < se_num; i++) {
3031 		si_select_se_sh(rdev, i, 0xffffffff);
3032 		data = 0;
3033 		for (j = 0; j < sh_per_se; j++) {
3034 			switch (enabled_rbs & 3) {
3035 			case 1:
3036 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3037 				break;
3038 			case 2:
3039 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3040 				break;
3041 			case 3:
3042 			default:
3043 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3044 				break;
3045 			}
3046 			enabled_rbs >>= 2;
3047 		}
3048 		WREG32(PA_SC_RASTER_CONFIG, data);
3049 	}
3050 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3051 }
3052 
3053 static void si_gpu_init(struct radeon_device *rdev)
3054 {
3055 	u32 gb_addr_config = 0;
3056 	u32 mc_shared_chmap, mc_arb_ramcfg;
3057 	u32 sx_debug_1;
3058 	u32 hdp_host_path_cntl;
3059 	u32 tmp;
3060 	int i, j;
3061 
3062 	switch (rdev->family) {
3063 	case CHIP_TAHITI:
3064 		rdev->config.si.max_shader_engines = 2;
3065 		rdev->config.si.max_tile_pipes = 12;
3066 		rdev->config.si.max_cu_per_sh = 8;
3067 		rdev->config.si.max_sh_per_se = 2;
3068 		rdev->config.si.max_backends_per_se = 4;
3069 		rdev->config.si.max_texture_channel_caches = 12;
3070 		rdev->config.si.max_gprs = 256;
3071 		rdev->config.si.max_gs_threads = 32;
3072 		rdev->config.si.max_hw_contexts = 8;
3073 
3074 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3075 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3076 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3077 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3078 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3079 		break;
3080 	case CHIP_PITCAIRN:
3081 		rdev->config.si.max_shader_engines = 2;
3082 		rdev->config.si.max_tile_pipes = 8;
3083 		rdev->config.si.max_cu_per_sh = 5;
3084 		rdev->config.si.max_sh_per_se = 2;
3085 		rdev->config.si.max_backends_per_se = 4;
3086 		rdev->config.si.max_texture_channel_caches = 8;
3087 		rdev->config.si.max_gprs = 256;
3088 		rdev->config.si.max_gs_threads = 32;
3089 		rdev->config.si.max_hw_contexts = 8;
3090 
3091 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3092 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3093 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3094 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3095 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3096 		break;
3097 	case CHIP_VERDE:
3098 	default:
3099 		rdev->config.si.max_shader_engines = 1;
3100 		rdev->config.si.max_tile_pipes = 4;
3101 		rdev->config.si.max_cu_per_sh = 5;
3102 		rdev->config.si.max_sh_per_se = 2;
3103 		rdev->config.si.max_backends_per_se = 4;
3104 		rdev->config.si.max_texture_channel_caches = 4;
3105 		rdev->config.si.max_gprs = 256;
3106 		rdev->config.si.max_gs_threads = 32;
3107 		rdev->config.si.max_hw_contexts = 8;
3108 
3109 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3110 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3111 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3112 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3113 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3114 		break;
3115 	case CHIP_OLAND:
3116 		rdev->config.si.max_shader_engines = 1;
3117 		rdev->config.si.max_tile_pipes = 4;
3118 		rdev->config.si.max_cu_per_sh = 6;
3119 		rdev->config.si.max_sh_per_se = 1;
3120 		rdev->config.si.max_backends_per_se = 2;
3121 		rdev->config.si.max_texture_channel_caches = 4;
3122 		rdev->config.si.max_gprs = 256;
3123 		rdev->config.si.max_gs_threads = 16;
3124 		rdev->config.si.max_hw_contexts = 8;
3125 
3126 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3127 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3128 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3129 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3130 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3131 		break;
3132 	case CHIP_HAINAN:
3133 		rdev->config.si.max_shader_engines = 1;
3134 		rdev->config.si.max_tile_pipes = 4;
3135 		rdev->config.si.max_cu_per_sh = 5;
3136 		rdev->config.si.max_sh_per_se = 1;
3137 		rdev->config.si.max_backends_per_se = 1;
3138 		rdev->config.si.max_texture_channel_caches = 2;
3139 		rdev->config.si.max_gprs = 256;
3140 		rdev->config.si.max_gs_threads = 16;
3141 		rdev->config.si.max_hw_contexts = 8;
3142 
3143 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3144 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3145 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3146 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3147 		gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
3148 		break;
3149 	}
3150 
3151 	/* Initialize HDP */
3152 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3153 		WREG32((0x2c14 + j), 0x00000000);
3154 		WREG32((0x2c18 + j), 0x00000000);
3155 		WREG32((0x2c1c + j), 0x00000000);
3156 		WREG32((0x2c20 + j), 0x00000000);
3157 		WREG32((0x2c24 + j), 0x00000000);
3158 	}
3159 
3160 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3161 
3162 	evergreen_fix_pci_max_read_req_size(rdev);
3163 
3164 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3165 
3166 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3167 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3168 
3169 	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3170 	rdev->config.si.mem_max_burst_length_bytes = 256;
3171 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3172 	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3173 	if (rdev->config.si.mem_row_size_in_kb > 4)
3174 		rdev->config.si.mem_row_size_in_kb = 4;
3175 	/* XXX use MC settings? */
3176 	rdev->config.si.shader_engine_tile_size = 32;
3177 	rdev->config.si.num_gpus = 1;
3178 	rdev->config.si.multi_gpu_tile_size = 64;
3179 
3180 	/* fix up row size */
3181 	gb_addr_config &= ~ROW_SIZE_MASK;
3182 	switch (rdev->config.si.mem_row_size_in_kb) {
3183 	case 1:
3184 	default:
3185 		gb_addr_config |= ROW_SIZE(0);
3186 		break;
3187 	case 2:
3188 		gb_addr_config |= ROW_SIZE(1);
3189 		break;
3190 	case 4:
3191 		gb_addr_config |= ROW_SIZE(2);
3192 		break;
3193 	}
3194 
3195 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3196 	 * not have bank info, so create a custom tiling dword.
3197 	 * bits 3:0   num_pipes
3198 	 * bits 7:4   num_banks
3199 	 * bits 11:8  group_size
3200 	 * bits 15:12 row_size
3201 	 */
3202 	rdev->config.si.tile_config = 0;
3203 	switch (rdev->config.si.num_tile_pipes) {
3204 	case 1:
3205 		rdev->config.si.tile_config |= (0 << 0);
3206 		break;
3207 	case 2:
3208 		rdev->config.si.tile_config |= (1 << 0);
3209 		break;
3210 	case 4:
3211 		rdev->config.si.tile_config |= (2 << 0);
3212 		break;
3213 	case 8:
3214 	default:
3215 		/* XXX what about 12? */
3216 		rdev->config.si.tile_config |= (3 << 0);
3217 		break;
3218 	}
3219 	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3220 	case 0: /* four banks */
3221 		rdev->config.si.tile_config |= 0 << 4;
3222 		break;
3223 	case 1: /* eight banks */
3224 		rdev->config.si.tile_config |= 1 << 4;
3225 		break;
3226 	case 2: /* sixteen banks */
3227 	default:
3228 		rdev->config.si.tile_config |= 2 << 4;
3229 		break;
3230 	}
3231 	rdev->config.si.tile_config |=
3232 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3233 	rdev->config.si.tile_config |=
3234 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3235 
3236 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3237 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3238 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3239 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3240 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3241 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3242 	if (rdev->has_uvd) {
3243 		WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3244 		WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3245 		WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3246 	}
3247 
3248 	si_tiling_mode_table_init(rdev);
3249 
3250 	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3251 		    rdev->config.si.max_sh_per_se,
3252 		    rdev->config.si.max_backends_per_se);
3253 
3254 	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3255 		     rdev->config.si.max_sh_per_se,
3256 		     rdev->config.si.max_cu_per_sh);
3257 
3258 	rdev->config.si.active_cus = 0;
3259 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
3260 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
3261 			rdev->config.si.active_cus +=
3262 				hweight32(si_get_cu_active_bitmap(rdev, i, j));
3263 		}
3264 	}
3265 
3266 	/* set HW defaults for 3D engine */
3267 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3268 				     ROQ_IB2_START(0x2b)));
3269 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3270 
3271 	sx_debug_1 = RREG32(SX_DEBUG_1);
3272 	WREG32(SX_DEBUG_1, sx_debug_1);
3273 
3274 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3275 
3276 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3277 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3278 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3279 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3280 
3281 	WREG32(VGT_NUM_INSTANCES, 1);
3282 
3283 	WREG32(CP_PERFMON_CNTL, 0);
3284 
3285 	WREG32(SQ_CONFIG, 0);
3286 
3287 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3288 					  FORCE_EOV_MAX_REZ_CNT(255)));
3289 
3290 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3291 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3292 
3293 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3294 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3295 
3296 	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3297 	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3298 	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3299 	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3300 	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3301 	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3302 	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3303 	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3304 
3305 	tmp = RREG32(HDP_MISC_CNTL);
3306 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3307 	WREG32(HDP_MISC_CNTL, tmp);
3308 
3309 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3310 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3311 
3312 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3313 
3314 	udelay(50);
3315 }
3316 
3317 /*
3318  * GPU scratch registers helpers function.
3319  */
3320 static void si_scratch_init(struct radeon_device *rdev)
3321 {
3322 	int i;
3323 
3324 	rdev->scratch.num_reg = 7;
3325 	rdev->scratch.reg_base = SCRATCH_REG0;
3326 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3327 		rdev->scratch.free[i] = true;
3328 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3329 	}
3330 }
3331 
3332 void si_fence_ring_emit(struct radeon_device *rdev,
3333 			struct radeon_fence *fence)
3334 {
3335 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3336 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3337 
3338 	/* flush read cache over gart */
3339 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3340 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3341 	radeon_ring_write(ring, 0);
3342 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3343 	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3344 			  PACKET3_TC_ACTION_ENA |
3345 			  PACKET3_SH_KCACHE_ACTION_ENA |
3346 			  PACKET3_SH_ICACHE_ACTION_ENA);
3347 	radeon_ring_write(ring, 0xFFFFFFFF);
3348 	radeon_ring_write(ring, 0);
3349 	radeon_ring_write(ring, 10); /* poll interval */
3350 	/* EVENT_WRITE_EOP - flush caches, send int */
3351 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3352 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3353 	radeon_ring_write(ring, lower_32_bits(addr));
3354 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3355 	radeon_ring_write(ring, fence->seq);
3356 	radeon_ring_write(ring, 0);
3357 }
3358 
3359 /*
3360  * IB stuff
3361  */
3362 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3363 {
3364 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3365 	u32 header;
3366 
3367 	if (ib->is_const_ib) {
3368 		/* set switch buffer packet before const IB */
3369 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3370 		radeon_ring_write(ring, 0);
3371 
3372 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3373 	} else {
3374 		u32 next_rptr;
3375 		if (ring->rptr_save_reg) {
3376 			next_rptr = ring->wptr + 3 + 4 + 8;
3377 			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3378 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3379 						  PACKET3_SET_CONFIG_REG_START) >> 2));
3380 			radeon_ring_write(ring, next_rptr);
3381 		} else if (rdev->wb.enabled) {
3382 			next_rptr = ring->wptr + 5 + 4 + 8;
3383 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3384 			radeon_ring_write(ring, (1 << 8));
3385 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3386 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3387 			radeon_ring_write(ring, next_rptr);
3388 		}
3389 
3390 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3391 	}
3392 
3393 	radeon_ring_write(ring, header);
3394 	radeon_ring_write(ring,
3395 #ifdef __BIG_ENDIAN
3396 			  (2 << 0) |
3397 #endif
3398 			  (ib->gpu_addr & 0xFFFFFFFC));
3399 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3400 	radeon_ring_write(ring, ib->length_dw |
3401 			  (ib->vm ? (ib->vm->id << 24) : 0));
3402 
3403 	if (!ib->is_const_ib) {
3404 		/* flush read cache over gart for this vmid */
3405 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3406 		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3407 		radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
3408 		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3409 		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3410 				  PACKET3_TC_ACTION_ENA |
3411 				  PACKET3_SH_KCACHE_ACTION_ENA |
3412 				  PACKET3_SH_ICACHE_ACTION_ENA);
3413 		radeon_ring_write(ring, 0xFFFFFFFF);
3414 		radeon_ring_write(ring, 0);
3415 		radeon_ring_write(ring, 10); /* poll interval */
3416 	}
3417 }
3418 
3419 /*
3420  * CP.
3421  */
3422 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3423 {
3424 	if (enable)
3425 		WREG32(CP_ME_CNTL, 0);
3426 	else {
3427 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3428 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3429 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3430 		WREG32(SCRATCH_UMSK, 0);
3431 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3432 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3433 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3434 	}
3435 	udelay(50);
3436 }
3437 
3438 static int si_cp_load_microcode(struct radeon_device *rdev)
3439 {
3440 	int i;
3441 
3442 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3443 		return -EINVAL;
3444 
3445 	si_cp_enable(rdev, false);
3446 
3447 	if (rdev->new_fw) {
3448 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3449 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3450 		const struct gfx_firmware_header_v1_0 *ce_hdr =
3451 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3452 		const struct gfx_firmware_header_v1_0 *me_hdr =
3453 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3454 		const __le32 *fw_data;
3455 		u32 fw_size;
3456 
3457 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3458 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3459 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3460 
3461 		/* PFP */
3462 		fw_data = (const __le32 *)
3463 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3464 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3465 		WREG32(CP_PFP_UCODE_ADDR, 0);
3466 		for (i = 0; i < fw_size; i++)
3467 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3468 		WREG32(CP_PFP_UCODE_ADDR, 0);
3469 
3470 		/* CE */
3471 		fw_data = (const __le32 *)
3472 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3473 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3474 		WREG32(CP_CE_UCODE_ADDR, 0);
3475 		for (i = 0; i < fw_size; i++)
3476 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3477 		WREG32(CP_CE_UCODE_ADDR, 0);
3478 
3479 		/* ME */
3480 		fw_data = (const __be32 *)
3481 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3482 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3483 		WREG32(CP_ME_RAM_WADDR, 0);
3484 		for (i = 0; i < fw_size; i++)
3485 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3486 		WREG32(CP_ME_RAM_WADDR, 0);
3487 	} else {
3488 		const __be32 *fw_data;
3489 
3490 		/* PFP */
3491 		fw_data = (const __be32 *)rdev->pfp_fw->data;
3492 		WREG32(CP_PFP_UCODE_ADDR, 0);
3493 		for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3494 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3495 		WREG32(CP_PFP_UCODE_ADDR, 0);
3496 
3497 		/* CE */
3498 		fw_data = (const __be32 *)rdev->ce_fw->data;
3499 		WREG32(CP_CE_UCODE_ADDR, 0);
3500 		for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3501 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3502 		WREG32(CP_CE_UCODE_ADDR, 0);
3503 
3504 		/* ME */
3505 		fw_data = (const __be32 *)rdev->me_fw->data;
3506 		WREG32(CP_ME_RAM_WADDR, 0);
3507 		for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3508 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3509 		WREG32(CP_ME_RAM_WADDR, 0);
3510 	}
3511 
3512 	WREG32(CP_PFP_UCODE_ADDR, 0);
3513 	WREG32(CP_CE_UCODE_ADDR, 0);
3514 	WREG32(CP_ME_RAM_WADDR, 0);
3515 	WREG32(CP_ME_RAM_RADDR, 0);
3516 	return 0;
3517 }
3518 
3519 static int si_cp_start(struct radeon_device *rdev)
3520 {
3521 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3522 	int r, i;
3523 
3524 	r = radeon_ring_lock(rdev, ring, 7 + 4);
3525 	if (r) {
3526 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3527 		return r;
3528 	}
3529 	/* init the CP */
3530 	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3531 	radeon_ring_write(ring, 0x1);
3532 	radeon_ring_write(ring, 0x0);
3533 	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3534 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3535 	radeon_ring_write(ring, 0);
3536 	radeon_ring_write(ring, 0);
3537 
3538 	/* init the CE partitions */
3539 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3540 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3541 	radeon_ring_write(ring, 0xc000);
3542 	radeon_ring_write(ring, 0xe000);
3543 	radeon_ring_unlock_commit(rdev, ring, false);
3544 
3545 	si_cp_enable(rdev, true);
3546 
3547 	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3548 	if (r) {
3549 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3550 		return r;
3551 	}
3552 
3553 	/* setup clear context state */
3554 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3555 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3556 
3557 	for (i = 0; i < si_default_size; i++)
3558 		radeon_ring_write(ring, si_default_state[i]);
3559 
3560 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3561 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3562 
3563 	/* set clear context state */
3564 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3565 	radeon_ring_write(ring, 0);
3566 
3567 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3568 	radeon_ring_write(ring, 0x00000316);
3569 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3570 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3571 
3572 	radeon_ring_unlock_commit(rdev, ring, false);
3573 
3574 	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3575 		ring = &rdev->ring[i];
3576 		r = radeon_ring_lock(rdev, ring, 2);
3577 
3578 		/* clear the compute context state */
3579 		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3580 		radeon_ring_write(ring, 0);
3581 
3582 		radeon_ring_unlock_commit(rdev, ring, false);
3583 	}
3584 
3585 	return 0;
3586 }
3587 
3588 static void si_cp_fini(struct radeon_device *rdev)
3589 {
3590 	struct radeon_ring *ring;
3591 	si_cp_enable(rdev, false);
3592 
3593 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3594 	radeon_ring_fini(rdev, ring);
3595 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3596 
3597 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3598 	radeon_ring_fini(rdev, ring);
3599 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3600 
3601 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3602 	radeon_ring_fini(rdev, ring);
3603 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3604 }
3605 
3606 static int si_cp_resume(struct radeon_device *rdev)
3607 {
3608 	struct radeon_ring *ring;
3609 	u32 tmp;
3610 	u32 rb_bufsz;
3611 	int r;
3612 
3613 	si_enable_gui_idle_interrupt(rdev, false);
3614 
3615 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3616 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3617 
3618 	/* Set the write pointer delay */
3619 	WREG32(CP_RB_WPTR_DELAY, 0);
3620 
3621 	WREG32(CP_DEBUG, 0);
3622 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3623 
3624 	/* ring 0 - compute and gfx */
3625 	/* Set ring buffer size */
3626 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3627 	rb_bufsz = order_base_2(ring->ring_size / 8);
3628 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3629 #ifdef __BIG_ENDIAN
3630 	tmp |= BUF_SWAP_32BIT;
3631 #endif
3632 	WREG32(CP_RB0_CNTL, tmp);
3633 
3634 	/* Initialize the ring buffer's read and write pointers */
3635 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3636 	ring->wptr = 0;
3637 	WREG32(CP_RB0_WPTR, ring->wptr);
3638 
3639 	/* set the wb address whether it's enabled or not */
3640 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3641 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3642 
3643 	if (rdev->wb.enabled)
3644 		WREG32(SCRATCH_UMSK, 0xff);
3645 	else {
3646 		tmp |= RB_NO_UPDATE;
3647 		WREG32(SCRATCH_UMSK, 0);
3648 	}
3649 
3650 	mdelay(1);
3651 	WREG32(CP_RB0_CNTL, tmp);
3652 
3653 	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3654 
3655 	/* ring1  - compute only */
3656 	/* Set ring buffer size */
3657 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3658 	rb_bufsz = order_base_2(ring->ring_size / 8);
3659 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3660 #ifdef __BIG_ENDIAN
3661 	tmp |= BUF_SWAP_32BIT;
3662 #endif
3663 	WREG32(CP_RB1_CNTL, tmp);
3664 
3665 	/* Initialize the ring buffer's read and write pointers */
3666 	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3667 	ring->wptr = 0;
3668 	WREG32(CP_RB1_WPTR, ring->wptr);
3669 
3670 	/* set the wb address whether it's enabled or not */
3671 	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3672 	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3673 
3674 	mdelay(1);
3675 	WREG32(CP_RB1_CNTL, tmp);
3676 
3677 	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3678 
3679 	/* ring2 - compute only */
3680 	/* Set ring buffer size */
3681 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3682 	rb_bufsz = order_base_2(ring->ring_size / 8);
3683 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3684 #ifdef __BIG_ENDIAN
3685 	tmp |= BUF_SWAP_32BIT;
3686 #endif
3687 	WREG32(CP_RB2_CNTL, tmp);
3688 
3689 	/* Initialize the ring buffer's read and write pointers */
3690 	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3691 	ring->wptr = 0;
3692 	WREG32(CP_RB2_WPTR, ring->wptr);
3693 
3694 	/* set the wb address whether it's enabled or not */
3695 	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3696 	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3697 
3698 	mdelay(1);
3699 	WREG32(CP_RB2_CNTL, tmp);
3700 
3701 	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3702 
3703 	/* start the rings */
3704 	si_cp_start(rdev);
3705 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3706 	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3707 	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3708 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3709 	if (r) {
3710 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3711 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3712 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3713 		return r;
3714 	}
3715 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3716 	if (r) {
3717 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3718 	}
3719 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3720 	if (r) {
3721 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3722 	}
3723 
3724 	si_enable_gui_idle_interrupt(rdev, true);
3725 
3726 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3727 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3728 
3729 	return 0;
3730 }
3731 
3732 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3733 {
3734 	u32 reset_mask = 0;
3735 	u32 tmp;
3736 
3737 	/* GRBM_STATUS */
3738 	tmp = RREG32(GRBM_STATUS);
3739 	if (tmp & (PA_BUSY | SC_BUSY |
3740 		   BCI_BUSY | SX_BUSY |
3741 		   TA_BUSY | VGT_BUSY |
3742 		   DB_BUSY | CB_BUSY |
3743 		   GDS_BUSY | SPI_BUSY |
3744 		   IA_BUSY | IA_BUSY_NO_DMA))
3745 		reset_mask |= RADEON_RESET_GFX;
3746 
3747 	if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3748 		   CP_BUSY | CP_COHERENCY_BUSY))
3749 		reset_mask |= RADEON_RESET_CP;
3750 
3751 	if (tmp & GRBM_EE_BUSY)
3752 		reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3753 
3754 	/* GRBM_STATUS2 */
3755 	tmp = RREG32(GRBM_STATUS2);
3756 	if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3757 		reset_mask |= RADEON_RESET_RLC;
3758 
3759 	/* DMA_STATUS_REG 0 */
3760 	tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3761 	if (!(tmp & DMA_IDLE))
3762 		reset_mask |= RADEON_RESET_DMA;
3763 
3764 	/* DMA_STATUS_REG 1 */
3765 	tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3766 	if (!(tmp & DMA_IDLE))
3767 		reset_mask |= RADEON_RESET_DMA1;
3768 
3769 	/* SRBM_STATUS2 */
3770 	tmp = RREG32(SRBM_STATUS2);
3771 	if (tmp & DMA_BUSY)
3772 		reset_mask |= RADEON_RESET_DMA;
3773 
3774 	if (tmp & DMA1_BUSY)
3775 		reset_mask |= RADEON_RESET_DMA1;
3776 
3777 	/* SRBM_STATUS */
3778 	tmp = RREG32(SRBM_STATUS);
3779 
3780 	if (tmp & IH_BUSY)
3781 		reset_mask |= RADEON_RESET_IH;
3782 
3783 	if (tmp & SEM_BUSY)
3784 		reset_mask |= RADEON_RESET_SEM;
3785 
3786 	if (tmp & GRBM_RQ_PENDING)
3787 		reset_mask |= RADEON_RESET_GRBM;
3788 
3789 	if (tmp & VMC_BUSY)
3790 		reset_mask |= RADEON_RESET_VMC;
3791 
3792 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3793 		   MCC_BUSY | MCD_BUSY))
3794 		reset_mask |= RADEON_RESET_MC;
3795 
3796 	if (evergreen_is_display_hung(rdev))
3797 		reset_mask |= RADEON_RESET_DISPLAY;
3798 
3799 	/* VM_L2_STATUS */
3800 	tmp = RREG32(VM_L2_STATUS);
3801 	if (tmp & L2_BUSY)
3802 		reset_mask |= RADEON_RESET_VMC;
3803 
3804 	/* Skip MC reset as it's mostly likely not hung, just busy */
3805 	if (reset_mask & RADEON_RESET_MC) {
3806 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3807 		reset_mask &= ~RADEON_RESET_MC;
3808 	}
3809 
3810 	return reset_mask;
3811 }
3812 
3813 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3814 {
3815 	struct evergreen_mc_save save;
3816 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3817 	u32 tmp;
3818 
3819 	if (reset_mask == 0)
3820 		return;
3821 
3822 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3823 
3824 	evergreen_print_gpu_status_regs(rdev);
3825 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3826 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3827 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3828 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3829 
3830 	/* disable PG/CG */
3831 	si_fini_pg(rdev);
3832 	si_fini_cg(rdev);
3833 
3834 	/* stop the rlc */
3835 	si_rlc_stop(rdev);
3836 
3837 	/* Disable CP parsing/prefetching */
3838 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3839 
3840 	if (reset_mask & RADEON_RESET_DMA) {
3841 		/* dma0 */
3842 		tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3843 		tmp &= ~DMA_RB_ENABLE;
3844 		WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3845 	}
3846 	if (reset_mask & RADEON_RESET_DMA1) {
3847 		/* dma1 */
3848 		tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3849 		tmp &= ~DMA_RB_ENABLE;
3850 		WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3851 	}
3852 
3853 	udelay(50);
3854 
3855 	evergreen_mc_stop(rdev, &save);
3856 	if (evergreen_mc_wait_for_idle(rdev)) {
3857 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3858 	}
3859 
3860 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3861 		grbm_soft_reset = SOFT_RESET_CB |
3862 			SOFT_RESET_DB |
3863 			SOFT_RESET_GDS |
3864 			SOFT_RESET_PA |
3865 			SOFT_RESET_SC |
3866 			SOFT_RESET_BCI |
3867 			SOFT_RESET_SPI |
3868 			SOFT_RESET_SX |
3869 			SOFT_RESET_TC |
3870 			SOFT_RESET_TA |
3871 			SOFT_RESET_VGT |
3872 			SOFT_RESET_IA;
3873 	}
3874 
3875 	if (reset_mask & RADEON_RESET_CP) {
3876 		grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3877 
3878 		srbm_soft_reset |= SOFT_RESET_GRBM;
3879 	}
3880 
3881 	if (reset_mask & RADEON_RESET_DMA)
3882 		srbm_soft_reset |= SOFT_RESET_DMA;
3883 
3884 	if (reset_mask & RADEON_RESET_DMA1)
3885 		srbm_soft_reset |= SOFT_RESET_DMA1;
3886 
3887 	if (reset_mask & RADEON_RESET_DISPLAY)
3888 		srbm_soft_reset |= SOFT_RESET_DC;
3889 
3890 	if (reset_mask & RADEON_RESET_RLC)
3891 		grbm_soft_reset |= SOFT_RESET_RLC;
3892 
3893 	if (reset_mask & RADEON_RESET_SEM)
3894 		srbm_soft_reset |= SOFT_RESET_SEM;
3895 
3896 	if (reset_mask & RADEON_RESET_IH)
3897 		srbm_soft_reset |= SOFT_RESET_IH;
3898 
3899 	if (reset_mask & RADEON_RESET_GRBM)
3900 		srbm_soft_reset |= SOFT_RESET_GRBM;
3901 
3902 	if (reset_mask & RADEON_RESET_VMC)
3903 		srbm_soft_reset |= SOFT_RESET_VMC;
3904 
3905 	if (reset_mask & RADEON_RESET_MC)
3906 		srbm_soft_reset |= SOFT_RESET_MC;
3907 
3908 	if (grbm_soft_reset) {
3909 		tmp = RREG32(GRBM_SOFT_RESET);
3910 		tmp |= grbm_soft_reset;
3911 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3912 		WREG32(GRBM_SOFT_RESET, tmp);
3913 		tmp = RREG32(GRBM_SOFT_RESET);
3914 
3915 		udelay(50);
3916 
3917 		tmp &= ~grbm_soft_reset;
3918 		WREG32(GRBM_SOFT_RESET, tmp);
3919 		tmp = RREG32(GRBM_SOFT_RESET);
3920 	}
3921 
3922 	if (srbm_soft_reset) {
3923 		tmp = RREG32(SRBM_SOFT_RESET);
3924 		tmp |= srbm_soft_reset;
3925 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3926 		WREG32(SRBM_SOFT_RESET, tmp);
3927 		tmp = RREG32(SRBM_SOFT_RESET);
3928 
3929 		udelay(50);
3930 
3931 		tmp &= ~srbm_soft_reset;
3932 		WREG32(SRBM_SOFT_RESET, tmp);
3933 		tmp = RREG32(SRBM_SOFT_RESET);
3934 	}
3935 
3936 	/* Wait a little for things to settle down */
3937 	udelay(50);
3938 
3939 	evergreen_mc_resume(rdev, &save);
3940 	udelay(50);
3941 
3942 	evergreen_print_gpu_status_regs(rdev);
3943 }
3944 
3945 static void si_set_clk_bypass_mode(struct radeon_device *rdev)
3946 {
3947 	u32 tmp, i;
3948 
3949 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3950 	tmp |= SPLL_BYPASS_EN;
3951 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3952 
3953 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3954 	tmp |= SPLL_CTLREQ_CHG;
3955 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3956 
3957 	for (i = 0; i < rdev->usec_timeout; i++) {
3958 		if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
3959 			break;
3960 		udelay(1);
3961 	}
3962 
3963 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3964 	tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
3965 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3966 
3967 	tmp = RREG32(MPLL_CNTL_MODE);
3968 	tmp &= ~MPLL_MCLK_SEL;
3969 	WREG32(MPLL_CNTL_MODE, tmp);
3970 }
3971 
3972 static void si_spll_powerdown(struct radeon_device *rdev)
3973 {
3974 	u32 tmp;
3975 
3976 	tmp = RREG32(SPLL_CNTL_MODE);
3977 	tmp |= SPLL_SW_DIR_CONTROL;
3978 	WREG32(SPLL_CNTL_MODE, tmp);
3979 
3980 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3981 	tmp |= SPLL_RESET;
3982 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3983 
3984 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3985 	tmp |= SPLL_SLEEP;
3986 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3987 
3988 	tmp = RREG32(SPLL_CNTL_MODE);
3989 	tmp &= ~SPLL_SW_DIR_CONTROL;
3990 	WREG32(SPLL_CNTL_MODE, tmp);
3991 }
3992 
3993 static void si_gpu_pci_config_reset(struct radeon_device *rdev)
3994 {
3995 	struct evergreen_mc_save save;
3996 	u32 tmp, i;
3997 
3998 	dev_info(rdev->dev, "GPU pci config reset\n");
3999 
4000 	/* disable dpm? */
4001 
4002 	/* disable cg/pg */
4003 	si_fini_pg(rdev);
4004 	si_fini_cg(rdev);
4005 
4006 	/* Disable CP parsing/prefetching */
4007 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4008 	/* dma0 */
4009 	tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
4010 	tmp &= ~DMA_RB_ENABLE;
4011 	WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
4012 	/* dma1 */
4013 	tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
4014 	tmp &= ~DMA_RB_ENABLE;
4015 	WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
4016 	/* XXX other engines? */
4017 
4018 	/* halt the rlc, disable cp internal ints */
4019 	si_rlc_stop(rdev);
4020 
4021 	udelay(50);
4022 
4023 	/* disable mem access */
4024 	evergreen_mc_stop(rdev, &save);
4025 	if (evergreen_mc_wait_for_idle(rdev)) {
4026 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
4027 	}
4028 
4029 	/* set mclk/sclk to bypass */
4030 	si_set_clk_bypass_mode(rdev);
4031 	/* powerdown spll */
4032 	si_spll_powerdown(rdev);
4033 	/* disable BM */
4034 	pci_clear_master(rdev->pdev);
4035 	/* reset */
4036 	radeon_pci_config_reset(rdev);
4037 	/* wait for asic to come out of reset */
4038 	for (i = 0; i < rdev->usec_timeout; i++) {
4039 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
4040 			break;
4041 		udelay(1);
4042 	}
4043 }
4044 
4045 int si_asic_reset(struct radeon_device *rdev)
4046 {
4047 	u32 reset_mask;
4048 
4049 	reset_mask = si_gpu_check_soft_reset(rdev);
4050 
4051 	if (reset_mask)
4052 		r600_set_bios_scratch_engine_hung(rdev, true);
4053 
4054 	/* try soft reset */
4055 	si_gpu_soft_reset(rdev, reset_mask);
4056 
4057 	reset_mask = si_gpu_check_soft_reset(rdev);
4058 
4059 	/* try pci config reset */
4060 	if (reset_mask && radeon_hard_reset)
4061 		si_gpu_pci_config_reset(rdev);
4062 
4063 	reset_mask = si_gpu_check_soft_reset(rdev);
4064 
4065 	if (!reset_mask)
4066 		r600_set_bios_scratch_engine_hung(rdev, false);
4067 
4068 	return 0;
4069 }
4070 
4071 /**
4072  * si_gfx_is_lockup - Check if the GFX engine is locked up
4073  *
4074  * @rdev: radeon_device pointer
4075  * @ring: radeon_ring structure holding ring information
4076  *
4077  * Check if the GFX engine is locked up.
4078  * Returns true if the engine appears to be locked up, false if not.
4079  */
4080 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4081 {
4082 	u32 reset_mask = si_gpu_check_soft_reset(rdev);
4083 
4084 	if (!(reset_mask & (RADEON_RESET_GFX |
4085 			    RADEON_RESET_COMPUTE |
4086 			    RADEON_RESET_CP))) {
4087 		radeon_ring_lockup_update(rdev, ring);
4088 		return false;
4089 	}
4090 	return radeon_ring_test_lockup(rdev, ring);
4091 }
4092 
4093 /* MC */
4094 static void si_mc_program(struct radeon_device *rdev)
4095 {
4096 	struct evergreen_mc_save save;
4097 	u32 tmp;
4098 	int i, j;
4099 
4100 	/* Initialize HDP */
4101 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4102 		WREG32((0x2c14 + j), 0x00000000);
4103 		WREG32((0x2c18 + j), 0x00000000);
4104 		WREG32((0x2c1c + j), 0x00000000);
4105 		WREG32((0x2c20 + j), 0x00000000);
4106 		WREG32((0x2c24 + j), 0x00000000);
4107 	}
4108 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4109 
4110 	evergreen_mc_stop(rdev, &save);
4111 	if (radeon_mc_wait_for_idle(rdev)) {
4112 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4113 	}
4114 	if (!ASIC_IS_NODCE(rdev))
4115 		/* Lockout access through VGA aperture*/
4116 		WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4117 	/* Update configuration */
4118 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4119 	       rdev->mc.vram_start >> 12);
4120 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4121 	       rdev->mc.vram_end >> 12);
4122 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4123 	       rdev->vram_scratch.gpu_addr >> 12);
4124 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4125 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4126 	WREG32(MC_VM_FB_LOCATION, tmp);
4127 	/* XXX double check these! */
4128 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4129 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4130 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4131 	WREG32(MC_VM_AGP_BASE, 0);
4132 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4133 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4134 	if (radeon_mc_wait_for_idle(rdev)) {
4135 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4136 	}
4137 	evergreen_mc_resume(rdev, &save);
4138 	if (!ASIC_IS_NODCE(rdev)) {
4139 		/* we need to own VRAM, so turn off the VGA renderer here
4140 		 * to stop it overwriting our objects */
4141 		rv515_vga_render_disable(rdev);
4142 	}
4143 }
4144 
4145 void si_vram_gtt_location(struct radeon_device *rdev,
4146 			  struct radeon_mc *mc)
4147 {
4148 	if (mc->mc_vram_size > 0xFFC0000000ULL) {
4149 		/* leave room for at least 1024M GTT */
4150 		dev_warn(rdev->dev, "limiting VRAM\n");
4151 		mc->real_vram_size = 0xFFC0000000ULL;
4152 		mc->mc_vram_size = 0xFFC0000000ULL;
4153 	}
4154 	radeon_vram_location(rdev, &rdev->mc, 0);
4155 	rdev->mc.gtt_base_align = 0;
4156 	radeon_gtt_location(rdev, mc);
4157 }
4158 
4159 static int si_mc_init(struct radeon_device *rdev)
4160 {
4161 	u32 tmp;
4162 	int chansize, numchan;
4163 
4164 	/* Get VRAM informations */
4165 	rdev->mc.vram_is_ddr = true;
4166 	tmp = RREG32(MC_ARB_RAMCFG);
4167 	if (tmp & CHANSIZE_OVERRIDE) {
4168 		chansize = 16;
4169 	} else if (tmp & CHANSIZE_MASK) {
4170 		chansize = 64;
4171 	} else {
4172 		chansize = 32;
4173 	}
4174 	tmp = RREG32(MC_SHARED_CHMAP);
4175 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4176 	case 0:
4177 	default:
4178 		numchan = 1;
4179 		break;
4180 	case 1:
4181 		numchan = 2;
4182 		break;
4183 	case 2:
4184 		numchan = 4;
4185 		break;
4186 	case 3:
4187 		numchan = 8;
4188 		break;
4189 	case 4:
4190 		numchan = 3;
4191 		break;
4192 	case 5:
4193 		numchan = 6;
4194 		break;
4195 	case 6:
4196 		numchan = 10;
4197 		break;
4198 	case 7:
4199 		numchan = 12;
4200 		break;
4201 	case 8:
4202 		numchan = 16;
4203 		break;
4204 	}
4205 	rdev->mc.vram_width = numchan * chansize;
4206 	/* Could aper size report 0 ? */
4207 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4208 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4209 	/* size in MB on si */
4210 	tmp = RREG32(CONFIG_MEMSIZE);
4211 	/* some boards may have garbage in the upper 16 bits */
4212 	if (tmp & 0xffff0000) {
4213 		DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
4214 		if (tmp & 0xffff)
4215 			tmp &= 0xffff;
4216 	}
4217 	rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
4218 	rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
4219 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
4220 	si_vram_gtt_location(rdev, &rdev->mc);
4221 	radeon_update_bandwidth_info(rdev);
4222 
4223 	return 0;
4224 }
4225 
4226 /*
4227  * GART
4228  */
4229 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
4230 {
4231 	/* flush hdp cache */
4232 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
4233 
4234 	/* bits 0-15 are the VM contexts0-15 */
4235 	WREG32(VM_INVALIDATE_REQUEST, 1);
4236 }
4237 
4238 static int si_pcie_gart_enable(struct radeon_device *rdev)
4239 {
4240 	int r, i;
4241 
4242 	if (rdev->gart.robj == NULL) {
4243 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4244 		return -EINVAL;
4245 	}
4246 	r = radeon_gart_table_vram_pin(rdev);
4247 	if (r)
4248 		return r;
4249 	/* Setup TLB control */
4250 	WREG32(MC_VM_MX_L1_TLB_CNTL,
4251 	       (0xA << 7) |
4252 	       ENABLE_L1_TLB |
4253 	       ENABLE_L1_FRAGMENT_PROCESSING |
4254 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4255 	       ENABLE_ADVANCED_DRIVER_MODEL |
4256 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4257 	/* Setup L2 cache */
4258 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4259 	       ENABLE_L2_FRAGMENT_PROCESSING |
4260 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4261 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4262 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4263 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4264 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4265 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4266 	       BANK_SELECT(4) |
4267 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
4268 	/* setup context0 */
4269 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4270 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4271 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4272 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4273 			(u32)(rdev->dummy_page.addr >> 12));
4274 	WREG32(VM_CONTEXT0_CNTL2, 0);
4275 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4276 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4277 
4278 	WREG32(0x15D4, 0);
4279 	WREG32(0x15D8, 0);
4280 	WREG32(0x15DC, 0);
4281 
4282 	/* empty context1-15 */
4283 	/* set vm size, must be a multiple of 4 */
4284 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4285 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4286 	/* Assign the pt base to something valid for now; the pts used for
4287 	 * the VMs are determined by the application and setup and assigned
4288 	 * on the fly in the vm part of radeon_gart.c
4289 	 */
4290 	for (i = 1; i < 16; i++) {
4291 		if (i < 8)
4292 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4293 			       rdev->vm_manager.saved_table_addr[i]);
4294 		else
4295 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4296 			       rdev->vm_manager.saved_table_addr[i]);
4297 	}
4298 
4299 	/* enable context1-15 */
4300 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4301 	       (u32)(rdev->dummy_page.addr >> 12));
4302 	WREG32(VM_CONTEXT1_CNTL2, 4);
4303 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4304 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
4305 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4306 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4307 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4308 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4309 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4310 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4311 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4312 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4313 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4314 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4315 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4316 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4317 
4318 	si_pcie_gart_tlb_flush(rdev);
4319 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4320 		 (unsigned)(rdev->mc.gtt_size >> 20),
4321 		 (unsigned long long)rdev->gart.table_addr);
4322 	rdev->gart.ready = true;
4323 	return 0;
4324 }
4325 
4326 static void si_pcie_gart_disable(struct radeon_device *rdev)
4327 {
4328 	unsigned i;
4329 
4330 	for (i = 1; i < 16; ++i) {
4331 		uint32_t reg;
4332 		if (i < 8)
4333 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
4334 		else
4335 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
4336 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
4337 	}
4338 
4339 	/* Disable all tables */
4340 	WREG32(VM_CONTEXT0_CNTL, 0);
4341 	WREG32(VM_CONTEXT1_CNTL, 0);
4342 	/* Setup TLB control */
4343 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4344 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4345 	/* Setup L2 cache */
4346 	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4347 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4348 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4349 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4350 	WREG32(VM_L2_CNTL2, 0);
4351 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4352 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4353 	radeon_gart_table_vram_unpin(rdev);
4354 }
4355 
4356 static void si_pcie_gart_fini(struct radeon_device *rdev)
4357 {
4358 	si_pcie_gart_disable(rdev);
4359 	radeon_gart_table_vram_free(rdev);
4360 	radeon_gart_fini(rdev);
4361 }
4362 
4363 /* vm parser */
4364 static bool si_vm_reg_valid(u32 reg)
4365 {
4366 	/* context regs are fine */
4367 	if (reg >= 0x28000)
4368 		return true;
4369 
4370 	/* check config regs */
4371 	switch (reg) {
4372 	case GRBM_GFX_INDEX:
4373 	case CP_STRMOUT_CNTL:
4374 	case VGT_VTX_VECT_EJECT_REG:
4375 	case VGT_CACHE_INVALIDATION:
4376 	case VGT_ESGS_RING_SIZE:
4377 	case VGT_GSVS_RING_SIZE:
4378 	case VGT_GS_VERTEX_REUSE:
4379 	case VGT_PRIMITIVE_TYPE:
4380 	case VGT_INDEX_TYPE:
4381 	case VGT_NUM_INDICES:
4382 	case VGT_NUM_INSTANCES:
4383 	case VGT_TF_RING_SIZE:
4384 	case VGT_HS_OFFCHIP_PARAM:
4385 	case VGT_TF_MEMORY_BASE:
4386 	case PA_CL_ENHANCE:
4387 	case PA_SU_LINE_STIPPLE_VALUE:
4388 	case PA_SC_LINE_STIPPLE_STATE:
4389 	case PA_SC_ENHANCE:
4390 	case SQC_CACHES:
4391 	case SPI_STATIC_THREAD_MGMT_1:
4392 	case SPI_STATIC_THREAD_MGMT_2:
4393 	case SPI_STATIC_THREAD_MGMT_3:
4394 	case SPI_PS_MAX_WAVE_ID:
4395 	case SPI_CONFIG_CNTL:
4396 	case SPI_CONFIG_CNTL_1:
4397 	case TA_CNTL_AUX:
4398 		return true;
4399 	default:
4400 		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4401 		return false;
4402 	}
4403 }
4404 
4405 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4406 				  u32 *ib, struct radeon_cs_packet *pkt)
4407 {
4408 	switch (pkt->opcode) {
4409 	case PACKET3_NOP:
4410 	case PACKET3_SET_BASE:
4411 	case PACKET3_SET_CE_DE_COUNTERS:
4412 	case PACKET3_LOAD_CONST_RAM:
4413 	case PACKET3_WRITE_CONST_RAM:
4414 	case PACKET3_WRITE_CONST_RAM_OFFSET:
4415 	case PACKET3_DUMP_CONST_RAM:
4416 	case PACKET3_INCREMENT_CE_COUNTER:
4417 	case PACKET3_WAIT_ON_DE_COUNTER:
4418 	case PACKET3_CE_WRITE:
4419 		break;
4420 	default:
4421 		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4422 		return -EINVAL;
4423 	}
4424 	return 0;
4425 }
4426 
4427 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4428 {
4429 	u32 start_reg, reg, i;
4430 	u32 command = ib[idx + 4];
4431 	u32 info = ib[idx + 1];
4432 	u32 idx_value = ib[idx];
4433 	if (command & PACKET3_CP_DMA_CMD_SAS) {
4434 		/* src address space is register */
4435 		if (((info & 0x60000000) >> 29) == 0) {
4436 			start_reg = idx_value << 2;
4437 			if (command & PACKET3_CP_DMA_CMD_SAIC) {
4438 				reg = start_reg;
4439 				if (!si_vm_reg_valid(reg)) {
4440 					DRM_ERROR("CP DMA Bad SRC register\n");
4441 					return -EINVAL;
4442 				}
4443 			} else {
4444 				for (i = 0; i < (command & 0x1fffff); i++) {
4445 					reg = start_reg + (4 * i);
4446 					if (!si_vm_reg_valid(reg)) {
4447 						DRM_ERROR("CP DMA Bad SRC register\n");
4448 						return -EINVAL;
4449 					}
4450 				}
4451 			}
4452 		}
4453 	}
4454 	if (command & PACKET3_CP_DMA_CMD_DAS) {
4455 		/* dst address space is register */
4456 		if (((info & 0x00300000) >> 20) == 0) {
4457 			start_reg = ib[idx + 2];
4458 			if (command & PACKET3_CP_DMA_CMD_DAIC) {
4459 				reg = start_reg;
4460 				if (!si_vm_reg_valid(reg)) {
4461 					DRM_ERROR("CP DMA Bad DST register\n");
4462 					return -EINVAL;
4463 				}
4464 			} else {
4465 				for (i = 0; i < (command & 0x1fffff); i++) {
4466 					reg = start_reg + (4 * i);
4467 				if (!si_vm_reg_valid(reg)) {
4468 						DRM_ERROR("CP DMA Bad DST register\n");
4469 						return -EINVAL;
4470 					}
4471 				}
4472 			}
4473 		}
4474 	}
4475 	return 0;
4476 }
4477 
4478 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4479 				   u32 *ib, struct radeon_cs_packet *pkt)
4480 {
4481 	int r;
4482 	u32 idx = pkt->idx + 1;
4483 	u32 idx_value = ib[idx];
4484 	u32 start_reg, end_reg, reg, i;
4485 
4486 	switch (pkt->opcode) {
4487 	case PACKET3_NOP:
4488 	case PACKET3_SET_BASE:
4489 	case PACKET3_CLEAR_STATE:
4490 	case PACKET3_INDEX_BUFFER_SIZE:
4491 	case PACKET3_DISPATCH_DIRECT:
4492 	case PACKET3_DISPATCH_INDIRECT:
4493 	case PACKET3_ALLOC_GDS:
4494 	case PACKET3_WRITE_GDS_RAM:
4495 	case PACKET3_ATOMIC_GDS:
4496 	case PACKET3_ATOMIC:
4497 	case PACKET3_OCCLUSION_QUERY:
4498 	case PACKET3_SET_PREDICATION:
4499 	case PACKET3_COND_EXEC:
4500 	case PACKET3_PRED_EXEC:
4501 	case PACKET3_DRAW_INDIRECT:
4502 	case PACKET3_DRAW_INDEX_INDIRECT:
4503 	case PACKET3_INDEX_BASE:
4504 	case PACKET3_DRAW_INDEX_2:
4505 	case PACKET3_CONTEXT_CONTROL:
4506 	case PACKET3_INDEX_TYPE:
4507 	case PACKET3_DRAW_INDIRECT_MULTI:
4508 	case PACKET3_DRAW_INDEX_AUTO:
4509 	case PACKET3_DRAW_INDEX_IMMD:
4510 	case PACKET3_NUM_INSTANCES:
4511 	case PACKET3_DRAW_INDEX_MULTI_AUTO:
4512 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4513 	case PACKET3_DRAW_INDEX_OFFSET_2:
4514 	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4515 	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4516 	case PACKET3_MPEG_INDEX:
4517 	case PACKET3_WAIT_REG_MEM:
4518 	case PACKET3_MEM_WRITE:
4519 	case PACKET3_PFP_SYNC_ME:
4520 	case PACKET3_SURFACE_SYNC:
4521 	case PACKET3_EVENT_WRITE:
4522 	case PACKET3_EVENT_WRITE_EOP:
4523 	case PACKET3_EVENT_WRITE_EOS:
4524 	case PACKET3_SET_CONTEXT_REG:
4525 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4526 	case PACKET3_SET_SH_REG:
4527 	case PACKET3_SET_SH_REG_OFFSET:
4528 	case PACKET3_INCREMENT_DE_COUNTER:
4529 	case PACKET3_WAIT_ON_CE_COUNTER:
4530 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4531 	case PACKET3_ME_WRITE:
4532 		break;
4533 	case PACKET3_COPY_DATA:
4534 		if ((idx_value & 0xf00) == 0) {
4535 			reg = ib[idx + 3] * 4;
4536 			if (!si_vm_reg_valid(reg))
4537 				return -EINVAL;
4538 		}
4539 		break;
4540 	case PACKET3_WRITE_DATA:
4541 		if ((idx_value & 0xf00) == 0) {
4542 			start_reg = ib[idx + 1] * 4;
4543 			if (idx_value & 0x10000) {
4544 				if (!si_vm_reg_valid(start_reg))
4545 					return -EINVAL;
4546 			} else {
4547 				for (i = 0; i < (pkt->count - 2); i++) {
4548 					reg = start_reg + (4 * i);
4549 					if (!si_vm_reg_valid(reg))
4550 						return -EINVAL;
4551 				}
4552 			}
4553 		}
4554 		break;
4555 	case PACKET3_COND_WRITE:
4556 		if (idx_value & 0x100) {
4557 			reg = ib[idx + 5] * 4;
4558 			if (!si_vm_reg_valid(reg))
4559 				return -EINVAL;
4560 		}
4561 		break;
4562 	case PACKET3_COPY_DW:
4563 		if (idx_value & 0x2) {
4564 			reg = ib[idx + 3] * 4;
4565 			if (!si_vm_reg_valid(reg))
4566 				return -EINVAL;
4567 		}
4568 		break;
4569 	case PACKET3_SET_CONFIG_REG:
4570 		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4571 		end_reg = 4 * pkt->count + start_reg - 4;
4572 		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4573 		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4574 		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4575 			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4576 			return -EINVAL;
4577 		}
4578 		for (i = 0; i < pkt->count; i++) {
4579 			reg = start_reg + (4 * i);
4580 			if (!si_vm_reg_valid(reg))
4581 				return -EINVAL;
4582 		}
4583 		break;
4584 	case PACKET3_CP_DMA:
4585 		r = si_vm_packet3_cp_dma_check(ib, idx);
4586 		if (r)
4587 			return r;
4588 		break;
4589 	default:
4590 		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4591 		return -EINVAL;
4592 	}
4593 	return 0;
4594 }
4595 
4596 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4597 				       u32 *ib, struct radeon_cs_packet *pkt)
4598 {
4599 	int r;
4600 	u32 idx = pkt->idx + 1;
4601 	u32 idx_value = ib[idx];
4602 	u32 start_reg, reg, i;
4603 
4604 	switch (pkt->opcode) {
4605 	case PACKET3_NOP:
4606 	case PACKET3_SET_BASE:
4607 	case PACKET3_CLEAR_STATE:
4608 	case PACKET3_DISPATCH_DIRECT:
4609 	case PACKET3_DISPATCH_INDIRECT:
4610 	case PACKET3_ALLOC_GDS:
4611 	case PACKET3_WRITE_GDS_RAM:
4612 	case PACKET3_ATOMIC_GDS:
4613 	case PACKET3_ATOMIC:
4614 	case PACKET3_OCCLUSION_QUERY:
4615 	case PACKET3_SET_PREDICATION:
4616 	case PACKET3_COND_EXEC:
4617 	case PACKET3_PRED_EXEC:
4618 	case PACKET3_CONTEXT_CONTROL:
4619 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4620 	case PACKET3_WAIT_REG_MEM:
4621 	case PACKET3_MEM_WRITE:
4622 	case PACKET3_PFP_SYNC_ME:
4623 	case PACKET3_SURFACE_SYNC:
4624 	case PACKET3_EVENT_WRITE:
4625 	case PACKET3_EVENT_WRITE_EOP:
4626 	case PACKET3_EVENT_WRITE_EOS:
4627 	case PACKET3_SET_CONTEXT_REG:
4628 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4629 	case PACKET3_SET_SH_REG:
4630 	case PACKET3_SET_SH_REG_OFFSET:
4631 	case PACKET3_INCREMENT_DE_COUNTER:
4632 	case PACKET3_WAIT_ON_CE_COUNTER:
4633 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4634 	case PACKET3_ME_WRITE:
4635 		break;
4636 	case PACKET3_COPY_DATA:
4637 		if ((idx_value & 0xf00) == 0) {
4638 			reg = ib[idx + 3] * 4;
4639 			if (!si_vm_reg_valid(reg))
4640 				return -EINVAL;
4641 		}
4642 		break;
4643 	case PACKET3_WRITE_DATA:
4644 		if ((idx_value & 0xf00) == 0) {
4645 			start_reg = ib[idx + 1] * 4;
4646 			if (idx_value & 0x10000) {
4647 				if (!si_vm_reg_valid(start_reg))
4648 					return -EINVAL;
4649 			} else {
4650 				for (i = 0; i < (pkt->count - 2); i++) {
4651 					reg = start_reg + (4 * i);
4652 					if (!si_vm_reg_valid(reg))
4653 						return -EINVAL;
4654 				}
4655 			}
4656 		}
4657 		break;
4658 	case PACKET3_COND_WRITE:
4659 		if (idx_value & 0x100) {
4660 			reg = ib[idx + 5] * 4;
4661 			if (!si_vm_reg_valid(reg))
4662 				return -EINVAL;
4663 		}
4664 		break;
4665 	case PACKET3_COPY_DW:
4666 		if (idx_value & 0x2) {
4667 			reg = ib[idx + 3] * 4;
4668 			if (!si_vm_reg_valid(reg))
4669 				return -EINVAL;
4670 		}
4671 		break;
4672 	case PACKET3_CP_DMA:
4673 		r = si_vm_packet3_cp_dma_check(ib, idx);
4674 		if (r)
4675 			return r;
4676 		break;
4677 	default:
4678 		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4679 		return -EINVAL;
4680 	}
4681 	return 0;
4682 }
4683 
4684 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4685 {
4686 	int ret = 0;
4687 	u32 idx = 0, i;
4688 	struct radeon_cs_packet pkt;
4689 
4690 	do {
4691 		pkt.idx = idx;
4692 		pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4693 		pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4694 		pkt.one_reg_wr = 0;
4695 		switch (pkt.type) {
4696 		case RADEON_PACKET_TYPE0:
4697 			dev_err(rdev->dev, "Packet0 not allowed!\n");
4698 			for (i = 0; i < ib->length_dw; i++) {
4699 				if (i == idx)
4700 					printk("\t0x%08x <---\n", ib->ptr[i]);
4701 				else
4702 					printk("\t0x%08x\n", ib->ptr[i]);
4703 			}
4704 			ret = -EINVAL;
4705 			break;
4706 		case RADEON_PACKET_TYPE2:
4707 			idx += 1;
4708 			break;
4709 		case RADEON_PACKET_TYPE3:
4710 			pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4711 			if (ib->is_const_ib)
4712 				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4713 			else {
4714 				switch (ib->ring) {
4715 				case RADEON_RING_TYPE_GFX_INDEX:
4716 					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4717 					break;
4718 				case CAYMAN_RING_TYPE_CP1_INDEX:
4719 				case CAYMAN_RING_TYPE_CP2_INDEX:
4720 					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4721 					break;
4722 				default:
4723 					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4724 					ret = -EINVAL;
4725 					break;
4726 				}
4727 			}
4728 			idx += pkt.count + 2;
4729 			break;
4730 		default:
4731 			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4732 			ret = -EINVAL;
4733 			break;
4734 		}
4735 		if (ret)
4736 			break;
4737 	} while (idx < ib->length_dw);
4738 
4739 	return ret;
4740 }
4741 
4742 /*
4743  * vm
4744  */
4745 int si_vm_init(struct radeon_device *rdev)
4746 {
4747 	/* number of VMs */
4748 	rdev->vm_manager.nvm = 16;
4749 	/* base offset of vram pages */
4750 	rdev->vm_manager.vram_base_offset = 0;
4751 
4752 	return 0;
4753 }
4754 
4755 void si_vm_fini(struct radeon_device *rdev)
4756 {
4757 }
4758 
4759 /**
4760  * si_vm_decode_fault - print human readable fault info
4761  *
4762  * @rdev: radeon_device pointer
4763  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4764  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4765  *
4766  * Print human readable fault information (SI).
4767  */
4768 static void si_vm_decode_fault(struct radeon_device *rdev,
4769 			       u32 status, u32 addr)
4770 {
4771 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4772 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4773 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4774 	char *block;
4775 
4776 	if (rdev->family == CHIP_TAHITI) {
4777 		switch (mc_id) {
4778 		case 160:
4779 		case 144:
4780 		case 96:
4781 		case 80:
4782 		case 224:
4783 		case 208:
4784 		case 32:
4785 		case 16:
4786 			block = "CB";
4787 			break;
4788 		case 161:
4789 		case 145:
4790 		case 97:
4791 		case 81:
4792 		case 225:
4793 		case 209:
4794 		case 33:
4795 		case 17:
4796 			block = "CB_FMASK";
4797 			break;
4798 		case 162:
4799 		case 146:
4800 		case 98:
4801 		case 82:
4802 		case 226:
4803 		case 210:
4804 		case 34:
4805 		case 18:
4806 			block = "CB_CMASK";
4807 			break;
4808 		case 163:
4809 		case 147:
4810 		case 99:
4811 		case 83:
4812 		case 227:
4813 		case 211:
4814 		case 35:
4815 		case 19:
4816 			block = "CB_IMMED";
4817 			break;
4818 		case 164:
4819 		case 148:
4820 		case 100:
4821 		case 84:
4822 		case 228:
4823 		case 212:
4824 		case 36:
4825 		case 20:
4826 			block = "DB";
4827 			break;
4828 		case 165:
4829 		case 149:
4830 		case 101:
4831 		case 85:
4832 		case 229:
4833 		case 213:
4834 		case 37:
4835 		case 21:
4836 			block = "DB_HTILE";
4837 			break;
4838 		case 167:
4839 		case 151:
4840 		case 103:
4841 		case 87:
4842 		case 231:
4843 		case 215:
4844 		case 39:
4845 		case 23:
4846 			block = "DB_STEN";
4847 			break;
4848 		case 72:
4849 		case 68:
4850 		case 64:
4851 		case 8:
4852 		case 4:
4853 		case 0:
4854 		case 136:
4855 		case 132:
4856 		case 128:
4857 		case 200:
4858 		case 196:
4859 		case 192:
4860 			block = "TC";
4861 			break;
4862 		case 112:
4863 		case 48:
4864 			block = "CP";
4865 			break;
4866 		case 49:
4867 		case 177:
4868 		case 50:
4869 		case 178:
4870 			block = "SH";
4871 			break;
4872 		case 53:
4873 		case 190:
4874 			block = "VGT";
4875 			break;
4876 		case 117:
4877 			block = "IH";
4878 			break;
4879 		case 51:
4880 		case 115:
4881 			block = "RLC";
4882 			break;
4883 		case 119:
4884 		case 183:
4885 			block = "DMA0";
4886 			break;
4887 		case 61:
4888 			block = "DMA1";
4889 			break;
4890 		case 248:
4891 		case 120:
4892 			block = "HDP";
4893 			break;
4894 		default:
4895 			block = "unknown";
4896 			break;
4897 		}
4898 	} else {
4899 		switch (mc_id) {
4900 		case 32:
4901 		case 16:
4902 		case 96:
4903 		case 80:
4904 		case 160:
4905 		case 144:
4906 		case 224:
4907 		case 208:
4908 			block = "CB";
4909 			break;
4910 		case 33:
4911 		case 17:
4912 		case 97:
4913 		case 81:
4914 		case 161:
4915 		case 145:
4916 		case 225:
4917 		case 209:
4918 			block = "CB_FMASK";
4919 			break;
4920 		case 34:
4921 		case 18:
4922 		case 98:
4923 		case 82:
4924 		case 162:
4925 		case 146:
4926 		case 226:
4927 		case 210:
4928 			block = "CB_CMASK";
4929 			break;
4930 		case 35:
4931 		case 19:
4932 		case 99:
4933 		case 83:
4934 		case 163:
4935 		case 147:
4936 		case 227:
4937 		case 211:
4938 			block = "CB_IMMED";
4939 			break;
4940 		case 36:
4941 		case 20:
4942 		case 100:
4943 		case 84:
4944 		case 164:
4945 		case 148:
4946 		case 228:
4947 		case 212:
4948 			block = "DB";
4949 			break;
4950 		case 37:
4951 		case 21:
4952 		case 101:
4953 		case 85:
4954 		case 165:
4955 		case 149:
4956 		case 229:
4957 		case 213:
4958 			block = "DB_HTILE";
4959 			break;
4960 		case 39:
4961 		case 23:
4962 		case 103:
4963 		case 87:
4964 		case 167:
4965 		case 151:
4966 		case 231:
4967 		case 215:
4968 			block = "DB_STEN";
4969 			break;
4970 		case 72:
4971 		case 68:
4972 		case 8:
4973 		case 4:
4974 		case 136:
4975 		case 132:
4976 		case 200:
4977 		case 196:
4978 			block = "TC";
4979 			break;
4980 		case 112:
4981 		case 48:
4982 			block = "CP";
4983 			break;
4984 		case 49:
4985 		case 177:
4986 		case 50:
4987 		case 178:
4988 			block = "SH";
4989 			break;
4990 		case 53:
4991 			block = "VGT";
4992 			break;
4993 		case 117:
4994 			block = "IH";
4995 			break;
4996 		case 51:
4997 		case 115:
4998 			block = "RLC";
4999 			break;
5000 		case 119:
5001 		case 183:
5002 			block = "DMA0";
5003 			break;
5004 		case 61:
5005 			block = "DMA1";
5006 			break;
5007 		case 248:
5008 		case 120:
5009 			block = "HDP";
5010 			break;
5011 		default:
5012 			block = "unknown";
5013 			break;
5014 		}
5015 	}
5016 
5017 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5018 	       protections, vmid, addr,
5019 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5020 	       block, mc_id);
5021 }
5022 
5023 void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5024 {
5025 	struct radeon_ring *ring = &rdev->ring[ridx];
5026 
5027 	if (vm == NULL)
5028 		return;
5029 
5030 	/* write new base address */
5031 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5032 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5033 				 WRITE_DATA_DST_SEL(0)));
5034 
5035 	if (vm->id < 8) {
5036 		radeon_ring_write(ring,
5037 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5038 	} else {
5039 		radeon_ring_write(ring,
5040 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5041 	}
5042 	radeon_ring_write(ring, 0);
5043 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5044 
5045 	/* flush hdp cache */
5046 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5047 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5048 				 WRITE_DATA_DST_SEL(0)));
5049 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5050 	radeon_ring_write(ring, 0);
5051 	radeon_ring_write(ring, 0x1);
5052 
5053 	/* bits 0-15 are the VM contexts0-15 */
5054 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5055 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5056 				 WRITE_DATA_DST_SEL(0)));
5057 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5058 	radeon_ring_write(ring, 0);
5059 	radeon_ring_write(ring, 1 << vm->id);
5060 
5061 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
5062 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5063 	radeon_ring_write(ring, 0x0);
5064 }
5065 
5066 /*
5067  *  Power and clock gating
5068  */
5069 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
5070 {
5071 	int i;
5072 
5073 	for (i = 0; i < rdev->usec_timeout; i++) {
5074 		if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
5075 			break;
5076 		udelay(1);
5077 	}
5078 
5079 	for (i = 0; i < rdev->usec_timeout; i++) {
5080 		if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
5081 			break;
5082 		udelay(1);
5083 	}
5084 }
5085 
5086 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
5087 					 bool enable)
5088 {
5089 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5090 	u32 mask;
5091 	int i;
5092 
5093 	if (enable)
5094 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5095 	else
5096 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5097 	WREG32(CP_INT_CNTL_RING0, tmp);
5098 
5099 	if (!enable) {
5100 		/* read a gfx register */
5101 		tmp = RREG32(DB_DEPTH_INFO);
5102 
5103 		mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
5104 		for (i = 0; i < rdev->usec_timeout; i++) {
5105 			if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
5106 				break;
5107 			udelay(1);
5108 		}
5109 	}
5110 }
5111 
5112 static void si_set_uvd_dcm(struct radeon_device *rdev,
5113 			   bool sw_mode)
5114 {
5115 	u32 tmp, tmp2;
5116 
5117 	tmp = RREG32(UVD_CGC_CTRL);
5118 	tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
5119 	tmp |= DCM | CG_DT(1) | CLK_OD(4);
5120 
5121 	if (sw_mode) {
5122 		tmp &= ~0x7ffff800;
5123 		tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
5124 	} else {
5125 		tmp |= 0x7ffff800;
5126 		tmp2 = 0;
5127 	}
5128 
5129 	WREG32(UVD_CGC_CTRL, tmp);
5130 	WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
5131 }
5132 
5133 void si_init_uvd_internal_cg(struct radeon_device *rdev)
5134 {
5135 	bool hw_mode = true;
5136 
5137 	if (hw_mode) {
5138 		si_set_uvd_dcm(rdev, false);
5139 	} else {
5140 		u32 tmp = RREG32(UVD_CGC_CTRL);
5141 		tmp &= ~DCM;
5142 		WREG32(UVD_CGC_CTRL, tmp);
5143 	}
5144 }
5145 
5146 static u32 si_halt_rlc(struct radeon_device *rdev)
5147 {
5148 	u32 data, orig;
5149 
5150 	orig = data = RREG32(RLC_CNTL);
5151 
5152 	if (data & RLC_ENABLE) {
5153 		data &= ~RLC_ENABLE;
5154 		WREG32(RLC_CNTL, data);
5155 
5156 		si_wait_for_rlc_serdes(rdev);
5157 	}
5158 
5159 	return orig;
5160 }
5161 
5162 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
5163 {
5164 	u32 tmp;
5165 
5166 	tmp = RREG32(RLC_CNTL);
5167 	if (tmp != rlc)
5168 		WREG32(RLC_CNTL, rlc);
5169 }
5170 
5171 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
5172 {
5173 	u32 data, orig;
5174 
5175 	orig = data = RREG32(DMA_PG);
5176 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
5177 		data |= PG_CNTL_ENABLE;
5178 	else
5179 		data &= ~PG_CNTL_ENABLE;
5180 	if (orig != data)
5181 		WREG32(DMA_PG, data);
5182 }
5183 
5184 static void si_init_dma_pg(struct radeon_device *rdev)
5185 {
5186 	u32 tmp;
5187 
5188 	WREG32(DMA_PGFSM_WRITE,  0x00002000);
5189 	WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
5190 
5191 	for (tmp = 0; tmp < 5; tmp++)
5192 		WREG32(DMA_PGFSM_WRITE, 0);
5193 }
5194 
5195 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
5196 			       bool enable)
5197 {
5198 	u32 tmp;
5199 
5200 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5201 		tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
5202 		WREG32(RLC_TTOP_D, tmp);
5203 
5204 		tmp = RREG32(RLC_PG_CNTL);
5205 		tmp |= GFX_PG_ENABLE;
5206 		WREG32(RLC_PG_CNTL, tmp);
5207 
5208 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5209 		tmp |= AUTO_PG_EN;
5210 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5211 	} else {
5212 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5213 		tmp &= ~AUTO_PG_EN;
5214 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5215 
5216 		tmp = RREG32(DB_RENDER_CONTROL);
5217 	}
5218 }
5219 
5220 static void si_init_gfx_cgpg(struct radeon_device *rdev)
5221 {
5222 	u32 tmp;
5223 
5224 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5225 
5226 	tmp = RREG32(RLC_PG_CNTL);
5227 	tmp |= GFX_PG_SRC;
5228 	WREG32(RLC_PG_CNTL, tmp);
5229 
5230 	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5231 
5232 	tmp = RREG32(RLC_AUTO_PG_CTRL);
5233 
5234 	tmp &= ~GRBM_REG_SGIT_MASK;
5235 	tmp |= GRBM_REG_SGIT(0x700);
5236 	tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5237 	WREG32(RLC_AUTO_PG_CTRL, tmp);
5238 }
5239 
5240 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5241 {
5242 	u32 mask = 0, tmp, tmp1;
5243 	int i;
5244 
5245 	si_select_se_sh(rdev, se, sh);
5246 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5247 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5248 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5249 
5250 	tmp &= 0xffff0000;
5251 
5252 	tmp |= tmp1;
5253 	tmp >>= 16;
5254 
5255 	for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5256 		mask <<= 1;
5257 		mask |= 1;
5258 	}
5259 
5260 	return (~tmp) & mask;
5261 }
5262 
5263 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5264 {
5265 	u32 i, j, k, active_cu_number = 0;
5266 	u32 mask, counter, cu_bitmap;
5267 	u32 tmp = 0;
5268 
5269 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5270 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5271 			mask = 1;
5272 			cu_bitmap = 0;
5273 			counter  = 0;
5274 			for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5275 				if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5276 					if (counter < 2)
5277 						cu_bitmap |= mask;
5278 					counter++;
5279 				}
5280 				mask <<= 1;
5281 			}
5282 
5283 			active_cu_number += counter;
5284 			tmp |= (cu_bitmap << (i * 16 + j * 8));
5285 		}
5286 	}
5287 
5288 	WREG32(RLC_PG_AO_CU_MASK, tmp);
5289 
5290 	tmp = RREG32(RLC_MAX_PG_CU);
5291 	tmp &= ~MAX_PU_CU_MASK;
5292 	tmp |= MAX_PU_CU(active_cu_number);
5293 	WREG32(RLC_MAX_PG_CU, tmp);
5294 }
5295 
5296 static void si_enable_cgcg(struct radeon_device *rdev,
5297 			   bool enable)
5298 {
5299 	u32 data, orig, tmp;
5300 
5301 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5302 
5303 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5304 		si_enable_gui_idle_interrupt(rdev, true);
5305 
5306 		WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5307 
5308 		tmp = si_halt_rlc(rdev);
5309 
5310 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5311 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5312 		WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5313 
5314 		si_wait_for_rlc_serdes(rdev);
5315 
5316 		si_update_rlc(rdev, tmp);
5317 
5318 		WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5319 
5320 		data |= CGCG_EN | CGLS_EN;
5321 	} else {
5322 		si_enable_gui_idle_interrupt(rdev, false);
5323 
5324 		RREG32(CB_CGTT_SCLK_CTRL);
5325 		RREG32(CB_CGTT_SCLK_CTRL);
5326 		RREG32(CB_CGTT_SCLK_CTRL);
5327 		RREG32(CB_CGTT_SCLK_CTRL);
5328 
5329 		data &= ~(CGCG_EN | CGLS_EN);
5330 	}
5331 
5332 	if (orig != data)
5333 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5334 }
5335 
5336 static void si_enable_mgcg(struct radeon_device *rdev,
5337 			   bool enable)
5338 {
5339 	u32 data, orig, tmp = 0;
5340 
5341 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5342 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5343 		data = 0x96940200;
5344 		if (orig != data)
5345 			WREG32(CGTS_SM_CTRL_REG, data);
5346 
5347 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5348 			orig = data = RREG32(CP_MEM_SLP_CNTL);
5349 			data |= CP_MEM_LS_EN;
5350 			if (orig != data)
5351 				WREG32(CP_MEM_SLP_CNTL, data);
5352 		}
5353 
5354 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5355 		data &= 0xffffffc0;
5356 		if (orig != data)
5357 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5358 
5359 		tmp = si_halt_rlc(rdev);
5360 
5361 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5362 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5363 		WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5364 
5365 		si_update_rlc(rdev, tmp);
5366 	} else {
5367 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5368 		data |= 0x00000003;
5369 		if (orig != data)
5370 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5371 
5372 		data = RREG32(CP_MEM_SLP_CNTL);
5373 		if (data & CP_MEM_LS_EN) {
5374 			data &= ~CP_MEM_LS_EN;
5375 			WREG32(CP_MEM_SLP_CNTL, data);
5376 		}
5377 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5378 		data |= LS_OVERRIDE | OVERRIDE;
5379 		if (orig != data)
5380 			WREG32(CGTS_SM_CTRL_REG, data);
5381 
5382 		tmp = si_halt_rlc(rdev);
5383 
5384 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5385 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5386 		WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5387 
5388 		si_update_rlc(rdev, tmp);
5389 	}
5390 }
5391 
5392 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5393 			       bool enable)
5394 {
5395 	u32 orig, data, tmp;
5396 
5397 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5398 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5399 		tmp |= 0x3fff;
5400 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5401 
5402 		orig = data = RREG32(UVD_CGC_CTRL);
5403 		data |= DCM;
5404 		if (orig != data)
5405 			WREG32(UVD_CGC_CTRL, data);
5406 
5407 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5408 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5409 	} else {
5410 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5411 		tmp &= ~0x3fff;
5412 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5413 
5414 		orig = data = RREG32(UVD_CGC_CTRL);
5415 		data &= ~DCM;
5416 		if (orig != data)
5417 			WREG32(UVD_CGC_CTRL, data);
5418 
5419 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5420 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5421 	}
5422 }
5423 
5424 static const u32 mc_cg_registers[] =
5425 {
5426 	MC_HUB_MISC_HUB_CG,
5427 	MC_HUB_MISC_SIP_CG,
5428 	MC_HUB_MISC_VM_CG,
5429 	MC_XPB_CLK_GAT,
5430 	ATC_MISC_CG,
5431 	MC_CITF_MISC_WR_CG,
5432 	MC_CITF_MISC_RD_CG,
5433 	MC_CITF_MISC_VM_CG,
5434 	VM_L2_CG,
5435 };
5436 
5437 static void si_enable_mc_ls(struct radeon_device *rdev,
5438 			    bool enable)
5439 {
5440 	int i;
5441 	u32 orig, data;
5442 
5443 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5444 		orig = data = RREG32(mc_cg_registers[i]);
5445 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5446 			data |= MC_LS_ENABLE;
5447 		else
5448 			data &= ~MC_LS_ENABLE;
5449 		if (data != orig)
5450 			WREG32(mc_cg_registers[i], data);
5451 	}
5452 }
5453 
5454 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5455 			       bool enable)
5456 {
5457 	int i;
5458 	u32 orig, data;
5459 
5460 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5461 		orig = data = RREG32(mc_cg_registers[i]);
5462 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5463 			data |= MC_CG_ENABLE;
5464 		else
5465 			data &= ~MC_CG_ENABLE;
5466 		if (data != orig)
5467 			WREG32(mc_cg_registers[i], data);
5468 	}
5469 }
5470 
5471 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5472 			       bool enable)
5473 {
5474 	u32 orig, data, offset;
5475 	int i;
5476 
5477 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5478 		for (i = 0; i < 2; i++) {
5479 			if (i == 0)
5480 				offset = DMA0_REGISTER_OFFSET;
5481 			else
5482 				offset = DMA1_REGISTER_OFFSET;
5483 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5484 			data &= ~MEM_POWER_OVERRIDE;
5485 			if (data != orig)
5486 				WREG32(DMA_POWER_CNTL + offset, data);
5487 			WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5488 		}
5489 	} else {
5490 		for (i = 0; i < 2; i++) {
5491 			if (i == 0)
5492 				offset = DMA0_REGISTER_OFFSET;
5493 			else
5494 				offset = DMA1_REGISTER_OFFSET;
5495 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5496 			data |= MEM_POWER_OVERRIDE;
5497 			if (data != orig)
5498 				WREG32(DMA_POWER_CNTL + offset, data);
5499 
5500 			orig = data = RREG32(DMA_CLK_CTRL + offset);
5501 			data = 0xff000000;
5502 			if (data != orig)
5503 				WREG32(DMA_CLK_CTRL + offset, data);
5504 		}
5505 	}
5506 }
5507 
5508 static void si_enable_bif_mgls(struct radeon_device *rdev,
5509 			       bool enable)
5510 {
5511 	u32 orig, data;
5512 
5513 	orig = data = RREG32_PCIE(PCIE_CNTL2);
5514 
5515 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5516 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5517 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5518 	else
5519 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5520 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5521 
5522 	if (orig != data)
5523 		WREG32_PCIE(PCIE_CNTL2, data);
5524 }
5525 
5526 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5527 			       bool enable)
5528 {
5529 	u32 orig, data;
5530 
5531 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
5532 
5533 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5534 		data &= ~CLOCK_GATING_DIS;
5535 	else
5536 		data |= CLOCK_GATING_DIS;
5537 
5538 	if (orig != data)
5539 		WREG32(HDP_HOST_PATH_CNTL, data);
5540 }
5541 
5542 static void si_enable_hdp_ls(struct radeon_device *rdev,
5543 			     bool enable)
5544 {
5545 	u32 orig, data;
5546 
5547 	orig = data = RREG32(HDP_MEM_POWER_LS);
5548 
5549 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5550 		data |= HDP_LS_ENABLE;
5551 	else
5552 		data &= ~HDP_LS_ENABLE;
5553 
5554 	if (orig != data)
5555 		WREG32(HDP_MEM_POWER_LS, data);
5556 }
5557 
5558 static void si_update_cg(struct radeon_device *rdev,
5559 			 u32 block, bool enable)
5560 {
5561 	if (block & RADEON_CG_BLOCK_GFX) {
5562 		si_enable_gui_idle_interrupt(rdev, false);
5563 		/* order matters! */
5564 		if (enable) {
5565 			si_enable_mgcg(rdev, true);
5566 			si_enable_cgcg(rdev, true);
5567 		} else {
5568 			si_enable_cgcg(rdev, false);
5569 			si_enable_mgcg(rdev, false);
5570 		}
5571 		si_enable_gui_idle_interrupt(rdev, true);
5572 	}
5573 
5574 	if (block & RADEON_CG_BLOCK_MC) {
5575 		si_enable_mc_mgcg(rdev, enable);
5576 		si_enable_mc_ls(rdev, enable);
5577 	}
5578 
5579 	if (block & RADEON_CG_BLOCK_SDMA) {
5580 		si_enable_dma_mgcg(rdev, enable);
5581 	}
5582 
5583 	if (block & RADEON_CG_BLOCK_BIF) {
5584 		si_enable_bif_mgls(rdev, enable);
5585 	}
5586 
5587 	if (block & RADEON_CG_BLOCK_UVD) {
5588 		if (rdev->has_uvd) {
5589 			si_enable_uvd_mgcg(rdev, enable);
5590 		}
5591 	}
5592 
5593 	if (block & RADEON_CG_BLOCK_HDP) {
5594 		si_enable_hdp_mgcg(rdev, enable);
5595 		si_enable_hdp_ls(rdev, enable);
5596 	}
5597 }
5598 
5599 static void si_init_cg(struct radeon_device *rdev)
5600 {
5601 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5602 			    RADEON_CG_BLOCK_MC |
5603 			    RADEON_CG_BLOCK_SDMA |
5604 			    RADEON_CG_BLOCK_BIF |
5605 			    RADEON_CG_BLOCK_HDP), true);
5606 	if (rdev->has_uvd) {
5607 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5608 		si_init_uvd_internal_cg(rdev);
5609 	}
5610 }
5611 
5612 static void si_fini_cg(struct radeon_device *rdev)
5613 {
5614 	if (rdev->has_uvd) {
5615 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5616 	}
5617 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5618 			    RADEON_CG_BLOCK_MC |
5619 			    RADEON_CG_BLOCK_SDMA |
5620 			    RADEON_CG_BLOCK_BIF |
5621 			    RADEON_CG_BLOCK_HDP), false);
5622 }
5623 
5624 u32 si_get_csb_size(struct radeon_device *rdev)
5625 {
5626 	u32 count = 0;
5627 	const struct cs_section_def *sect = NULL;
5628 	const struct cs_extent_def *ext = NULL;
5629 
5630 	if (rdev->rlc.cs_data == NULL)
5631 		return 0;
5632 
5633 	/* begin clear state */
5634 	count += 2;
5635 	/* context control state */
5636 	count += 3;
5637 
5638 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5639 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5640 			if (sect->id == SECT_CONTEXT)
5641 				count += 2 + ext->reg_count;
5642 			else
5643 				return 0;
5644 		}
5645 	}
5646 	/* pa_sc_raster_config */
5647 	count += 3;
5648 	/* end clear state */
5649 	count += 2;
5650 	/* clear state */
5651 	count += 2;
5652 
5653 	return count;
5654 }
5655 
5656 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5657 {
5658 	u32 count = 0, i;
5659 	const struct cs_section_def *sect = NULL;
5660 	const struct cs_extent_def *ext = NULL;
5661 
5662 	if (rdev->rlc.cs_data == NULL)
5663 		return;
5664 	if (buffer == NULL)
5665 		return;
5666 
5667 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5668 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5669 
5670 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5671 	buffer[count++] = cpu_to_le32(0x80000000);
5672 	buffer[count++] = cpu_to_le32(0x80000000);
5673 
5674 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5675 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5676 			if (sect->id == SECT_CONTEXT) {
5677 				buffer[count++] =
5678 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5679 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5680 				for (i = 0; i < ext->reg_count; i++)
5681 					buffer[count++] = cpu_to_le32(ext->extent[i]);
5682 			} else {
5683 				return;
5684 			}
5685 		}
5686 	}
5687 
5688 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5689 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5690 	switch (rdev->family) {
5691 	case CHIP_TAHITI:
5692 	case CHIP_PITCAIRN:
5693 		buffer[count++] = cpu_to_le32(0x2a00126a);
5694 		break;
5695 	case CHIP_VERDE:
5696 		buffer[count++] = cpu_to_le32(0x0000124a);
5697 		break;
5698 	case CHIP_OLAND:
5699 		buffer[count++] = cpu_to_le32(0x00000082);
5700 		break;
5701 	case CHIP_HAINAN:
5702 		buffer[count++] = cpu_to_le32(0x00000000);
5703 		break;
5704 	default:
5705 		buffer[count++] = cpu_to_le32(0x00000000);
5706 		break;
5707 	}
5708 
5709 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5710 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5711 
5712 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5713 	buffer[count++] = cpu_to_le32(0);
5714 }
5715 
5716 static void si_init_pg(struct radeon_device *rdev)
5717 {
5718 	if (rdev->pg_flags) {
5719 		if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5720 			si_init_dma_pg(rdev);
5721 		}
5722 		si_init_ao_cu_mask(rdev);
5723 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5724 			si_init_gfx_cgpg(rdev);
5725 		} else {
5726 			WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5727 			WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5728 		}
5729 		si_enable_dma_pg(rdev, true);
5730 		si_enable_gfx_cgpg(rdev, true);
5731 	} else {
5732 		WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5733 		WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5734 	}
5735 }
5736 
5737 static void si_fini_pg(struct radeon_device *rdev)
5738 {
5739 	if (rdev->pg_flags) {
5740 		si_enable_dma_pg(rdev, false);
5741 		si_enable_gfx_cgpg(rdev, false);
5742 	}
5743 }
5744 
5745 /*
5746  * RLC
5747  */
5748 void si_rlc_reset(struct radeon_device *rdev)
5749 {
5750 	u32 tmp = RREG32(GRBM_SOFT_RESET);
5751 
5752 	tmp |= SOFT_RESET_RLC;
5753 	WREG32(GRBM_SOFT_RESET, tmp);
5754 	udelay(50);
5755 	tmp &= ~SOFT_RESET_RLC;
5756 	WREG32(GRBM_SOFT_RESET, tmp);
5757 	udelay(50);
5758 }
5759 
5760 static void si_rlc_stop(struct radeon_device *rdev)
5761 {
5762 	WREG32(RLC_CNTL, 0);
5763 
5764 	si_enable_gui_idle_interrupt(rdev, false);
5765 
5766 	si_wait_for_rlc_serdes(rdev);
5767 }
5768 
5769 static void si_rlc_start(struct radeon_device *rdev)
5770 {
5771 	WREG32(RLC_CNTL, RLC_ENABLE);
5772 
5773 	si_enable_gui_idle_interrupt(rdev, true);
5774 
5775 	udelay(50);
5776 }
5777 
5778 static bool si_lbpw_supported(struct radeon_device *rdev)
5779 {
5780 	u32 tmp;
5781 
5782 	/* Enable LBPW only for DDR3 */
5783 	tmp = RREG32(MC_SEQ_MISC0);
5784 	if ((tmp & 0xF0000000) == 0xB0000000)
5785 		return true;
5786 	return false;
5787 }
5788 
5789 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5790 {
5791 	u32 tmp;
5792 
5793 	tmp = RREG32(RLC_LB_CNTL);
5794 	if (enable)
5795 		tmp |= LOAD_BALANCE_ENABLE;
5796 	else
5797 		tmp &= ~LOAD_BALANCE_ENABLE;
5798 	WREG32(RLC_LB_CNTL, tmp);
5799 
5800 	if (!enable) {
5801 		si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5802 		WREG32(SPI_LB_CU_MASK, 0x00ff);
5803 	}
5804 }
5805 
5806 static int si_rlc_resume(struct radeon_device *rdev)
5807 {
5808 	u32 i;
5809 
5810 	if (!rdev->rlc_fw)
5811 		return -EINVAL;
5812 
5813 	si_rlc_stop(rdev);
5814 
5815 	si_rlc_reset(rdev);
5816 
5817 	si_init_pg(rdev);
5818 
5819 	si_init_cg(rdev);
5820 
5821 	WREG32(RLC_RL_BASE, 0);
5822 	WREG32(RLC_RL_SIZE, 0);
5823 	WREG32(RLC_LB_CNTL, 0);
5824 	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5825 	WREG32(RLC_LB_CNTR_INIT, 0);
5826 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5827 
5828 	WREG32(RLC_MC_CNTL, 0);
5829 	WREG32(RLC_UCODE_CNTL, 0);
5830 
5831 	if (rdev->new_fw) {
5832 		const struct rlc_firmware_header_v1_0 *hdr =
5833 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5834 		u32 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5835 		const __le32 *fw_data = (const __le32 *)
5836 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5837 
5838 		radeon_ucode_print_rlc_hdr(&hdr->header);
5839 
5840 		for (i = 0; i < fw_size; i++) {
5841 			WREG32(RLC_UCODE_ADDR, i);
5842 			WREG32(RLC_UCODE_DATA, le32_to_cpup(fw_data++));
5843 		}
5844 	} else {
5845 		const __be32 *fw_data =
5846 			(const __be32 *)rdev->rlc_fw->data;
5847 		for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5848 			WREG32(RLC_UCODE_ADDR, i);
5849 			WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5850 		}
5851 	}
5852 	WREG32(RLC_UCODE_ADDR, 0);
5853 
5854 	si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5855 
5856 	si_rlc_start(rdev);
5857 
5858 	return 0;
5859 }
5860 
5861 static void si_enable_interrupts(struct radeon_device *rdev)
5862 {
5863 	u32 ih_cntl = RREG32(IH_CNTL);
5864 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5865 
5866 	ih_cntl |= ENABLE_INTR;
5867 	ih_rb_cntl |= IH_RB_ENABLE;
5868 	WREG32(IH_CNTL, ih_cntl);
5869 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5870 	rdev->ih.enabled = true;
5871 }
5872 
5873 static void si_disable_interrupts(struct radeon_device *rdev)
5874 {
5875 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5876 	u32 ih_cntl = RREG32(IH_CNTL);
5877 
5878 	ih_rb_cntl &= ~IH_RB_ENABLE;
5879 	ih_cntl &= ~ENABLE_INTR;
5880 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5881 	WREG32(IH_CNTL, ih_cntl);
5882 	/* set rptr, wptr to 0 */
5883 	WREG32(IH_RB_RPTR, 0);
5884 	WREG32(IH_RB_WPTR, 0);
5885 	rdev->ih.enabled = false;
5886 	rdev->ih.rptr = 0;
5887 }
5888 
5889 static void si_disable_interrupt_state(struct radeon_device *rdev)
5890 {
5891 	u32 tmp;
5892 
5893 	tmp = RREG32(CP_INT_CNTL_RING0) &
5894 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5895 	WREG32(CP_INT_CNTL_RING0, tmp);
5896 	WREG32(CP_INT_CNTL_RING1, 0);
5897 	WREG32(CP_INT_CNTL_RING2, 0);
5898 	tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5899 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5900 	tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5901 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5902 	WREG32(GRBM_INT_CNTL, 0);
5903 	if (rdev->num_crtc >= 2) {
5904 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5905 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5906 	}
5907 	if (rdev->num_crtc >= 4) {
5908 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5909 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5910 	}
5911 	if (rdev->num_crtc >= 6) {
5912 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5913 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5914 	}
5915 
5916 	if (rdev->num_crtc >= 2) {
5917 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5918 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5919 	}
5920 	if (rdev->num_crtc >= 4) {
5921 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5922 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5923 	}
5924 	if (rdev->num_crtc >= 6) {
5925 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5926 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5927 	}
5928 
5929 	if (!ASIC_IS_NODCE(rdev)) {
5930 		WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5931 
5932 		tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5933 		WREG32(DC_HPD1_INT_CONTROL, tmp);
5934 		tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5935 		WREG32(DC_HPD2_INT_CONTROL, tmp);
5936 		tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5937 		WREG32(DC_HPD3_INT_CONTROL, tmp);
5938 		tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5939 		WREG32(DC_HPD4_INT_CONTROL, tmp);
5940 		tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5941 		WREG32(DC_HPD5_INT_CONTROL, tmp);
5942 		tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5943 		WREG32(DC_HPD6_INT_CONTROL, tmp);
5944 	}
5945 }
5946 
5947 static int si_irq_init(struct radeon_device *rdev)
5948 {
5949 	int ret = 0;
5950 	int rb_bufsz;
5951 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5952 
5953 	/* allocate ring */
5954 	ret = r600_ih_ring_alloc(rdev);
5955 	if (ret)
5956 		return ret;
5957 
5958 	/* disable irqs */
5959 	si_disable_interrupts(rdev);
5960 
5961 	/* init rlc */
5962 	ret = si_rlc_resume(rdev);
5963 	if (ret) {
5964 		r600_ih_ring_fini(rdev);
5965 		return ret;
5966 	}
5967 
5968 	/* setup interrupt control */
5969 	/* set dummy read address to ring address */
5970 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5971 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
5972 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5973 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5974 	 */
5975 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5976 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5977 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5978 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
5979 
5980 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5981 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
5982 
5983 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5984 		      IH_WPTR_OVERFLOW_CLEAR |
5985 		      (rb_bufsz << 1));
5986 
5987 	if (rdev->wb.enabled)
5988 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5989 
5990 	/* set the writeback address whether it's enabled or not */
5991 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5992 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5993 
5994 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5995 
5996 	/* set rptr, wptr to 0 */
5997 	WREG32(IH_RB_RPTR, 0);
5998 	WREG32(IH_RB_WPTR, 0);
5999 
6000 	/* Default settings for IH_CNTL (disabled at first) */
6001 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6002 	/* RPTR_REARM only works if msi's are enabled */
6003 	if (rdev->msi_enabled)
6004 		ih_cntl |= RPTR_REARM;
6005 	WREG32(IH_CNTL, ih_cntl);
6006 
6007 	/* force the active interrupt state to all disabled */
6008 	si_disable_interrupt_state(rdev);
6009 
6010 	pci_set_master(rdev->pdev);
6011 
6012 	/* enable irqs */
6013 	si_enable_interrupts(rdev);
6014 
6015 	return ret;
6016 }
6017 
6018 int si_irq_set(struct radeon_device *rdev)
6019 {
6020 	u32 cp_int_cntl;
6021 	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
6022 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6023 	u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
6024 	u32 grbm_int_cntl = 0;
6025 	u32 dma_cntl, dma_cntl1;
6026 	u32 thermal_int = 0;
6027 
6028 	if (!rdev->irq.installed) {
6029 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6030 		return -EINVAL;
6031 	}
6032 	/* don't enable anything if the ih is disabled */
6033 	if (!rdev->ih.enabled) {
6034 		si_disable_interrupts(rdev);
6035 		/* force the active interrupt state to all disabled */
6036 		si_disable_interrupt_state(rdev);
6037 		return 0;
6038 	}
6039 
6040 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6041 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6042 
6043 	if (!ASIC_IS_NODCE(rdev)) {
6044 		hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6045 		hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6046 		hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6047 		hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6048 		hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6049 		hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6050 	}
6051 
6052 	dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6053 	dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6054 
6055 	thermal_int = RREG32(CG_THERMAL_INT) &
6056 		~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6057 
6058 	/* enable CP interrupts on all rings */
6059 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6060 		DRM_DEBUG("si_irq_set: sw int gfx\n");
6061 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6062 	}
6063 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6064 		DRM_DEBUG("si_irq_set: sw int cp1\n");
6065 		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
6066 	}
6067 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6068 		DRM_DEBUG("si_irq_set: sw int cp2\n");
6069 		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
6070 	}
6071 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6072 		DRM_DEBUG("si_irq_set: sw int dma\n");
6073 		dma_cntl |= TRAP_ENABLE;
6074 	}
6075 
6076 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6077 		DRM_DEBUG("si_irq_set: sw int dma1\n");
6078 		dma_cntl1 |= TRAP_ENABLE;
6079 	}
6080 	if (rdev->irq.crtc_vblank_int[0] ||
6081 	    atomic_read(&rdev->irq.pflip[0])) {
6082 		DRM_DEBUG("si_irq_set: vblank 0\n");
6083 		crtc1 |= VBLANK_INT_MASK;
6084 	}
6085 	if (rdev->irq.crtc_vblank_int[1] ||
6086 	    atomic_read(&rdev->irq.pflip[1])) {
6087 		DRM_DEBUG("si_irq_set: vblank 1\n");
6088 		crtc2 |= VBLANK_INT_MASK;
6089 	}
6090 	if (rdev->irq.crtc_vblank_int[2] ||
6091 	    atomic_read(&rdev->irq.pflip[2])) {
6092 		DRM_DEBUG("si_irq_set: vblank 2\n");
6093 		crtc3 |= VBLANK_INT_MASK;
6094 	}
6095 	if (rdev->irq.crtc_vblank_int[3] ||
6096 	    atomic_read(&rdev->irq.pflip[3])) {
6097 		DRM_DEBUG("si_irq_set: vblank 3\n");
6098 		crtc4 |= VBLANK_INT_MASK;
6099 	}
6100 	if (rdev->irq.crtc_vblank_int[4] ||
6101 	    atomic_read(&rdev->irq.pflip[4])) {
6102 		DRM_DEBUG("si_irq_set: vblank 4\n");
6103 		crtc5 |= VBLANK_INT_MASK;
6104 	}
6105 	if (rdev->irq.crtc_vblank_int[5] ||
6106 	    atomic_read(&rdev->irq.pflip[5])) {
6107 		DRM_DEBUG("si_irq_set: vblank 5\n");
6108 		crtc6 |= VBLANK_INT_MASK;
6109 	}
6110 	if (rdev->irq.hpd[0]) {
6111 		DRM_DEBUG("si_irq_set: hpd 1\n");
6112 		hpd1 |= DC_HPDx_INT_EN;
6113 	}
6114 	if (rdev->irq.hpd[1]) {
6115 		DRM_DEBUG("si_irq_set: hpd 2\n");
6116 		hpd2 |= DC_HPDx_INT_EN;
6117 	}
6118 	if (rdev->irq.hpd[2]) {
6119 		DRM_DEBUG("si_irq_set: hpd 3\n");
6120 		hpd3 |= DC_HPDx_INT_EN;
6121 	}
6122 	if (rdev->irq.hpd[3]) {
6123 		DRM_DEBUG("si_irq_set: hpd 4\n");
6124 		hpd4 |= DC_HPDx_INT_EN;
6125 	}
6126 	if (rdev->irq.hpd[4]) {
6127 		DRM_DEBUG("si_irq_set: hpd 5\n");
6128 		hpd5 |= DC_HPDx_INT_EN;
6129 	}
6130 	if (rdev->irq.hpd[5]) {
6131 		DRM_DEBUG("si_irq_set: hpd 6\n");
6132 		hpd6 |= DC_HPDx_INT_EN;
6133 	}
6134 
6135 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6136 	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
6137 	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
6138 
6139 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
6140 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
6141 
6142 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6143 
6144 	if (rdev->irq.dpm_thermal) {
6145 		DRM_DEBUG("dpm thermal\n");
6146 		thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6147 	}
6148 
6149 	if (rdev->num_crtc >= 2) {
6150 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6151 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6152 	}
6153 	if (rdev->num_crtc >= 4) {
6154 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6155 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6156 	}
6157 	if (rdev->num_crtc >= 6) {
6158 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6159 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6160 	}
6161 
6162 	if (rdev->num_crtc >= 2) {
6163 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
6164 		       GRPH_PFLIP_INT_MASK);
6165 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
6166 		       GRPH_PFLIP_INT_MASK);
6167 	}
6168 	if (rdev->num_crtc >= 4) {
6169 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
6170 		       GRPH_PFLIP_INT_MASK);
6171 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
6172 		       GRPH_PFLIP_INT_MASK);
6173 	}
6174 	if (rdev->num_crtc >= 6) {
6175 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
6176 		       GRPH_PFLIP_INT_MASK);
6177 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
6178 		       GRPH_PFLIP_INT_MASK);
6179 	}
6180 
6181 	if (!ASIC_IS_NODCE(rdev)) {
6182 		WREG32(DC_HPD1_INT_CONTROL, hpd1);
6183 		WREG32(DC_HPD2_INT_CONTROL, hpd2);
6184 		WREG32(DC_HPD3_INT_CONTROL, hpd3);
6185 		WREG32(DC_HPD4_INT_CONTROL, hpd4);
6186 		WREG32(DC_HPD5_INT_CONTROL, hpd5);
6187 		WREG32(DC_HPD6_INT_CONTROL, hpd6);
6188 	}
6189 
6190 	WREG32(CG_THERMAL_INT, thermal_int);
6191 
6192 	return 0;
6193 }
6194 
6195 static inline void si_irq_ack(struct radeon_device *rdev)
6196 {
6197 	u32 tmp;
6198 
6199 	if (ASIC_IS_NODCE(rdev))
6200 		return;
6201 
6202 	rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6203 	rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6204 	rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6205 	rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6206 	rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6207 	rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6208 	rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
6209 	rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
6210 	if (rdev->num_crtc >= 4) {
6211 		rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
6212 		rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
6213 	}
6214 	if (rdev->num_crtc >= 6) {
6215 		rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
6216 		rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
6217 	}
6218 
6219 	if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
6220 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6221 	if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
6222 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6223 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
6224 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6225 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
6226 		WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6227 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6228 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6229 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6230 		WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6231 
6232 	if (rdev->num_crtc >= 4) {
6233 		if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
6234 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6235 		if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
6236 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6237 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6238 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6239 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6240 			WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6241 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6242 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6243 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6244 			WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6245 	}
6246 
6247 	if (rdev->num_crtc >= 6) {
6248 		if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
6249 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6250 		if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
6251 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6252 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6253 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6254 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6255 			WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6256 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6257 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6258 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6259 			WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6260 	}
6261 
6262 	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6263 		tmp = RREG32(DC_HPD1_INT_CONTROL);
6264 		tmp |= DC_HPDx_INT_ACK;
6265 		WREG32(DC_HPD1_INT_CONTROL, tmp);
6266 	}
6267 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6268 		tmp = RREG32(DC_HPD2_INT_CONTROL);
6269 		tmp |= DC_HPDx_INT_ACK;
6270 		WREG32(DC_HPD2_INT_CONTROL, tmp);
6271 	}
6272 	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6273 		tmp = RREG32(DC_HPD3_INT_CONTROL);
6274 		tmp |= DC_HPDx_INT_ACK;
6275 		WREG32(DC_HPD3_INT_CONTROL, tmp);
6276 	}
6277 	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6278 		tmp = RREG32(DC_HPD4_INT_CONTROL);
6279 		tmp |= DC_HPDx_INT_ACK;
6280 		WREG32(DC_HPD4_INT_CONTROL, tmp);
6281 	}
6282 	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6283 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6284 		tmp |= DC_HPDx_INT_ACK;
6285 		WREG32(DC_HPD5_INT_CONTROL, tmp);
6286 	}
6287 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6288 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6289 		tmp |= DC_HPDx_INT_ACK;
6290 		WREG32(DC_HPD6_INT_CONTROL, tmp);
6291 	}
6292 }
6293 
6294 static void si_irq_disable(struct radeon_device *rdev)
6295 {
6296 	si_disable_interrupts(rdev);
6297 	/* Wait and acknowledge irq */
6298 	mdelay(1);
6299 	si_irq_ack(rdev);
6300 	si_disable_interrupt_state(rdev);
6301 }
6302 
6303 static void si_irq_suspend(struct radeon_device *rdev)
6304 {
6305 	si_irq_disable(rdev);
6306 	si_rlc_stop(rdev);
6307 }
6308 
6309 static void si_irq_fini(struct radeon_device *rdev)
6310 {
6311 	si_irq_suspend(rdev);
6312 	r600_ih_ring_fini(rdev);
6313 }
6314 
6315 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6316 {
6317 	u32 wptr, tmp;
6318 
6319 	if (rdev->wb.enabled)
6320 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6321 	else
6322 		wptr = RREG32(IH_RB_WPTR);
6323 
6324 	if (wptr & RB_OVERFLOW) {
6325 		wptr &= ~RB_OVERFLOW;
6326 		/* When a ring buffer overflow happen start parsing interrupt
6327 		 * from the last not overwritten vector (wptr + 16). Hopefully
6328 		 * this should allow us to catchup.
6329 		 */
6330 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
6331 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
6332 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6333 		tmp = RREG32(IH_RB_CNTL);
6334 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
6335 		WREG32(IH_RB_CNTL, tmp);
6336 	}
6337 	return (wptr & rdev->ih.ptr_mask);
6338 }
6339 
6340 /*        SI IV Ring
6341  * Each IV ring entry is 128 bits:
6342  * [7:0]    - interrupt source id
6343  * [31:8]   - reserved
6344  * [59:32]  - interrupt source data
6345  * [63:60]  - reserved
6346  * [71:64]  - RINGID
6347  * [79:72]  - VMID
6348  * [127:80] - reserved
6349  */
6350 int si_irq_process(struct radeon_device *rdev)
6351 {
6352 	u32 wptr;
6353 	u32 rptr;
6354 	u32 src_id, src_data, ring_id;
6355 	u32 ring_index;
6356 	bool queue_hotplug = false;
6357 	bool queue_thermal = false;
6358 	u32 status, addr;
6359 
6360 	if (!rdev->ih.enabled || rdev->shutdown)
6361 		return IRQ_NONE;
6362 
6363 	wptr = si_get_ih_wptr(rdev);
6364 
6365 restart_ih:
6366 	/* is somebody else already processing irqs? */
6367 	if (atomic_xchg(&rdev->ih.lock, 1))
6368 		return IRQ_NONE;
6369 
6370 	rptr = rdev->ih.rptr;
6371 	DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6372 
6373 	/* Order reading of wptr vs. reading of IH ring data */
6374 	rmb();
6375 
6376 	/* display interrupts */
6377 	si_irq_ack(rdev);
6378 
6379 	while (rptr != wptr) {
6380 		/* wptr/rptr are in bytes! */
6381 		ring_index = rptr / 4;
6382 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6383 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6384 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6385 
6386 		switch (src_id) {
6387 		case 1: /* D1 vblank/vline */
6388 			switch (src_data) {
6389 			case 0: /* D1 vblank */
6390 				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
6391 					if (rdev->irq.crtc_vblank_int[0]) {
6392 						drm_handle_vblank(rdev->ddev, 0);
6393 						rdev->pm.vblank_sync = true;
6394 						wake_up(&rdev->irq.vblank_queue);
6395 					}
6396 					if (atomic_read(&rdev->irq.pflip[0]))
6397 						radeon_crtc_handle_vblank(rdev, 0);
6398 					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6399 					DRM_DEBUG("IH: D1 vblank\n");
6400 				}
6401 				break;
6402 			case 1: /* D1 vline */
6403 				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
6404 					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6405 					DRM_DEBUG("IH: D1 vline\n");
6406 				}
6407 				break;
6408 			default:
6409 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6410 				break;
6411 			}
6412 			break;
6413 		case 2: /* D2 vblank/vline */
6414 			switch (src_data) {
6415 			case 0: /* D2 vblank */
6416 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6417 					if (rdev->irq.crtc_vblank_int[1]) {
6418 						drm_handle_vblank(rdev->ddev, 1);
6419 						rdev->pm.vblank_sync = true;
6420 						wake_up(&rdev->irq.vblank_queue);
6421 					}
6422 					if (atomic_read(&rdev->irq.pflip[1]))
6423 						radeon_crtc_handle_vblank(rdev, 1);
6424 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6425 					DRM_DEBUG("IH: D2 vblank\n");
6426 				}
6427 				break;
6428 			case 1: /* D2 vline */
6429 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6430 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6431 					DRM_DEBUG("IH: D2 vline\n");
6432 				}
6433 				break;
6434 			default:
6435 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6436 				break;
6437 			}
6438 			break;
6439 		case 3: /* D3 vblank/vline */
6440 			switch (src_data) {
6441 			case 0: /* D3 vblank */
6442 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6443 					if (rdev->irq.crtc_vblank_int[2]) {
6444 						drm_handle_vblank(rdev->ddev, 2);
6445 						rdev->pm.vblank_sync = true;
6446 						wake_up(&rdev->irq.vblank_queue);
6447 					}
6448 					if (atomic_read(&rdev->irq.pflip[2]))
6449 						radeon_crtc_handle_vblank(rdev, 2);
6450 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6451 					DRM_DEBUG("IH: D3 vblank\n");
6452 				}
6453 				break;
6454 			case 1: /* D3 vline */
6455 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6456 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6457 					DRM_DEBUG("IH: D3 vline\n");
6458 				}
6459 				break;
6460 			default:
6461 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6462 				break;
6463 			}
6464 			break;
6465 		case 4: /* D4 vblank/vline */
6466 			switch (src_data) {
6467 			case 0: /* D4 vblank */
6468 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6469 					if (rdev->irq.crtc_vblank_int[3]) {
6470 						drm_handle_vblank(rdev->ddev, 3);
6471 						rdev->pm.vblank_sync = true;
6472 						wake_up(&rdev->irq.vblank_queue);
6473 					}
6474 					if (atomic_read(&rdev->irq.pflip[3]))
6475 						radeon_crtc_handle_vblank(rdev, 3);
6476 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6477 					DRM_DEBUG("IH: D4 vblank\n");
6478 				}
6479 				break;
6480 			case 1: /* D4 vline */
6481 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6482 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6483 					DRM_DEBUG("IH: D4 vline\n");
6484 				}
6485 				break;
6486 			default:
6487 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6488 				break;
6489 			}
6490 			break;
6491 		case 5: /* D5 vblank/vline */
6492 			switch (src_data) {
6493 			case 0: /* D5 vblank */
6494 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6495 					if (rdev->irq.crtc_vblank_int[4]) {
6496 						drm_handle_vblank(rdev->ddev, 4);
6497 						rdev->pm.vblank_sync = true;
6498 						wake_up(&rdev->irq.vblank_queue);
6499 					}
6500 					if (atomic_read(&rdev->irq.pflip[4]))
6501 						radeon_crtc_handle_vblank(rdev, 4);
6502 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6503 					DRM_DEBUG("IH: D5 vblank\n");
6504 				}
6505 				break;
6506 			case 1: /* D5 vline */
6507 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6508 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6509 					DRM_DEBUG("IH: D5 vline\n");
6510 				}
6511 				break;
6512 			default:
6513 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6514 				break;
6515 			}
6516 			break;
6517 		case 6: /* D6 vblank/vline */
6518 			switch (src_data) {
6519 			case 0: /* D6 vblank */
6520 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6521 					if (rdev->irq.crtc_vblank_int[5]) {
6522 						drm_handle_vblank(rdev->ddev, 5);
6523 						rdev->pm.vblank_sync = true;
6524 						wake_up(&rdev->irq.vblank_queue);
6525 					}
6526 					if (atomic_read(&rdev->irq.pflip[5]))
6527 						radeon_crtc_handle_vblank(rdev, 5);
6528 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6529 					DRM_DEBUG("IH: D6 vblank\n");
6530 				}
6531 				break;
6532 			case 1: /* D6 vline */
6533 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6534 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6535 					DRM_DEBUG("IH: D6 vline\n");
6536 				}
6537 				break;
6538 			default:
6539 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6540 				break;
6541 			}
6542 			break;
6543 		case 8: /* D1 page flip */
6544 		case 10: /* D2 page flip */
6545 		case 12: /* D3 page flip */
6546 		case 14: /* D4 page flip */
6547 		case 16: /* D5 page flip */
6548 		case 18: /* D6 page flip */
6549 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
6550 			if (radeon_use_pflipirq > 0)
6551 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
6552 			break;
6553 		case 42: /* HPD hotplug */
6554 			switch (src_data) {
6555 			case 0:
6556 				if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6557 					rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6558 					queue_hotplug = true;
6559 					DRM_DEBUG("IH: HPD1\n");
6560 				}
6561 				break;
6562 			case 1:
6563 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6564 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6565 					queue_hotplug = true;
6566 					DRM_DEBUG("IH: HPD2\n");
6567 				}
6568 				break;
6569 			case 2:
6570 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6571 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6572 					queue_hotplug = true;
6573 					DRM_DEBUG("IH: HPD3\n");
6574 				}
6575 				break;
6576 			case 3:
6577 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6578 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6579 					queue_hotplug = true;
6580 					DRM_DEBUG("IH: HPD4\n");
6581 				}
6582 				break;
6583 			case 4:
6584 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6585 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6586 					queue_hotplug = true;
6587 					DRM_DEBUG("IH: HPD5\n");
6588 				}
6589 				break;
6590 			case 5:
6591 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6592 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6593 					queue_hotplug = true;
6594 					DRM_DEBUG("IH: HPD6\n");
6595 				}
6596 				break;
6597 			default:
6598 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6599 				break;
6600 			}
6601 			break;
6602 		case 124: /* UVD */
6603 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6604 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6605 			break;
6606 		case 146:
6607 		case 147:
6608 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6609 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6610 			/* reset addr and status */
6611 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6612 			if (addr == 0x0 && status == 0x0)
6613 				break;
6614 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6615 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6616 				addr);
6617 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6618 				status);
6619 			si_vm_decode_fault(rdev, status, addr);
6620 			break;
6621 		case 176: /* RINGID0 CP_INT */
6622 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6623 			break;
6624 		case 177: /* RINGID1 CP_INT */
6625 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6626 			break;
6627 		case 178: /* RINGID2 CP_INT */
6628 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6629 			break;
6630 		case 181: /* CP EOP event */
6631 			DRM_DEBUG("IH: CP EOP\n");
6632 			switch (ring_id) {
6633 			case 0:
6634 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6635 				break;
6636 			case 1:
6637 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6638 				break;
6639 			case 2:
6640 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6641 				break;
6642 			}
6643 			break;
6644 		case 224: /* DMA trap event */
6645 			DRM_DEBUG("IH: DMA trap\n");
6646 			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6647 			break;
6648 		case 230: /* thermal low to high */
6649 			DRM_DEBUG("IH: thermal low to high\n");
6650 			rdev->pm.dpm.thermal.high_to_low = false;
6651 			queue_thermal = true;
6652 			break;
6653 		case 231: /* thermal high to low */
6654 			DRM_DEBUG("IH: thermal high to low\n");
6655 			rdev->pm.dpm.thermal.high_to_low = true;
6656 			queue_thermal = true;
6657 			break;
6658 		case 233: /* GUI IDLE */
6659 			DRM_DEBUG("IH: GUI idle\n");
6660 			break;
6661 		case 244: /* DMA trap event */
6662 			DRM_DEBUG("IH: DMA1 trap\n");
6663 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6664 			break;
6665 		default:
6666 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6667 			break;
6668 		}
6669 
6670 		/* wptr/rptr are in bytes! */
6671 		rptr += 16;
6672 		rptr &= rdev->ih.ptr_mask;
6673 		WREG32(IH_RB_RPTR, rptr);
6674 	}
6675 	if (queue_hotplug)
6676 		schedule_work(&rdev->hotplug_work);
6677 	if (queue_thermal && rdev->pm.dpm_enabled)
6678 		schedule_work(&rdev->pm.dpm.thermal.work);
6679 	rdev->ih.rptr = rptr;
6680 	atomic_set(&rdev->ih.lock, 0);
6681 
6682 	/* make sure wptr hasn't changed while processing */
6683 	wptr = si_get_ih_wptr(rdev);
6684 	if (wptr != rptr)
6685 		goto restart_ih;
6686 
6687 	return IRQ_HANDLED;
6688 }
6689 
6690 /*
6691  * startup/shutdown callbacks
6692  */
6693 static int si_startup(struct radeon_device *rdev)
6694 {
6695 	struct radeon_ring *ring;
6696 	int r;
6697 
6698 	/* enable pcie gen2/3 link */
6699 	si_pcie_gen3_enable(rdev);
6700 	/* enable aspm */
6701 	si_program_aspm(rdev);
6702 
6703 	/* scratch needs to be initialized before MC */
6704 	r = r600_vram_scratch_init(rdev);
6705 	if (r)
6706 		return r;
6707 
6708 	si_mc_program(rdev);
6709 
6710 	if (!rdev->pm.dpm_enabled) {
6711 		r = si_mc_load_microcode(rdev);
6712 		if (r) {
6713 			DRM_ERROR("Failed to load MC firmware!\n");
6714 			return r;
6715 		}
6716 	}
6717 
6718 	r = si_pcie_gart_enable(rdev);
6719 	if (r)
6720 		return r;
6721 	si_gpu_init(rdev);
6722 
6723 	/* allocate rlc buffers */
6724 	if (rdev->family == CHIP_VERDE) {
6725 		rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
6726 		rdev->rlc.reg_list_size =
6727 			(u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
6728 	}
6729 	rdev->rlc.cs_data = si_cs_data;
6730 	r = sumo_rlc_init(rdev);
6731 	if (r) {
6732 		DRM_ERROR("Failed to init rlc BOs!\n");
6733 		return r;
6734 	}
6735 
6736 	/* allocate wb buffer */
6737 	r = radeon_wb_init(rdev);
6738 	if (r)
6739 		return r;
6740 
6741 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6742 	if (r) {
6743 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6744 		return r;
6745 	}
6746 
6747 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6748 	if (r) {
6749 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6750 		return r;
6751 	}
6752 
6753 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6754 	if (r) {
6755 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6756 		return r;
6757 	}
6758 
6759 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6760 	if (r) {
6761 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6762 		return r;
6763 	}
6764 
6765 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6766 	if (r) {
6767 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6768 		return r;
6769 	}
6770 
6771 	if (rdev->has_uvd) {
6772 		r = uvd_v2_2_resume(rdev);
6773 		if (!r) {
6774 			r = radeon_fence_driver_start_ring(rdev,
6775 							   R600_RING_TYPE_UVD_INDEX);
6776 			if (r)
6777 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6778 		}
6779 		if (r)
6780 			rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6781 	}
6782 
6783 	/* Enable IRQ */
6784 	if (!rdev->irq.installed) {
6785 		r = radeon_irq_kms_init(rdev);
6786 		if (r)
6787 			return r;
6788 	}
6789 
6790 	r = si_irq_init(rdev);
6791 	if (r) {
6792 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
6793 		radeon_irq_kms_fini(rdev);
6794 		return r;
6795 	}
6796 	si_irq_set(rdev);
6797 
6798 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6799 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6800 			     RADEON_CP_PACKET2);
6801 	if (r)
6802 		return r;
6803 
6804 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6805 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6806 			     RADEON_CP_PACKET2);
6807 	if (r)
6808 		return r;
6809 
6810 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6811 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6812 			     RADEON_CP_PACKET2);
6813 	if (r)
6814 		return r;
6815 
6816 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6817 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6818 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6819 	if (r)
6820 		return r;
6821 
6822 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6823 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6824 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6825 	if (r)
6826 		return r;
6827 
6828 	r = si_cp_load_microcode(rdev);
6829 	if (r)
6830 		return r;
6831 	r = si_cp_resume(rdev);
6832 	if (r)
6833 		return r;
6834 
6835 	r = cayman_dma_resume(rdev);
6836 	if (r)
6837 		return r;
6838 
6839 	if (rdev->has_uvd) {
6840 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6841 		if (ring->ring_size) {
6842 			r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
6843 					     RADEON_CP_PACKET2);
6844 			if (!r)
6845 				r = uvd_v1_0_init(rdev);
6846 			if (r)
6847 				DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6848 		}
6849 	}
6850 
6851 	r = radeon_ib_pool_init(rdev);
6852 	if (r) {
6853 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6854 		return r;
6855 	}
6856 
6857 	r = radeon_vm_manager_init(rdev);
6858 	if (r) {
6859 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6860 		return r;
6861 	}
6862 
6863 	r = dce6_audio_init(rdev);
6864 	if (r)
6865 		return r;
6866 
6867 	return 0;
6868 }
6869 
6870 int si_resume(struct radeon_device *rdev)
6871 {
6872 	int r;
6873 
6874 	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6875 	 * posting will perform necessary task to bring back GPU into good
6876 	 * shape.
6877 	 */
6878 	/* post card */
6879 	atom_asic_init(rdev->mode_info.atom_context);
6880 
6881 	/* init golden registers */
6882 	si_init_golden_registers(rdev);
6883 
6884 	if (rdev->pm.pm_method == PM_METHOD_DPM)
6885 		radeon_pm_resume(rdev);
6886 
6887 	rdev->accel_working = true;
6888 	r = si_startup(rdev);
6889 	if (r) {
6890 		DRM_ERROR("si startup failed on resume\n");
6891 		rdev->accel_working = false;
6892 		return r;
6893 	}
6894 
6895 	return r;
6896 
6897 }
6898 
6899 int si_suspend(struct radeon_device *rdev)
6900 {
6901 	radeon_pm_suspend(rdev);
6902 	dce6_audio_fini(rdev);
6903 	radeon_vm_manager_fini(rdev);
6904 	si_cp_enable(rdev, false);
6905 	cayman_dma_stop(rdev);
6906 	if (rdev->has_uvd) {
6907 		uvd_v1_0_fini(rdev);
6908 		radeon_uvd_suspend(rdev);
6909 	}
6910 	si_fini_pg(rdev);
6911 	si_fini_cg(rdev);
6912 	si_irq_suspend(rdev);
6913 	radeon_wb_disable(rdev);
6914 	si_pcie_gart_disable(rdev);
6915 	return 0;
6916 }
6917 
6918 /* Plan is to move initialization in that function and use
6919  * helper function so that radeon_device_init pretty much
6920  * do nothing more than calling asic specific function. This
6921  * should also allow to remove a bunch of callback function
6922  * like vram_info.
6923  */
6924 int si_init(struct radeon_device *rdev)
6925 {
6926 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6927 	int r;
6928 
6929 	/* Read BIOS */
6930 	if (!radeon_get_bios(rdev)) {
6931 		if (ASIC_IS_AVIVO(rdev))
6932 			return -EINVAL;
6933 	}
6934 	/* Must be an ATOMBIOS */
6935 	if (!rdev->is_atom_bios) {
6936 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6937 		return -EINVAL;
6938 	}
6939 	r = radeon_atombios_init(rdev);
6940 	if (r)
6941 		return r;
6942 
6943 	/* Post card if necessary */
6944 	if (!radeon_card_posted(rdev)) {
6945 		if (!rdev->bios) {
6946 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6947 			return -EINVAL;
6948 		}
6949 		DRM_INFO("GPU not posted. posting now...\n");
6950 		atom_asic_init(rdev->mode_info.atom_context);
6951 	}
6952 	/* init golden registers */
6953 	si_init_golden_registers(rdev);
6954 	/* Initialize scratch registers */
6955 	si_scratch_init(rdev);
6956 	/* Initialize surface registers */
6957 	radeon_surface_init(rdev);
6958 	/* Initialize clocks */
6959 	radeon_get_clock_info(rdev->ddev);
6960 
6961 	/* Fence driver */
6962 	r = radeon_fence_driver_init(rdev);
6963 	if (r)
6964 		return r;
6965 
6966 	/* initialize memory controller */
6967 	r = si_mc_init(rdev);
6968 	if (r)
6969 		return r;
6970 	/* Memory manager */
6971 	r = radeon_bo_init(rdev);
6972 	if (r)
6973 		return r;
6974 
6975 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6976 	    !rdev->rlc_fw || !rdev->mc_fw) {
6977 		r = si_init_microcode(rdev);
6978 		if (r) {
6979 			DRM_ERROR("Failed to load firmware!\n");
6980 			return r;
6981 		}
6982 	}
6983 
6984 	/* Initialize power management */
6985 	radeon_pm_init(rdev);
6986 
6987 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6988 	ring->ring_obj = NULL;
6989 	r600_ring_init(rdev, ring, 1024 * 1024);
6990 
6991 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6992 	ring->ring_obj = NULL;
6993 	r600_ring_init(rdev, ring, 1024 * 1024);
6994 
6995 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6996 	ring->ring_obj = NULL;
6997 	r600_ring_init(rdev, ring, 1024 * 1024);
6998 
6999 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7000 	ring->ring_obj = NULL;
7001 	r600_ring_init(rdev, ring, 64 * 1024);
7002 
7003 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7004 	ring->ring_obj = NULL;
7005 	r600_ring_init(rdev, ring, 64 * 1024);
7006 
7007 	if (rdev->has_uvd) {
7008 		r = radeon_uvd_init(rdev);
7009 		if (!r) {
7010 			ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7011 			ring->ring_obj = NULL;
7012 			r600_ring_init(rdev, ring, 4096);
7013 		}
7014 	}
7015 
7016 	rdev->ih.ring_obj = NULL;
7017 	r600_ih_ring_init(rdev, 64 * 1024);
7018 
7019 	r = r600_pcie_gart_init(rdev);
7020 	if (r)
7021 		return r;
7022 
7023 	rdev->accel_working = true;
7024 	r = si_startup(rdev);
7025 	if (r) {
7026 		dev_err(rdev->dev, "disabling GPU acceleration\n");
7027 		si_cp_fini(rdev);
7028 		cayman_dma_fini(rdev);
7029 		si_irq_fini(rdev);
7030 		sumo_rlc_fini(rdev);
7031 		radeon_wb_fini(rdev);
7032 		radeon_ib_pool_fini(rdev);
7033 		radeon_vm_manager_fini(rdev);
7034 		radeon_irq_kms_fini(rdev);
7035 		si_pcie_gart_fini(rdev);
7036 		rdev->accel_working = false;
7037 	}
7038 
7039 	/* Don't start up if the MC ucode is missing.
7040 	 * The default clocks and voltages before the MC ucode
7041 	 * is loaded are not suffient for advanced operations.
7042 	 */
7043 	if (!rdev->mc_fw) {
7044 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
7045 		return -EINVAL;
7046 	}
7047 
7048 	return 0;
7049 }
7050 
7051 void si_fini(struct radeon_device *rdev)
7052 {
7053 	radeon_pm_fini(rdev);
7054 	si_cp_fini(rdev);
7055 	cayman_dma_fini(rdev);
7056 	si_fini_pg(rdev);
7057 	si_fini_cg(rdev);
7058 	si_irq_fini(rdev);
7059 	sumo_rlc_fini(rdev);
7060 	radeon_wb_fini(rdev);
7061 	radeon_vm_manager_fini(rdev);
7062 	radeon_ib_pool_fini(rdev);
7063 	radeon_irq_kms_fini(rdev);
7064 	if (rdev->has_uvd) {
7065 		uvd_v1_0_fini(rdev);
7066 		radeon_uvd_fini(rdev);
7067 	}
7068 	si_pcie_gart_fini(rdev);
7069 	r600_vram_scratch_fini(rdev);
7070 	radeon_gem_fini(rdev);
7071 	radeon_fence_driver_fini(rdev);
7072 	radeon_bo_fini(rdev);
7073 	radeon_atombios_fini(rdev);
7074 	kfree(rdev->bios);
7075 	rdev->bios = NULL;
7076 }
7077 
7078 /**
7079  * si_get_gpu_clock_counter - return GPU clock counter snapshot
7080  *
7081  * @rdev: radeon_device pointer
7082  *
7083  * Fetches a GPU clock counter snapshot (SI).
7084  * Returns the 64 bit clock counter snapshot.
7085  */
7086 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
7087 {
7088 	uint64_t clock;
7089 
7090 	mutex_lock(&rdev->gpu_clock_mutex);
7091 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
7092 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
7093 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
7094 	mutex_unlock(&rdev->gpu_clock_mutex);
7095 	return clock;
7096 }
7097 
7098 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
7099 {
7100 	unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
7101 	int r;
7102 
7103 	/* bypass vclk and dclk with bclk */
7104 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7105 		VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
7106 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7107 
7108 	/* put PLL in bypass mode */
7109 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
7110 
7111 	if (!vclk || !dclk) {
7112 		/* keep the Bypass mode, put PLL to sleep */
7113 		WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
7114 		return 0;
7115 	}
7116 
7117 	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
7118 					  16384, 0x03FFFFFF, 0, 128, 5,
7119 					  &fb_div, &vclk_div, &dclk_div);
7120 	if (r)
7121 		return r;
7122 
7123 	/* set RESET_ANTI_MUX to 0 */
7124 	WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7125 
7126 	/* set VCO_MODE to 1 */
7127 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
7128 
7129 	/* toggle UPLL_SLEEP to 1 then back to 0 */
7130 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
7131 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
7132 
7133 	/* deassert UPLL_RESET */
7134 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7135 
7136 	mdelay(1);
7137 
7138 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7139 	if (r)
7140 		return r;
7141 
7142 	/* assert UPLL_RESET again */
7143 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
7144 
7145 	/* disable spread spectrum. */
7146 	WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7147 
7148 	/* set feedback divider */
7149 	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
7150 
7151 	/* set ref divider to 0 */
7152 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
7153 
7154 	if (fb_div < 307200)
7155 		WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
7156 	else
7157 		WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
7158 
7159 	/* set PDIV_A and PDIV_B */
7160 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7161 		UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
7162 		~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
7163 
7164 	/* give the PLL some time to settle */
7165 	mdelay(15);
7166 
7167 	/* deassert PLL_RESET */
7168 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7169 
7170 	mdelay(15);
7171 
7172 	/* switch from bypass mode to normal mode */
7173 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
7174 
7175 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7176 	if (r)
7177 		return r;
7178 
7179 	/* switch VCLK and DCLK selection */
7180 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7181 		VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
7182 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7183 
7184 	mdelay(100);
7185 
7186 	return 0;
7187 }
7188 
7189 static void si_pcie_gen3_enable(struct radeon_device *rdev)
7190 {
7191 	struct pci_dev *root = rdev->pdev->bus->self;
7192 	int bridge_pos, gpu_pos;
7193 	u32 speed_cntl, mask, current_data_rate;
7194 	int ret, i;
7195 	u16 tmp16;
7196 
7197 	if (pci_is_root_bus(rdev->pdev->bus))
7198 		return;
7199 
7200 	if (radeon_pcie_gen2 == 0)
7201 		return;
7202 
7203 	if (rdev->flags & RADEON_IS_IGP)
7204 		return;
7205 
7206 	if (!(rdev->flags & RADEON_IS_PCIE))
7207 		return;
7208 
7209 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
7210 	if (ret != 0)
7211 		return;
7212 
7213 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
7214 		return;
7215 
7216 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7217 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7218 		LC_CURRENT_DATA_RATE_SHIFT;
7219 	if (mask & DRM_PCIE_SPEED_80) {
7220 		if (current_data_rate == 2) {
7221 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7222 			return;
7223 		}
7224 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7225 	} else if (mask & DRM_PCIE_SPEED_50) {
7226 		if (current_data_rate == 1) {
7227 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7228 			return;
7229 		}
7230 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7231 	}
7232 
7233 	bridge_pos = pci_pcie_cap(root);
7234 	if (!bridge_pos)
7235 		return;
7236 
7237 	gpu_pos = pci_pcie_cap(rdev->pdev);
7238 	if (!gpu_pos)
7239 		return;
7240 
7241 	if (mask & DRM_PCIE_SPEED_80) {
7242 		/* re-try equalization if gen3 is not already enabled */
7243 		if (current_data_rate != 2) {
7244 			u16 bridge_cfg, gpu_cfg;
7245 			u16 bridge_cfg2, gpu_cfg2;
7246 			u32 max_lw, current_lw, tmp;
7247 
7248 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7249 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7250 
7251 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7252 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7253 
7254 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7255 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7256 
7257 			tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7258 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7259 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7260 
7261 			if (current_lw < max_lw) {
7262 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7263 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
7264 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7265 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7266 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7267 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7268 				}
7269 			}
7270 
7271 			for (i = 0; i < 10; i++) {
7272 				/* check status */
7273 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
7274 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7275 					break;
7276 
7277 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7278 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7279 
7280 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
7281 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
7282 
7283 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7284 				tmp |= LC_SET_QUIESCE;
7285 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7286 
7287 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7288 				tmp |= LC_REDO_EQ;
7289 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7290 
7291 				mdelay(100);
7292 
7293 				/* linkctl */
7294 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
7295 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7296 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7297 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7298 
7299 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
7300 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7301 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7302 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7303 
7304 				/* linkctl2 */
7305 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7306 				tmp16 &= ~((1 << 4) | (7 << 9));
7307 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
7308 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
7309 
7310 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7311 				tmp16 &= ~((1 << 4) | (7 << 9));
7312 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
7313 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7314 
7315 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7316 				tmp &= ~LC_SET_QUIESCE;
7317 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7318 			}
7319 		}
7320 	}
7321 
7322 	/* set the link speed */
7323 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7324 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7325 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7326 
7327 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7328 	tmp16 &= ~0xf;
7329 	if (mask & DRM_PCIE_SPEED_80)
7330 		tmp16 |= 3; /* gen3 */
7331 	else if (mask & DRM_PCIE_SPEED_50)
7332 		tmp16 |= 2; /* gen2 */
7333 	else
7334 		tmp16 |= 1; /* gen1 */
7335 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7336 
7337 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7338 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7339 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7340 
7341 	for (i = 0; i < rdev->usec_timeout; i++) {
7342 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7343 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7344 			break;
7345 		udelay(1);
7346 	}
7347 }
7348 
7349 static void si_program_aspm(struct radeon_device *rdev)
7350 {
7351 	u32 data, orig;
7352 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7353 	bool disable_clkreq = false;
7354 
7355 	if (radeon_aspm == 0)
7356 		return;
7357 
7358 	if (!(rdev->flags & RADEON_IS_PCIE))
7359 		return;
7360 
7361 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7362 	data &= ~LC_XMIT_N_FTS_MASK;
7363 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7364 	if (orig != data)
7365 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7366 
7367 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7368 	data |= LC_GO_TO_RECOVERY;
7369 	if (orig != data)
7370 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7371 
7372 	orig = data = RREG32_PCIE(PCIE_P_CNTL);
7373 	data |= P_IGNORE_EDB_ERR;
7374 	if (orig != data)
7375 		WREG32_PCIE(PCIE_P_CNTL, data);
7376 
7377 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7378 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7379 	data |= LC_PMI_TO_L1_DIS;
7380 	if (!disable_l0s)
7381 		data |= LC_L0S_INACTIVITY(7);
7382 
7383 	if (!disable_l1) {
7384 		data |= LC_L1_INACTIVITY(7);
7385 		data &= ~LC_PMI_TO_L1_DIS;
7386 		if (orig != data)
7387 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7388 
7389 		if (!disable_plloff_in_l1) {
7390 			bool clk_req_support;
7391 
7392 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7393 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7394 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7395 			if (orig != data)
7396 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7397 
7398 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7399 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7400 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7401 			if (orig != data)
7402 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7403 
7404 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7405 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7406 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7407 			if (orig != data)
7408 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7409 
7410 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7411 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7412 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7413 			if (orig != data)
7414 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7415 
7416 			if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7417 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7418 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7419 				if (orig != data)
7420 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7421 
7422 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7423 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7424 				if (orig != data)
7425 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7426 
7427 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7428 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7429 				if (orig != data)
7430 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7431 
7432 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7433 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7434 				if (orig != data)
7435 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7436 
7437 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7438 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7439 				if (orig != data)
7440 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7441 
7442 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7443 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7444 				if (orig != data)
7445 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7446 
7447 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7448 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7449 				if (orig != data)
7450 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7451 
7452 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7453 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7454 				if (orig != data)
7455 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7456 			}
7457 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7458 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7459 			data |= LC_DYN_LANES_PWR_STATE(3);
7460 			if (orig != data)
7461 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7462 
7463 			orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7464 			data &= ~LS2_EXIT_TIME_MASK;
7465 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7466 				data |= LS2_EXIT_TIME(5);
7467 			if (orig != data)
7468 				WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7469 
7470 			orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7471 			data &= ~LS2_EXIT_TIME_MASK;
7472 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7473 				data |= LS2_EXIT_TIME(5);
7474 			if (orig != data)
7475 				WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7476 
7477 			if (!disable_clkreq &&
7478 			    !pci_is_root_bus(rdev->pdev->bus)) {
7479 				struct pci_dev *root = rdev->pdev->bus->self;
7480 				u32 lnkcap;
7481 
7482 				clk_req_support = false;
7483 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7484 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7485 					clk_req_support = true;
7486 			} else {
7487 				clk_req_support = false;
7488 			}
7489 
7490 			if (clk_req_support) {
7491 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7492 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7493 				if (orig != data)
7494 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7495 
7496 				orig = data = RREG32(THM_CLK_CNTL);
7497 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7498 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7499 				if (orig != data)
7500 					WREG32(THM_CLK_CNTL, data);
7501 
7502 				orig = data = RREG32(MISC_CLK_CNTL);
7503 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7504 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7505 				if (orig != data)
7506 					WREG32(MISC_CLK_CNTL, data);
7507 
7508 				orig = data = RREG32(CG_CLKPIN_CNTL);
7509 				data &= ~BCLK_AS_XCLK;
7510 				if (orig != data)
7511 					WREG32(CG_CLKPIN_CNTL, data);
7512 
7513 				orig = data = RREG32(CG_CLKPIN_CNTL_2);
7514 				data &= ~FORCE_BIF_REFCLK_EN;
7515 				if (orig != data)
7516 					WREG32(CG_CLKPIN_CNTL_2, data);
7517 
7518 				orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7519 				data &= ~MPLL_CLKOUT_SEL_MASK;
7520 				data |= MPLL_CLKOUT_SEL(4);
7521 				if (orig != data)
7522 					WREG32(MPLL_BYPASSCLK_SEL, data);
7523 
7524 				orig = data = RREG32(SPLL_CNTL_MODE);
7525 				data &= ~SPLL_REFCLK_SEL_MASK;
7526 				if (orig != data)
7527 					WREG32(SPLL_CNTL_MODE, data);
7528 			}
7529 		}
7530 	} else {
7531 		if (orig != data)
7532 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7533 	}
7534 
7535 	orig = data = RREG32_PCIE(PCIE_CNTL2);
7536 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7537 	if (orig != data)
7538 		WREG32_PCIE(PCIE_CNTL2, data);
7539 
7540 	if (!disable_l0s) {
7541 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7542 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7543 			data = RREG32_PCIE(PCIE_LC_STATUS1);
7544 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7545 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7546 				data &= ~LC_L0S_INACTIVITY_MASK;
7547 				if (orig != data)
7548 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7549 			}
7550 		}
7551 	}
7552 }
7553