xref: /openbmc/linux/drivers/gpu/drm/radeon/si.c (revision 8c0b9ee8)
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include <drm/radeon_drm.h>
32 #include "sid.h"
33 #include "atom.h"
34 #include "si_blit_shaders.h"
35 #include "clearstate_si.h"
36 #include "radeon_ucode.h"
37 
38 
39 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_mc2.bin");
44 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
45 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
46 
47 MODULE_FIRMWARE("radeon/tahiti_pfp.bin");
48 MODULE_FIRMWARE("radeon/tahiti_me.bin");
49 MODULE_FIRMWARE("radeon/tahiti_ce.bin");
50 MODULE_FIRMWARE("radeon/tahiti_mc.bin");
51 MODULE_FIRMWARE("radeon/tahiti_rlc.bin");
52 MODULE_FIRMWARE("radeon/tahiti_smc.bin");
53 
54 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
55 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
56 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
57 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
58 MODULE_FIRMWARE("radeon/PITCAIRN_mc2.bin");
59 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
60 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
61 
62 MODULE_FIRMWARE("radeon/pitcairn_pfp.bin");
63 MODULE_FIRMWARE("radeon/pitcairn_me.bin");
64 MODULE_FIRMWARE("radeon/pitcairn_ce.bin");
65 MODULE_FIRMWARE("radeon/pitcairn_mc.bin");
66 MODULE_FIRMWARE("radeon/pitcairn_rlc.bin");
67 MODULE_FIRMWARE("radeon/pitcairn_smc.bin");
68 
69 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
70 MODULE_FIRMWARE("radeon/VERDE_me.bin");
71 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
72 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
73 MODULE_FIRMWARE("radeon/VERDE_mc2.bin");
74 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
75 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
76 
77 MODULE_FIRMWARE("radeon/verde_pfp.bin");
78 MODULE_FIRMWARE("radeon/verde_me.bin");
79 MODULE_FIRMWARE("radeon/verde_ce.bin");
80 MODULE_FIRMWARE("radeon/verde_mc.bin");
81 MODULE_FIRMWARE("radeon/verde_rlc.bin");
82 MODULE_FIRMWARE("radeon/verde_smc.bin");
83 
84 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
85 MODULE_FIRMWARE("radeon/OLAND_me.bin");
86 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
87 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
88 MODULE_FIRMWARE("radeon/OLAND_mc2.bin");
89 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
90 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
91 
92 MODULE_FIRMWARE("radeon/oland_pfp.bin");
93 MODULE_FIRMWARE("radeon/oland_me.bin");
94 MODULE_FIRMWARE("radeon/oland_ce.bin");
95 MODULE_FIRMWARE("radeon/oland_mc.bin");
96 MODULE_FIRMWARE("radeon/oland_rlc.bin");
97 MODULE_FIRMWARE("radeon/oland_smc.bin");
98 
99 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
100 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
101 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
102 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
103 MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
104 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
105 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
106 
107 MODULE_FIRMWARE("radeon/hainan_pfp.bin");
108 MODULE_FIRMWARE("radeon/hainan_me.bin");
109 MODULE_FIRMWARE("radeon/hainan_ce.bin");
110 MODULE_FIRMWARE("radeon/hainan_mc.bin");
111 MODULE_FIRMWARE("radeon/hainan_rlc.bin");
112 MODULE_FIRMWARE("radeon/hainan_smc.bin");
113 
114 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
115 static void si_pcie_gen3_enable(struct radeon_device *rdev);
116 static void si_program_aspm(struct radeon_device *rdev);
117 extern void sumo_rlc_fini(struct radeon_device *rdev);
118 extern int sumo_rlc_init(struct radeon_device *rdev);
119 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
120 extern void r600_ih_ring_fini(struct radeon_device *rdev);
121 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
122 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
123 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
124 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
125 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
126 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
127 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
128 					 bool enable);
129 static void si_init_pg(struct radeon_device *rdev);
130 static void si_init_cg(struct radeon_device *rdev);
131 static void si_fini_pg(struct radeon_device *rdev);
132 static void si_fini_cg(struct radeon_device *rdev);
133 static void si_rlc_stop(struct radeon_device *rdev);
134 
135 static const u32 verde_rlc_save_restore_register_list[] =
136 {
137 	(0x8000 << 16) | (0x98f4 >> 2),
138 	0x00000000,
139 	(0x8040 << 16) | (0x98f4 >> 2),
140 	0x00000000,
141 	(0x8000 << 16) | (0xe80 >> 2),
142 	0x00000000,
143 	(0x8040 << 16) | (0xe80 >> 2),
144 	0x00000000,
145 	(0x8000 << 16) | (0x89bc >> 2),
146 	0x00000000,
147 	(0x8040 << 16) | (0x89bc >> 2),
148 	0x00000000,
149 	(0x8000 << 16) | (0x8c1c >> 2),
150 	0x00000000,
151 	(0x8040 << 16) | (0x8c1c >> 2),
152 	0x00000000,
153 	(0x9c00 << 16) | (0x98f0 >> 2),
154 	0x00000000,
155 	(0x9c00 << 16) | (0xe7c >> 2),
156 	0x00000000,
157 	(0x8000 << 16) | (0x9148 >> 2),
158 	0x00000000,
159 	(0x8040 << 16) | (0x9148 >> 2),
160 	0x00000000,
161 	(0x9c00 << 16) | (0x9150 >> 2),
162 	0x00000000,
163 	(0x9c00 << 16) | (0x897c >> 2),
164 	0x00000000,
165 	(0x9c00 << 16) | (0x8d8c >> 2),
166 	0x00000000,
167 	(0x9c00 << 16) | (0xac54 >> 2),
168 	0X00000000,
169 	0x3,
170 	(0x9c00 << 16) | (0x98f8 >> 2),
171 	0x00000000,
172 	(0x9c00 << 16) | (0x9910 >> 2),
173 	0x00000000,
174 	(0x9c00 << 16) | (0x9914 >> 2),
175 	0x00000000,
176 	(0x9c00 << 16) | (0x9918 >> 2),
177 	0x00000000,
178 	(0x9c00 << 16) | (0x991c >> 2),
179 	0x00000000,
180 	(0x9c00 << 16) | (0x9920 >> 2),
181 	0x00000000,
182 	(0x9c00 << 16) | (0x9924 >> 2),
183 	0x00000000,
184 	(0x9c00 << 16) | (0x9928 >> 2),
185 	0x00000000,
186 	(0x9c00 << 16) | (0x992c >> 2),
187 	0x00000000,
188 	(0x9c00 << 16) | (0x9930 >> 2),
189 	0x00000000,
190 	(0x9c00 << 16) | (0x9934 >> 2),
191 	0x00000000,
192 	(0x9c00 << 16) | (0x9938 >> 2),
193 	0x00000000,
194 	(0x9c00 << 16) | (0x993c >> 2),
195 	0x00000000,
196 	(0x9c00 << 16) | (0x9940 >> 2),
197 	0x00000000,
198 	(0x9c00 << 16) | (0x9944 >> 2),
199 	0x00000000,
200 	(0x9c00 << 16) | (0x9948 >> 2),
201 	0x00000000,
202 	(0x9c00 << 16) | (0x994c >> 2),
203 	0x00000000,
204 	(0x9c00 << 16) | (0x9950 >> 2),
205 	0x00000000,
206 	(0x9c00 << 16) | (0x9954 >> 2),
207 	0x00000000,
208 	(0x9c00 << 16) | (0x9958 >> 2),
209 	0x00000000,
210 	(0x9c00 << 16) | (0x995c >> 2),
211 	0x00000000,
212 	(0x9c00 << 16) | (0x9960 >> 2),
213 	0x00000000,
214 	(0x9c00 << 16) | (0x9964 >> 2),
215 	0x00000000,
216 	(0x9c00 << 16) | (0x9968 >> 2),
217 	0x00000000,
218 	(0x9c00 << 16) | (0x996c >> 2),
219 	0x00000000,
220 	(0x9c00 << 16) | (0x9970 >> 2),
221 	0x00000000,
222 	(0x9c00 << 16) | (0x9974 >> 2),
223 	0x00000000,
224 	(0x9c00 << 16) | (0x9978 >> 2),
225 	0x00000000,
226 	(0x9c00 << 16) | (0x997c >> 2),
227 	0x00000000,
228 	(0x9c00 << 16) | (0x9980 >> 2),
229 	0x00000000,
230 	(0x9c00 << 16) | (0x9984 >> 2),
231 	0x00000000,
232 	(0x9c00 << 16) | (0x9988 >> 2),
233 	0x00000000,
234 	(0x9c00 << 16) | (0x998c >> 2),
235 	0x00000000,
236 	(0x9c00 << 16) | (0x8c00 >> 2),
237 	0x00000000,
238 	(0x9c00 << 16) | (0x8c14 >> 2),
239 	0x00000000,
240 	(0x9c00 << 16) | (0x8c04 >> 2),
241 	0x00000000,
242 	(0x9c00 << 16) | (0x8c08 >> 2),
243 	0x00000000,
244 	(0x8000 << 16) | (0x9b7c >> 2),
245 	0x00000000,
246 	(0x8040 << 16) | (0x9b7c >> 2),
247 	0x00000000,
248 	(0x8000 << 16) | (0xe84 >> 2),
249 	0x00000000,
250 	(0x8040 << 16) | (0xe84 >> 2),
251 	0x00000000,
252 	(0x8000 << 16) | (0x89c0 >> 2),
253 	0x00000000,
254 	(0x8040 << 16) | (0x89c0 >> 2),
255 	0x00000000,
256 	(0x8000 << 16) | (0x914c >> 2),
257 	0x00000000,
258 	(0x8040 << 16) | (0x914c >> 2),
259 	0x00000000,
260 	(0x8000 << 16) | (0x8c20 >> 2),
261 	0x00000000,
262 	(0x8040 << 16) | (0x8c20 >> 2),
263 	0x00000000,
264 	(0x8000 << 16) | (0x9354 >> 2),
265 	0x00000000,
266 	(0x8040 << 16) | (0x9354 >> 2),
267 	0x00000000,
268 	(0x9c00 << 16) | (0x9060 >> 2),
269 	0x00000000,
270 	(0x9c00 << 16) | (0x9364 >> 2),
271 	0x00000000,
272 	(0x9c00 << 16) | (0x9100 >> 2),
273 	0x00000000,
274 	(0x9c00 << 16) | (0x913c >> 2),
275 	0x00000000,
276 	(0x8000 << 16) | (0x90e0 >> 2),
277 	0x00000000,
278 	(0x8000 << 16) | (0x90e4 >> 2),
279 	0x00000000,
280 	(0x8000 << 16) | (0x90e8 >> 2),
281 	0x00000000,
282 	(0x8040 << 16) | (0x90e0 >> 2),
283 	0x00000000,
284 	(0x8040 << 16) | (0x90e4 >> 2),
285 	0x00000000,
286 	(0x8040 << 16) | (0x90e8 >> 2),
287 	0x00000000,
288 	(0x9c00 << 16) | (0x8bcc >> 2),
289 	0x00000000,
290 	(0x9c00 << 16) | (0x8b24 >> 2),
291 	0x00000000,
292 	(0x9c00 << 16) | (0x88c4 >> 2),
293 	0x00000000,
294 	(0x9c00 << 16) | (0x8e50 >> 2),
295 	0x00000000,
296 	(0x9c00 << 16) | (0x8c0c >> 2),
297 	0x00000000,
298 	(0x9c00 << 16) | (0x8e58 >> 2),
299 	0x00000000,
300 	(0x9c00 << 16) | (0x8e5c >> 2),
301 	0x00000000,
302 	(0x9c00 << 16) | (0x9508 >> 2),
303 	0x00000000,
304 	(0x9c00 << 16) | (0x950c >> 2),
305 	0x00000000,
306 	(0x9c00 << 16) | (0x9494 >> 2),
307 	0x00000000,
308 	(0x9c00 << 16) | (0xac0c >> 2),
309 	0x00000000,
310 	(0x9c00 << 16) | (0xac10 >> 2),
311 	0x00000000,
312 	(0x9c00 << 16) | (0xac14 >> 2),
313 	0x00000000,
314 	(0x9c00 << 16) | (0xae00 >> 2),
315 	0x00000000,
316 	(0x9c00 << 16) | (0xac08 >> 2),
317 	0x00000000,
318 	(0x9c00 << 16) | (0x88d4 >> 2),
319 	0x00000000,
320 	(0x9c00 << 16) | (0x88c8 >> 2),
321 	0x00000000,
322 	(0x9c00 << 16) | (0x88cc >> 2),
323 	0x00000000,
324 	(0x9c00 << 16) | (0x89b0 >> 2),
325 	0x00000000,
326 	(0x9c00 << 16) | (0x8b10 >> 2),
327 	0x00000000,
328 	(0x9c00 << 16) | (0x8a14 >> 2),
329 	0x00000000,
330 	(0x9c00 << 16) | (0x9830 >> 2),
331 	0x00000000,
332 	(0x9c00 << 16) | (0x9834 >> 2),
333 	0x00000000,
334 	(0x9c00 << 16) | (0x9838 >> 2),
335 	0x00000000,
336 	(0x9c00 << 16) | (0x9a10 >> 2),
337 	0x00000000,
338 	(0x8000 << 16) | (0x9870 >> 2),
339 	0x00000000,
340 	(0x8000 << 16) | (0x9874 >> 2),
341 	0x00000000,
342 	(0x8001 << 16) | (0x9870 >> 2),
343 	0x00000000,
344 	(0x8001 << 16) | (0x9874 >> 2),
345 	0x00000000,
346 	(0x8040 << 16) | (0x9870 >> 2),
347 	0x00000000,
348 	(0x8040 << 16) | (0x9874 >> 2),
349 	0x00000000,
350 	(0x8041 << 16) | (0x9870 >> 2),
351 	0x00000000,
352 	(0x8041 << 16) | (0x9874 >> 2),
353 	0x00000000,
354 	0x00000000
355 };
356 
357 static const u32 tahiti_golden_rlc_registers[] =
358 {
359 	0xc424, 0xffffffff, 0x00601005,
360 	0xc47c, 0xffffffff, 0x10104040,
361 	0xc488, 0xffffffff, 0x0100000a,
362 	0xc314, 0xffffffff, 0x00000800,
363 	0xc30c, 0xffffffff, 0x800000f4,
364 	0xf4a8, 0xffffffff, 0x00000000
365 };
366 
367 static const u32 tahiti_golden_registers[] =
368 {
369 	0x9a10, 0x00010000, 0x00018208,
370 	0x9830, 0xffffffff, 0x00000000,
371 	0x9834, 0xf00fffff, 0x00000400,
372 	0x9838, 0x0002021c, 0x00020200,
373 	0xc78, 0x00000080, 0x00000000,
374 	0xd030, 0x000300c0, 0x00800040,
375 	0xd830, 0x000300c0, 0x00800040,
376 	0x5bb0, 0x000000f0, 0x00000070,
377 	0x5bc0, 0x00200000, 0x50100000,
378 	0x7030, 0x31000311, 0x00000011,
379 	0x277c, 0x00000003, 0x000007ff,
380 	0x240c, 0x000007ff, 0x00000000,
381 	0x8a14, 0xf000001f, 0x00000007,
382 	0x8b24, 0xffffffff, 0x00ffffff,
383 	0x8b10, 0x0000ff0f, 0x00000000,
384 	0x28a4c, 0x07ffffff, 0x4e000000,
385 	0x28350, 0x3f3f3fff, 0x2a00126a,
386 	0x30, 0x000000ff, 0x0040,
387 	0x34, 0x00000040, 0x00004040,
388 	0x9100, 0x07ffffff, 0x03000000,
389 	0x8e88, 0x01ff1f3f, 0x00000000,
390 	0x8e84, 0x01ff1f3f, 0x00000000,
391 	0x9060, 0x0000007f, 0x00000020,
392 	0x9508, 0x00010000, 0x00010000,
393 	0xac14, 0x00000200, 0x000002fb,
394 	0xac10, 0xffffffff, 0x0000543b,
395 	0xac0c, 0xffffffff, 0xa9210876,
396 	0x88d0, 0xffffffff, 0x000fff40,
397 	0x88d4, 0x0000001f, 0x00000010,
398 	0x1410, 0x20000000, 0x20fffed8,
399 	0x15c0, 0x000c0fc0, 0x000c0400
400 };
401 
402 static const u32 tahiti_golden_registers2[] =
403 {
404 	0xc64, 0x00000001, 0x00000001
405 };
406 
407 static const u32 pitcairn_golden_rlc_registers[] =
408 {
409 	0xc424, 0xffffffff, 0x00601004,
410 	0xc47c, 0xffffffff, 0x10102020,
411 	0xc488, 0xffffffff, 0x01000020,
412 	0xc314, 0xffffffff, 0x00000800,
413 	0xc30c, 0xffffffff, 0x800000a4
414 };
415 
416 static const u32 pitcairn_golden_registers[] =
417 {
418 	0x9a10, 0x00010000, 0x00018208,
419 	0x9830, 0xffffffff, 0x00000000,
420 	0x9834, 0xf00fffff, 0x00000400,
421 	0x9838, 0x0002021c, 0x00020200,
422 	0xc78, 0x00000080, 0x00000000,
423 	0xd030, 0x000300c0, 0x00800040,
424 	0xd830, 0x000300c0, 0x00800040,
425 	0x5bb0, 0x000000f0, 0x00000070,
426 	0x5bc0, 0x00200000, 0x50100000,
427 	0x7030, 0x31000311, 0x00000011,
428 	0x2ae4, 0x00073ffe, 0x000022a2,
429 	0x240c, 0x000007ff, 0x00000000,
430 	0x8a14, 0xf000001f, 0x00000007,
431 	0x8b24, 0xffffffff, 0x00ffffff,
432 	0x8b10, 0x0000ff0f, 0x00000000,
433 	0x28a4c, 0x07ffffff, 0x4e000000,
434 	0x28350, 0x3f3f3fff, 0x2a00126a,
435 	0x30, 0x000000ff, 0x0040,
436 	0x34, 0x00000040, 0x00004040,
437 	0x9100, 0x07ffffff, 0x03000000,
438 	0x9060, 0x0000007f, 0x00000020,
439 	0x9508, 0x00010000, 0x00010000,
440 	0xac14, 0x000003ff, 0x000000f7,
441 	0xac10, 0xffffffff, 0x00000000,
442 	0xac0c, 0xffffffff, 0x32761054,
443 	0x88d4, 0x0000001f, 0x00000010,
444 	0x15c0, 0x000c0fc0, 0x000c0400
445 };
446 
447 static const u32 verde_golden_rlc_registers[] =
448 {
449 	0xc424, 0xffffffff, 0x033f1005,
450 	0xc47c, 0xffffffff, 0x10808020,
451 	0xc488, 0xffffffff, 0x00800008,
452 	0xc314, 0xffffffff, 0x00001000,
453 	0xc30c, 0xffffffff, 0x80010014
454 };
455 
456 static const u32 verde_golden_registers[] =
457 {
458 	0x9a10, 0x00010000, 0x00018208,
459 	0x9830, 0xffffffff, 0x00000000,
460 	0x9834, 0xf00fffff, 0x00000400,
461 	0x9838, 0x0002021c, 0x00020200,
462 	0xc78, 0x00000080, 0x00000000,
463 	0xd030, 0x000300c0, 0x00800040,
464 	0xd030, 0x000300c0, 0x00800040,
465 	0xd830, 0x000300c0, 0x00800040,
466 	0xd830, 0x000300c0, 0x00800040,
467 	0x5bb0, 0x000000f0, 0x00000070,
468 	0x5bc0, 0x00200000, 0x50100000,
469 	0x7030, 0x31000311, 0x00000011,
470 	0x2ae4, 0x00073ffe, 0x000022a2,
471 	0x2ae4, 0x00073ffe, 0x000022a2,
472 	0x2ae4, 0x00073ffe, 0x000022a2,
473 	0x240c, 0x000007ff, 0x00000000,
474 	0x240c, 0x000007ff, 0x00000000,
475 	0x240c, 0x000007ff, 0x00000000,
476 	0x8a14, 0xf000001f, 0x00000007,
477 	0x8a14, 0xf000001f, 0x00000007,
478 	0x8a14, 0xf000001f, 0x00000007,
479 	0x8b24, 0xffffffff, 0x00ffffff,
480 	0x8b10, 0x0000ff0f, 0x00000000,
481 	0x28a4c, 0x07ffffff, 0x4e000000,
482 	0x28350, 0x3f3f3fff, 0x0000124a,
483 	0x28350, 0x3f3f3fff, 0x0000124a,
484 	0x28350, 0x3f3f3fff, 0x0000124a,
485 	0x30, 0x000000ff, 0x0040,
486 	0x34, 0x00000040, 0x00004040,
487 	0x9100, 0x07ffffff, 0x03000000,
488 	0x9100, 0x07ffffff, 0x03000000,
489 	0x8e88, 0x01ff1f3f, 0x00000000,
490 	0x8e88, 0x01ff1f3f, 0x00000000,
491 	0x8e88, 0x01ff1f3f, 0x00000000,
492 	0x8e84, 0x01ff1f3f, 0x00000000,
493 	0x8e84, 0x01ff1f3f, 0x00000000,
494 	0x8e84, 0x01ff1f3f, 0x00000000,
495 	0x9060, 0x0000007f, 0x00000020,
496 	0x9508, 0x00010000, 0x00010000,
497 	0xac14, 0x000003ff, 0x00000003,
498 	0xac14, 0x000003ff, 0x00000003,
499 	0xac14, 0x000003ff, 0x00000003,
500 	0xac10, 0xffffffff, 0x00000000,
501 	0xac10, 0xffffffff, 0x00000000,
502 	0xac10, 0xffffffff, 0x00000000,
503 	0xac0c, 0xffffffff, 0x00001032,
504 	0xac0c, 0xffffffff, 0x00001032,
505 	0xac0c, 0xffffffff, 0x00001032,
506 	0x88d4, 0x0000001f, 0x00000010,
507 	0x88d4, 0x0000001f, 0x00000010,
508 	0x88d4, 0x0000001f, 0x00000010,
509 	0x15c0, 0x000c0fc0, 0x000c0400
510 };
511 
512 static const u32 oland_golden_rlc_registers[] =
513 {
514 	0xc424, 0xffffffff, 0x00601005,
515 	0xc47c, 0xffffffff, 0x10104040,
516 	0xc488, 0xffffffff, 0x0100000a,
517 	0xc314, 0xffffffff, 0x00000800,
518 	0xc30c, 0xffffffff, 0x800000f4
519 };
520 
521 static const u32 oland_golden_registers[] =
522 {
523 	0x9a10, 0x00010000, 0x00018208,
524 	0x9830, 0xffffffff, 0x00000000,
525 	0x9834, 0xf00fffff, 0x00000400,
526 	0x9838, 0x0002021c, 0x00020200,
527 	0xc78, 0x00000080, 0x00000000,
528 	0xd030, 0x000300c0, 0x00800040,
529 	0xd830, 0x000300c0, 0x00800040,
530 	0x5bb0, 0x000000f0, 0x00000070,
531 	0x5bc0, 0x00200000, 0x50100000,
532 	0x7030, 0x31000311, 0x00000011,
533 	0x2ae4, 0x00073ffe, 0x000022a2,
534 	0x240c, 0x000007ff, 0x00000000,
535 	0x8a14, 0xf000001f, 0x00000007,
536 	0x8b24, 0xffffffff, 0x00ffffff,
537 	0x8b10, 0x0000ff0f, 0x00000000,
538 	0x28a4c, 0x07ffffff, 0x4e000000,
539 	0x28350, 0x3f3f3fff, 0x00000082,
540 	0x30, 0x000000ff, 0x0040,
541 	0x34, 0x00000040, 0x00004040,
542 	0x9100, 0x07ffffff, 0x03000000,
543 	0x9060, 0x0000007f, 0x00000020,
544 	0x9508, 0x00010000, 0x00010000,
545 	0xac14, 0x000003ff, 0x000000f3,
546 	0xac10, 0xffffffff, 0x00000000,
547 	0xac0c, 0xffffffff, 0x00003210,
548 	0x88d4, 0x0000001f, 0x00000010,
549 	0x15c0, 0x000c0fc0, 0x000c0400
550 };
551 
552 static const u32 hainan_golden_registers[] =
553 {
554 	0x9a10, 0x00010000, 0x00018208,
555 	0x9830, 0xffffffff, 0x00000000,
556 	0x9834, 0xf00fffff, 0x00000400,
557 	0x9838, 0x0002021c, 0x00020200,
558 	0xd0c0, 0xff000fff, 0x00000100,
559 	0xd030, 0x000300c0, 0x00800040,
560 	0xd8c0, 0xff000fff, 0x00000100,
561 	0xd830, 0x000300c0, 0x00800040,
562 	0x2ae4, 0x00073ffe, 0x000022a2,
563 	0x240c, 0x000007ff, 0x00000000,
564 	0x8a14, 0xf000001f, 0x00000007,
565 	0x8b24, 0xffffffff, 0x00ffffff,
566 	0x8b10, 0x0000ff0f, 0x00000000,
567 	0x28a4c, 0x07ffffff, 0x4e000000,
568 	0x28350, 0x3f3f3fff, 0x00000000,
569 	0x30, 0x000000ff, 0x0040,
570 	0x34, 0x00000040, 0x00004040,
571 	0x9100, 0x03e00000, 0x03600000,
572 	0x9060, 0x0000007f, 0x00000020,
573 	0x9508, 0x00010000, 0x00010000,
574 	0xac14, 0x000003ff, 0x000000f1,
575 	0xac10, 0xffffffff, 0x00000000,
576 	0xac0c, 0xffffffff, 0x00003210,
577 	0x88d4, 0x0000001f, 0x00000010,
578 	0x15c0, 0x000c0fc0, 0x000c0400
579 };
580 
581 static const u32 hainan_golden_registers2[] =
582 {
583 	0x98f8, 0xffffffff, 0x02010001
584 };
585 
586 static const u32 tahiti_mgcg_cgcg_init[] =
587 {
588 	0xc400, 0xffffffff, 0xfffffffc,
589 	0x802c, 0xffffffff, 0xe0000000,
590 	0x9a60, 0xffffffff, 0x00000100,
591 	0x92a4, 0xffffffff, 0x00000100,
592 	0xc164, 0xffffffff, 0x00000100,
593 	0x9774, 0xffffffff, 0x00000100,
594 	0x8984, 0xffffffff, 0x06000100,
595 	0x8a18, 0xffffffff, 0x00000100,
596 	0x92a0, 0xffffffff, 0x00000100,
597 	0xc380, 0xffffffff, 0x00000100,
598 	0x8b28, 0xffffffff, 0x00000100,
599 	0x9144, 0xffffffff, 0x00000100,
600 	0x8d88, 0xffffffff, 0x00000100,
601 	0x8d8c, 0xffffffff, 0x00000100,
602 	0x9030, 0xffffffff, 0x00000100,
603 	0x9034, 0xffffffff, 0x00000100,
604 	0x9038, 0xffffffff, 0x00000100,
605 	0x903c, 0xffffffff, 0x00000100,
606 	0xad80, 0xffffffff, 0x00000100,
607 	0xac54, 0xffffffff, 0x00000100,
608 	0x897c, 0xffffffff, 0x06000100,
609 	0x9868, 0xffffffff, 0x00000100,
610 	0x9510, 0xffffffff, 0x00000100,
611 	0xaf04, 0xffffffff, 0x00000100,
612 	0xae04, 0xffffffff, 0x00000100,
613 	0x949c, 0xffffffff, 0x00000100,
614 	0x802c, 0xffffffff, 0xe0000000,
615 	0x9160, 0xffffffff, 0x00010000,
616 	0x9164, 0xffffffff, 0x00030002,
617 	0x9168, 0xffffffff, 0x00040007,
618 	0x916c, 0xffffffff, 0x00060005,
619 	0x9170, 0xffffffff, 0x00090008,
620 	0x9174, 0xffffffff, 0x00020001,
621 	0x9178, 0xffffffff, 0x00040003,
622 	0x917c, 0xffffffff, 0x00000007,
623 	0x9180, 0xffffffff, 0x00060005,
624 	0x9184, 0xffffffff, 0x00090008,
625 	0x9188, 0xffffffff, 0x00030002,
626 	0x918c, 0xffffffff, 0x00050004,
627 	0x9190, 0xffffffff, 0x00000008,
628 	0x9194, 0xffffffff, 0x00070006,
629 	0x9198, 0xffffffff, 0x000a0009,
630 	0x919c, 0xffffffff, 0x00040003,
631 	0x91a0, 0xffffffff, 0x00060005,
632 	0x91a4, 0xffffffff, 0x00000009,
633 	0x91a8, 0xffffffff, 0x00080007,
634 	0x91ac, 0xffffffff, 0x000b000a,
635 	0x91b0, 0xffffffff, 0x00050004,
636 	0x91b4, 0xffffffff, 0x00070006,
637 	0x91b8, 0xffffffff, 0x0008000b,
638 	0x91bc, 0xffffffff, 0x000a0009,
639 	0x91c0, 0xffffffff, 0x000d000c,
640 	0x91c4, 0xffffffff, 0x00060005,
641 	0x91c8, 0xffffffff, 0x00080007,
642 	0x91cc, 0xffffffff, 0x0000000b,
643 	0x91d0, 0xffffffff, 0x000a0009,
644 	0x91d4, 0xffffffff, 0x000d000c,
645 	0x91d8, 0xffffffff, 0x00070006,
646 	0x91dc, 0xffffffff, 0x00090008,
647 	0x91e0, 0xffffffff, 0x0000000c,
648 	0x91e4, 0xffffffff, 0x000b000a,
649 	0x91e8, 0xffffffff, 0x000e000d,
650 	0x91ec, 0xffffffff, 0x00080007,
651 	0x91f0, 0xffffffff, 0x000a0009,
652 	0x91f4, 0xffffffff, 0x0000000d,
653 	0x91f8, 0xffffffff, 0x000c000b,
654 	0x91fc, 0xffffffff, 0x000f000e,
655 	0x9200, 0xffffffff, 0x00090008,
656 	0x9204, 0xffffffff, 0x000b000a,
657 	0x9208, 0xffffffff, 0x000c000f,
658 	0x920c, 0xffffffff, 0x000e000d,
659 	0x9210, 0xffffffff, 0x00110010,
660 	0x9214, 0xffffffff, 0x000a0009,
661 	0x9218, 0xffffffff, 0x000c000b,
662 	0x921c, 0xffffffff, 0x0000000f,
663 	0x9220, 0xffffffff, 0x000e000d,
664 	0x9224, 0xffffffff, 0x00110010,
665 	0x9228, 0xffffffff, 0x000b000a,
666 	0x922c, 0xffffffff, 0x000d000c,
667 	0x9230, 0xffffffff, 0x00000010,
668 	0x9234, 0xffffffff, 0x000f000e,
669 	0x9238, 0xffffffff, 0x00120011,
670 	0x923c, 0xffffffff, 0x000c000b,
671 	0x9240, 0xffffffff, 0x000e000d,
672 	0x9244, 0xffffffff, 0x00000011,
673 	0x9248, 0xffffffff, 0x0010000f,
674 	0x924c, 0xffffffff, 0x00130012,
675 	0x9250, 0xffffffff, 0x000d000c,
676 	0x9254, 0xffffffff, 0x000f000e,
677 	0x9258, 0xffffffff, 0x00100013,
678 	0x925c, 0xffffffff, 0x00120011,
679 	0x9260, 0xffffffff, 0x00150014,
680 	0x9264, 0xffffffff, 0x000e000d,
681 	0x9268, 0xffffffff, 0x0010000f,
682 	0x926c, 0xffffffff, 0x00000013,
683 	0x9270, 0xffffffff, 0x00120011,
684 	0x9274, 0xffffffff, 0x00150014,
685 	0x9278, 0xffffffff, 0x000f000e,
686 	0x927c, 0xffffffff, 0x00110010,
687 	0x9280, 0xffffffff, 0x00000014,
688 	0x9284, 0xffffffff, 0x00130012,
689 	0x9288, 0xffffffff, 0x00160015,
690 	0x928c, 0xffffffff, 0x0010000f,
691 	0x9290, 0xffffffff, 0x00120011,
692 	0x9294, 0xffffffff, 0x00000015,
693 	0x9298, 0xffffffff, 0x00140013,
694 	0x929c, 0xffffffff, 0x00170016,
695 	0x9150, 0xffffffff, 0x96940200,
696 	0x8708, 0xffffffff, 0x00900100,
697 	0xc478, 0xffffffff, 0x00000080,
698 	0xc404, 0xffffffff, 0x0020003f,
699 	0x30, 0xffffffff, 0x0000001c,
700 	0x34, 0x000f0000, 0x000f0000,
701 	0x160c, 0xffffffff, 0x00000100,
702 	0x1024, 0xffffffff, 0x00000100,
703 	0x102c, 0x00000101, 0x00000000,
704 	0x20a8, 0xffffffff, 0x00000104,
705 	0x264c, 0x000c0000, 0x000c0000,
706 	0x2648, 0x000c0000, 0x000c0000,
707 	0x55e4, 0xff000fff, 0x00000100,
708 	0x55e8, 0x00000001, 0x00000001,
709 	0x2f50, 0x00000001, 0x00000001,
710 	0x30cc, 0xc0000fff, 0x00000104,
711 	0xc1e4, 0x00000001, 0x00000001,
712 	0xd0c0, 0xfffffff0, 0x00000100,
713 	0xd8c0, 0xfffffff0, 0x00000100
714 };
715 
716 static const u32 pitcairn_mgcg_cgcg_init[] =
717 {
718 	0xc400, 0xffffffff, 0xfffffffc,
719 	0x802c, 0xffffffff, 0xe0000000,
720 	0x9a60, 0xffffffff, 0x00000100,
721 	0x92a4, 0xffffffff, 0x00000100,
722 	0xc164, 0xffffffff, 0x00000100,
723 	0x9774, 0xffffffff, 0x00000100,
724 	0x8984, 0xffffffff, 0x06000100,
725 	0x8a18, 0xffffffff, 0x00000100,
726 	0x92a0, 0xffffffff, 0x00000100,
727 	0xc380, 0xffffffff, 0x00000100,
728 	0x8b28, 0xffffffff, 0x00000100,
729 	0x9144, 0xffffffff, 0x00000100,
730 	0x8d88, 0xffffffff, 0x00000100,
731 	0x8d8c, 0xffffffff, 0x00000100,
732 	0x9030, 0xffffffff, 0x00000100,
733 	0x9034, 0xffffffff, 0x00000100,
734 	0x9038, 0xffffffff, 0x00000100,
735 	0x903c, 0xffffffff, 0x00000100,
736 	0xad80, 0xffffffff, 0x00000100,
737 	0xac54, 0xffffffff, 0x00000100,
738 	0x897c, 0xffffffff, 0x06000100,
739 	0x9868, 0xffffffff, 0x00000100,
740 	0x9510, 0xffffffff, 0x00000100,
741 	0xaf04, 0xffffffff, 0x00000100,
742 	0xae04, 0xffffffff, 0x00000100,
743 	0x949c, 0xffffffff, 0x00000100,
744 	0x802c, 0xffffffff, 0xe0000000,
745 	0x9160, 0xffffffff, 0x00010000,
746 	0x9164, 0xffffffff, 0x00030002,
747 	0x9168, 0xffffffff, 0x00040007,
748 	0x916c, 0xffffffff, 0x00060005,
749 	0x9170, 0xffffffff, 0x00090008,
750 	0x9174, 0xffffffff, 0x00020001,
751 	0x9178, 0xffffffff, 0x00040003,
752 	0x917c, 0xffffffff, 0x00000007,
753 	0x9180, 0xffffffff, 0x00060005,
754 	0x9184, 0xffffffff, 0x00090008,
755 	0x9188, 0xffffffff, 0x00030002,
756 	0x918c, 0xffffffff, 0x00050004,
757 	0x9190, 0xffffffff, 0x00000008,
758 	0x9194, 0xffffffff, 0x00070006,
759 	0x9198, 0xffffffff, 0x000a0009,
760 	0x919c, 0xffffffff, 0x00040003,
761 	0x91a0, 0xffffffff, 0x00060005,
762 	0x91a4, 0xffffffff, 0x00000009,
763 	0x91a8, 0xffffffff, 0x00080007,
764 	0x91ac, 0xffffffff, 0x000b000a,
765 	0x91b0, 0xffffffff, 0x00050004,
766 	0x91b4, 0xffffffff, 0x00070006,
767 	0x91b8, 0xffffffff, 0x0008000b,
768 	0x91bc, 0xffffffff, 0x000a0009,
769 	0x91c0, 0xffffffff, 0x000d000c,
770 	0x9200, 0xffffffff, 0x00090008,
771 	0x9204, 0xffffffff, 0x000b000a,
772 	0x9208, 0xffffffff, 0x000c000f,
773 	0x920c, 0xffffffff, 0x000e000d,
774 	0x9210, 0xffffffff, 0x00110010,
775 	0x9214, 0xffffffff, 0x000a0009,
776 	0x9218, 0xffffffff, 0x000c000b,
777 	0x921c, 0xffffffff, 0x0000000f,
778 	0x9220, 0xffffffff, 0x000e000d,
779 	0x9224, 0xffffffff, 0x00110010,
780 	0x9228, 0xffffffff, 0x000b000a,
781 	0x922c, 0xffffffff, 0x000d000c,
782 	0x9230, 0xffffffff, 0x00000010,
783 	0x9234, 0xffffffff, 0x000f000e,
784 	0x9238, 0xffffffff, 0x00120011,
785 	0x923c, 0xffffffff, 0x000c000b,
786 	0x9240, 0xffffffff, 0x000e000d,
787 	0x9244, 0xffffffff, 0x00000011,
788 	0x9248, 0xffffffff, 0x0010000f,
789 	0x924c, 0xffffffff, 0x00130012,
790 	0x9250, 0xffffffff, 0x000d000c,
791 	0x9254, 0xffffffff, 0x000f000e,
792 	0x9258, 0xffffffff, 0x00100013,
793 	0x925c, 0xffffffff, 0x00120011,
794 	0x9260, 0xffffffff, 0x00150014,
795 	0x9150, 0xffffffff, 0x96940200,
796 	0x8708, 0xffffffff, 0x00900100,
797 	0xc478, 0xffffffff, 0x00000080,
798 	0xc404, 0xffffffff, 0x0020003f,
799 	0x30, 0xffffffff, 0x0000001c,
800 	0x34, 0x000f0000, 0x000f0000,
801 	0x160c, 0xffffffff, 0x00000100,
802 	0x1024, 0xffffffff, 0x00000100,
803 	0x102c, 0x00000101, 0x00000000,
804 	0x20a8, 0xffffffff, 0x00000104,
805 	0x55e4, 0xff000fff, 0x00000100,
806 	0x55e8, 0x00000001, 0x00000001,
807 	0x2f50, 0x00000001, 0x00000001,
808 	0x30cc, 0xc0000fff, 0x00000104,
809 	0xc1e4, 0x00000001, 0x00000001,
810 	0xd0c0, 0xfffffff0, 0x00000100,
811 	0xd8c0, 0xfffffff0, 0x00000100
812 };
813 
814 static const u32 verde_mgcg_cgcg_init[] =
815 {
816 	0xc400, 0xffffffff, 0xfffffffc,
817 	0x802c, 0xffffffff, 0xe0000000,
818 	0x9a60, 0xffffffff, 0x00000100,
819 	0x92a4, 0xffffffff, 0x00000100,
820 	0xc164, 0xffffffff, 0x00000100,
821 	0x9774, 0xffffffff, 0x00000100,
822 	0x8984, 0xffffffff, 0x06000100,
823 	0x8a18, 0xffffffff, 0x00000100,
824 	0x92a0, 0xffffffff, 0x00000100,
825 	0xc380, 0xffffffff, 0x00000100,
826 	0x8b28, 0xffffffff, 0x00000100,
827 	0x9144, 0xffffffff, 0x00000100,
828 	0x8d88, 0xffffffff, 0x00000100,
829 	0x8d8c, 0xffffffff, 0x00000100,
830 	0x9030, 0xffffffff, 0x00000100,
831 	0x9034, 0xffffffff, 0x00000100,
832 	0x9038, 0xffffffff, 0x00000100,
833 	0x903c, 0xffffffff, 0x00000100,
834 	0xad80, 0xffffffff, 0x00000100,
835 	0xac54, 0xffffffff, 0x00000100,
836 	0x897c, 0xffffffff, 0x06000100,
837 	0x9868, 0xffffffff, 0x00000100,
838 	0x9510, 0xffffffff, 0x00000100,
839 	0xaf04, 0xffffffff, 0x00000100,
840 	0xae04, 0xffffffff, 0x00000100,
841 	0x949c, 0xffffffff, 0x00000100,
842 	0x802c, 0xffffffff, 0xe0000000,
843 	0x9160, 0xffffffff, 0x00010000,
844 	0x9164, 0xffffffff, 0x00030002,
845 	0x9168, 0xffffffff, 0x00040007,
846 	0x916c, 0xffffffff, 0x00060005,
847 	0x9170, 0xffffffff, 0x00090008,
848 	0x9174, 0xffffffff, 0x00020001,
849 	0x9178, 0xffffffff, 0x00040003,
850 	0x917c, 0xffffffff, 0x00000007,
851 	0x9180, 0xffffffff, 0x00060005,
852 	0x9184, 0xffffffff, 0x00090008,
853 	0x9188, 0xffffffff, 0x00030002,
854 	0x918c, 0xffffffff, 0x00050004,
855 	0x9190, 0xffffffff, 0x00000008,
856 	0x9194, 0xffffffff, 0x00070006,
857 	0x9198, 0xffffffff, 0x000a0009,
858 	0x919c, 0xffffffff, 0x00040003,
859 	0x91a0, 0xffffffff, 0x00060005,
860 	0x91a4, 0xffffffff, 0x00000009,
861 	0x91a8, 0xffffffff, 0x00080007,
862 	0x91ac, 0xffffffff, 0x000b000a,
863 	0x91b0, 0xffffffff, 0x00050004,
864 	0x91b4, 0xffffffff, 0x00070006,
865 	0x91b8, 0xffffffff, 0x0008000b,
866 	0x91bc, 0xffffffff, 0x000a0009,
867 	0x91c0, 0xffffffff, 0x000d000c,
868 	0x9200, 0xffffffff, 0x00090008,
869 	0x9204, 0xffffffff, 0x000b000a,
870 	0x9208, 0xffffffff, 0x000c000f,
871 	0x920c, 0xffffffff, 0x000e000d,
872 	0x9210, 0xffffffff, 0x00110010,
873 	0x9214, 0xffffffff, 0x000a0009,
874 	0x9218, 0xffffffff, 0x000c000b,
875 	0x921c, 0xffffffff, 0x0000000f,
876 	0x9220, 0xffffffff, 0x000e000d,
877 	0x9224, 0xffffffff, 0x00110010,
878 	0x9228, 0xffffffff, 0x000b000a,
879 	0x922c, 0xffffffff, 0x000d000c,
880 	0x9230, 0xffffffff, 0x00000010,
881 	0x9234, 0xffffffff, 0x000f000e,
882 	0x9238, 0xffffffff, 0x00120011,
883 	0x923c, 0xffffffff, 0x000c000b,
884 	0x9240, 0xffffffff, 0x000e000d,
885 	0x9244, 0xffffffff, 0x00000011,
886 	0x9248, 0xffffffff, 0x0010000f,
887 	0x924c, 0xffffffff, 0x00130012,
888 	0x9250, 0xffffffff, 0x000d000c,
889 	0x9254, 0xffffffff, 0x000f000e,
890 	0x9258, 0xffffffff, 0x00100013,
891 	0x925c, 0xffffffff, 0x00120011,
892 	0x9260, 0xffffffff, 0x00150014,
893 	0x9150, 0xffffffff, 0x96940200,
894 	0x8708, 0xffffffff, 0x00900100,
895 	0xc478, 0xffffffff, 0x00000080,
896 	0xc404, 0xffffffff, 0x0020003f,
897 	0x30, 0xffffffff, 0x0000001c,
898 	0x34, 0x000f0000, 0x000f0000,
899 	0x160c, 0xffffffff, 0x00000100,
900 	0x1024, 0xffffffff, 0x00000100,
901 	0x102c, 0x00000101, 0x00000000,
902 	0x20a8, 0xffffffff, 0x00000104,
903 	0x264c, 0x000c0000, 0x000c0000,
904 	0x2648, 0x000c0000, 0x000c0000,
905 	0x55e4, 0xff000fff, 0x00000100,
906 	0x55e8, 0x00000001, 0x00000001,
907 	0x2f50, 0x00000001, 0x00000001,
908 	0x30cc, 0xc0000fff, 0x00000104,
909 	0xc1e4, 0x00000001, 0x00000001,
910 	0xd0c0, 0xfffffff0, 0x00000100,
911 	0xd8c0, 0xfffffff0, 0x00000100
912 };
913 
914 static const u32 oland_mgcg_cgcg_init[] =
915 {
916 	0xc400, 0xffffffff, 0xfffffffc,
917 	0x802c, 0xffffffff, 0xe0000000,
918 	0x9a60, 0xffffffff, 0x00000100,
919 	0x92a4, 0xffffffff, 0x00000100,
920 	0xc164, 0xffffffff, 0x00000100,
921 	0x9774, 0xffffffff, 0x00000100,
922 	0x8984, 0xffffffff, 0x06000100,
923 	0x8a18, 0xffffffff, 0x00000100,
924 	0x92a0, 0xffffffff, 0x00000100,
925 	0xc380, 0xffffffff, 0x00000100,
926 	0x8b28, 0xffffffff, 0x00000100,
927 	0x9144, 0xffffffff, 0x00000100,
928 	0x8d88, 0xffffffff, 0x00000100,
929 	0x8d8c, 0xffffffff, 0x00000100,
930 	0x9030, 0xffffffff, 0x00000100,
931 	0x9034, 0xffffffff, 0x00000100,
932 	0x9038, 0xffffffff, 0x00000100,
933 	0x903c, 0xffffffff, 0x00000100,
934 	0xad80, 0xffffffff, 0x00000100,
935 	0xac54, 0xffffffff, 0x00000100,
936 	0x897c, 0xffffffff, 0x06000100,
937 	0x9868, 0xffffffff, 0x00000100,
938 	0x9510, 0xffffffff, 0x00000100,
939 	0xaf04, 0xffffffff, 0x00000100,
940 	0xae04, 0xffffffff, 0x00000100,
941 	0x949c, 0xffffffff, 0x00000100,
942 	0x802c, 0xffffffff, 0xe0000000,
943 	0x9160, 0xffffffff, 0x00010000,
944 	0x9164, 0xffffffff, 0x00030002,
945 	0x9168, 0xffffffff, 0x00040007,
946 	0x916c, 0xffffffff, 0x00060005,
947 	0x9170, 0xffffffff, 0x00090008,
948 	0x9174, 0xffffffff, 0x00020001,
949 	0x9178, 0xffffffff, 0x00040003,
950 	0x917c, 0xffffffff, 0x00000007,
951 	0x9180, 0xffffffff, 0x00060005,
952 	0x9184, 0xffffffff, 0x00090008,
953 	0x9188, 0xffffffff, 0x00030002,
954 	0x918c, 0xffffffff, 0x00050004,
955 	0x9190, 0xffffffff, 0x00000008,
956 	0x9194, 0xffffffff, 0x00070006,
957 	0x9198, 0xffffffff, 0x000a0009,
958 	0x919c, 0xffffffff, 0x00040003,
959 	0x91a0, 0xffffffff, 0x00060005,
960 	0x91a4, 0xffffffff, 0x00000009,
961 	0x91a8, 0xffffffff, 0x00080007,
962 	0x91ac, 0xffffffff, 0x000b000a,
963 	0x91b0, 0xffffffff, 0x00050004,
964 	0x91b4, 0xffffffff, 0x00070006,
965 	0x91b8, 0xffffffff, 0x0008000b,
966 	0x91bc, 0xffffffff, 0x000a0009,
967 	0x91c0, 0xffffffff, 0x000d000c,
968 	0x91c4, 0xffffffff, 0x00060005,
969 	0x91c8, 0xffffffff, 0x00080007,
970 	0x91cc, 0xffffffff, 0x0000000b,
971 	0x91d0, 0xffffffff, 0x000a0009,
972 	0x91d4, 0xffffffff, 0x000d000c,
973 	0x9150, 0xffffffff, 0x96940200,
974 	0x8708, 0xffffffff, 0x00900100,
975 	0xc478, 0xffffffff, 0x00000080,
976 	0xc404, 0xffffffff, 0x0020003f,
977 	0x30, 0xffffffff, 0x0000001c,
978 	0x34, 0x000f0000, 0x000f0000,
979 	0x160c, 0xffffffff, 0x00000100,
980 	0x1024, 0xffffffff, 0x00000100,
981 	0x102c, 0x00000101, 0x00000000,
982 	0x20a8, 0xffffffff, 0x00000104,
983 	0x264c, 0x000c0000, 0x000c0000,
984 	0x2648, 0x000c0000, 0x000c0000,
985 	0x55e4, 0xff000fff, 0x00000100,
986 	0x55e8, 0x00000001, 0x00000001,
987 	0x2f50, 0x00000001, 0x00000001,
988 	0x30cc, 0xc0000fff, 0x00000104,
989 	0xc1e4, 0x00000001, 0x00000001,
990 	0xd0c0, 0xfffffff0, 0x00000100,
991 	0xd8c0, 0xfffffff0, 0x00000100
992 };
993 
994 static const u32 hainan_mgcg_cgcg_init[] =
995 {
996 	0xc400, 0xffffffff, 0xfffffffc,
997 	0x802c, 0xffffffff, 0xe0000000,
998 	0x9a60, 0xffffffff, 0x00000100,
999 	0x92a4, 0xffffffff, 0x00000100,
1000 	0xc164, 0xffffffff, 0x00000100,
1001 	0x9774, 0xffffffff, 0x00000100,
1002 	0x8984, 0xffffffff, 0x06000100,
1003 	0x8a18, 0xffffffff, 0x00000100,
1004 	0x92a0, 0xffffffff, 0x00000100,
1005 	0xc380, 0xffffffff, 0x00000100,
1006 	0x8b28, 0xffffffff, 0x00000100,
1007 	0x9144, 0xffffffff, 0x00000100,
1008 	0x8d88, 0xffffffff, 0x00000100,
1009 	0x8d8c, 0xffffffff, 0x00000100,
1010 	0x9030, 0xffffffff, 0x00000100,
1011 	0x9034, 0xffffffff, 0x00000100,
1012 	0x9038, 0xffffffff, 0x00000100,
1013 	0x903c, 0xffffffff, 0x00000100,
1014 	0xad80, 0xffffffff, 0x00000100,
1015 	0xac54, 0xffffffff, 0x00000100,
1016 	0x897c, 0xffffffff, 0x06000100,
1017 	0x9868, 0xffffffff, 0x00000100,
1018 	0x9510, 0xffffffff, 0x00000100,
1019 	0xaf04, 0xffffffff, 0x00000100,
1020 	0xae04, 0xffffffff, 0x00000100,
1021 	0x949c, 0xffffffff, 0x00000100,
1022 	0x802c, 0xffffffff, 0xe0000000,
1023 	0x9160, 0xffffffff, 0x00010000,
1024 	0x9164, 0xffffffff, 0x00030002,
1025 	0x9168, 0xffffffff, 0x00040007,
1026 	0x916c, 0xffffffff, 0x00060005,
1027 	0x9170, 0xffffffff, 0x00090008,
1028 	0x9174, 0xffffffff, 0x00020001,
1029 	0x9178, 0xffffffff, 0x00040003,
1030 	0x917c, 0xffffffff, 0x00000007,
1031 	0x9180, 0xffffffff, 0x00060005,
1032 	0x9184, 0xffffffff, 0x00090008,
1033 	0x9188, 0xffffffff, 0x00030002,
1034 	0x918c, 0xffffffff, 0x00050004,
1035 	0x9190, 0xffffffff, 0x00000008,
1036 	0x9194, 0xffffffff, 0x00070006,
1037 	0x9198, 0xffffffff, 0x000a0009,
1038 	0x919c, 0xffffffff, 0x00040003,
1039 	0x91a0, 0xffffffff, 0x00060005,
1040 	0x91a4, 0xffffffff, 0x00000009,
1041 	0x91a8, 0xffffffff, 0x00080007,
1042 	0x91ac, 0xffffffff, 0x000b000a,
1043 	0x91b0, 0xffffffff, 0x00050004,
1044 	0x91b4, 0xffffffff, 0x00070006,
1045 	0x91b8, 0xffffffff, 0x0008000b,
1046 	0x91bc, 0xffffffff, 0x000a0009,
1047 	0x91c0, 0xffffffff, 0x000d000c,
1048 	0x91c4, 0xffffffff, 0x00060005,
1049 	0x91c8, 0xffffffff, 0x00080007,
1050 	0x91cc, 0xffffffff, 0x0000000b,
1051 	0x91d0, 0xffffffff, 0x000a0009,
1052 	0x91d4, 0xffffffff, 0x000d000c,
1053 	0x9150, 0xffffffff, 0x96940200,
1054 	0x8708, 0xffffffff, 0x00900100,
1055 	0xc478, 0xffffffff, 0x00000080,
1056 	0xc404, 0xffffffff, 0x0020003f,
1057 	0x30, 0xffffffff, 0x0000001c,
1058 	0x34, 0x000f0000, 0x000f0000,
1059 	0x160c, 0xffffffff, 0x00000100,
1060 	0x1024, 0xffffffff, 0x00000100,
1061 	0x20a8, 0xffffffff, 0x00000104,
1062 	0x264c, 0x000c0000, 0x000c0000,
1063 	0x2648, 0x000c0000, 0x000c0000,
1064 	0x2f50, 0x00000001, 0x00000001,
1065 	0x30cc, 0xc0000fff, 0x00000104,
1066 	0xc1e4, 0x00000001, 0x00000001,
1067 	0xd0c0, 0xfffffff0, 0x00000100,
1068 	0xd8c0, 0xfffffff0, 0x00000100
1069 };
1070 
1071 static u32 verde_pg_init[] =
1072 {
1073 	0x353c, 0xffffffff, 0x40000,
1074 	0x3538, 0xffffffff, 0x200010ff,
1075 	0x353c, 0xffffffff, 0x0,
1076 	0x353c, 0xffffffff, 0x0,
1077 	0x353c, 0xffffffff, 0x0,
1078 	0x353c, 0xffffffff, 0x0,
1079 	0x353c, 0xffffffff, 0x0,
1080 	0x353c, 0xffffffff, 0x7007,
1081 	0x3538, 0xffffffff, 0x300010ff,
1082 	0x353c, 0xffffffff, 0x0,
1083 	0x353c, 0xffffffff, 0x0,
1084 	0x353c, 0xffffffff, 0x0,
1085 	0x353c, 0xffffffff, 0x0,
1086 	0x353c, 0xffffffff, 0x0,
1087 	0x353c, 0xffffffff, 0x400000,
1088 	0x3538, 0xffffffff, 0x100010ff,
1089 	0x353c, 0xffffffff, 0x0,
1090 	0x353c, 0xffffffff, 0x0,
1091 	0x353c, 0xffffffff, 0x0,
1092 	0x353c, 0xffffffff, 0x0,
1093 	0x353c, 0xffffffff, 0x0,
1094 	0x353c, 0xffffffff, 0x120200,
1095 	0x3538, 0xffffffff, 0x500010ff,
1096 	0x353c, 0xffffffff, 0x0,
1097 	0x353c, 0xffffffff, 0x0,
1098 	0x353c, 0xffffffff, 0x0,
1099 	0x353c, 0xffffffff, 0x0,
1100 	0x353c, 0xffffffff, 0x0,
1101 	0x353c, 0xffffffff, 0x1e1e16,
1102 	0x3538, 0xffffffff, 0x600010ff,
1103 	0x353c, 0xffffffff, 0x0,
1104 	0x353c, 0xffffffff, 0x0,
1105 	0x353c, 0xffffffff, 0x0,
1106 	0x353c, 0xffffffff, 0x0,
1107 	0x353c, 0xffffffff, 0x0,
1108 	0x353c, 0xffffffff, 0x171f1e,
1109 	0x3538, 0xffffffff, 0x700010ff,
1110 	0x353c, 0xffffffff, 0x0,
1111 	0x353c, 0xffffffff, 0x0,
1112 	0x353c, 0xffffffff, 0x0,
1113 	0x353c, 0xffffffff, 0x0,
1114 	0x353c, 0xffffffff, 0x0,
1115 	0x353c, 0xffffffff, 0x0,
1116 	0x3538, 0xffffffff, 0x9ff,
1117 	0x3500, 0xffffffff, 0x0,
1118 	0x3504, 0xffffffff, 0x10000800,
1119 	0x3504, 0xffffffff, 0xf,
1120 	0x3504, 0xffffffff, 0xf,
1121 	0x3500, 0xffffffff, 0x4,
1122 	0x3504, 0xffffffff, 0x1000051e,
1123 	0x3504, 0xffffffff, 0xffff,
1124 	0x3504, 0xffffffff, 0xffff,
1125 	0x3500, 0xffffffff, 0x8,
1126 	0x3504, 0xffffffff, 0x80500,
1127 	0x3500, 0xffffffff, 0x12,
1128 	0x3504, 0xffffffff, 0x9050c,
1129 	0x3500, 0xffffffff, 0x1d,
1130 	0x3504, 0xffffffff, 0xb052c,
1131 	0x3500, 0xffffffff, 0x2a,
1132 	0x3504, 0xffffffff, 0x1053e,
1133 	0x3500, 0xffffffff, 0x2d,
1134 	0x3504, 0xffffffff, 0x10546,
1135 	0x3500, 0xffffffff, 0x30,
1136 	0x3504, 0xffffffff, 0xa054e,
1137 	0x3500, 0xffffffff, 0x3c,
1138 	0x3504, 0xffffffff, 0x1055f,
1139 	0x3500, 0xffffffff, 0x3f,
1140 	0x3504, 0xffffffff, 0x10567,
1141 	0x3500, 0xffffffff, 0x42,
1142 	0x3504, 0xffffffff, 0x1056f,
1143 	0x3500, 0xffffffff, 0x45,
1144 	0x3504, 0xffffffff, 0x10572,
1145 	0x3500, 0xffffffff, 0x48,
1146 	0x3504, 0xffffffff, 0x20575,
1147 	0x3500, 0xffffffff, 0x4c,
1148 	0x3504, 0xffffffff, 0x190801,
1149 	0x3500, 0xffffffff, 0x67,
1150 	0x3504, 0xffffffff, 0x1082a,
1151 	0x3500, 0xffffffff, 0x6a,
1152 	0x3504, 0xffffffff, 0x1b082d,
1153 	0x3500, 0xffffffff, 0x87,
1154 	0x3504, 0xffffffff, 0x310851,
1155 	0x3500, 0xffffffff, 0xba,
1156 	0x3504, 0xffffffff, 0x891,
1157 	0x3500, 0xffffffff, 0xbc,
1158 	0x3504, 0xffffffff, 0x893,
1159 	0x3500, 0xffffffff, 0xbe,
1160 	0x3504, 0xffffffff, 0x20895,
1161 	0x3500, 0xffffffff, 0xc2,
1162 	0x3504, 0xffffffff, 0x20899,
1163 	0x3500, 0xffffffff, 0xc6,
1164 	0x3504, 0xffffffff, 0x2089d,
1165 	0x3500, 0xffffffff, 0xca,
1166 	0x3504, 0xffffffff, 0x8a1,
1167 	0x3500, 0xffffffff, 0xcc,
1168 	0x3504, 0xffffffff, 0x8a3,
1169 	0x3500, 0xffffffff, 0xce,
1170 	0x3504, 0xffffffff, 0x308a5,
1171 	0x3500, 0xffffffff, 0xd3,
1172 	0x3504, 0xffffffff, 0x6d08cd,
1173 	0x3500, 0xffffffff, 0x142,
1174 	0x3504, 0xffffffff, 0x2000095a,
1175 	0x3504, 0xffffffff, 0x1,
1176 	0x3500, 0xffffffff, 0x144,
1177 	0x3504, 0xffffffff, 0x301f095b,
1178 	0x3500, 0xffffffff, 0x165,
1179 	0x3504, 0xffffffff, 0xc094d,
1180 	0x3500, 0xffffffff, 0x173,
1181 	0x3504, 0xffffffff, 0xf096d,
1182 	0x3500, 0xffffffff, 0x184,
1183 	0x3504, 0xffffffff, 0x15097f,
1184 	0x3500, 0xffffffff, 0x19b,
1185 	0x3504, 0xffffffff, 0xc0998,
1186 	0x3500, 0xffffffff, 0x1a9,
1187 	0x3504, 0xffffffff, 0x409a7,
1188 	0x3500, 0xffffffff, 0x1af,
1189 	0x3504, 0xffffffff, 0xcdc,
1190 	0x3500, 0xffffffff, 0x1b1,
1191 	0x3504, 0xffffffff, 0x800,
1192 	0x3508, 0xffffffff, 0x6c9b2000,
1193 	0x3510, 0xfc00, 0x2000,
1194 	0x3544, 0xffffffff, 0xfc0,
1195 	0x28d4, 0x00000100, 0x100
1196 };
1197 
1198 static void si_init_golden_registers(struct radeon_device *rdev)
1199 {
1200 	switch (rdev->family) {
1201 	case CHIP_TAHITI:
1202 		radeon_program_register_sequence(rdev,
1203 						 tahiti_golden_registers,
1204 						 (const u32)ARRAY_SIZE(tahiti_golden_registers));
1205 		radeon_program_register_sequence(rdev,
1206 						 tahiti_golden_rlc_registers,
1207 						 (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1208 		radeon_program_register_sequence(rdev,
1209 						 tahiti_mgcg_cgcg_init,
1210 						 (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1211 		radeon_program_register_sequence(rdev,
1212 						 tahiti_golden_registers2,
1213 						 (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1214 		break;
1215 	case CHIP_PITCAIRN:
1216 		radeon_program_register_sequence(rdev,
1217 						 pitcairn_golden_registers,
1218 						 (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1219 		radeon_program_register_sequence(rdev,
1220 						 pitcairn_golden_rlc_registers,
1221 						 (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1222 		radeon_program_register_sequence(rdev,
1223 						 pitcairn_mgcg_cgcg_init,
1224 						 (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1225 		break;
1226 	case CHIP_VERDE:
1227 		radeon_program_register_sequence(rdev,
1228 						 verde_golden_registers,
1229 						 (const u32)ARRAY_SIZE(verde_golden_registers));
1230 		radeon_program_register_sequence(rdev,
1231 						 verde_golden_rlc_registers,
1232 						 (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1233 		radeon_program_register_sequence(rdev,
1234 						 verde_mgcg_cgcg_init,
1235 						 (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1236 		radeon_program_register_sequence(rdev,
1237 						 verde_pg_init,
1238 						 (const u32)ARRAY_SIZE(verde_pg_init));
1239 		break;
1240 	case CHIP_OLAND:
1241 		radeon_program_register_sequence(rdev,
1242 						 oland_golden_registers,
1243 						 (const u32)ARRAY_SIZE(oland_golden_registers));
1244 		radeon_program_register_sequence(rdev,
1245 						 oland_golden_rlc_registers,
1246 						 (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1247 		radeon_program_register_sequence(rdev,
1248 						 oland_mgcg_cgcg_init,
1249 						 (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1250 		break;
1251 	case CHIP_HAINAN:
1252 		radeon_program_register_sequence(rdev,
1253 						 hainan_golden_registers,
1254 						 (const u32)ARRAY_SIZE(hainan_golden_registers));
1255 		radeon_program_register_sequence(rdev,
1256 						 hainan_golden_registers2,
1257 						 (const u32)ARRAY_SIZE(hainan_golden_registers2));
1258 		radeon_program_register_sequence(rdev,
1259 						 hainan_mgcg_cgcg_init,
1260 						 (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1261 		break;
1262 	default:
1263 		break;
1264 	}
1265 }
1266 
1267 #define PCIE_BUS_CLK                10000
1268 #define TCLK                        (PCIE_BUS_CLK / 10)
1269 
1270 /**
1271  * si_get_xclk - get the xclk
1272  *
1273  * @rdev: radeon_device pointer
1274  *
1275  * Returns the reference clock used by the gfx engine
1276  * (SI).
1277  */
1278 u32 si_get_xclk(struct radeon_device *rdev)
1279 {
1280         u32 reference_clock = rdev->clock.spll.reference_freq;
1281 	u32 tmp;
1282 
1283 	tmp = RREG32(CG_CLKPIN_CNTL_2);
1284 	if (tmp & MUX_TCLK_TO_XCLK)
1285 		return TCLK;
1286 
1287 	tmp = RREG32(CG_CLKPIN_CNTL);
1288 	if (tmp & XTALIN_DIVIDE)
1289 		return reference_clock / 4;
1290 
1291 	return reference_clock;
1292 }
1293 
1294 /* get temperature in millidegrees */
1295 int si_get_temp(struct radeon_device *rdev)
1296 {
1297 	u32 temp;
1298 	int actual_temp = 0;
1299 
1300 	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1301 		CTF_TEMP_SHIFT;
1302 
1303 	if (temp & 0x200)
1304 		actual_temp = 255;
1305 	else
1306 		actual_temp = temp & 0x1ff;
1307 
1308 	actual_temp = (actual_temp * 1000);
1309 
1310 	return actual_temp;
1311 }
1312 
1313 #define TAHITI_IO_MC_REGS_SIZE 36
1314 
1315 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1316 	{0x0000006f, 0x03044000},
1317 	{0x00000070, 0x0480c018},
1318 	{0x00000071, 0x00000040},
1319 	{0x00000072, 0x01000000},
1320 	{0x00000074, 0x000000ff},
1321 	{0x00000075, 0x00143400},
1322 	{0x00000076, 0x08ec0800},
1323 	{0x00000077, 0x040000cc},
1324 	{0x00000079, 0x00000000},
1325 	{0x0000007a, 0x21000409},
1326 	{0x0000007c, 0x00000000},
1327 	{0x0000007d, 0xe8000000},
1328 	{0x0000007e, 0x044408a8},
1329 	{0x0000007f, 0x00000003},
1330 	{0x00000080, 0x00000000},
1331 	{0x00000081, 0x01000000},
1332 	{0x00000082, 0x02000000},
1333 	{0x00000083, 0x00000000},
1334 	{0x00000084, 0xe3f3e4f4},
1335 	{0x00000085, 0x00052024},
1336 	{0x00000087, 0x00000000},
1337 	{0x00000088, 0x66036603},
1338 	{0x00000089, 0x01000000},
1339 	{0x0000008b, 0x1c0a0000},
1340 	{0x0000008c, 0xff010000},
1341 	{0x0000008e, 0xffffefff},
1342 	{0x0000008f, 0xfff3efff},
1343 	{0x00000090, 0xfff3efbf},
1344 	{0x00000094, 0x00101101},
1345 	{0x00000095, 0x00000fff},
1346 	{0x00000096, 0x00116fff},
1347 	{0x00000097, 0x60010000},
1348 	{0x00000098, 0x10010000},
1349 	{0x00000099, 0x00006000},
1350 	{0x0000009a, 0x00001000},
1351 	{0x0000009f, 0x00a77400}
1352 };
1353 
1354 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1355 	{0x0000006f, 0x03044000},
1356 	{0x00000070, 0x0480c018},
1357 	{0x00000071, 0x00000040},
1358 	{0x00000072, 0x01000000},
1359 	{0x00000074, 0x000000ff},
1360 	{0x00000075, 0x00143400},
1361 	{0x00000076, 0x08ec0800},
1362 	{0x00000077, 0x040000cc},
1363 	{0x00000079, 0x00000000},
1364 	{0x0000007a, 0x21000409},
1365 	{0x0000007c, 0x00000000},
1366 	{0x0000007d, 0xe8000000},
1367 	{0x0000007e, 0x044408a8},
1368 	{0x0000007f, 0x00000003},
1369 	{0x00000080, 0x00000000},
1370 	{0x00000081, 0x01000000},
1371 	{0x00000082, 0x02000000},
1372 	{0x00000083, 0x00000000},
1373 	{0x00000084, 0xe3f3e4f4},
1374 	{0x00000085, 0x00052024},
1375 	{0x00000087, 0x00000000},
1376 	{0x00000088, 0x66036603},
1377 	{0x00000089, 0x01000000},
1378 	{0x0000008b, 0x1c0a0000},
1379 	{0x0000008c, 0xff010000},
1380 	{0x0000008e, 0xffffefff},
1381 	{0x0000008f, 0xfff3efff},
1382 	{0x00000090, 0xfff3efbf},
1383 	{0x00000094, 0x00101101},
1384 	{0x00000095, 0x00000fff},
1385 	{0x00000096, 0x00116fff},
1386 	{0x00000097, 0x60010000},
1387 	{0x00000098, 0x10010000},
1388 	{0x00000099, 0x00006000},
1389 	{0x0000009a, 0x00001000},
1390 	{0x0000009f, 0x00a47400}
1391 };
1392 
1393 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1394 	{0x0000006f, 0x03044000},
1395 	{0x00000070, 0x0480c018},
1396 	{0x00000071, 0x00000040},
1397 	{0x00000072, 0x01000000},
1398 	{0x00000074, 0x000000ff},
1399 	{0x00000075, 0x00143400},
1400 	{0x00000076, 0x08ec0800},
1401 	{0x00000077, 0x040000cc},
1402 	{0x00000079, 0x00000000},
1403 	{0x0000007a, 0x21000409},
1404 	{0x0000007c, 0x00000000},
1405 	{0x0000007d, 0xe8000000},
1406 	{0x0000007e, 0x044408a8},
1407 	{0x0000007f, 0x00000003},
1408 	{0x00000080, 0x00000000},
1409 	{0x00000081, 0x01000000},
1410 	{0x00000082, 0x02000000},
1411 	{0x00000083, 0x00000000},
1412 	{0x00000084, 0xe3f3e4f4},
1413 	{0x00000085, 0x00052024},
1414 	{0x00000087, 0x00000000},
1415 	{0x00000088, 0x66036603},
1416 	{0x00000089, 0x01000000},
1417 	{0x0000008b, 0x1c0a0000},
1418 	{0x0000008c, 0xff010000},
1419 	{0x0000008e, 0xffffefff},
1420 	{0x0000008f, 0xfff3efff},
1421 	{0x00000090, 0xfff3efbf},
1422 	{0x00000094, 0x00101101},
1423 	{0x00000095, 0x00000fff},
1424 	{0x00000096, 0x00116fff},
1425 	{0x00000097, 0x60010000},
1426 	{0x00000098, 0x10010000},
1427 	{0x00000099, 0x00006000},
1428 	{0x0000009a, 0x00001000},
1429 	{0x0000009f, 0x00a37400}
1430 };
1431 
1432 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1433 	{0x0000006f, 0x03044000},
1434 	{0x00000070, 0x0480c018},
1435 	{0x00000071, 0x00000040},
1436 	{0x00000072, 0x01000000},
1437 	{0x00000074, 0x000000ff},
1438 	{0x00000075, 0x00143400},
1439 	{0x00000076, 0x08ec0800},
1440 	{0x00000077, 0x040000cc},
1441 	{0x00000079, 0x00000000},
1442 	{0x0000007a, 0x21000409},
1443 	{0x0000007c, 0x00000000},
1444 	{0x0000007d, 0xe8000000},
1445 	{0x0000007e, 0x044408a8},
1446 	{0x0000007f, 0x00000003},
1447 	{0x00000080, 0x00000000},
1448 	{0x00000081, 0x01000000},
1449 	{0x00000082, 0x02000000},
1450 	{0x00000083, 0x00000000},
1451 	{0x00000084, 0xe3f3e4f4},
1452 	{0x00000085, 0x00052024},
1453 	{0x00000087, 0x00000000},
1454 	{0x00000088, 0x66036603},
1455 	{0x00000089, 0x01000000},
1456 	{0x0000008b, 0x1c0a0000},
1457 	{0x0000008c, 0xff010000},
1458 	{0x0000008e, 0xffffefff},
1459 	{0x0000008f, 0xfff3efff},
1460 	{0x00000090, 0xfff3efbf},
1461 	{0x00000094, 0x00101101},
1462 	{0x00000095, 0x00000fff},
1463 	{0x00000096, 0x00116fff},
1464 	{0x00000097, 0x60010000},
1465 	{0x00000098, 0x10010000},
1466 	{0x00000099, 0x00006000},
1467 	{0x0000009a, 0x00001000},
1468 	{0x0000009f, 0x00a17730}
1469 };
1470 
1471 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1472 	{0x0000006f, 0x03044000},
1473 	{0x00000070, 0x0480c018},
1474 	{0x00000071, 0x00000040},
1475 	{0x00000072, 0x01000000},
1476 	{0x00000074, 0x000000ff},
1477 	{0x00000075, 0x00143400},
1478 	{0x00000076, 0x08ec0800},
1479 	{0x00000077, 0x040000cc},
1480 	{0x00000079, 0x00000000},
1481 	{0x0000007a, 0x21000409},
1482 	{0x0000007c, 0x00000000},
1483 	{0x0000007d, 0xe8000000},
1484 	{0x0000007e, 0x044408a8},
1485 	{0x0000007f, 0x00000003},
1486 	{0x00000080, 0x00000000},
1487 	{0x00000081, 0x01000000},
1488 	{0x00000082, 0x02000000},
1489 	{0x00000083, 0x00000000},
1490 	{0x00000084, 0xe3f3e4f4},
1491 	{0x00000085, 0x00052024},
1492 	{0x00000087, 0x00000000},
1493 	{0x00000088, 0x66036603},
1494 	{0x00000089, 0x01000000},
1495 	{0x0000008b, 0x1c0a0000},
1496 	{0x0000008c, 0xff010000},
1497 	{0x0000008e, 0xffffefff},
1498 	{0x0000008f, 0xfff3efff},
1499 	{0x00000090, 0xfff3efbf},
1500 	{0x00000094, 0x00101101},
1501 	{0x00000095, 0x00000fff},
1502 	{0x00000096, 0x00116fff},
1503 	{0x00000097, 0x60010000},
1504 	{0x00000098, 0x10010000},
1505 	{0x00000099, 0x00006000},
1506 	{0x0000009a, 0x00001000},
1507 	{0x0000009f, 0x00a07730}
1508 };
1509 
1510 /* ucode loading */
1511 int si_mc_load_microcode(struct radeon_device *rdev)
1512 {
1513 	const __be32 *fw_data = NULL;
1514 	const __le32 *new_fw_data = NULL;
1515 	u32 running, blackout = 0;
1516 	u32 *io_mc_regs = NULL;
1517 	const __le32 *new_io_mc_regs = NULL;
1518 	int i, regs_size, ucode_size;
1519 
1520 	if (!rdev->mc_fw)
1521 		return -EINVAL;
1522 
1523 	if (rdev->new_fw) {
1524 		const struct mc_firmware_header_v1_0 *hdr =
1525 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1526 
1527 		radeon_ucode_print_mc_hdr(&hdr->header);
1528 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1529 		new_io_mc_regs = (const __le32 *)
1530 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1531 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1532 		new_fw_data = (const __le32 *)
1533 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1534 	} else {
1535 		ucode_size = rdev->mc_fw->size / 4;
1536 
1537 		switch (rdev->family) {
1538 		case CHIP_TAHITI:
1539 			io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1540 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1541 			break;
1542 		case CHIP_PITCAIRN:
1543 			io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1544 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1545 			break;
1546 		case CHIP_VERDE:
1547 		default:
1548 			io_mc_regs = (u32 *)&verde_io_mc_regs;
1549 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1550 			break;
1551 		case CHIP_OLAND:
1552 			io_mc_regs = (u32 *)&oland_io_mc_regs;
1553 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1554 			break;
1555 		case CHIP_HAINAN:
1556 			io_mc_regs = (u32 *)&hainan_io_mc_regs;
1557 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1558 			break;
1559 		}
1560 		fw_data = (const __be32 *)rdev->mc_fw->data;
1561 	}
1562 
1563 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1564 
1565 	if (running == 0) {
1566 		if (running) {
1567 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1568 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1569 		}
1570 
1571 		/* reset the engine and set to writable */
1572 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1573 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1574 
1575 		/* load mc io regs */
1576 		for (i = 0; i < regs_size; i++) {
1577 			if (rdev->new_fw) {
1578 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1579 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1580 			} else {
1581 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1582 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1583 			}
1584 		}
1585 		/* load the MC ucode */
1586 		for (i = 0; i < ucode_size; i++) {
1587 			if (rdev->new_fw)
1588 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1589 			else
1590 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1591 		}
1592 
1593 		/* put the engine back into the active state */
1594 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1595 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1596 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1597 
1598 		/* wait for training to complete */
1599 		for (i = 0; i < rdev->usec_timeout; i++) {
1600 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1601 				break;
1602 			udelay(1);
1603 		}
1604 		for (i = 0; i < rdev->usec_timeout; i++) {
1605 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1606 				break;
1607 			udelay(1);
1608 		}
1609 
1610 		if (running)
1611 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1612 	}
1613 
1614 	return 0;
1615 }
1616 
1617 static int si_init_microcode(struct radeon_device *rdev)
1618 {
1619 	const char *chip_name;
1620 	const char *new_chip_name;
1621 	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1622 	size_t smc_req_size, mc2_req_size;
1623 	char fw_name[30];
1624 	int err;
1625 	int new_fw = 0;
1626 
1627 	DRM_DEBUG("\n");
1628 
1629 	switch (rdev->family) {
1630 	case CHIP_TAHITI:
1631 		chip_name = "TAHITI";
1632 		new_chip_name = "tahiti";
1633 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1634 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1635 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1636 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1637 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1638 		mc2_req_size = TAHITI_MC_UCODE_SIZE * 4;
1639 		smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1640 		break;
1641 	case CHIP_PITCAIRN:
1642 		chip_name = "PITCAIRN";
1643 		new_chip_name = "pitcairn";
1644 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1645 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1646 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1647 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1648 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1649 		mc2_req_size = PITCAIRN_MC_UCODE_SIZE * 4;
1650 		smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1651 		break;
1652 	case CHIP_VERDE:
1653 		chip_name = "VERDE";
1654 		new_chip_name = "verde";
1655 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1656 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1657 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1658 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1659 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1660 		mc2_req_size = VERDE_MC_UCODE_SIZE * 4;
1661 		smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1662 		break;
1663 	case CHIP_OLAND:
1664 		chip_name = "OLAND";
1665 		new_chip_name = "oland";
1666 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1667 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1668 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1669 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1670 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1671 		smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1672 		break;
1673 	case CHIP_HAINAN:
1674 		chip_name = "HAINAN";
1675 		new_chip_name = "hainan";
1676 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1677 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1678 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1679 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1680 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1681 		smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1682 		break;
1683 	default: BUG();
1684 	}
1685 
1686 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
1687 
1688 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
1689 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1690 	if (err) {
1691 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1692 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1693 		if (err)
1694 			goto out;
1695 		if (rdev->pfp_fw->size != pfp_req_size) {
1696 			printk(KERN_ERR
1697 			       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1698 			       rdev->pfp_fw->size, fw_name);
1699 			err = -EINVAL;
1700 			goto out;
1701 		}
1702 	} else {
1703 		err = radeon_ucode_validate(rdev->pfp_fw);
1704 		if (err) {
1705 			printk(KERN_ERR
1706 			       "si_cp: validation failed for firmware \"%s\"\n",
1707 			       fw_name);
1708 			goto out;
1709 		} else {
1710 			new_fw++;
1711 		}
1712 	}
1713 
1714 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
1715 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1716 	if (err) {
1717 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1718 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1719 		if (err)
1720 			goto out;
1721 		if (rdev->me_fw->size != me_req_size) {
1722 			printk(KERN_ERR
1723 			       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1724 			       rdev->me_fw->size, fw_name);
1725 			err = -EINVAL;
1726 		}
1727 	} else {
1728 		err = radeon_ucode_validate(rdev->me_fw);
1729 		if (err) {
1730 			printk(KERN_ERR
1731 			       "si_cp: validation failed for firmware \"%s\"\n",
1732 			       fw_name);
1733 			goto out;
1734 		} else {
1735 			new_fw++;
1736 		}
1737 	}
1738 
1739 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
1740 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1741 	if (err) {
1742 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1743 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1744 		if (err)
1745 			goto out;
1746 		if (rdev->ce_fw->size != ce_req_size) {
1747 			printk(KERN_ERR
1748 			       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1749 			       rdev->ce_fw->size, fw_name);
1750 			err = -EINVAL;
1751 		}
1752 	} else {
1753 		err = radeon_ucode_validate(rdev->ce_fw);
1754 		if (err) {
1755 			printk(KERN_ERR
1756 			       "si_cp: validation failed for firmware \"%s\"\n",
1757 			       fw_name);
1758 			goto out;
1759 		} else {
1760 			new_fw++;
1761 		}
1762 	}
1763 
1764 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
1765 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1766 	if (err) {
1767 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1768 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1769 		if (err)
1770 			goto out;
1771 		if (rdev->rlc_fw->size != rlc_req_size) {
1772 			printk(KERN_ERR
1773 			       "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1774 			       rdev->rlc_fw->size, fw_name);
1775 			err = -EINVAL;
1776 		}
1777 	} else {
1778 		err = radeon_ucode_validate(rdev->rlc_fw);
1779 		if (err) {
1780 			printk(KERN_ERR
1781 			       "si_cp: validation failed for firmware \"%s\"\n",
1782 			       fw_name);
1783 			goto out;
1784 		} else {
1785 			new_fw++;
1786 		}
1787 	}
1788 
1789 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
1790 	err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1791 	if (err) {
1792 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
1793 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1794 		if (err) {
1795 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1796 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1797 			if (err)
1798 				goto out;
1799 		}
1800 		if ((rdev->mc_fw->size != mc_req_size) &&
1801 		    (rdev->mc_fw->size != mc2_req_size)) {
1802 			printk(KERN_ERR
1803 			       "si_mc: Bogus length %zu in firmware \"%s\"\n",
1804 			       rdev->mc_fw->size, fw_name);
1805 			err = -EINVAL;
1806 		}
1807 		DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
1808 	} else {
1809 		err = radeon_ucode_validate(rdev->mc_fw);
1810 		if (err) {
1811 			printk(KERN_ERR
1812 			       "si_cp: validation failed for firmware \"%s\"\n",
1813 			       fw_name);
1814 			goto out;
1815 		} else {
1816 			new_fw++;
1817 		}
1818 	}
1819 
1820 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
1821 	err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1822 	if (err) {
1823 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1824 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1825 		if (err) {
1826 			printk(KERN_ERR
1827 			       "smc: error loading firmware \"%s\"\n",
1828 			       fw_name);
1829 			release_firmware(rdev->smc_fw);
1830 			rdev->smc_fw = NULL;
1831 			err = 0;
1832 		} else if (rdev->smc_fw->size != smc_req_size) {
1833 			printk(KERN_ERR
1834 			       "si_smc: Bogus length %zu in firmware \"%s\"\n",
1835 			       rdev->smc_fw->size, fw_name);
1836 			err = -EINVAL;
1837 		}
1838 	} else {
1839 		err = radeon_ucode_validate(rdev->smc_fw);
1840 		if (err) {
1841 			printk(KERN_ERR
1842 			       "si_cp: validation failed for firmware \"%s\"\n",
1843 			       fw_name);
1844 			goto out;
1845 		} else {
1846 			new_fw++;
1847 		}
1848 	}
1849 
1850 	if (new_fw == 0) {
1851 		rdev->new_fw = false;
1852 	} else if (new_fw < 6) {
1853 		printk(KERN_ERR "si_fw: mixing new and old firmware!\n");
1854 		err = -EINVAL;
1855 	} else {
1856 		rdev->new_fw = true;
1857 	}
1858 out:
1859 	if (err) {
1860 		if (err != -EINVAL)
1861 			printk(KERN_ERR
1862 			       "si_cp: Failed to load firmware \"%s\"\n",
1863 			       fw_name);
1864 		release_firmware(rdev->pfp_fw);
1865 		rdev->pfp_fw = NULL;
1866 		release_firmware(rdev->me_fw);
1867 		rdev->me_fw = NULL;
1868 		release_firmware(rdev->ce_fw);
1869 		rdev->ce_fw = NULL;
1870 		release_firmware(rdev->rlc_fw);
1871 		rdev->rlc_fw = NULL;
1872 		release_firmware(rdev->mc_fw);
1873 		rdev->mc_fw = NULL;
1874 		release_firmware(rdev->smc_fw);
1875 		rdev->smc_fw = NULL;
1876 	}
1877 	return err;
1878 }
1879 
1880 /* watermark setup */
1881 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1882 				   struct radeon_crtc *radeon_crtc,
1883 				   struct drm_display_mode *mode,
1884 				   struct drm_display_mode *other_mode)
1885 {
1886 	u32 tmp, buffer_alloc, i;
1887 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1888 	/*
1889 	 * Line Buffer Setup
1890 	 * There are 3 line buffers, each one shared by 2 display controllers.
1891 	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1892 	 * the display controllers.  The paritioning is done via one of four
1893 	 * preset allocations specified in bits 21:20:
1894 	 *  0 - half lb
1895 	 *  2 - whole lb, other crtc must be disabled
1896 	 */
1897 	/* this can get tricky if we have two large displays on a paired group
1898 	 * of crtcs.  Ideally for multiple large displays we'd assign them to
1899 	 * non-linked crtcs for maximum line buffer allocation.
1900 	 */
1901 	if (radeon_crtc->base.enabled && mode) {
1902 		if (other_mode) {
1903 			tmp = 0; /* 1/2 */
1904 			buffer_alloc = 1;
1905 		} else {
1906 			tmp = 2; /* whole */
1907 			buffer_alloc = 2;
1908 		}
1909 	} else {
1910 		tmp = 0;
1911 		buffer_alloc = 0;
1912 	}
1913 
1914 	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1915 	       DC_LB_MEMORY_CONFIG(tmp));
1916 
1917 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1918 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1919 	for (i = 0; i < rdev->usec_timeout; i++) {
1920 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1921 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
1922 			break;
1923 		udelay(1);
1924 	}
1925 
1926 	if (radeon_crtc->base.enabled && mode) {
1927 		switch (tmp) {
1928 		case 0:
1929 		default:
1930 			return 4096 * 2;
1931 		case 2:
1932 			return 8192 * 2;
1933 		}
1934 	}
1935 
1936 	/* controller not enabled, so no lb used */
1937 	return 0;
1938 }
1939 
1940 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1941 {
1942 	u32 tmp = RREG32(MC_SHARED_CHMAP);
1943 
1944 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1945 	case 0:
1946 	default:
1947 		return 1;
1948 	case 1:
1949 		return 2;
1950 	case 2:
1951 		return 4;
1952 	case 3:
1953 		return 8;
1954 	case 4:
1955 		return 3;
1956 	case 5:
1957 		return 6;
1958 	case 6:
1959 		return 10;
1960 	case 7:
1961 		return 12;
1962 	case 8:
1963 		return 16;
1964 	}
1965 }
1966 
1967 struct dce6_wm_params {
1968 	u32 dram_channels; /* number of dram channels */
1969 	u32 yclk;          /* bandwidth per dram data pin in kHz */
1970 	u32 sclk;          /* engine clock in kHz */
1971 	u32 disp_clk;      /* display clock in kHz */
1972 	u32 src_width;     /* viewport width */
1973 	u32 active_time;   /* active display time in ns */
1974 	u32 blank_time;    /* blank time in ns */
1975 	bool interlaced;    /* mode is interlaced */
1976 	fixed20_12 vsc;    /* vertical scale ratio */
1977 	u32 num_heads;     /* number of active crtcs */
1978 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1979 	u32 lb_size;       /* line buffer allocated to pipe */
1980 	u32 vtaps;         /* vertical scaler taps */
1981 };
1982 
1983 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1984 {
1985 	/* Calculate raw DRAM Bandwidth */
1986 	fixed20_12 dram_efficiency; /* 0.7 */
1987 	fixed20_12 yclk, dram_channels, bandwidth;
1988 	fixed20_12 a;
1989 
1990 	a.full = dfixed_const(1000);
1991 	yclk.full = dfixed_const(wm->yclk);
1992 	yclk.full = dfixed_div(yclk, a);
1993 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
1994 	a.full = dfixed_const(10);
1995 	dram_efficiency.full = dfixed_const(7);
1996 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
1997 	bandwidth.full = dfixed_mul(dram_channels, yclk);
1998 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
1999 
2000 	return dfixed_trunc(bandwidth);
2001 }
2002 
2003 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2004 {
2005 	/* Calculate DRAM Bandwidth and the part allocated to display. */
2006 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
2007 	fixed20_12 yclk, dram_channels, bandwidth;
2008 	fixed20_12 a;
2009 
2010 	a.full = dfixed_const(1000);
2011 	yclk.full = dfixed_const(wm->yclk);
2012 	yclk.full = dfixed_div(yclk, a);
2013 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2014 	a.full = dfixed_const(10);
2015 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
2016 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
2017 	bandwidth.full = dfixed_mul(dram_channels, yclk);
2018 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
2019 
2020 	return dfixed_trunc(bandwidth);
2021 }
2022 
2023 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
2024 {
2025 	/* Calculate the display Data return Bandwidth */
2026 	fixed20_12 return_efficiency; /* 0.8 */
2027 	fixed20_12 sclk, bandwidth;
2028 	fixed20_12 a;
2029 
2030 	a.full = dfixed_const(1000);
2031 	sclk.full = dfixed_const(wm->sclk);
2032 	sclk.full = dfixed_div(sclk, a);
2033 	a.full = dfixed_const(10);
2034 	return_efficiency.full = dfixed_const(8);
2035 	return_efficiency.full = dfixed_div(return_efficiency, a);
2036 	a.full = dfixed_const(32);
2037 	bandwidth.full = dfixed_mul(a, sclk);
2038 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
2039 
2040 	return dfixed_trunc(bandwidth);
2041 }
2042 
2043 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
2044 {
2045 	return 32;
2046 }
2047 
2048 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
2049 {
2050 	/* Calculate the DMIF Request Bandwidth */
2051 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
2052 	fixed20_12 disp_clk, sclk, bandwidth;
2053 	fixed20_12 a, b1, b2;
2054 	u32 min_bandwidth;
2055 
2056 	a.full = dfixed_const(1000);
2057 	disp_clk.full = dfixed_const(wm->disp_clk);
2058 	disp_clk.full = dfixed_div(disp_clk, a);
2059 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
2060 	b1.full = dfixed_mul(a, disp_clk);
2061 
2062 	a.full = dfixed_const(1000);
2063 	sclk.full = dfixed_const(wm->sclk);
2064 	sclk.full = dfixed_div(sclk, a);
2065 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
2066 	b2.full = dfixed_mul(a, sclk);
2067 
2068 	a.full = dfixed_const(10);
2069 	disp_clk_request_efficiency.full = dfixed_const(8);
2070 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
2071 
2072 	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
2073 
2074 	a.full = dfixed_const(min_bandwidth);
2075 	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
2076 
2077 	return dfixed_trunc(bandwidth);
2078 }
2079 
2080 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
2081 {
2082 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
2083 	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
2084 	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
2085 	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
2086 
2087 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
2088 }
2089 
2090 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
2091 {
2092 	/* Calculate the display mode Average Bandwidth
2093 	 * DisplayMode should contain the source and destination dimensions,
2094 	 * timing, etc.
2095 	 */
2096 	fixed20_12 bpp;
2097 	fixed20_12 line_time;
2098 	fixed20_12 src_width;
2099 	fixed20_12 bandwidth;
2100 	fixed20_12 a;
2101 
2102 	a.full = dfixed_const(1000);
2103 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
2104 	line_time.full = dfixed_div(line_time, a);
2105 	bpp.full = dfixed_const(wm->bytes_per_pixel);
2106 	src_width.full = dfixed_const(wm->src_width);
2107 	bandwidth.full = dfixed_mul(src_width, bpp);
2108 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
2109 	bandwidth.full = dfixed_div(bandwidth, line_time);
2110 
2111 	return dfixed_trunc(bandwidth);
2112 }
2113 
2114 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
2115 {
2116 	/* First calcualte the latency in ns */
2117 	u32 mc_latency = 2000; /* 2000 ns. */
2118 	u32 available_bandwidth = dce6_available_bandwidth(wm);
2119 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
2120 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
2121 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
2122 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
2123 		(wm->num_heads * cursor_line_pair_return_time);
2124 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
2125 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
2126 	u32 tmp, dmif_size = 12288;
2127 	fixed20_12 a, b, c;
2128 
2129 	if (wm->num_heads == 0)
2130 		return 0;
2131 
2132 	a.full = dfixed_const(2);
2133 	b.full = dfixed_const(1);
2134 	if ((wm->vsc.full > a.full) ||
2135 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
2136 	    (wm->vtaps >= 5) ||
2137 	    ((wm->vsc.full >= a.full) && wm->interlaced))
2138 		max_src_lines_per_dst_line = 4;
2139 	else
2140 		max_src_lines_per_dst_line = 2;
2141 
2142 	a.full = dfixed_const(available_bandwidth);
2143 	b.full = dfixed_const(wm->num_heads);
2144 	a.full = dfixed_div(a, b);
2145 
2146 	b.full = dfixed_const(mc_latency + 512);
2147 	c.full = dfixed_const(wm->disp_clk);
2148 	b.full = dfixed_div(b, c);
2149 
2150 	c.full = dfixed_const(dmif_size);
2151 	b.full = dfixed_div(c, b);
2152 
2153 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
2154 
2155 	b.full = dfixed_const(1000);
2156 	c.full = dfixed_const(wm->disp_clk);
2157 	b.full = dfixed_div(c, b);
2158 	c.full = dfixed_const(wm->bytes_per_pixel);
2159 	b.full = dfixed_mul(b, c);
2160 
2161 	lb_fill_bw = min(tmp, dfixed_trunc(b));
2162 
2163 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2164 	b.full = dfixed_const(1000);
2165 	c.full = dfixed_const(lb_fill_bw);
2166 	b.full = dfixed_div(c, b);
2167 	a.full = dfixed_div(a, b);
2168 	line_fill_time = dfixed_trunc(a);
2169 
2170 	if (line_fill_time < wm->active_time)
2171 		return latency;
2172 	else
2173 		return latency + (line_fill_time - wm->active_time);
2174 
2175 }
2176 
2177 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2178 {
2179 	if (dce6_average_bandwidth(wm) <=
2180 	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2181 		return true;
2182 	else
2183 		return false;
2184 };
2185 
2186 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2187 {
2188 	if (dce6_average_bandwidth(wm) <=
2189 	    (dce6_available_bandwidth(wm) / wm->num_heads))
2190 		return true;
2191 	else
2192 		return false;
2193 };
2194 
2195 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2196 {
2197 	u32 lb_partitions = wm->lb_size / wm->src_width;
2198 	u32 line_time = wm->active_time + wm->blank_time;
2199 	u32 latency_tolerant_lines;
2200 	u32 latency_hiding;
2201 	fixed20_12 a;
2202 
2203 	a.full = dfixed_const(1);
2204 	if (wm->vsc.full > a.full)
2205 		latency_tolerant_lines = 1;
2206 	else {
2207 		if (lb_partitions <= (wm->vtaps + 1))
2208 			latency_tolerant_lines = 1;
2209 		else
2210 			latency_tolerant_lines = 2;
2211 	}
2212 
2213 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2214 
2215 	if (dce6_latency_watermark(wm) <= latency_hiding)
2216 		return true;
2217 	else
2218 		return false;
2219 }
2220 
2221 static void dce6_program_watermarks(struct radeon_device *rdev,
2222 					 struct radeon_crtc *radeon_crtc,
2223 					 u32 lb_size, u32 num_heads)
2224 {
2225 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
2226 	struct dce6_wm_params wm_low, wm_high;
2227 	u32 dram_channels;
2228 	u32 pixel_period;
2229 	u32 line_time = 0;
2230 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
2231 	u32 priority_a_mark = 0, priority_b_mark = 0;
2232 	u32 priority_a_cnt = PRIORITY_OFF;
2233 	u32 priority_b_cnt = PRIORITY_OFF;
2234 	u32 tmp, arb_control3;
2235 	fixed20_12 a, b, c;
2236 
2237 	if (radeon_crtc->base.enabled && num_heads && mode) {
2238 		pixel_period = 1000000 / (u32)mode->clock;
2239 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2240 		priority_a_cnt = 0;
2241 		priority_b_cnt = 0;
2242 
2243 		if (rdev->family == CHIP_ARUBA)
2244 			dram_channels = evergreen_get_number_of_dram_channels(rdev);
2245 		else
2246 			dram_channels = si_get_number_of_dram_channels(rdev);
2247 
2248 		/* watermark for high clocks */
2249 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2250 			wm_high.yclk =
2251 				radeon_dpm_get_mclk(rdev, false) * 10;
2252 			wm_high.sclk =
2253 				radeon_dpm_get_sclk(rdev, false) * 10;
2254 		} else {
2255 			wm_high.yclk = rdev->pm.current_mclk * 10;
2256 			wm_high.sclk = rdev->pm.current_sclk * 10;
2257 		}
2258 
2259 		wm_high.disp_clk = mode->clock;
2260 		wm_high.src_width = mode->crtc_hdisplay;
2261 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2262 		wm_high.blank_time = line_time - wm_high.active_time;
2263 		wm_high.interlaced = false;
2264 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2265 			wm_high.interlaced = true;
2266 		wm_high.vsc = radeon_crtc->vsc;
2267 		wm_high.vtaps = 1;
2268 		if (radeon_crtc->rmx_type != RMX_OFF)
2269 			wm_high.vtaps = 2;
2270 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2271 		wm_high.lb_size = lb_size;
2272 		wm_high.dram_channels = dram_channels;
2273 		wm_high.num_heads = num_heads;
2274 
2275 		/* watermark for low clocks */
2276 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2277 			wm_low.yclk =
2278 				radeon_dpm_get_mclk(rdev, true) * 10;
2279 			wm_low.sclk =
2280 				radeon_dpm_get_sclk(rdev, true) * 10;
2281 		} else {
2282 			wm_low.yclk = rdev->pm.current_mclk * 10;
2283 			wm_low.sclk = rdev->pm.current_sclk * 10;
2284 		}
2285 
2286 		wm_low.disp_clk = mode->clock;
2287 		wm_low.src_width = mode->crtc_hdisplay;
2288 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2289 		wm_low.blank_time = line_time - wm_low.active_time;
2290 		wm_low.interlaced = false;
2291 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2292 			wm_low.interlaced = true;
2293 		wm_low.vsc = radeon_crtc->vsc;
2294 		wm_low.vtaps = 1;
2295 		if (radeon_crtc->rmx_type != RMX_OFF)
2296 			wm_low.vtaps = 2;
2297 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2298 		wm_low.lb_size = lb_size;
2299 		wm_low.dram_channels = dram_channels;
2300 		wm_low.num_heads = num_heads;
2301 
2302 		/* set for high clocks */
2303 		latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2304 		/* set for low clocks */
2305 		latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2306 
2307 		/* possibly force display priority to high */
2308 		/* should really do this at mode validation time... */
2309 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2310 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2311 		    !dce6_check_latency_hiding(&wm_high) ||
2312 		    (rdev->disp_priority == 2)) {
2313 			DRM_DEBUG_KMS("force priority to high\n");
2314 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2315 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2316 		}
2317 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2318 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2319 		    !dce6_check_latency_hiding(&wm_low) ||
2320 		    (rdev->disp_priority == 2)) {
2321 			DRM_DEBUG_KMS("force priority to high\n");
2322 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2323 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2324 		}
2325 
2326 		a.full = dfixed_const(1000);
2327 		b.full = dfixed_const(mode->clock);
2328 		b.full = dfixed_div(b, a);
2329 		c.full = dfixed_const(latency_watermark_a);
2330 		c.full = dfixed_mul(c, b);
2331 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2332 		c.full = dfixed_div(c, a);
2333 		a.full = dfixed_const(16);
2334 		c.full = dfixed_div(c, a);
2335 		priority_a_mark = dfixed_trunc(c);
2336 		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2337 
2338 		a.full = dfixed_const(1000);
2339 		b.full = dfixed_const(mode->clock);
2340 		b.full = dfixed_div(b, a);
2341 		c.full = dfixed_const(latency_watermark_b);
2342 		c.full = dfixed_mul(c, b);
2343 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2344 		c.full = dfixed_div(c, a);
2345 		a.full = dfixed_const(16);
2346 		c.full = dfixed_div(c, a);
2347 		priority_b_mark = dfixed_trunc(c);
2348 		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2349 	}
2350 
2351 	/* select wm A */
2352 	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2353 	tmp = arb_control3;
2354 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2355 	tmp |= LATENCY_WATERMARK_MASK(1);
2356 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2357 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2358 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2359 		LATENCY_HIGH_WATERMARK(line_time)));
2360 	/* select wm B */
2361 	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2362 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2363 	tmp |= LATENCY_WATERMARK_MASK(2);
2364 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2365 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2366 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2367 		LATENCY_HIGH_WATERMARK(line_time)));
2368 	/* restore original selection */
2369 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2370 
2371 	/* write the priority marks */
2372 	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2373 	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2374 
2375 	/* save values for DPM */
2376 	radeon_crtc->line_time = line_time;
2377 	radeon_crtc->wm_high = latency_watermark_a;
2378 	radeon_crtc->wm_low = latency_watermark_b;
2379 }
2380 
2381 void dce6_bandwidth_update(struct radeon_device *rdev)
2382 {
2383 	struct drm_display_mode *mode0 = NULL;
2384 	struct drm_display_mode *mode1 = NULL;
2385 	u32 num_heads = 0, lb_size;
2386 	int i;
2387 
2388 	if (!rdev->mode_info.mode_config_initialized)
2389 		return;
2390 
2391 	radeon_update_display_priority(rdev);
2392 
2393 	for (i = 0; i < rdev->num_crtc; i++) {
2394 		if (rdev->mode_info.crtcs[i]->base.enabled)
2395 			num_heads++;
2396 	}
2397 	for (i = 0; i < rdev->num_crtc; i += 2) {
2398 		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2399 		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2400 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2401 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2402 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2403 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2404 	}
2405 }
2406 
2407 /*
2408  * Core functions
2409  */
2410 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2411 {
2412 	const u32 num_tile_mode_states = 32;
2413 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2414 
2415 	switch (rdev->config.si.mem_row_size_in_kb) {
2416 	case 1:
2417 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2418 		break;
2419 	case 2:
2420 	default:
2421 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2422 		break;
2423 	case 4:
2424 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2425 		break;
2426 	}
2427 
2428 	if ((rdev->family == CHIP_TAHITI) ||
2429 	    (rdev->family == CHIP_PITCAIRN)) {
2430 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2431 			switch (reg_offset) {
2432 			case 0:  /* non-AA compressed depth or any compressed stencil */
2433 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2434 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2435 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2436 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2437 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2438 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2439 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2440 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2441 				break;
2442 			case 1:  /* 2xAA/4xAA compressed depth only */
2443 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2444 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2445 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2446 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2447 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2448 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2449 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2450 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2451 				break;
2452 			case 2:  /* 8xAA compressed depth only */
2453 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2454 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2455 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2456 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2457 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2458 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2459 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2460 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2461 				break;
2462 			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2463 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2464 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2465 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2466 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2467 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2468 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2470 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2471 				break;
2472 			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2473 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2474 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2475 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2476 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2477 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2478 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2480 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2481 				break;
2482 			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2483 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2484 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2485 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2486 						 TILE_SPLIT(split_equal_to_row_size) |
2487 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2488 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2489 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2490 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2491 				break;
2492 			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2493 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2494 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2495 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2496 						 TILE_SPLIT(split_equal_to_row_size) |
2497 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2498 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2499 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2500 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2501 				break;
2502 			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2503 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2504 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2505 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2506 						 TILE_SPLIT(split_equal_to_row_size) |
2507 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2508 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2509 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2510 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2511 				break;
2512 			case 8:  /* 1D and 1D Array Surfaces */
2513 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2514 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2515 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2516 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2517 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2518 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2519 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2520 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2521 				break;
2522 			case 9:  /* Displayable maps. */
2523 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2524 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2525 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2526 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2527 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2528 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2529 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2530 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2531 				break;
2532 			case 10:  /* Display 8bpp. */
2533 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2534 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2535 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2536 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2537 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2538 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2539 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2540 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2541 				break;
2542 			case 11:  /* Display 16bpp. */
2543 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2544 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2545 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2546 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2547 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2548 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2549 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2550 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2551 				break;
2552 			case 12:  /* Display 32bpp. */
2553 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2554 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2555 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2556 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2557 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2558 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2559 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2560 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2561 				break;
2562 			case 13:  /* Thin. */
2563 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2564 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2565 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2566 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2567 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2568 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2569 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2570 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2571 				break;
2572 			case 14:  /* Thin 8 bpp. */
2573 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2574 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2575 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2576 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2577 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2578 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2579 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2580 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2581 				break;
2582 			case 15:  /* Thin 16 bpp. */
2583 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2584 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2585 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2586 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2587 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2588 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2589 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2590 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2591 				break;
2592 			case 16:  /* Thin 32 bpp. */
2593 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2594 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2595 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2596 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2597 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2598 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2599 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2600 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2601 				break;
2602 			case 17:  /* Thin 64 bpp. */
2603 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2604 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2605 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2606 						 TILE_SPLIT(split_equal_to_row_size) |
2607 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2608 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2609 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2610 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2611 				break;
2612 			case 21:  /* 8 bpp PRT. */
2613 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2614 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2615 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2616 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2617 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2618 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2619 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2620 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2621 				break;
2622 			case 22:  /* 16 bpp PRT */
2623 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2624 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2625 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2626 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2627 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2628 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2629 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2630 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2631 				break;
2632 			case 23:  /* 32 bpp PRT */
2633 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2634 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2635 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2636 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2637 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2638 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2639 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2640 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2641 				break;
2642 			case 24:  /* 64 bpp PRT */
2643 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2644 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2645 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2646 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2647 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2648 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2649 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2650 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2651 				break;
2652 			case 25:  /* 128 bpp PRT */
2653 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2654 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2655 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2656 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2657 						 NUM_BANKS(ADDR_SURF_8_BANK) |
2658 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2659 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2660 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2661 				break;
2662 			default:
2663 				gb_tile_moden = 0;
2664 				break;
2665 			}
2666 			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2667 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2668 		}
2669 	} else if ((rdev->family == CHIP_VERDE) ||
2670 		   (rdev->family == CHIP_OLAND) ||
2671 		   (rdev->family == CHIP_HAINAN)) {
2672 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2673 			switch (reg_offset) {
2674 			case 0:  /* non-AA compressed depth or any compressed stencil */
2675 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2676 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2677 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2678 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2679 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2680 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2681 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2682 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2683 				break;
2684 			case 1:  /* 2xAA/4xAA compressed depth only */
2685 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2686 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2687 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2688 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2689 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2690 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2691 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2692 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2693 				break;
2694 			case 2:  /* 8xAA compressed depth only */
2695 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2696 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2697 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2698 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2699 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2700 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2701 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2702 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2703 				break;
2704 			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2705 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2706 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2707 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2708 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2709 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2710 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2711 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2712 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2713 				break;
2714 			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2715 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2716 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2717 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2718 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2719 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2720 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2721 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2722 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2723 				break;
2724 			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2725 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2726 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2727 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2728 						 TILE_SPLIT(split_equal_to_row_size) |
2729 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2730 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2731 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2732 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2733 				break;
2734 			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2735 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2736 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2737 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2738 						 TILE_SPLIT(split_equal_to_row_size) |
2739 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2740 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2741 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2742 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2743 				break;
2744 			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2745 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2746 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2747 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2748 						 TILE_SPLIT(split_equal_to_row_size) |
2749 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2750 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2751 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2752 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2753 				break;
2754 			case 8:  /* 1D and 1D Array Surfaces */
2755 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2756 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2757 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2758 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2759 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2760 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2761 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2762 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2763 				break;
2764 			case 9:  /* Displayable maps. */
2765 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2766 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2767 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2768 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2769 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2770 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2771 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2772 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2773 				break;
2774 			case 10:  /* Display 8bpp. */
2775 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2776 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2777 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2778 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2779 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2780 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2781 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2782 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2783 				break;
2784 			case 11:  /* Display 16bpp. */
2785 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2786 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2787 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2788 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2789 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2790 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2791 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2792 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2793 				break;
2794 			case 12:  /* Display 32bpp. */
2795 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2796 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2797 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2798 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2799 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2800 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2801 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2802 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2803 				break;
2804 			case 13:  /* Thin. */
2805 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2806 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2807 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2808 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2809 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2810 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2811 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2812 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2813 				break;
2814 			case 14:  /* Thin 8 bpp. */
2815 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2816 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2817 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2818 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2819 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2820 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2821 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2822 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2823 				break;
2824 			case 15:  /* Thin 16 bpp. */
2825 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2826 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2827 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2828 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2829 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2830 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2831 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2832 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2833 				break;
2834 			case 16:  /* Thin 32 bpp. */
2835 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2836 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2837 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2838 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2839 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2840 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2841 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2842 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2843 				break;
2844 			case 17:  /* Thin 64 bpp. */
2845 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2846 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2847 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2848 						 TILE_SPLIT(split_equal_to_row_size) |
2849 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2850 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2851 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2852 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2853 				break;
2854 			case 21:  /* 8 bpp PRT. */
2855 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2856 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2857 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2858 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2859 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2860 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2861 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2862 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2863 				break;
2864 			case 22:  /* 16 bpp PRT */
2865 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2866 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2867 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2868 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2869 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2870 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2871 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2872 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2873 				break;
2874 			case 23:  /* 32 bpp PRT */
2875 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2876 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2877 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2878 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2879 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2880 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2881 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2882 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2883 				break;
2884 			case 24:  /* 64 bpp PRT */
2885 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2886 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2887 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2888 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2889 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2890 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2891 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2892 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2893 				break;
2894 			case 25:  /* 128 bpp PRT */
2895 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2896 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2897 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2898 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2899 						 NUM_BANKS(ADDR_SURF_8_BANK) |
2900 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2901 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2902 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2903 				break;
2904 			default:
2905 				gb_tile_moden = 0;
2906 				break;
2907 			}
2908 			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2909 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2910 		}
2911 	} else
2912 		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2913 }
2914 
2915 static void si_select_se_sh(struct radeon_device *rdev,
2916 			    u32 se_num, u32 sh_num)
2917 {
2918 	u32 data = INSTANCE_BROADCAST_WRITES;
2919 
2920 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2921 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2922 	else if (se_num == 0xffffffff)
2923 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2924 	else if (sh_num == 0xffffffff)
2925 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2926 	else
2927 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2928 	WREG32(GRBM_GFX_INDEX, data);
2929 }
2930 
2931 static u32 si_create_bitmask(u32 bit_width)
2932 {
2933 	u32 i, mask = 0;
2934 
2935 	for (i = 0; i < bit_width; i++) {
2936 		mask <<= 1;
2937 		mask |= 1;
2938 	}
2939 	return mask;
2940 }
2941 
2942 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2943 {
2944 	u32 data, mask;
2945 
2946 	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2947 	if (data & 1)
2948 		data &= INACTIVE_CUS_MASK;
2949 	else
2950 		data = 0;
2951 	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2952 
2953 	data >>= INACTIVE_CUS_SHIFT;
2954 
2955 	mask = si_create_bitmask(cu_per_sh);
2956 
2957 	return ~data & mask;
2958 }
2959 
2960 static void si_setup_spi(struct radeon_device *rdev,
2961 			 u32 se_num, u32 sh_per_se,
2962 			 u32 cu_per_sh)
2963 {
2964 	int i, j, k;
2965 	u32 data, mask, active_cu;
2966 
2967 	for (i = 0; i < se_num; i++) {
2968 		for (j = 0; j < sh_per_se; j++) {
2969 			si_select_se_sh(rdev, i, j);
2970 			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2971 			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2972 
2973 			mask = 1;
2974 			for (k = 0; k < 16; k++) {
2975 				mask <<= k;
2976 				if (active_cu & mask) {
2977 					data &= ~mask;
2978 					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2979 					break;
2980 				}
2981 			}
2982 		}
2983 	}
2984 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2985 }
2986 
2987 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2988 			      u32 max_rb_num_per_se,
2989 			      u32 sh_per_se)
2990 {
2991 	u32 data, mask;
2992 
2993 	data = RREG32(CC_RB_BACKEND_DISABLE);
2994 	if (data & 1)
2995 		data &= BACKEND_DISABLE_MASK;
2996 	else
2997 		data = 0;
2998 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2999 
3000 	data >>= BACKEND_DISABLE_SHIFT;
3001 
3002 	mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
3003 
3004 	return data & mask;
3005 }
3006 
3007 static void si_setup_rb(struct radeon_device *rdev,
3008 			u32 se_num, u32 sh_per_se,
3009 			u32 max_rb_num_per_se)
3010 {
3011 	int i, j;
3012 	u32 data, mask;
3013 	u32 disabled_rbs = 0;
3014 	u32 enabled_rbs = 0;
3015 
3016 	for (i = 0; i < se_num; i++) {
3017 		for (j = 0; j < sh_per_se; j++) {
3018 			si_select_se_sh(rdev, i, j);
3019 			data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3020 			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
3021 		}
3022 	}
3023 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3024 
3025 	mask = 1;
3026 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3027 		if (!(disabled_rbs & mask))
3028 			enabled_rbs |= mask;
3029 		mask <<= 1;
3030 	}
3031 
3032 	rdev->config.si.backend_enable_mask = enabled_rbs;
3033 
3034 	for (i = 0; i < se_num; i++) {
3035 		si_select_se_sh(rdev, i, 0xffffffff);
3036 		data = 0;
3037 		for (j = 0; j < sh_per_se; j++) {
3038 			switch (enabled_rbs & 3) {
3039 			case 1:
3040 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3041 				break;
3042 			case 2:
3043 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3044 				break;
3045 			case 3:
3046 			default:
3047 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3048 				break;
3049 			}
3050 			enabled_rbs >>= 2;
3051 		}
3052 		WREG32(PA_SC_RASTER_CONFIG, data);
3053 	}
3054 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3055 }
3056 
3057 static void si_gpu_init(struct radeon_device *rdev)
3058 {
3059 	u32 gb_addr_config = 0;
3060 	u32 mc_shared_chmap, mc_arb_ramcfg;
3061 	u32 sx_debug_1;
3062 	u32 hdp_host_path_cntl;
3063 	u32 tmp;
3064 	int i, j;
3065 
3066 	switch (rdev->family) {
3067 	case CHIP_TAHITI:
3068 		rdev->config.si.max_shader_engines = 2;
3069 		rdev->config.si.max_tile_pipes = 12;
3070 		rdev->config.si.max_cu_per_sh = 8;
3071 		rdev->config.si.max_sh_per_se = 2;
3072 		rdev->config.si.max_backends_per_se = 4;
3073 		rdev->config.si.max_texture_channel_caches = 12;
3074 		rdev->config.si.max_gprs = 256;
3075 		rdev->config.si.max_gs_threads = 32;
3076 		rdev->config.si.max_hw_contexts = 8;
3077 
3078 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3079 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3080 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3081 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3082 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3083 		break;
3084 	case CHIP_PITCAIRN:
3085 		rdev->config.si.max_shader_engines = 2;
3086 		rdev->config.si.max_tile_pipes = 8;
3087 		rdev->config.si.max_cu_per_sh = 5;
3088 		rdev->config.si.max_sh_per_se = 2;
3089 		rdev->config.si.max_backends_per_se = 4;
3090 		rdev->config.si.max_texture_channel_caches = 8;
3091 		rdev->config.si.max_gprs = 256;
3092 		rdev->config.si.max_gs_threads = 32;
3093 		rdev->config.si.max_hw_contexts = 8;
3094 
3095 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3096 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3097 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3098 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3099 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3100 		break;
3101 	case CHIP_VERDE:
3102 	default:
3103 		rdev->config.si.max_shader_engines = 1;
3104 		rdev->config.si.max_tile_pipes = 4;
3105 		rdev->config.si.max_cu_per_sh = 5;
3106 		rdev->config.si.max_sh_per_se = 2;
3107 		rdev->config.si.max_backends_per_se = 4;
3108 		rdev->config.si.max_texture_channel_caches = 4;
3109 		rdev->config.si.max_gprs = 256;
3110 		rdev->config.si.max_gs_threads = 32;
3111 		rdev->config.si.max_hw_contexts = 8;
3112 
3113 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3114 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3115 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3116 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3117 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3118 		break;
3119 	case CHIP_OLAND:
3120 		rdev->config.si.max_shader_engines = 1;
3121 		rdev->config.si.max_tile_pipes = 4;
3122 		rdev->config.si.max_cu_per_sh = 6;
3123 		rdev->config.si.max_sh_per_se = 1;
3124 		rdev->config.si.max_backends_per_se = 2;
3125 		rdev->config.si.max_texture_channel_caches = 4;
3126 		rdev->config.si.max_gprs = 256;
3127 		rdev->config.si.max_gs_threads = 16;
3128 		rdev->config.si.max_hw_contexts = 8;
3129 
3130 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3131 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3132 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3133 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3134 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3135 		break;
3136 	case CHIP_HAINAN:
3137 		rdev->config.si.max_shader_engines = 1;
3138 		rdev->config.si.max_tile_pipes = 4;
3139 		rdev->config.si.max_cu_per_sh = 5;
3140 		rdev->config.si.max_sh_per_se = 1;
3141 		rdev->config.si.max_backends_per_se = 1;
3142 		rdev->config.si.max_texture_channel_caches = 2;
3143 		rdev->config.si.max_gprs = 256;
3144 		rdev->config.si.max_gs_threads = 16;
3145 		rdev->config.si.max_hw_contexts = 8;
3146 
3147 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3148 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3149 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3150 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3151 		gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
3152 		break;
3153 	}
3154 
3155 	/* Initialize HDP */
3156 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3157 		WREG32((0x2c14 + j), 0x00000000);
3158 		WREG32((0x2c18 + j), 0x00000000);
3159 		WREG32((0x2c1c + j), 0x00000000);
3160 		WREG32((0x2c20 + j), 0x00000000);
3161 		WREG32((0x2c24 + j), 0x00000000);
3162 	}
3163 
3164 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3165 	WREG32(SRBM_INT_CNTL, 1);
3166 	WREG32(SRBM_INT_ACK, 1);
3167 
3168 	evergreen_fix_pci_max_read_req_size(rdev);
3169 
3170 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3171 
3172 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3173 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3174 
3175 	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3176 	rdev->config.si.mem_max_burst_length_bytes = 256;
3177 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3178 	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3179 	if (rdev->config.si.mem_row_size_in_kb > 4)
3180 		rdev->config.si.mem_row_size_in_kb = 4;
3181 	/* XXX use MC settings? */
3182 	rdev->config.si.shader_engine_tile_size = 32;
3183 	rdev->config.si.num_gpus = 1;
3184 	rdev->config.si.multi_gpu_tile_size = 64;
3185 
3186 	/* fix up row size */
3187 	gb_addr_config &= ~ROW_SIZE_MASK;
3188 	switch (rdev->config.si.mem_row_size_in_kb) {
3189 	case 1:
3190 	default:
3191 		gb_addr_config |= ROW_SIZE(0);
3192 		break;
3193 	case 2:
3194 		gb_addr_config |= ROW_SIZE(1);
3195 		break;
3196 	case 4:
3197 		gb_addr_config |= ROW_SIZE(2);
3198 		break;
3199 	}
3200 
3201 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3202 	 * not have bank info, so create a custom tiling dword.
3203 	 * bits 3:0   num_pipes
3204 	 * bits 7:4   num_banks
3205 	 * bits 11:8  group_size
3206 	 * bits 15:12 row_size
3207 	 */
3208 	rdev->config.si.tile_config = 0;
3209 	switch (rdev->config.si.num_tile_pipes) {
3210 	case 1:
3211 		rdev->config.si.tile_config |= (0 << 0);
3212 		break;
3213 	case 2:
3214 		rdev->config.si.tile_config |= (1 << 0);
3215 		break;
3216 	case 4:
3217 		rdev->config.si.tile_config |= (2 << 0);
3218 		break;
3219 	case 8:
3220 	default:
3221 		/* XXX what about 12? */
3222 		rdev->config.si.tile_config |= (3 << 0);
3223 		break;
3224 	}
3225 	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3226 	case 0: /* four banks */
3227 		rdev->config.si.tile_config |= 0 << 4;
3228 		break;
3229 	case 1: /* eight banks */
3230 		rdev->config.si.tile_config |= 1 << 4;
3231 		break;
3232 	case 2: /* sixteen banks */
3233 	default:
3234 		rdev->config.si.tile_config |= 2 << 4;
3235 		break;
3236 	}
3237 	rdev->config.si.tile_config |=
3238 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3239 	rdev->config.si.tile_config |=
3240 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3241 
3242 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3243 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3244 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3245 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3246 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3247 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3248 	if (rdev->has_uvd) {
3249 		WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3250 		WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3251 		WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3252 	}
3253 
3254 	si_tiling_mode_table_init(rdev);
3255 
3256 	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3257 		    rdev->config.si.max_sh_per_se,
3258 		    rdev->config.si.max_backends_per_se);
3259 
3260 	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3261 		     rdev->config.si.max_sh_per_se,
3262 		     rdev->config.si.max_cu_per_sh);
3263 
3264 	rdev->config.si.active_cus = 0;
3265 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
3266 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
3267 			rdev->config.si.active_cus +=
3268 				hweight32(si_get_cu_active_bitmap(rdev, i, j));
3269 		}
3270 	}
3271 
3272 	/* set HW defaults for 3D engine */
3273 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3274 				     ROQ_IB2_START(0x2b)));
3275 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3276 
3277 	sx_debug_1 = RREG32(SX_DEBUG_1);
3278 	WREG32(SX_DEBUG_1, sx_debug_1);
3279 
3280 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3281 
3282 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3283 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3284 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3285 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3286 
3287 	WREG32(VGT_NUM_INSTANCES, 1);
3288 
3289 	WREG32(CP_PERFMON_CNTL, 0);
3290 
3291 	WREG32(SQ_CONFIG, 0);
3292 
3293 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3294 					  FORCE_EOV_MAX_REZ_CNT(255)));
3295 
3296 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3297 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3298 
3299 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3300 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3301 
3302 	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3303 	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3304 	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3305 	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3306 	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3307 	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3308 	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3309 	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3310 
3311 	tmp = RREG32(HDP_MISC_CNTL);
3312 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3313 	WREG32(HDP_MISC_CNTL, tmp);
3314 
3315 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3316 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3317 
3318 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3319 
3320 	udelay(50);
3321 }
3322 
3323 /*
3324  * GPU scratch registers helpers function.
3325  */
3326 static void si_scratch_init(struct radeon_device *rdev)
3327 {
3328 	int i;
3329 
3330 	rdev->scratch.num_reg = 7;
3331 	rdev->scratch.reg_base = SCRATCH_REG0;
3332 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3333 		rdev->scratch.free[i] = true;
3334 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3335 	}
3336 }
3337 
3338 void si_fence_ring_emit(struct radeon_device *rdev,
3339 			struct radeon_fence *fence)
3340 {
3341 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3342 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3343 
3344 	/* flush read cache over gart */
3345 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3346 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3347 	radeon_ring_write(ring, 0);
3348 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3349 	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3350 			  PACKET3_TC_ACTION_ENA |
3351 			  PACKET3_SH_KCACHE_ACTION_ENA |
3352 			  PACKET3_SH_ICACHE_ACTION_ENA);
3353 	radeon_ring_write(ring, 0xFFFFFFFF);
3354 	radeon_ring_write(ring, 0);
3355 	radeon_ring_write(ring, 10); /* poll interval */
3356 	/* EVENT_WRITE_EOP - flush caches, send int */
3357 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3358 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3359 	radeon_ring_write(ring, lower_32_bits(addr));
3360 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3361 	radeon_ring_write(ring, fence->seq);
3362 	radeon_ring_write(ring, 0);
3363 }
3364 
3365 /*
3366  * IB stuff
3367  */
3368 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3369 {
3370 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3371 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3372 	u32 header;
3373 
3374 	if (ib->is_const_ib) {
3375 		/* set switch buffer packet before const IB */
3376 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3377 		radeon_ring_write(ring, 0);
3378 
3379 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3380 	} else {
3381 		u32 next_rptr;
3382 		if (ring->rptr_save_reg) {
3383 			next_rptr = ring->wptr + 3 + 4 + 8;
3384 			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3385 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3386 						  PACKET3_SET_CONFIG_REG_START) >> 2));
3387 			radeon_ring_write(ring, next_rptr);
3388 		} else if (rdev->wb.enabled) {
3389 			next_rptr = ring->wptr + 5 + 4 + 8;
3390 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3391 			radeon_ring_write(ring, (1 << 8));
3392 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3393 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3394 			radeon_ring_write(ring, next_rptr);
3395 		}
3396 
3397 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3398 	}
3399 
3400 	radeon_ring_write(ring, header);
3401 	radeon_ring_write(ring,
3402 #ifdef __BIG_ENDIAN
3403 			  (2 << 0) |
3404 #endif
3405 			  (ib->gpu_addr & 0xFFFFFFFC));
3406 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3407 	radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
3408 
3409 	if (!ib->is_const_ib) {
3410 		/* flush read cache over gart for this vmid */
3411 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3412 		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3413 		radeon_ring_write(ring, vm_id);
3414 		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3415 		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3416 				  PACKET3_TC_ACTION_ENA |
3417 				  PACKET3_SH_KCACHE_ACTION_ENA |
3418 				  PACKET3_SH_ICACHE_ACTION_ENA);
3419 		radeon_ring_write(ring, 0xFFFFFFFF);
3420 		radeon_ring_write(ring, 0);
3421 		radeon_ring_write(ring, 10); /* poll interval */
3422 	}
3423 }
3424 
3425 /*
3426  * CP.
3427  */
3428 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3429 {
3430 	if (enable)
3431 		WREG32(CP_ME_CNTL, 0);
3432 	else {
3433 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3434 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3435 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3436 		WREG32(SCRATCH_UMSK, 0);
3437 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3438 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3439 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3440 	}
3441 	udelay(50);
3442 }
3443 
3444 static int si_cp_load_microcode(struct radeon_device *rdev)
3445 {
3446 	int i;
3447 
3448 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3449 		return -EINVAL;
3450 
3451 	si_cp_enable(rdev, false);
3452 
3453 	if (rdev->new_fw) {
3454 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3455 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3456 		const struct gfx_firmware_header_v1_0 *ce_hdr =
3457 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3458 		const struct gfx_firmware_header_v1_0 *me_hdr =
3459 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3460 		const __le32 *fw_data;
3461 		u32 fw_size;
3462 
3463 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3464 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3465 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3466 
3467 		/* PFP */
3468 		fw_data = (const __le32 *)
3469 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3470 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3471 		WREG32(CP_PFP_UCODE_ADDR, 0);
3472 		for (i = 0; i < fw_size; i++)
3473 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3474 		WREG32(CP_PFP_UCODE_ADDR, 0);
3475 
3476 		/* CE */
3477 		fw_data = (const __le32 *)
3478 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3479 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3480 		WREG32(CP_CE_UCODE_ADDR, 0);
3481 		for (i = 0; i < fw_size; i++)
3482 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3483 		WREG32(CP_CE_UCODE_ADDR, 0);
3484 
3485 		/* ME */
3486 		fw_data = (const __be32 *)
3487 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3488 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3489 		WREG32(CP_ME_RAM_WADDR, 0);
3490 		for (i = 0; i < fw_size; i++)
3491 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3492 		WREG32(CP_ME_RAM_WADDR, 0);
3493 	} else {
3494 		const __be32 *fw_data;
3495 
3496 		/* PFP */
3497 		fw_data = (const __be32 *)rdev->pfp_fw->data;
3498 		WREG32(CP_PFP_UCODE_ADDR, 0);
3499 		for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3500 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3501 		WREG32(CP_PFP_UCODE_ADDR, 0);
3502 
3503 		/* CE */
3504 		fw_data = (const __be32 *)rdev->ce_fw->data;
3505 		WREG32(CP_CE_UCODE_ADDR, 0);
3506 		for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3507 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3508 		WREG32(CP_CE_UCODE_ADDR, 0);
3509 
3510 		/* ME */
3511 		fw_data = (const __be32 *)rdev->me_fw->data;
3512 		WREG32(CP_ME_RAM_WADDR, 0);
3513 		for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3514 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3515 		WREG32(CP_ME_RAM_WADDR, 0);
3516 	}
3517 
3518 	WREG32(CP_PFP_UCODE_ADDR, 0);
3519 	WREG32(CP_CE_UCODE_ADDR, 0);
3520 	WREG32(CP_ME_RAM_WADDR, 0);
3521 	WREG32(CP_ME_RAM_RADDR, 0);
3522 	return 0;
3523 }
3524 
3525 static int si_cp_start(struct radeon_device *rdev)
3526 {
3527 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3528 	int r, i;
3529 
3530 	r = radeon_ring_lock(rdev, ring, 7 + 4);
3531 	if (r) {
3532 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3533 		return r;
3534 	}
3535 	/* init the CP */
3536 	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3537 	radeon_ring_write(ring, 0x1);
3538 	radeon_ring_write(ring, 0x0);
3539 	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3540 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3541 	radeon_ring_write(ring, 0);
3542 	radeon_ring_write(ring, 0);
3543 
3544 	/* init the CE partitions */
3545 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3546 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3547 	radeon_ring_write(ring, 0xc000);
3548 	radeon_ring_write(ring, 0xe000);
3549 	radeon_ring_unlock_commit(rdev, ring, false);
3550 
3551 	si_cp_enable(rdev, true);
3552 
3553 	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3554 	if (r) {
3555 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3556 		return r;
3557 	}
3558 
3559 	/* setup clear context state */
3560 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3561 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3562 
3563 	for (i = 0; i < si_default_size; i++)
3564 		radeon_ring_write(ring, si_default_state[i]);
3565 
3566 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3567 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3568 
3569 	/* set clear context state */
3570 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3571 	radeon_ring_write(ring, 0);
3572 
3573 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3574 	radeon_ring_write(ring, 0x00000316);
3575 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3576 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3577 
3578 	radeon_ring_unlock_commit(rdev, ring, false);
3579 
3580 	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3581 		ring = &rdev->ring[i];
3582 		r = radeon_ring_lock(rdev, ring, 2);
3583 
3584 		/* clear the compute context state */
3585 		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3586 		radeon_ring_write(ring, 0);
3587 
3588 		radeon_ring_unlock_commit(rdev, ring, false);
3589 	}
3590 
3591 	return 0;
3592 }
3593 
3594 static void si_cp_fini(struct radeon_device *rdev)
3595 {
3596 	struct radeon_ring *ring;
3597 	si_cp_enable(rdev, false);
3598 
3599 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3600 	radeon_ring_fini(rdev, ring);
3601 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3602 
3603 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3604 	radeon_ring_fini(rdev, ring);
3605 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3606 
3607 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3608 	radeon_ring_fini(rdev, ring);
3609 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3610 }
3611 
3612 static int si_cp_resume(struct radeon_device *rdev)
3613 {
3614 	struct radeon_ring *ring;
3615 	u32 tmp;
3616 	u32 rb_bufsz;
3617 	int r;
3618 
3619 	si_enable_gui_idle_interrupt(rdev, false);
3620 
3621 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3622 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3623 
3624 	/* Set the write pointer delay */
3625 	WREG32(CP_RB_WPTR_DELAY, 0);
3626 
3627 	WREG32(CP_DEBUG, 0);
3628 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3629 
3630 	/* ring 0 - compute and gfx */
3631 	/* Set ring buffer size */
3632 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3633 	rb_bufsz = order_base_2(ring->ring_size / 8);
3634 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3635 #ifdef __BIG_ENDIAN
3636 	tmp |= BUF_SWAP_32BIT;
3637 #endif
3638 	WREG32(CP_RB0_CNTL, tmp);
3639 
3640 	/* Initialize the ring buffer's read and write pointers */
3641 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3642 	ring->wptr = 0;
3643 	WREG32(CP_RB0_WPTR, ring->wptr);
3644 
3645 	/* set the wb address whether it's enabled or not */
3646 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3647 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3648 
3649 	if (rdev->wb.enabled)
3650 		WREG32(SCRATCH_UMSK, 0xff);
3651 	else {
3652 		tmp |= RB_NO_UPDATE;
3653 		WREG32(SCRATCH_UMSK, 0);
3654 	}
3655 
3656 	mdelay(1);
3657 	WREG32(CP_RB0_CNTL, tmp);
3658 
3659 	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3660 
3661 	/* ring1  - compute only */
3662 	/* Set ring buffer size */
3663 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3664 	rb_bufsz = order_base_2(ring->ring_size / 8);
3665 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3666 #ifdef __BIG_ENDIAN
3667 	tmp |= BUF_SWAP_32BIT;
3668 #endif
3669 	WREG32(CP_RB1_CNTL, tmp);
3670 
3671 	/* Initialize the ring buffer's read and write pointers */
3672 	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3673 	ring->wptr = 0;
3674 	WREG32(CP_RB1_WPTR, ring->wptr);
3675 
3676 	/* set the wb address whether it's enabled or not */
3677 	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3678 	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3679 
3680 	mdelay(1);
3681 	WREG32(CP_RB1_CNTL, tmp);
3682 
3683 	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3684 
3685 	/* ring2 - compute only */
3686 	/* Set ring buffer size */
3687 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3688 	rb_bufsz = order_base_2(ring->ring_size / 8);
3689 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3690 #ifdef __BIG_ENDIAN
3691 	tmp |= BUF_SWAP_32BIT;
3692 #endif
3693 	WREG32(CP_RB2_CNTL, tmp);
3694 
3695 	/* Initialize the ring buffer's read and write pointers */
3696 	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3697 	ring->wptr = 0;
3698 	WREG32(CP_RB2_WPTR, ring->wptr);
3699 
3700 	/* set the wb address whether it's enabled or not */
3701 	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3702 	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3703 
3704 	mdelay(1);
3705 	WREG32(CP_RB2_CNTL, tmp);
3706 
3707 	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3708 
3709 	/* start the rings */
3710 	si_cp_start(rdev);
3711 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3712 	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3713 	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3714 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3715 	if (r) {
3716 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3717 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3718 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3719 		return r;
3720 	}
3721 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3722 	if (r) {
3723 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3724 	}
3725 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3726 	if (r) {
3727 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3728 	}
3729 
3730 	si_enable_gui_idle_interrupt(rdev, true);
3731 
3732 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3733 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3734 
3735 	return 0;
3736 }
3737 
3738 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3739 {
3740 	u32 reset_mask = 0;
3741 	u32 tmp;
3742 
3743 	/* GRBM_STATUS */
3744 	tmp = RREG32(GRBM_STATUS);
3745 	if (tmp & (PA_BUSY | SC_BUSY |
3746 		   BCI_BUSY | SX_BUSY |
3747 		   TA_BUSY | VGT_BUSY |
3748 		   DB_BUSY | CB_BUSY |
3749 		   GDS_BUSY | SPI_BUSY |
3750 		   IA_BUSY | IA_BUSY_NO_DMA))
3751 		reset_mask |= RADEON_RESET_GFX;
3752 
3753 	if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3754 		   CP_BUSY | CP_COHERENCY_BUSY))
3755 		reset_mask |= RADEON_RESET_CP;
3756 
3757 	if (tmp & GRBM_EE_BUSY)
3758 		reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3759 
3760 	/* GRBM_STATUS2 */
3761 	tmp = RREG32(GRBM_STATUS2);
3762 	if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3763 		reset_mask |= RADEON_RESET_RLC;
3764 
3765 	/* DMA_STATUS_REG 0 */
3766 	tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3767 	if (!(tmp & DMA_IDLE))
3768 		reset_mask |= RADEON_RESET_DMA;
3769 
3770 	/* DMA_STATUS_REG 1 */
3771 	tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3772 	if (!(tmp & DMA_IDLE))
3773 		reset_mask |= RADEON_RESET_DMA1;
3774 
3775 	/* SRBM_STATUS2 */
3776 	tmp = RREG32(SRBM_STATUS2);
3777 	if (tmp & DMA_BUSY)
3778 		reset_mask |= RADEON_RESET_DMA;
3779 
3780 	if (tmp & DMA1_BUSY)
3781 		reset_mask |= RADEON_RESET_DMA1;
3782 
3783 	/* SRBM_STATUS */
3784 	tmp = RREG32(SRBM_STATUS);
3785 
3786 	if (tmp & IH_BUSY)
3787 		reset_mask |= RADEON_RESET_IH;
3788 
3789 	if (tmp & SEM_BUSY)
3790 		reset_mask |= RADEON_RESET_SEM;
3791 
3792 	if (tmp & GRBM_RQ_PENDING)
3793 		reset_mask |= RADEON_RESET_GRBM;
3794 
3795 	if (tmp & VMC_BUSY)
3796 		reset_mask |= RADEON_RESET_VMC;
3797 
3798 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3799 		   MCC_BUSY | MCD_BUSY))
3800 		reset_mask |= RADEON_RESET_MC;
3801 
3802 	if (evergreen_is_display_hung(rdev))
3803 		reset_mask |= RADEON_RESET_DISPLAY;
3804 
3805 	/* VM_L2_STATUS */
3806 	tmp = RREG32(VM_L2_STATUS);
3807 	if (tmp & L2_BUSY)
3808 		reset_mask |= RADEON_RESET_VMC;
3809 
3810 	/* Skip MC reset as it's mostly likely not hung, just busy */
3811 	if (reset_mask & RADEON_RESET_MC) {
3812 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3813 		reset_mask &= ~RADEON_RESET_MC;
3814 	}
3815 
3816 	return reset_mask;
3817 }
3818 
3819 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3820 {
3821 	struct evergreen_mc_save save;
3822 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3823 	u32 tmp;
3824 
3825 	if (reset_mask == 0)
3826 		return;
3827 
3828 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3829 
3830 	evergreen_print_gpu_status_regs(rdev);
3831 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3832 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3833 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3834 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3835 
3836 	/* disable PG/CG */
3837 	si_fini_pg(rdev);
3838 	si_fini_cg(rdev);
3839 
3840 	/* stop the rlc */
3841 	si_rlc_stop(rdev);
3842 
3843 	/* Disable CP parsing/prefetching */
3844 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3845 
3846 	if (reset_mask & RADEON_RESET_DMA) {
3847 		/* dma0 */
3848 		tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3849 		tmp &= ~DMA_RB_ENABLE;
3850 		WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3851 	}
3852 	if (reset_mask & RADEON_RESET_DMA1) {
3853 		/* dma1 */
3854 		tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3855 		tmp &= ~DMA_RB_ENABLE;
3856 		WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3857 	}
3858 
3859 	udelay(50);
3860 
3861 	evergreen_mc_stop(rdev, &save);
3862 	if (evergreen_mc_wait_for_idle(rdev)) {
3863 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3864 	}
3865 
3866 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3867 		grbm_soft_reset = SOFT_RESET_CB |
3868 			SOFT_RESET_DB |
3869 			SOFT_RESET_GDS |
3870 			SOFT_RESET_PA |
3871 			SOFT_RESET_SC |
3872 			SOFT_RESET_BCI |
3873 			SOFT_RESET_SPI |
3874 			SOFT_RESET_SX |
3875 			SOFT_RESET_TC |
3876 			SOFT_RESET_TA |
3877 			SOFT_RESET_VGT |
3878 			SOFT_RESET_IA;
3879 	}
3880 
3881 	if (reset_mask & RADEON_RESET_CP) {
3882 		grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3883 
3884 		srbm_soft_reset |= SOFT_RESET_GRBM;
3885 	}
3886 
3887 	if (reset_mask & RADEON_RESET_DMA)
3888 		srbm_soft_reset |= SOFT_RESET_DMA;
3889 
3890 	if (reset_mask & RADEON_RESET_DMA1)
3891 		srbm_soft_reset |= SOFT_RESET_DMA1;
3892 
3893 	if (reset_mask & RADEON_RESET_DISPLAY)
3894 		srbm_soft_reset |= SOFT_RESET_DC;
3895 
3896 	if (reset_mask & RADEON_RESET_RLC)
3897 		grbm_soft_reset |= SOFT_RESET_RLC;
3898 
3899 	if (reset_mask & RADEON_RESET_SEM)
3900 		srbm_soft_reset |= SOFT_RESET_SEM;
3901 
3902 	if (reset_mask & RADEON_RESET_IH)
3903 		srbm_soft_reset |= SOFT_RESET_IH;
3904 
3905 	if (reset_mask & RADEON_RESET_GRBM)
3906 		srbm_soft_reset |= SOFT_RESET_GRBM;
3907 
3908 	if (reset_mask & RADEON_RESET_VMC)
3909 		srbm_soft_reset |= SOFT_RESET_VMC;
3910 
3911 	if (reset_mask & RADEON_RESET_MC)
3912 		srbm_soft_reset |= SOFT_RESET_MC;
3913 
3914 	if (grbm_soft_reset) {
3915 		tmp = RREG32(GRBM_SOFT_RESET);
3916 		tmp |= grbm_soft_reset;
3917 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3918 		WREG32(GRBM_SOFT_RESET, tmp);
3919 		tmp = RREG32(GRBM_SOFT_RESET);
3920 
3921 		udelay(50);
3922 
3923 		tmp &= ~grbm_soft_reset;
3924 		WREG32(GRBM_SOFT_RESET, tmp);
3925 		tmp = RREG32(GRBM_SOFT_RESET);
3926 	}
3927 
3928 	if (srbm_soft_reset) {
3929 		tmp = RREG32(SRBM_SOFT_RESET);
3930 		tmp |= srbm_soft_reset;
3931 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3932 		WREG32(SRBM_SOFT_RESET, tmp);
3933 		tmp = RREG32(SRBM_SOFT_RESET);
3934 
3935 		udelay(50);
3936 
3937 		tmp &= ~srbm_soft_reset;
3938 		WREG32(SRBM_SOFT_RESET, tmp);
3939 		tmp = RREG32(SRBM_SOFT_RESET);
3940 	}
3941 
3942 	/* Wait a little for things to settle down */
3943 	udelay(50);
3944 
3945 	evergreen_mc_resume(rdev, &save);
3946 	udelay(50);
3947 
3948 	evergreen_print_gpu_status_regs(rdev);
3949 }
3950 
3951 static void si_set_clk_bypass_mode(struct radeon_device *rdev)
3952 {
3953 	u32 tmp, i;
3954 
3955 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3956 	tmp |= SPLL_BYPASS_EN;
3957 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3958 
3959 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3960 	tmp |= SPLL_CTLREQ_CHG;
3961 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3962 
3963 	for (i = 0; i < rdev->usec_timeout; i++) {
3964 		if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
3965 			break;
3966 		udelay(1);
3967 	}
3968 
3969 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3970 	tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
3971 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3972 
3973 	tmp = RREG32(MPLL_CNTL_MODE);
3974 	tmp &= ~MPLL_MCLK_SEL;
3975 	WREG32(MPLL_CNTL_MODE, tmp);
3976 }
3977 
3978 static void si_spll_powerdown(struct radeon_device *rdev)
3979 {
3980 	u32 tmp;
3981 
3982 	tmp = RREG32(SPLL_CNTL_MODE);
3983 	tmp |= SPLL_SW_DIR_CONTROL;
3984 	WREG32(SPLL_CNTL_MODE, tmp);
3985 
3986 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3987 	tmp |= SPLL_RESET;
3988 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3989 
3990 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3991 	tmp |= SPLL_SLEEP;
3992 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3993 
3994 	tmp = RREG32(SPLL_CNTL_MODE);
3995 	tmp &= ~SPLL_SW_DIR_CONTROL;
3996 	WREG32(SPLL_CNTL_MODE, tmp);
3997 }
3998 
3999 static void si_gpu_pci_config_reset(struct radeon_device *rdev)
4000 {
4001 	struct evergreen_mc_save save;
4002 	u32 tmp, i;
4003 
4004 	dev_info(rdev->dev, "GPU pci config reset\n");
4005 
4006 	/* disable dpm? */
4007 
4008 	/* disable cg/pg */
4009 	si_fini_pg(rdev);
4010 	si_fini_cg(rdev);
4011 
4012 	/* Disable CP parsing/prefetching */
4013 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4014 	/* dma0 */
4015 	tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
4016 	tmp &= ~DMA_RB_ENABLE;
4017 	WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
4018 	/* dma1 */
4019 	tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
4020 	tmp &= ~DMA_RB_ENABLE;
4021 	WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
4022 	/* XXX other engines? */
4023 
4024 	/* halt the rlc, disable cp internal ints */
4025 	si_rlc_stop(rdev);
4026 
4027 	udelay(50);
4028 
4029 	/* disable mem access */
4030 	evergreen_mc_stop(rdev, &save);
4031 	if (evergreen_mc_wait_for_idle(rdev)) {
4032 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
4033 	}
4034 
4035 	/* set mclk/sclk to bypass */
4036 	si_set_clk_bypass_mode(rdev);
4037 	/* powerdown spll */
4038 	si_spll_powerdown(rdev);
4039 	/* disable BM */
4040 	pci_clear_master(rdev->pdev);
4041 	/* reset */
4042 	radeon_pci_config_reset(rdev);
4043 	/* wait for asic to come out of reset */
4044 	for (i = 0; i < rdev->usec_timeout; i++) {
4045 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
4046 			break;
4047 		udelay(1);
4048 	}
4049 }
4050 
4051 int si_asic_reset(struct radeon_device *rdev)
4052 {
4053 	u32 reset_mask;
4054 
4055 	reset_mask = si_gpu_check_soft_reset(rdev);
4056 
4057 	if (reset_mask)
4058 		r600_set_bios_scratch_engine_hung(rdev, true);
4059 
4060 	/* try soft reset */
4061 	si_gpu_soft_reset(rdev, reset_mask);
4062 
4063 	reset_mask = si_gpu_check_soft_reset(rdev);
4064 
4065 	/* try pci config reset */
4066 	if (reset_mask && radeon_hard_reset)
4067 		si_gpu_pci_config_reset(rdev);
4068 
4069 	reset_mask = si_gpu_check_soft_reset(rdev);
4070 
4071 	if (!reset_mask)
4072 		r600_set_bios_scratch_engine_hung(rdev, false);
4073 
4074 	return 0;
4075 }
4076 
4077 /**
4078  * si_gfx_is_lockup - Check if the GFX engine is locked up
4079  *
4080  * @rdev: radeon_device pointer
4081  * @ring: radeon_ring structure holding ring information
4082  *
4083  * Check if the GFX engine is locked up.
4084  * Returns true if the engine appears to be locked up, false if not.
4085  */
4086 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4087 {
4088 	u32 reset_mask = si_gpu_check_soft_reset(rdev);
4089 
4090 	if (!(reset_mask & (RADEON_RESET_GFX |
4091 			    RADEON_RESET_COMPUTE |
4092 			    RADEON_RESET_CP))) {
4093 		radeon_ring_lockup_update(rdev, ring);
4094 		return false;
4095 	}
4096 	return radeon_ring_test_lockup(rdev, ring);
4097 }
4098 
4099 /* MC */
4100 static void si_mc_program(struct radeon_device *rdev)
4101 {
4102 	struct evergreen_mc_save save;
4103 	u32 tmp;
4104 	int i, j;
4105 
4106 	/* Initialize HDP */
4107 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4108 		WREG32((0x2c14 + j), 0x00000000);
4109 		WREG32((0x2c18 + j), 0x00000000);
4110 		WREG32((0x2c1c + j), 0x00000000);
4111 		WREG32((0x2c20 + j), 0x00000000);
4112 		WREG32((0x2c24 + j), 0x00000000);
4113 	}
4114 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4115 
4116 	evergreen_mc_stop(rdev, &save);
4117 	if (radeon_mc_wait_for_idle(rdev)) {
4118 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4119 	}
4120 	if (!ASIC_IS_NODCE(rdev))
4121 		/* Lockout access through VGA aperture*/
4122 		WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4123 	/* Update configuration */
4124 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4125 	       rdev->mc.vram_start >> 12);
4126 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4127 	       rdev->mc.vram_end >> 12);
4128 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4129 	       rdev->vram_scratch.gpu_addr >> 12);
4130 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4131 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4132 	WREG32(MC_VM_FB_LOCATION, tmp);
4133 	/* XXX double check these! */
4134 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4135 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4136 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4137 	WREG32(MC_VM_AGP_BASE, 0);
4138 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4139 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4140 	if (radeon_mc_wait_for_idle(rdev)) {
4141 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4142 	}
4143 	evergreen_mc_resume(rdev, &save);
4144 	if (!ASIC_IS_NODCE(rdev)) {
4145 		/* we need to own VRAM, so turn off the VGA renderer here
4146 		 * to stop it overwriting our objects */
4147 		rv515_vga_render_disable(rdev);
4148 	}
4149 }
4150 
4151 void si_vram_gtt_location(struct radeon_device *rdev,
4152 			  struct radeon_mc *mc)
4153 {
4154 	if (mc->mc_vram_size > 0xFFC0000000ULL) {
4155 		/* leave room for at least 1024M GTT */
4156 		dev_warn(rdev->dev, "limiting VRAM\n");
4157 		mc->real_vram_size = 0xFFC0000000ULL;
4158 		mc->mc_vram_size = 0xFFC0000000ULL;
4159 	}
4160 	radeon_vram_location(rdev, &rdev->mc, 0);
4161 	rdev->mc.gtt_base_align = 0;
4162 	radeon_gtt_location(rdev, mc);
4163 }
4164 
4165 static int si_mc_init(struct radeon_device *rdev)
4166 {
4167 	u32 tmp;
4168 	int chansize, numchan;
4169 
4170 	/* Get VRAM informations */
4171 	rdev->mc.vram_is_ddr = true;
4172 	tmp = RREG32(MC_ARB_RAMCFG);
4173 	if (tmp & CHANSIZE_OVERRIDE) {
4174 		chansize = 16;
4175 	} else if (tmp & CHANSIZE_MASK) {
4176 		chansize = 64;
4177 	} else {
4178 		chansize = 32;
4179 	}
4180 	tmp = RREG32(MC_SHARED_CHMAP);
4181 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4182 	case 0:
4183 	default:
4184 		numchan = 1;
4185 		break;
4186 	case 1:
4187 		numchan = 2;
4188 		break;
4189 	case 2:
4190 		numchan = 4;
4191 		break;
4192 	case 3:
4193 		numchan = 8;
4194 		break;
4195 	case 4:
4196 		numchan = 3;
4197 		break;
4198 	case 5:
4199 		numchan = 6;
4200 		break;
4201 	case 6:
4202 		numchan = 10;
4203 		break;
4204 	case 7:
4205 		numchan = 12;
4206 		break;
4207 	case 8:
4208 		numchan = 16;
4209 		break;
4210 	}
4211 	rdev->mc.vram_width = numchan * chansize;
4212 	/* Could aper size report 0 ? */
4213 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4214 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4215 	/* size in MB on si */
4216 	tmp = RREG32(CONFIG_MEMSIZE);
4217 	/* some boards may have garbage in the upper 16 bits */
4218 	if (tmp & 0xffff0000) {
4219 		DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
4220 		if (tmp & 0xffff)
4221 			tmp &= 0xffff;
4222 	}
4223 	rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
4224 	rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
4225 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
4226 	si_vram_gtt_location(rdev, &rdev->mc);
4227 	radeon_update_bandwidth_info(rdev);
4228 
4229 	return 0;
4230 }
4231 
4232 /*
4233  * GART
4234  */
4235 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
4236 {
4237 	/* flush hdp cache */
4238 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
4239 
4240 	/* bits 0-15 are the VM contexts0-15 */
4241 	WREG32(VM_INVALIDATE_REQUEST, 1);
4242 }
4243 
4244 static int si_pcie_gart_enable(struct radeon_device *rdev)
4245 {
4246 	int r, i;
4247 
4248 	if (rdev->gart.robj == NULL) {
4249 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4250 		return -EINVAL;
4251 	}
4252 	r = radeon_gart_table_vram_pin(rdev);
4253 	if (r)
4254 		return r;
4255 	/* Setup TLB control */
4256 	WREG32(MC_VM_MX_L1_TLB_CNTL,
4257 	       (0xA << 7) |
4258 	       ENABLE_L1_TLB |
4259 	       ENABLE_L1_FRAGMENT_PROCESSING |
4260 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4261 	       ENABLE_ADVANCED_DRIVER_MODEL |
4262 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4263 	/* Setup L2 cache */
4264 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4265 	       ENABLE_L2_FRAGMENT_PROCESSING |
4266 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4267 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4268 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4269 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4270 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4271 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4272 	       BANK_SELECT(4) |
4273 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
4274 	/* setup context0 */
4275 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4276 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4277 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4278 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4279 			(u32)(rdev->dummy_page.addr >> 12));
4280 	WREG32(VM_CONTEXT0_CNTL2, 0);
4281 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4282 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4283 
4284 	WREG32(0x15D4, 0);
4285 	WREG32(0x15D8, 0);
4286 	WREG32(0x15DC, 0);
4287 
4288 	/* empty context1-15 */
4289 	/* set vm size, must be a multiple of 4 */
4290 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4291 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4292 	/* Assign the pt base to something valid for now; the pts used for
4293 	 * the VMs are determined by the application and setup and assigned
4294 	 * on the fly in the vm part of radeon_gart.c
4295 	 */
4296 	for (i = 1; i < 16; i++) {
4297 		if (i < 8)
4298 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4299 			       rdev->vm_manager.saved_table_addr[i]);
4300 		else
4301 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4302 			       rdev->vm_manager.saved_table_addr[i]);
4303 	}
4304 
4305 	/* enable context1-15 */
4306 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4307 	       (u32)(rdev->dummy_page.addr >> 12));
4308 	WREG32(VM_CONTEXT1_CNTL2, 4);
4309 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4310 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
4311 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4312 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4313 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4314 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4315 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4316 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4317 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4318 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4319 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4320 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4321 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4322 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4323 
4324 	si_pcie_gart_tlb_flush(rdev);
4325 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4326 		 (unsigned)(rdev->mc.gtt_size >> 20),
4327 		 (unsigned long long)rdev->gart.table_addr);
4328 	rdev->gart.ready = true;
4329 	return 0;
4330 }
4331 
4332 static void si_pcie_gart_disable(struct radeon_device *rdev)
4333 {
4334 	unsigned i;
4335 
4336 	for (i = 1; i < 16; ++i) {
4337 		uint32_t reg;
4338 		if (i < 8)
4339 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
4340 		else
4341 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
4342 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
4343 	}
4344 
4345 	/* Disable all tables */
4346 	WREG32(VM_CONTEXT0_CNTL, 0);
4347 	WREG32(VM_CONTEXT1_CNTL, 0);
4348 	/* Setup TLB control */
4349 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4350 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4351 	/* Setup L2 cache */
4352 	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4353 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4354 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4355 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4356 	WREG32(VM_L2_CNTL2, 0);
4357 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4358 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4359 	radeon_gart_table_vram_unpin(rdev);
4360 }
4361 
4362 static void si_pcie_gart_fini(struct radeon_device *rdev)
4363 {
4364 	si_pcie_gart_disable(rdev);
4365 	radeon_gart_table_vram_free(rdev);
4366 	radeon_gart_fini(rdev);
4367 }
4368 
4369 /* vm parser */
4370 static bool si_vm_reg_valid(u32 reg)
4371 {
4372 	/* context regs are fine */
4373 	if (reg >= 0x28000)
4374 		return true;
4375 
4376 	/* check config regs */
4377 	switch (reg) {
4378 	case GRBM_GFX_INDEX:
4379 	case CP_STRMOUT_CNTL:
4380 	case VGT_VTX_VECT_EJECT_REG:
4381 	case VGT_CACHE_INVALIDATION:
4382 	case VGT_ESGS_RING_SIZE:
4383 	case VGT_GSVS_RING_SIZE:
4384 	case VGT_GS_VERTEX_REUSE:
4385 	case VGT_PRIMITIVE_TYPE:
4386 	case VGT_INDEX_TYPE:
4387 	case VGT_NUM_INDICES:
4388 	case VGT_NUM_INSTANCES:
4389 	case VGT_TF_RING_SIZE:
4390 	case VGT_HS_OFFCHIP_PARAM:
4391 	case VGT_TF_MEMORY_BASE:
4392 	case PA_CL_ENHANCE:
4393 	case PA_SU_LINE_STIPPLE_VALUE:
4394 	case PA_SC_LINE_STIPPLE_STATE:
4395 	case PA_SC_ENHANCE:
4396 	case SQC_CACHES:
4397 	case SPI_STATIC_THREAD_MGMT_1:
4398 	case SPI_STATIC_THREAD_MGMT_2:
4399 	case SPI_STATIC_THREAD_MGMT_3:
4400 	case SPI_PS_MAX_WAVE_ID:
4401 	case SPI_CONFIG_CNTL:
4402 	case SPI_CONFIG_CNTL_1:
4403 	case TA_CNTL_AUX:
4404 		return true;
4405 	default:
4406 		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4407 		return false;
4408 	}
4409 }
4410 
4411 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4412 				  u32 *ib, struct radeon_cs_packet *pkt)
4413 {
4414 	switch (pkt->opcode) {
4415 	case PACKET3_NOP:
4416 	case PACKET3_SET_BASE:
4417 	case PACKET3_SET_CE_DE_COUNTERS:
4418 	case PACKET3_LOAD_CONST_RAM:
4419 	case PACKET3_WRITE_CONST_RAM:
4420 	case PACKET3_WRITE_CONST_RAM_OFFSET:
4421 	case PACKET3_DUMP_CONST_RAM:
4422 	case PACKET3_INCREMENT_CE_COUNTER:
4423 	case PACKET3_WAIT_ON_DE_COUNTER:
4424 	case PACKET3_CE_WRITE:
4425 		break;
4426 	default:
4427 		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4428 		return -EINVAL;
4429 	}
4430 	return 0;
4431 }
4432 
4433 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4434 {
4435 	u32 start_reg, reg, i;
4436 	u32 command = ib[idx + 4];
4437 	u32 info = ib[idx + 1];
4438 	u32 idx_value = ib[idx];
4439 	if (command & PACKET3_CP_DMA_CMD_SAS) {
4440 		/* src address space is register */
4441 		if (((info & 0x60000000) >> 29) == 0) {
4442 			start_reg = idx_value << 2;
4443 			if (command & PACKET3_CP_DMA_CMD_SAIC) {
4444 				reg = start_reg;
4445 				if (!si_vm_reg_valid(reg)) {
4446 					DRM_ERROR("CP DMA Bad SRC register\n");
4447 					return -EINVAL;
4448 				}
4449 			} else {
4450 				for (i = 0; i < (command & 0x1fffff); i++) {
4451 					reg = start_reg + (4 * i);
4452 					if (!si_vm_reg_valid(reg)) {
4453 						DRM_ERROR("CP DMA Bad SRC register\n");
4454 						return -EINVAL;
4455 					}
4456 				}
4457 			}
4458 		}
4459 	}
4460 	if (command & PACKET3_CP_DMA_CMD_DAS) {
4461 		/* dst address space is register */
4462 		if (((info & 0x00300000) >> 20) == 0) {
4463 			start_reg = ib[idx + 2];
4464 			if (command & PACKET3_CP_DMA_CMD_DAIC) {
4465 				reg = start_reg;
4466 				if (!si_vm_reg_valid(reg)) {
4467 					DRM_ERROR("CP DMA Bad DST register\n");
4468 					return -EINVAL;
4469 				}
4470 			} else {
4471 				for (i = 0; i < (command & 0x1fffff); i++) {
4472 					reg = start_reg + (4 * i);
4473 				if (!si_vm_reg_valid(reg)) {
4474 						DRM_ERROR("CP DMA Bad DST register\n");
4475 						return -EINVAL;
4476 					}
4477 				}
4478 			}
4479 		}
4480 	}
4481 	return 0;
4482 }
4483 
4484 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4485 				   u32 *ib, struct radeon_cs_packet *pkt)
4486 {
4487 	int r;
4488 	u32 idx = pkt->idx + 1;
4489 	u32 idx_value = ib[idx];
4490 	u32 start_reg, end_reg, reg, i;
4491 
4492 	switch (pkt->opcode) {
4493 	case PACKET3_NOP:
4494 	case PACKET3_SET_BASE:
4495 	case PACKET3_CLEAR_STATE:
4496 	case PACKET3_INDEX_BUFFER_SIZE:
4497 	case PACKET3_DISPATCH_DIRECT:
4498 	case PACKET3_DISPATCH_INDIRECT:
4499 	case PACKET3_ALLOC_GDS:
4500 	case PACKET3_WRITE_GDS_RAM:
4501 	case PACKET3_ATOMIC_GDS:
4502 	case PACKET3_ATOMIC:
4503 	case PACKET3_OCCLUSION_QUERY:
4504 	case PACKET3_SET_PREDICATION:
4505 	case PACKET3_COND_EXEC:
4506 	case PACKET3_PRED_EXEC:
4507 	case PACKET3_DRAW_INDIRECT:
4508 	case PACKET3_DRAW_INDEX_INDIRECT:
4509 	case PACKET3_INDEX_BASE:
4510 	case PACKET3_DRAW_INDEX_2:
4511 	case PACKET3_CONTEXT_CONTROL:
4512 	case PACKET3_INDEX_TYPE:
4513 	case PACKET3_DRAW_INDIRECT_MULTI:
4514 	case PACKET3_DRAW_INDEX_AUTO:
4515 	case PACKET3_DRAW_INDEX_IMMD:
4516 	case PACKET3_NUM_INSTANCES:
4517 	case PACKET3_DRAW_INDEX_MULTI_AUTO:
4518 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4519 	case PACKET3_DRAW_INDEX_OFFSET_2:
4520 	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4521 	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4522 	case PACKET3_MPEG_INDEX:
4523 	case PACKET3_WAIT_REG_MEM:
4524 	case PACKET3_MEM_WRITE:
4525 	case PACKET3_PFP_SYNC_ME:
4526 	case PACKET3_SURFACE_SYNC:
4527 	case PACKET3_EVENT_WRITE:
4528 	case PACKET3_EVENT_WRITE_EOP:
4529 	case PACKET3_EVENT_WRITE_EOS:
4530 	case PACKET3_SET_CONTEXT_REG:
4531 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4532 	case PACKET3_SET_SH_REG:
4533 	case PACKET3_SET_SH_REG_OFFSET:
4534 	case PACKET3_INCREMENT_DE_COUNTER:
4535 	case PACKET3_WAIT_ON_CE_COUNTER:
4536 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4537 	case PACKET3_ME_WRITE:
4538 		break;
4539 	case PACKET3_COPY_DATA:
4540 		if ((idx_value & 0xf00) == 0) {
4541 			reg = ib[idx + 3] * 4;
4542 			if (!si_vm_reg_valid(reg))
4543 				return -EINVAL;
4544 		}
4545 		break;
4546 	case PACKET3_WRITE_DATA:
4547 		if ((idx_value & 0xf00) == 0) {
4548 			start_reg = ib[idx + 1] * 4;
4549 			if (idx_value & 0x10000) {
4550 				if (!si_vm_reg_valid(start_reg))
4551 					return -EINVAL;
4552 			} else {
4553 				for (i = 0; i < (pkt->count - 2); i++) {
4554 					reg = start_reg + (4 * i);
4555 					if (!si_vm_reg_valid(reg))
4556 						return -EINVAL;
4557 				}
4558 			}
4559 		}
4560 		break;
4561 	case PACKET3_COND_WRITE:
4562 		if (idx_value & 0x100) {
4563 			reg = ib[idx + 5] * 4;
4564 			if (!si_vm_reg_valid(reg))
4565 				return -EINVAL;
4566 		}
4567 		break;
4568 	case PACKET3_COPY_DW:
4569 		if (idx_value & 0x2) {
4570 			reg = ib[idx + 3] * 4;
4571 			if (!si_vm_reg_valid(reg))
4572 				return -EINVAL;
4573 		}
4574 		break;
4575 	case PACKET3_SET_CONFIG_REG:
4576 		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4577 		end_reg = 4 * pkt->count + start_reg - 4;
4578 		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4579 		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4580 		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4581 			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4582 			return -EINVAL;
4583 		}
4584 		for (i = 0; i < pkt->count; i++) {
4585 			reg = start_reg + (4 * i);
4586 			if (!si_vm_reg_valid(reg))
4587 				return -EINVAL;
4588 		}
4589 		break;
4590 	case PACKET3_CP_DMA:
4591 		r = si_vm_packet3_cp_dma_check(ib, idx);
4592 		if (r)
4593 			return r;
4594 		break;
4595 	default:
4596 		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4597 		return -EINVAL;
4598 	}
4599 	return 0;
4600 }
4601 
4602 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4603 				       u32 *ib, struct radeon_cs_packet *pkt)
4604 {
4605 	int r;
4606 	u32 idx = pkt->idx + 1;
4607 	u32 idx_value = ib[idx];
4608 	u32 start_reg, reg, i;
4609 
4610 	switch (pkt->opcode) {
4611 	case PACKET3_NOP:
4612 	case PACKET3_SET_BASE:
4613 	case PACKET3_CLEAR_STATE:
4614 	case PACKET3_DISPATCH_DIRECT:
4615 	case PACKET3_DISPATCH_INDIRECT:
4616 	case PACKET3_ALLOC_GDS:
4617 	case PACKET3_WRITE_GDS_RAM:
4618 	case PACKET3_ATOMIC_GDS:
4619 	case PACKET3_ATOMIC:
4620 	case PACKET3_OCCLUSION_QUERY:
4621 	case PACKET3_SET_PREDICATION:
4622 	case PACKET3_COND_EXEC:
4623 	case PACKET3_PRED_EXEC:
4624 	case PACKET3_CONTEXT_CONTROL:
4625 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4626 	case PACKET3_WAIT_REG_MEM:
4627 	case PACKET3_MEM_WRITE:
4628 	case PACKET3_PFP_SYNC_ME:
4629 	case PACKET3_SURFACE_SYNC:
4630 	case PACKET3_EVENT_WRITE:
4631 	case PACKET3_EVENT_WRITE_EOP:
4632 	case PACKET3_EVENT_WRITE_EOS:
4633 	case PACKET3_SET_CONTEXT_REG:
4634 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4635 	case PACKET3_SET_SH_REG:
4636 	case PACKET3_SET_SH_REG_OFFSET:
4637 	case PACKET3_INCREMENT_DE_COUNTER:
4638 	case PACKET3_WAIT_ON_CE_COUNTER:
4639 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4640 	case PACKET3_ME_WRITE:
4641 		break;
4642 	case PACKET3_COPY_DATA:
4643 		if ((idx_value & 0xf00) == 0) {
4644 			reg = ib[idx + 3] * 4;
4645 			if (!si_vm_reg_valid(reg))
4646 				return -EINVAL;
4647 		}
4648 		break;
4649 	case PACKET3_WRITE_DATA:
4650 		if ((idx_value & 0xf00) == 0) {
4651 			start_reg = ib[idx + 1] * 4;
4652 			if (idx_value & 0x10000) {
4653 				if (!si_vm_reg_valid(start_reg))
4654 					return -EINVAL;
4655 			} else {
4656 				for (i = 0; i < (pkt->count - 2); i++) {
4657 					reg = start_reg + (4 * i);
4658 					if (!si_vm_reg_valid(reg))
4659 						return -EINVAL;
4660 				}
4661 			}
4662 		}
4663 		break;
4664 	case PACKET3_COND_WRITE:
4665 		if (idx_value & 0x100) {
4666 			reg = ib[idx + 5] * 4;
4667 			if (!si_vm_reg_valid(reg))
4668 				return -EINVAL;
4669 		}
4670 		break;
4671 	case PACKET3_COPY_DW:
4672 		if (idx_value & 0x2) {
4673 			reg = ib[idx + 3] * 4;
4674 			if (!si_vm_reg_valid(reg))
4675 				return -EINVAL;
4676 		}
4677 		break;
4678 	case PACKET3_CP_DMA:
4679 		r = si_vm_packet3_cp_dma_check(ib, idx);
4680 		if (r)
4681 			return r;
4682 		break;
4683 	default:
4684 		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4685 		return -EINVAL;
4686 	}
4687 	return 0;
4688 }
4689 
4690 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4691 {
4692 	int ret = 0;
4693 	u32 idx = 0, i;
4694 	struct radeon_cs_packet pkt;
4695 
4696 	do {
4697 		pkt.idx = idx;
4698 		pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4699 		pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4700 		pkt.one_reg_wr = 0;
4701 		switch (pkt.type) {
4702 		case RADEON_PACKET_TYPE0:
4703 			dev_err(rdev->dev, "Packet0 not allowed!\n");
4704 			ret = -EINVAL;
4705 			break;
4706 		case RADEON_PACKET_TYPE2:
4707 			idx += 1;
4708 			break;
4709 		case RADEON_PACKET_TYPE3:
4710 			pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4711 			if (ib->is_const_ib)
4712 				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4713 			else {
4714 				switch (ib->ring) {
4715 				case RADEON_RING_TYPE_GFX_INDEX:
4716 					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4717 					break;
4718 				case CAYMAN_RING_TYPE_CP1_INDEX:
4719 				case CAYMAN_RING_TYPE_CP2_INDEX:
4720 					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4721 					break;
4722 				default:
4723 					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4724 					ret = -EINVAL;
4725 					break;
4726 				}
4727 			}
4728 			idx += pkt.count + 2;
4729 			break;
4730 		default:
4731 			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4732 			ret = -EINVAL;
4733 			break;
4734 		}
4735 		if (ret) {
4736 			for (i = 0; i < ib->length_dw; i++) {
4737 				if (i == idx)
4738 					printk("\t0x%08x <---\n", ib->ptr[i]);
4739 				else
4740 					printk("\t0x%08x\n", ib->ptr[i]);
4741 			}
4742 			break;
4743 		}
4744 	} while (idx < ib->length_dw);
4745 
4746 	return ret;
4747 }
4748 
4749 /*
4750  * vm
4751  */
4752 int si_vm_init(struct radeon_device *rdev)
4753 {
4754 	/* number of VMs */
4755 	rdev->vm_manager.nvm = 16;
4756 	/* base offset of vram pages */
4757 	rdev->vm_manager.vram_base_offset = 0;
4758 
4759 	return 0;
4760 }
4761 
4762 void si_vm_fini(struct radeon_device *rdev)
4763 {
4764 }
4765 
4766 /**
4767  * si_vm_decode_fault - print human readable fault info
4768  *
4769  * @rdev: radeon_device pointer
4770  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4771  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4772  *
4773  * Print human readable fault information (SI).
4774  */
4775 static void si_vm_decode_fault(struct radeon_device *rdev,
4776 			       u32 status, u32 addr)
4777 {
4778 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4779 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4780 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4781 	char *block;
4782 
4783 	if (rdev->family == CHIP_TAHITI) {
4784 		switch (mc_id) {
4785 		case 160:
4786 		case 144:
4787 		case 96:
4788 		case 80:
4789 		case 224:
4790 		case 208:
4791 		case 32:
4792 		case 16:
4793 			block = "CB";
4794 			break;
4795 		case 161:
4796 		case 145:
4797 		case 97:
4798 		case 81:
4799 		case 225:
4800 		case 209:
4801 		case 33:
4802 		case 17:
4803 			block = "CB_FMASK";
4804 			break;
4805 		case 162:
4806 		case 146:
4807 		case 98:
4808 		case 82:
4809 		case 226:
4810 		case 210:
4811 		case 34:
4812 		case 18:
4813 			block = "CB_CMASK";
4814 			break;
4815 		case 163:
4816 		case 147:
4817 		case 99:
4818 		case 83:
4819 		case 227:
4820 		case 211:
4821 		case 35:
4822 		case 19:
4823 			block = "CB_IMMED";
4824 			break;
4825 		case 164:
4826 		case 148:
4827 		case 100:
4828 		case 84:
4829 		case 228:
4830 		case 212:
4831 		case 36:
4832 		case 20:
4833 			block = "DB";
4834 			break;
4835 		case 165:
4836 		case 149:
4837 		case 101:
4838 		case 85:
4839 		case 229:
4840 		case 213:
4841 		case 37:
4842 		case 21:
4843 			block = "DB_HTILE";
4844 			break;
4845 		case 167:
4846 		case 151:
4847 		case 103:
4848 		case 87:
4849 		case 231:
4850 		case 215:
4851 		case 39:
4852 		case 23:
4853 			block = "DB_STEN";
4854 			break;
4855 		case 72:
4856 		case 68:
4857 		case 64:
4858 		case 8:
4859 		case 4:
4860 		case 0:
4861 		case 136:
4862 		case 132:
4863 		case 128:
4864 		case 200:
4865 		case 196:
4866 		case 192:
4867 			block = "TC";
4868 			break;
4869 		case 112:
4870 		case 48:
4871 			block = "CP";
4872 			break;
4873 		case 49:
4874 		case 177:
4875 		case 50:
4876 		case 178:
4877 			block = "SH";
4878 			break;
4879 		case 53:
4880 		case 190:
4881 			block = "VGT";
4882 			break;
4883 		case 117:
4884 			block = "IH";
4885 			break;
4886 		case 51:
4887 		case 115:
4888 			block = "RLC";
4889 			break;
4890 		case 119:
4891 		case 183:
4892 			block = "DMA0";
4893 			break;
4894 		case 61:
4895 			block = "DMA1";
4896 			break;
4897 		case 248:
4898 		case 120:
4899 			block = "HDP";
4900 			break;
4901 		default:
4902 			block = "unknown";
4903 			break;
4904 		}
4905 	} else {
4906 		switch (mc_id) {
4907 		case 32:
4908 		case 16:
4909 		case 96:
4910 		case 80:
4911 		case 160:
4912 		case 144:
4913 		case 224:
4914 		case 208:
4915 			block = "CB";
4916 			break;
4917 		case 33:
4918 		case 17:
4919 		case 97:
4920 		case 81:
4921 		case 161:
4922 		case 145:
4923 		case 225:
4924 		case 209:
4925 			block = "CB_FMASK";
4926 			break;
4927 		case 34:
4928 		case 18:
4929 		case 98:
4930 		case 82:
4931 		case 162:
4932 		case 146:
4933 		case 226:
4934 		case 210:
4935 			block = "CB_CMASK";
4936 			break;
4937 		case 35:
4938 		case 19:
4939 		case 99:
4940 		case 83:
4941 		case 163:
4942 		case 147:
4943 		case 227:
4944 		case 211:
4945 			block = "CB_IMMED";
4946 			break;
4947 		case 36:
4948 		case 20:
4949 		case 100:
4950 		case 84:
4951 		case 164:
4952 		case 148:
4953 		case 228:
4954 		case 212:
4955 			block = "DB";
4956 			break;
4957 		case 37:
4958 		case 21:
4959 		case 101:
4960 		case 85:
4961 		case 165:
4962 		case 149:
4963 		case 229:
4964 		case 213:
4965 			block = "DB_HTILE";
4966 			break;
4967 		case 39:
4968 		case 23:
4969 		case 103:
4970 		case 87:
4971 		case 167:
4972 		case 151:
4973 		case 231:
4974 		case 215:
4975 			block = "DB_STEN";
4976 			break;
4977 		case 72:
4978 		case 68:
4979 		case 8:
4980 		case 4:
4981 		case 136:
4982 		case 132:
4983 		case 200:
4984 		case 196:
4985 			block = "TC";
4986 			break;
4987 		case 112:
4988 		case 48:
4989 			block = "CP";
4990 			break;
4991 		case 49:
4992 		case 177:
4993 		case 50:
4994 		case 178:
4995 			block = "SH";
4996 			break;
4997 		case 53:
4998 			block = "VGT";
4999 			break;
5000 		case 117:
5001 			block = "IH";
5002 			break;
5003 		case 51:
5004 		case 115:
5005 			block = "RLC";
5006 			break;
5007 		case 119:
5008 		case 183:
5009 			block = "DMA0";
5010 			break;
5011 		case 61:
5012 			block = "DMA1";
5013 			break;
5014 		case 248:
5015 		case 120:
5016 			block = "HDP";
5017 			break;
5018 		default:
5019 			block = "unknown";
5020 			break;
5021 		}
5022 	}
5023 
5024 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5025 	       protections, vmid, addr,
5026 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5027 	       block, mc_id);
5028 }
5029 
5030 void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5031 		 unsigned vm_id, uint64_t pd_addr)
5032 {
5033 	/* write new base address */
5034 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5035 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5036 				 WRITE_DATA_DST_SEL(0)));
5037 
5038 	if (vm_id < 8) {
5039 		radeon_ring_write(ring,
5040 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5041 	} else {
5042 		radeon_ring_write(ring,
5043 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5044 	}
5045 	radeon_ring_write(ring, 0);
5046 	radeon_ring_write(ring, pd_addr >> 12);
5047 
5048 	/* flush hdp cache */
5049 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5050 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5051 				 WRITE_DATA_DST_SEL(0)));
5052 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5053 	radeon_ring_write(ring, 0);
5054 	radeon_ring_write(ring, 0x1);
5055 
5056 	/* bits 0-15 are the VM contexts0-15 */
5057 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5058 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5059 				 WRITE_DATA_DST_SEL(0)));
5060 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5061 	radeon_ring_write(ring, 0);
5062 	radeon_ring_write(ring, 1 << vm_id);
5063 
5064 	/* wait for the invalidate to complete */
5065 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5066 	radeon_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) |  /* always */
5067 				 WAIT_REG_MEM_ENGINE(0))); /* me */
5068 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5069 	radeon_ring_write(ring, 0);
5070 	radeon_ring_write(ring, 0); /* ref */
5071 	radeon_ring_write(ring, 0); /* mask */
5072 	radeon_ring_write(ring, 0x20); /* poll interval */
5073 
5074 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
5075 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5076 	radeon_ring_write(ring, 0x0);
5077 }
5078 
5079 /*
5080  *  Power and clock gating
5081  */
5082 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
5083 {
5084 	int i;
5085 
5086 	for (i = 0; i < rdev->usec_timeout; i++) {
5087 		if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
5088 			break;
5089 		udelay(1);
5090 	}
5091 
5092 	for (i = 0; i < rdev->usec_timeout; i++) {
5093 		if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
5094 			break;
5095 		udelay(1);
5096 	}
5097 }
5098 
5099 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
5100 					 bool enable)
5101 {
5102 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5103 	u32 mask;
5104 	int i;
5105 
5106 	if (enable)
5107 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5108 	else
5109 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5110 	WREG32(CP_INT_CNTL_RING0, tmp);
5111 
5112 	if (!enable) {
5113 		/* read a gfx register */
5114 		tmp = RREG32(DB_DEPTH_INFO);
5115 
5116 		mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
5117 		for (i = 0; i < rdev->usec_timeout; i++) {
5118 			if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
5119 				break;
5120 			udelay(1);
5121 		}
5122 	}
5123 }
5124 
5125 static void si_set_uvd_dcm(struct radeon_device *rdev,
5126 			   bool sw_mode)
5127 {
5128 	u32 tmp, tmp2;
5129 
5130 	tmp = RREG32(UVD_CGC_CTRL);
5131 	tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
5132 	tmp |= DCM | CG_DT(1) | CLK_OD(4);
5133 
5134 	if (sw_mode) {
5135 		tmp &= ~0x7ffff800;
5136 		tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
5137 	} else {
5138 		tmp |= 0x7ffff800;
5139 		tmp2 = 0;
5140 	}
5141 
5142 	WREG32(UVD_CGC_CTRL, tmp);
5143 	WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
5144 }
5145 
5146 void si_init_uvd_internal_cg(struct radeon_device *rdev)
5147 {
5148 	bool hw_mode = true;
5149 
5150 	if (hw_mode) {
5151 		si_set_uvd_dcm(rdev, false);
5152 	} else {
5153 		u32 tmp = RREG32(UVD_CGC_CTRL);
5154 		tmp &= ~DCM;
5155 		WREG32(UVD_CGC_CTRL, tmp);
5156 	}
5157 }
5158 
5159 static u32 si_halt_rlc(struct radeon_device *rdev)
5160 {
5161 	u32 data, orig;
5162 
5163 	orig = data = RREG32(RLC_CNTL);
5164 
5165 	if (data & RLC_ENABLE) {
5166 		data &= ~RLC_ENABLE;
5167 		WREG32(RLC_CNTL, data);
5168 
5169 		si_wait_for_rlc_serdes(rdev);
5170 	}
5171 
5172 	return orig;
5173 }
5174 
5175 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
5176 {
5177 	u32 tmp;
5178 
5179 	tmp = RREG32(RLC_CNTL);
5180 	if (tmp != rlc)
5181 		WREG32(RLC_CNTL, rlc);
5182 }
5183 
5184 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
5185 {
5186 	u32 data, orig;
5187 
5188 	orig = data = RREG32(DMA_PG);
5189 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
5190 		data |= PG_CNTL_ENABLE;
5191 	else
5192 		data &= ~PG_CNTL_ENABLE;
5193 	if (orig != data)
5194 		WREG32(DMA_PG, data);
5195 }
5196 
5197 static void si_init_dma_pg(struct radeon_device *rdev)
5198 {
5199 	u32 tmp;
5200 
5201 	WREG32(DMA_PGFSM_WRITE,  0x00002000);
5202 	WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
5203 
5204 	for (tmp = 0; tmp < 5; tmp++)
5205 		WREG32(DMA_PGFSM_WRITE, 0);
5206 }
5207 
5208 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
5209 			       bool enable)
5210 {
5211 	u32 tmp;
5212 
5213 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5214 		tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
5215 		WREG32(RLC_TTOP_D, tmp);
5216 
5217 		tmp = RREG32(RLC_PG_CNTL);
5218 		tmp |= GFX_PG_ENABLE;
5219 		WREG32(RLC_PG_CNTL, tmp);
5220 
5221 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5222 		tmp |= AUTO_PG_EN;
5223 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5224 	} else {
5225 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5226 		tmp &= ~AUTO_PG_EN;
5227 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5228 
5229 		tmp = RREG32(DB_RENDER_CONTROL);
5230 	}
5231 }
5232 
5233 static void si_init_gfx_cgpg(struct radeon_device *rdev)
5234 {
5235 	u32 tmp;
5236 
5237 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5238 
5239 	tmp = RREG32(RLC_PG_CNTL);
5240 	tmp |= GFX_PG_SRC;
5241 	WREG32(RLC_PG_CNTL, tmp);
5242 
5243 	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5244 
5245 	tmp = RREG32(RLC_AUTO_PG_CTRL);
5246 
5247 	tmp &= ~GRBM_REG_SGIT_MASK;
5248 	tmp |= GRBM_REG_SGIT(0x700);
5249 	tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5250 	WREG32(RLC_AUTO_PG_CTRL, tmp);
5251 }
5252 
5253 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5254 {
5255 	u32 mask = 0, tmp, tmp1;
5256 	int i;
5257 
5258 	si_select_se_sh(rdev, se, sh);
5259 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5260 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5261 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5262 
5263 	tmp &= 0xffff0000;
5264 
5265 	tmp |= tmp1;
5266 	tmp >>= 16;
5267 
5268 	for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5269 		mask <<= 1;
5270 		mask |= 1;
5271 	}
5272 
5273 	return (~tmp) & mask;
5274 }
5275 
5276 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5277 {
5278 	u32 i, j, k, active_cu_number = 0;
5279 	u32 mask, counter, cu_bitmap;
5280 	u32 tmp = 0;
5281 
5282 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5283 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5284 			mask = 1;
5285 			cu_bitmap = 0;
5286 			counter  = 0;
5287 			for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5288 				if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5289 					if (counter < 2)
5290 						cu_bitmap |= mask;
5291 					counter++;
5292 				}
5293 				mask <<= 1;
5294 			}
5295 
5296 			active_cu_number += counter;
5297 			tmp |= (cu_bitmap << (i * 16 + j * 8));
5298 		}
5299 	}
5300 
5301 	WREG32(RLC_PG_AO_CU_MASK, tmp);
5302 
5303 	tmp = RREG32(RLC_MAX_PG_CU);
5304 	tmp &= ~MAX_PU_CU_MASK;
5305 	tmp |= MAX_PU_CU(active_cu_number);
5306 	WREG32(RLC_MAX_PG_CU, tmp);
5307 }
5308 
5309 static void si_enable_cgcg(struct radeon_device *rdev,
5310 			   bool enable)
5311 {
5312 	u32 data, orig, tmp;
5313 
5314 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5315 
5316 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5317 		si_enable_gui_idle_interrupt(rdev, true);
5318 
5319 		WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5320 
5321 		tmp = si_halt_rlc(rdev);
5322 
5323 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5324 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5325 		WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5326 
5327 		si_wait_for_rlc_serdes(rdev);
5328 
5329 		si_update_rlc(rdev, tmp);
5330 
5331 		WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5332 
5333 		data |= CGCG_EN | CGLS_EN;
5334 	} else {
5335 		si_enable_gui_idle_interrupt(rdev, false);
5336 
5337 		RREG32(CB_CGTT_SCLK_CTRL);
5338 		RREG32(CB_CGTT_SCLK_CTRL);
5339 		RREG32(CB_CGTT_SCLK_CTRL);
5340 		RREG32(CB_CGTT_SCLK_CTRL);
5341 
5342 		data &= ~(CGCG_EN | CGLS_EN);
5343 	}
5344 
5345 	if (orig != data)
5346 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5347 }
5348 
5349 static void si_enable_mgcg(struct radeon_device *rdev,
5350 			   bool enable)
5351 {
5352 	u32 data, orig, tmp = 0;
5353 
5354 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5355 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5356 		data = 0x96940200;
5357 		if (orig != data)
5358 			WREG32(CGTS_SM_CTRL_REG, data);
5359 
5360 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5361 			orig = data = RREG32(CP_MEM_SLP_CNTL);
5362 			data |= CP_MEM_LS_EN;
5363 			if (orig != data)
5364 				WREG32(CP_MEM_SLP_CNTL, data);
5365 		}
5366 
5367 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5368 		data &= 0xffffffc0;
5369 		if (orig != data)
5370 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5371 
5372 		tmp = si_halt_rlc(rdev);
5373 
5374 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5375 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5376 		WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5377 
5378 		si_update_rlc(rdev, tmp);
5379 	} else {
5380 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5381 		data |= 0x00000003;
5382 		if (orig != data)
5383 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5384 
5385 		data = RREG32(CP_MEM_SLP_CNTL);
5386 		if (data & CP_MEM_LS_EN) {
5387 			data &= ~CP_MEM_LS_EN;
5388 			WREG32(CP_MEM_SLP_CNTL, data);
5389 		}
5390 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5391 		data |= LS_OVERRIDE | OVERRIDE;
5392 		if (orig != data)
5393 			WREG32(CGTS_SM_CTRL_REG, data);
5394 
5395 		tmp = si_halt_rlc(rdev);
5396 
5397 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5398 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5399 		WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5400 
5401 		si_update_rlc(rdev, tmp);
5402 	}
5403 }
5404 
5405 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5406 			       bool enable)
5407 {
5408 	u32 orig, data, tmp;
5409 
5410 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5411 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5412 		tmp |= 0x3fff;
5413 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5414 
5415 		orig = data = RREG32(UVD_CGC_CTRL);
5416 		data |= DCM;
5417 		if (orig != data)
5418 			WREG32(UVD_CGC_CTRL, data);
5419 
5420 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5421 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5422 	} else {
5423 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5424 		tmp &= ~0x3fff;
5425 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5426 
5427 		orig = data = RREG32(UVD_CGC_CTRL);
5428 		data &= ~DCM;
5429 		if (orig != data)
5430 			WREG32(UVD_CGC_CTRL, data);
5431 
5432 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5433 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5434 	}
5435 }
5436 
5437 static const u32 mc_cg_registers[] =
5438 {
5439 	MC_HUB_MISC_HUB_CG,
5440 	MC_HUB_MISC_SIP_CG,
5441 	MC_HUB_MISC_VM_CG,
5442 	MC_XPB_CLK_GAT,
5443 	ATC_MISC_CG,
5444 	MC_CITF_MISC_WR_CG,
5445 	MC_CITF_MISC_RD_CG,
5446 	MC_CITF_MISC_VM_CG,
5447 	VM_L2_CG,
5448 };
5449 
5450 static void si_enable_mc_ls(struct radeon_device *rdev,
5451 			    bool enable)
5452 {
5453 	int i;
5454 	u32 orig, data;
5455 
5456 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5457 		orig = data = RREG32(mc_cg_registers[i]);
5458 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5459 			data |= MC_LS_ENABLE;
5460 		else
5461 			data &= ~MC_LS_ENABLE;
5462 		if (data != orig)
5463 			WREG32(mc_cg_registers[i], data);
5464 	}
5465 }
5466 
5467 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5468 			       bool enable)
5469 {
5470 	int i;
5471 	u32 orig, data;
5472 
5473 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5474 		orig = data = RREG32(mc_cg_registers[i]);
5475 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5476 			data |= MC_CG_ENABLE;
5477 		else
5478 			data &= ~MC_CG_ENABLE;
5479 		if (data != orig)
5480 			WREG32(mc_cg_registers[i], data);
5481 	}
5482 }
5483 
5484 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5485 			       bool enable)
5486 {
5487 	u32 orig, data, offset;
5488 	int i;
5489 
5490 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5491 		for (i = 0; i < 2; i++) {
5492 			if (i == 0)
5493 				offset = DMA0_REGISTER_OFFSET;
5494 			else
5495 				offset = DMA1_REGISTER_OFFSET;
5496 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5497 			data &= ~MEM_POWER_OVERRIDE;
5498 			if (data != orig)
5499 				WREG32(DMA_POWER_CNTL + offset, data);
5500 			WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5501 		}
5502 	} else {
5503 		for (i = 0; i < 2; i++) {
5504 			if (i == 0)
5505 				offset = DMA0_REGISTER_OFFSET;
5506 			else
5507 				offset = DMA1_REGISTER_OFFSET;
5508 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5509 			data |= MEM_POWER_OVERRIDE;
5510 			if (data != orig)
5511 				WREG32(DMA_POWER_CNTL + offset, data);
5512 
5513 			orig = data = RREG32(DMA_CLK_CTRL + offset);
5514 			data = 0xff000000;
5515 			if (data != orig)
5516 				WREG32(DMA_CLK_CTRL + offset, data);
5517 		}
5518 	}
5519 }
5520 
5521 static void si_enable_bif_mgls(struct radeon_device *rdev,
5522 			       bool enable)
5523 {
5524 	u32 orig, data;
5525 
5526 	orig = data = RREG32_PCIE(PCIE_CNTL2);
5527 
5528 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5529 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5530 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5531 	else
5532 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5533 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5534 
5535 	if (orig != data)
5536 		WREG32_PCIE(PCIE_CNTL2, data);
5537 }
5538 
5539 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5540 			       bool enable)
5541 {
5542 	u32 orig, data;
5543 
5544 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
5545 
5546 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5547 		data &= ~CLOCK_GATING_DIS;
5548 	else
5549 		data |= CLOCK_GATING_DIS;
5550 
5551 	if (orig != data)
5552 		WREG32(HDP_HOST_PATH_CNTL, data);
5553 }
5554 
5555 static void si_enable_hdp_ls(struct radeon_device *rdev,
5556 			     bool enable)
5557 {
5558 	u32 orig, data;
5559 
5560 	orig = data = RREG32(HDP_MEM_POWER_LS);
5561 
5562 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5563 		data |= HDP_LS_ENABLE;
5564 	else
5565 		data &= ~HDP_LS_ENABLE;
5566 
5567 	if (orig != data)
5568 		WREG32(HDP_MEM_POWER_LS, data);
5569 }
5570 
5571 static void si_update_cg(struct radeon_device *rdev,
5572 			 u32 block, bool enable)
5573 {
5574 	if (block & RADEON_CG_BLOCK_GFX) {
5575 		si_enable_gui_idle_interrupt(rdev, false);
5576 		/* order matters! */
5577 		if (enable) {
5578 			si_enable_mgcg(rdev, true);
5579 			si_enable_cgcg(rdev, true);
5580 		} else {
5581 			si_enable_cgcg(rdev, false);
5582 			si_enable_mgcg(rdev, false);
5583 		}
5584 		si_enable_gui_idle_interrupt(rdev, true);
5585 	}
5586 
5587 	if (block & RADEON_CG_BLOCK_MC) {
5588 		si_enable_mc_mgcg(rdev, enable);
5589 		si_enable_mc_ls(rdev, enable);
5590 	}
5591 
5592 	if (block & RADEON_CG_BLOCK_SDMA) {
5593 		si_enable_dma_mgcg(rdev, enable);
5594 	}
5595 
5596 	if (block & RADEON_CG_BLOCK_BIF) {
5597 		si_enable_bif_mgls(rdev, enable);
5598 	}
5599 
5600 	if (block & RADEON_CG_BLOCK_UVD) {
5601 		if (rdev->has_uvd) {
5602 			si_enable_uvd_mgcg(rdev, enable);
5603 		}
5604 	}
5605 
5606 	if (block & RADEON_CG_BLOCK_HDP) {
5607 		si_enable_hdp_mgcg(rdev, enable);
5608 		si_enable_hdp_ls(rdev, enable);
5609 	}
5610 }
5611 
5612 static void si_init_cg(struct radeon_device *rdev)
5613 {
5614 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5615 			    RADEON_CG_BLOCK_MC |
5616 			    RADEON_CG_BLOCK_SDMA |
5617 			    RADEON_CG_BLOCK_BIF |
5618 			    RADEON_CG_BLOCK_HDP), true);
5619 	if (rdev->has_uvd) {
5620 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5621 		si_init_uvd_internal_cg(rdev);
5622 	}
5623 }
5624 
5625 static void si_fini_cg(struct radeon_device *rdev)
5626 {
5627 	if (rdev->has_uvd) {
5628 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5629 	}
5630 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5631 			    RADEON_CG_BLOCK_MC |
5632 			    RADEON_CG_BLOCK_SDMA |
5633 			    RADEON_CG_BLOCK_BIF |
5634 			    RADEON_CG_BLOCK_HDP), false);
5635 }
5636 
5637 u32 si_get_csb_size(struct radeon_device *rdev)
5638 {
5639 	u32 count = 0;
5640 	const struct cs_section_def *sect = NULL;
5641 	const struct cs_extent_def *ext = NULL;
5642 
5643 	if (rdev->rlc.cs_data == NULL)
5644 		return 0;
5645 
5646 	/* begin clear state */
5647 	count += 2;
5648 	/* context control state */
5649 	count += 3;
5650 
5651 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5652 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5653 			if (sect->id == SECT_CONTEXT)
5654 				count += 2 + ext->reg_count;
5655 			else
5656 				return 0;
5657 		}
5658 	}
5659 	/* pa_sc_raster_config */
5660 	count += 3;
5661 	/* end clear state */
5662 	count += 2;
5663 	/* clear state */
5664 	count += 2;
5665 
5666 	return count;
5667 }
5668 
5669 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5670 {
5671 	u32 count = 0, i;
5672 	const struct cs_section_def *sect = NULL;
5673 	const struct cs_extent_def *ext = NULL;
5674 
5675 	if (rdev->rlc.cs_data == NULL)
5676 		return;
5677 	if (buffer == NULL)
5678 		return;
5679 
5680 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5681 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5682 
5683 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5684 	buffer[count++] = cpu_to_le32(0x80000000);
5685 	buffer[count++] = cpu_to_le32(0x80000000);
5686 
5687 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5688 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5689 			if (sect->id == SECT_CONTEXT) {
5690 				buffer[count++] =
5691 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5692 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5693 				for (i = 0; i < ext->reg_count; i++)
5694 					buffer[count++] = cpu_to_le32(ext->extent[i]);
5695 			} else {
5696 				return;
5697 			}
5698 		}
5699 	}
5700 
5701 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5702 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5703 	switch (rdev->family) {
5704 	case CHIP_TAHITI:
5705 	case CHIP_PITCAIRN:
5706 		buffer[count++] = cpu_to_le32(0x2a00126a);
5707 		break;
5708 	case CHIP_VERDE:
5709 		buffer[count++] = cpu_to_le32(0x0000124a);
5710 		break;
5711 	case CHIP_OLAND:
5712 		buffer[count++] = cpu_to_le32(0x00000082);
5713 		break;
5714 	case CHIP_HAINAN:
5715 		buffer[count++] = cpu_to_le32(0x00000000);
5716 		break;
5717 	default:
5718 		buffer[count++] = cpu_to_le32(0x00000000);
5719 		break;
5720 	}
5721 
5722 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5723 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5724 
5725 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5726 	buffer[count++] = cpu_to_le32(0);
5727 }
5728 
5729 static void si_init_pg(struct radeon_device *rdev)
5730 {
5731 	if (rdev->pg_flags) {
5732 		if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5733 			si_init_dma_pg(rdev);
5734 		}
5735 		si_init_ao_cu_mask(rdev);
5736 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5737 			si_init_gfx_cgpg(rdev);
5738 		} else {
5739 			WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5740 			WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5741 		}
5742 		si_enable_dma_pg(rdev, true);
5743 		si_enable_gfx_cgpg(rdev, true);
5744 	} else {
5745 		WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5746 		WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5747 	}
5748 }
5749 
5750 static void si_fini_pg(struct radeon_device *rdev)
5751 {
5752 	if (rdev->pg_flags) {
5753 		si_enable_dma_pg(rdev, false);
5754 		si_enable_gfx_cgpg(rdev, false);
5755 	}
5756 }
5757 
5758 /*
5759  * RLC
5760  */
5761 void si_rlc_reset(struct radeon_device *rdev)
5762 {
5763 	u32 tmp = RREG32(GRBM_SOFT_RESET);
5764 
5765 	tmp |= SOFT_RESET_RLC;
5766 	WREG32(GRBM_SOFT_RESET, tmp);
5767 	udelay(50);
5768 	tmp &= ~SOFT_RESET_RLC;
5769 	WREG32(GRBM_SOFT_RESET, tmp);
5770 	udelay(50);
5771 }
5772 
5773 static void si_rlc_stop(struct radeon_device *rdev)
5774 {
5775 	WREG32(RLC_CNTL, 0);
5776 
5777 	si_enable_gui_idle_interrupt(rdev, false);
5778 
5779 	si_wait_for_rlc_serdes(rdev);
5780 }
5781 
5782 static void si_rlc_start(struct radeon_device *rdev)
5783 {
5784 	WREG32(RLC_CNTL, RLC_ENABLE);
5785 
5786 	si_enable_gui_idle_interrupt(rdev, true);
5787 
5788 	udelay(50);
5789 }
5790 
5791 static bool si_lbpw_supported(struct radeon_device *rdev)
5792 {
5793 	u32 tmp;
5794 
5795 	/* Enable LBPW only for DDR3 */
5796 	tmp = RREG32(MC_SEQ_MISC0);
5797 	if ((tmp & 0xF0000000) == 0xB0000000)
5798 		return true;
5799 	return false;
5800 }
5801 
5802 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5803 {
5804 	u32 tmp;
5805 
5806 	tmp = RREG32(RLC_LB_CNTL);
5807 	if (enable)
5808 		tmp |= LOAD_BALANCE_ENABLE;
5809 	else
5810 		tmp &= ~LOAD_BALANCE_ENABLE;
5811 	WREG32(RLC_LB_CNTL, tmp);
5812 
5813 	if (!enable) {
5814 		si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5815 		WREG32(SPI_LB_CU_MASK, 0x00ff);
5816 	}
5817 }
5818 
5819 static int si_rlc_resume(struct radeon_device *rdev)
5820 {
5821 	u32 i;
5822 
5823 	if (!rdev->rlc_fw)
5824 		return -EINVAL;
5825 
5826 	si_rlc_stop(rdev);
5827 
5828 	si_rlc_reset(rdev);
5829 
5830 	si_init_pg(rdev);
5831 
5832 	si_init_cg(rdev);
5833 
5834 	WREG32(RLC_RL_BASE, 0);
5835 	WREG32(RLC_RL_SIZE, 0);
5836 	WREG32(RLC_LB_CNTL, 0);
5837 	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5838 	WREG32(RLC_LB_CNTR_INIT, 0);
5839 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5840 
5841 	WREG32(RLC_MC_CNTL, 0);
5842 	WREG32(RLC_UCODE_CNTL, 0);
5843 
5844 	if (rdev->new_fw) {
5845 		const struct rlc_firmware_header_v1_0 *hdr =
5846 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5847 		u32 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5848 		const __le32 *fw_data = (const __le32 *)
5849 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5850 
5851 		radeon_ucode_print_rlc_hdr(&hdr->header);
5852 
5853 		for (i = 0; i < fw_size; i++) {
5854 			WREG32(RLC_UCODE_ADDR, i);
5855 			WREG32(RLC_UCODE_DATA, le32_to_cpup(fw_data++));
5856 		}
5857 	} else {
5858 		const __be32 *fw_data =
5859 			(const __be32 *)rdev->rlc_fw->data;
5860 		for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5861 			WREG32(RLC_UCODE_ADDR, i);
5862 			WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5863 		}
5864 	}
5865 	WREG32(RLC_UCODE_ADDR, 0);
5866 
5867 	si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5868 
5869 	si_rlc_start(rdev);
5870 
5871 	return 0;
5872 }
5873 
5874 static void si_enable_interrupts(struct radeon_device *rdev)
5875 {
5876 	u32 ih_cntl = RREG32(IH_CNTL);
5877 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5878 
5879 	ih_cntl |= ENABLE_INTR;
5880 	ih_rb_cntl |= IH_RB_ENABLE;
5881 	WREG32(IH_CNTL, ih_cntl);
5882 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5883 	rdev->ih.enabled = true;
5884 }
5885 
5886 static void si_disable_interrupts(struct radeon_device *rdev)
5887 {
5888 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5889 	u32 ih_cntl = RREG32(IH_CNTL);
5890 
5891 	ih_rb_cntl &= ~IH_RB_ENABLE;
5892 	ih_cntl &= ~ENABLE_INTR;
5893 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5894 	WREG32(IH_CNTL, ih_cntl);
5895 	/* set rptr, wptr to 0 */
5896 	WREG32(IH_RB_RPTR, 0);
5897 	WREG32(IH_RB_WPTR, 0);
5898 	rdev->ih.enabled = false;
5899 	rdev->ih.rptr = 0;
5900 }
5901 
5902 static void si_disable_interrupt_state(struct radeon_device *rdev)
5903 {
5904 	u32 tmp;
5905 
5906 	tmp = RREG32(CP_INT_CNTL_RING0) &
5907 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5908 	WREG32(CP_INT_CNTL_RING0, tmp);
5909 	WREG32(CP_INT_CNTL_RING1, 0);
5910 	WREG32(CP_INT_CNTL_RING2, 0);
5911 	tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5912 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5913 	tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5914 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5915 	WREG32(GRBM_INT_CNTL, 0);
5916 	WREG32(SRBM_INT_CNTL, 0);
5917 	if (rdev->num_crtc >= 2) {
5918 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5919 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5920 	}
5921 	if (rdev->num_crtc >= 4) {
5922 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5923 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5924 	}
5925 	if (rdev->num_crtc >= 6) {
5926 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5927 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5928 	}
5929 
5930 	if (rdev->num_crtc >= 2) {
5931 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5932 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5933 	}
5934 	if (rdev->num_crtc >= 4) {
5935 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5936 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5937 	}
5938 	if (rdev->num_crtc >= 6) {
5939 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5940 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5941 	}
5942 
5943 	if (!ASIC_IS_NODCE(rdev)) {
5944 		WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5945 
5946 		tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5947 		WREG32(DC_HPD1_INT_CONTROL, tmp);
5948 		tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5949 		WREG32(DC_HPD2_INT_CONTROL, tmp);
5950 		tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5951 		WREG32(DC_HPD3_INT_CONTROL, tmp);
5952 		tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5953 		WREG32(DC_HPD4_INT_CONTROL, tmp);
5954 		tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5955 		WREG32(DC_HPD5_INT_CONTROL, tmp);
5956 		tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5957 		WREG32(DC_HPD6_INT_CONTROL, tmp);
5958 	}
5959 }
5960 
5961 static int si_irq_init(struct radeon_device *rdev)
5962 {
5963 	int ret = 0;
5964 	int rb_bufsz;
5965 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5966 
5967 	/* allocate ring */
5968 	ret = r600_ih_ring_alloc(rdev);
5969 	if (ret)
5970 		return ret;
5971 
5972 	/* disable irqs */
5973 	si_disable_interrupts(rdev);
5974 
5975 	/* init rlc */
5976 	ret = si_rlc_resume(rdev);
5977 	if (ret) {
5978 		r600_ih_ring_fini(rdev);
5979 		return ret;
5980 	}
5981 
5982 	/* setup interrupt control */
5983 	/* set dummy read address to ring address */
5984 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5985 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
5986 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5987 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5988 	 */
5989 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5990 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5991 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5992 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
5993 
5994 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5995 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
5996 
5997 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5998 		      IH_WPTR_OVERFLOW_CLEAR |
5999 		      (rb_bufsz << 1));
6000 
6001 	if (rdev->wb.enabled)
6002 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6003 
6004 	/* set the writeback address whether it's enabled or not */
6005 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6006 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6007 
6008 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6009 
6010 	/* set rptr, wptr to 0 */
6011 	WREG32(IH_RB_RPTR, 0);
6012 	WREG32(IH_RB_WPTR, 0);
6013 
6014 	/* Default settings for IH_CNTL (disabled at first) */
6015 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6016 	/* RPTR_REARM only works if msi's are enabled */
6017 	if (rdev->msi_enabled)
6018 		ih_cntl |= RPTR_REARM;
6019 	WREG32(IH_CNTL, ih_cntl);
6020 
6021 	/* force the active interrupt state to all disabled */
6022 	si_disable_interrupt_state(rdev);
6023 
6024 	pci_set_master(rdev->pdev);
6025 
6026 	/* enable irqs */
6027 	si_enable_interrupts(rdev);
6028 
6029 	return ret;
6030 }
6031 
6032 int si_irq_set(struct radeon_device *rdev)
6033 {
6034 	u32 cp_int_cntl;
6035 	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
6036 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6037 	u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
6038 	u32 grbm_int_cntl = 0;
6039 	u32 dma_cntl, dma_cntl1;
6040 	u32 thermal_int = 0;
6041 
6042 	if (!rdev->irq.installed) {
6043 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6044 		return -EINVAL;
6045 	}
6046 	/* don't enable anything if the ih is disabled */
6047 	if (!rdev->ih.enabled) {
6048 		si_disable_interrupts(rdev);
6049 		/* force the active interrupt state to all disabled */
6050 		si_disable_interrupt_state(rdev);
6051 		return 0;
6052 	}
6053 
6054 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6055 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6056 
6057 	if (!ASIC_IS_NODCE(rdev)) {
6058 		hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6059 		hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6060 		hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6061 		hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6062 		hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6063 		hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6064 	}
6065 
6066 	dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6067 	dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6068 
6069 	thermal_int = RREG32(CG_THERMAL_INT) &
6070 		~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6071 
6072 	/* enable CP interrupts on all rings */
6073 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6074 		DRM_DEBUG("si_irq_set: sw int gfx\n");
6075 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6076 	}
6077 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6078 		DRM_DEBUG("si_irq_set: sw int cp1\n");
6079 		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
6080 	}
6081 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6082 		DRM_DEBUG("si_irq_set: sw int cp2\n");
6083 		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
6084 	}
6085 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6086 		DRM_DEBUG("si_irq_set: sw int dma\n");
6087 		dma_cntl |= TRAP_ENABLE;
6088 	}
6089 
6090 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6091 		DRM_DEBUG("si_irq_set: sw int dma1\n");
6092 		dma_cntl1 |= TRAP_ENABLE;
6093 	}
6094 	if (rdev->irq.crtc_vblank_int[0] ||
6095 	    atomic_read(&rdev->irq.pflip[0])) {
6096 		DRM_DEBUG("si_irq_set: vblank 0\n");
6097 		crtc1 |= VBLANK_INT_MASK;
6098 	}
6099 	if (rdev->irq.crtc_vblank_int[1] ||
6100 	    atomic_read(&rdev->irq.pflip[1])) {
6101 		DRM_DEBUG("si_irq_set: vblank 1\n");
6102 		crtc2 |= VBLANK_INT_MASK;
6103 	}
6104 	if (rdev->irq.crtc_vblank_int[2] ||
6105 	    atomic_read(&rdev->irq.pflip[2])) {
6106 		DRM_DEBUG("si_irq_set: vblank 2\n");
6107 		crtc3 |= VBLANK_INT_MASK;
6108 	}
6109 	if (rdev->irq.crtc_vblank_int[3] ||
6110 	    atomic_read(&rdev->irq.pflip[3])) {
6111 		DRM_DEBUG("si_irq_set: vblank 3\n");
6112 		crtc4 |= VBLANK_INT_MASK;
6113 	}
6114 	if (rdev->irq.crtc_vblank_int[4] ||
6115 	    atomic_read(&rdev->irq.pflip[4])) {
6116 		DRM_DEBUG("si_irq_set: vblank 4\n");
6117 		crtc5 |= VBLANK_INT_MASK;
6118 	}
6119 	if (rdev->irq.crtc_vblank_int[5] ||
6120 	    atomic_read(&rdev->irq.pflip[5])) {
6121 		DRM_DEBUG("si_irq_set: vblank 5\n");
6122 		crtc6 |= VBLANK_INT_MASK;
6123 	}
6124 	if (rdev->irq.hpd[0]) {
6125 		DRM_DEBUG("si_irq_set: hpd 1\n");
6126 		hpd1 |= DC_HPDx_INT_EN;
6127 	}
6128 	if (rdev->irq.hpd[1]) {
6129 		DRM_DEBUG("si_irq_set: hpd 2\n");
6130 		hpd2 |= DC_HPDx_INT_EN;
6131 	}
6132 	if (rdev->irq.hpd[2]) {
6133 		DRM_DEBUG("si_irq_set: hpd 3\n");
6134 		hpd3 |= DC_HPDx_INT_EN;
6135 	}
6136 	if (rdev->irq.hpd[3]) {
6137 		DRM_DEBUG("si_irq_set: hpd 4\n");
6138 		hpd4 |= DC_HPDx_INT_EN;
6139 	}
6140 	if (rdev->irq.hpd[4]) {
6141 		DRM_DEBUG("si_irq_set: hpd 5\n");
6142 		hpd5 |= DC_HPDx_INT_EN;
6143 	}
6144 	if (rdev->irq.hpd[5]) {
6145 		DRM_DEBUG("si_irq_set: hpd 6\n");
6146 		hpd6 |= DC_HPDx_INT_EN;
6147 	}
6148 
6149 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6150 	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
6151 	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
6152 
6153 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
6154 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
6155 
6156 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6157 
6158 	if (rdev->irq.dpm_thermal) {
6159 		DRM_DEBUG("dpm thermal\n");
6160 		thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6161 	}
6162 
6163 	if (rdev->num_crtc >= 2) {
6164 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6165 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6166 	}
6167 	if (rdev->num_crtc >= 4) {
6168 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6169 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6170 	}
6171 	if (rdev->num_crtc >= 6) {
6172 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6173 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6174 	}
6175 
6176 	if (rdev->num_crtc >= 2) {
6177 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
6178 		       GRPH_PFLIP_INT_MASK);
6179 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
6180 		       GRPH_PFLIP_INT_MASK);
6181 	}
6182 	if (rdev->num_crtc >= 4) {
6183 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
6184 		       GRPH_PFLIP_INT_MASK);
6185 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
6186 		       GRPH_PFLIP_INT_MASK);
6187 	}
6188 	if (rdev->num_crtc >= 6) {
6189 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
6190 		       GRPH_PFLIP_INT_MASK);
6191 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
6192 		       GRPH_PFLIP_INT_MASK);
6193 	}
6194 
6195 	if (!ASIC_IS_NODCE(rdev)) {
6196 		WREG32(DC_HPD1_INT_CONTROL, hpd1);
6197 		WREG32(DC_HPD2_INT_CONTROL, hpd2);
6198 		WREG32(DC_HPD3_INT_CONTROL, hpd3);
6199 		WREG32(DC_HPD4_INT_CONTROL, hpd4);
6200 		WREG32(DC_HPD5_INT_CONTROL, hpd5);
6201 		WREG32(DC_HPD6_INT_CONTROL, hpd6);
6202 	}
6203 
6204 	WREG32(CG_THERMAL_INT, thermal_int);
6205 
6206 	/* posting read */
6207 	RREG32(SRBM_STATUS);
6208 
6209 	return 0;
6210 }
6211 
6212 static inline void si_irq_ack(struct radeon_device *rdev)
6213 {
6214 	u32 tmp;
6215 
6216 	if (ASIC_IS_NODCE(rdev))
6217 		return;
6218 
6219 	rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6220 	rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6221 	rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6222 	rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6223 	rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6224 	rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6225 	rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
6226 	rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
6227 	if (rdev->num_crtc >= 4) {
6228 		rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
6229 		rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
6230 	}
6231 	if (rdev->num_crtc >= 6) {
6232 		rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
6233 		rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
6234 	}
6235 
6236 	if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
6237 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6238 	if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
6239 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6240 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
6241 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6242 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
6243 		WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6244 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6245 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6246 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6247 		WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6248 
6249 	if (rdev->num_crtc >= 4) {
6250 		if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
6251 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6252 		if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
6253 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6254 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6255 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6256 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6257 			WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6258 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6259 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6260 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6261 			WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6262 	}
6263 
6264 	if (rdev->num_crtc >= 6) {
6265 		if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
6266 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6267 		if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
6268 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6269 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6270 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6271 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6272 			WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6273 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6274 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6275 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6276 			WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6277 	}
6278 
6279 	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6280 		tmp = RREG32(DC_HPD1_INT_CONTROL);
6281 		tmp |= DC_HPDx_INT_ACK;
6282 		WREG32(DC_HPD1_INT_CONTROL, tmp);
6283 	}
6284 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6285 		tmp = RREG32(DC_HPD2_INT_CONTROL);
6286 		tmp |= DC_HPDx_INT_ACK;
6287 		WREG32(DC_HPD2_INT_CONTROL, tmp);
6288 	}
6289 	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6290 		tmp = RREG32(DC_HPD3_INT_CONTROL);
6291 		tmp |= DC_HPDx_INT_ACK;
6292 		WREG32(DC_HPD3_INT_CONTROL, tmp);
6293 	}
6294 	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6295 		tmp = RREG32(DC_HPD4_INT_CONTROL);
6296 		tmp |= DC_HPDx_INT_ACK;
6297 		WREG32(DC_HPD4_INT_CONTROL, tmp);
6298 	}
6299 	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6300 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6301 		tmp |= DC_HPDx_INT_ACK;
6302 		WREG32(DC_HPD5_INT_CONTROL, tmp);
6303 	}
6304 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6305 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6306 		tmp |= DC_HPDx_INT_ACK;
6307 		WREG32(DC_HPD6_INT_CONTROL, tmp);
6308 	}
6309 }
6310 
6311 static void si_irq_disable(struct radeon_device *rdev)
6312 {
6313 	si_disable_interrupts(rdev);
6314 	/* Wait and acknowledge irq */
6315 	mdelay(1);
6316 	si_irq_ack(rdev);
6317 	si_disable_interrupt_state(rdev);
6318 }
6319 
6320 static void si_irq_suspend(struct radeon_device *rdev)
6321 {
6322 	si_irq_disable(rdev);
6323 	si_rlc_stop(rdev);
6324 }
6325 
6326 static void si_irq_fini(struct radeon_device *rdev)
6327 {
6328 	si_irq_suspend(rdev);
6329 	r600_ih_ring_fini(rdev);
6330 }
6331 
6332 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6333 {
6334 	u32 wptr, tmp;
6335 
6336 	if (rdev->wb.enabled)
6337 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6338 	else
6339 		wptr = RREG32(IH_RB_WPTR);
6340 
6341 	if (wptr & RB_OVERFLOW) {
6342 		wptr &= ~RB_OVERFLOW;
6343 		/* When a ring buffer overflow happen start parsing interrupt
6344 		 * from the last not overwritten vector (wptr + 16). Hopefully
6345 		 * this should allow us to catchup.
6346 		 */
6347 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
6348 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
6349 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6350 		tmp = RREG32(IH_RB_CNTL);
6351 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
6352 		WREG32(IH_RB_CNTL, tmp);
6353 	}
6354 	return (wptr & rdev->ih.ptr_mask);
6355 }
6356 
6357 /*        SI IV Ring
6358  * Each IV ring entry is 128 bits:
6359  * [7:0]    - interrupt source id
6360  * [31:8]   - reserved
6361  * [59:32]  - interrupt source data
6362  * [63:60]  - reserved
6363  * [71:64]  - RINGID
6364  * [79:72]  - VMID
6365  * [127:80] - reserved
6366  */
6367 int si_irq_process(struct radeon_device *rdev)
6368 {
6369 	u32 wptr;
6370 	u32 rptr;
6371 	u32 src_id, src_data, ring_id;
6372 	u32 ring_index;
6373 	bool queue_hotplug = false;
6374 	bool queue_thermal = false;
6375 	u32 status, addr;
6376 
6377 	if (!rdev->ih.enabled || rdev->shutdown)
6378 		return IRQ_NONE;
6379 
6380 	wptr = si_get_ih_wptr(rdev);
6381 
6382 restart_ih:
6383 	/* is somebody else already processing irqs? */
6384 	if (atomic_xchg(&rdev->ih.lock, 1))
6385 		return IRQ_NONE;
6386 
6387 	rptr = rdev->ih.rptr;
6388 	DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6389 
6390 	/* Order reading of wptr vs. reading of IH ring data */
6391 	rmb();
6392 
6393 	/* display interrupts */
6394 	si_irq_ack(rdev);
6395 
6396 	while (rptr != wptr) {
6397 		/* wptr/rptr are in bytes! */
6398 		ring_index = rptr / 4;
6399 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6400 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6401 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6402 
6403 		switch (src_id) {
6404 		case 1: /* D1 vblank/vline */
6405 			switch (src_data) {
6406 			case 0: /* D1 vblank */
6407 				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
6408 					if (rdev->irq.crtc_vblank_int[0]) {
6409 						drm_handle_vblank(rdev->ddev, 0);
6410 						rdev->pm.vblank_sync = true;
6411 						wake_up(&rdev->irq.vblank_queue);
6412 					}
6413 					if (atomic_read(&rdev->irq.pflip[0]))
6414 						radeon_crtc_handle_vblank(rdev, 0);
6415 					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6416 					DRM_DEBUG("IH: D1 vblank\n");
6417 				}
6418 				break;
6419 			case 1: /* D1 vline */
6420 				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
6421 					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6422 					DRM_DEBUG("IH: D1 vline\n");
6423 				}
6424 				break;
6425 			default:
6426 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6427 				break;
6428 			}
6429 			break;
6430 		case 2: /* D2 vblank/vline */
6431 			switch (src_data) {
6432 			case 0: /* D2 vblank */
6433 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6434 					if (rdev->irq.crtc_vblank_int[1]) {
6435 						drm_handle_vblank(rdev->ddev, 1);
6436 						rdev->pm.vblank_sync = true;
6437 						wake_up(&rdev->irq.vblank_queue);
6438 					}
6439 					if (atomic_read(&rdev->irq.pflip[1]))
6440 						radeon_crtc_handle_vblank(rdev, 1);
6441 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6442 					DRM_DEBUG("IH: D2 vblank\n");
6443 				}
6444 				break;
6445 			case 1: /* D2 vline */
6446 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6447 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6448 					DRM_DEBUG("IH: D2 vline\n");
6449 				}
6450 				break;
6451 			default:
6452 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6453 				break;
6454 			}
6455 			break;
6456 		case 3: /* D3 vblank/vline */
6457 			switch (src_data) {
6458 			case 0: /* D3 vblank */
6459 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6460 					if (rdev->irq.crtc_vblank_int[2]) {
6461 						drm_handle_vblank(rdev->ddev, 2);
6462 						rdev->pm.vblank_sync = true;
6463 						wake_up(&rdev->irq.vblank_queue);
6464 					}
6465 					if (atomic_read(&rdev->irq.pflip[2]))
6466 						radeon_crtc_handle_vblank(rdev, 2);
6467 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6468 					DRM_DEBUG("IH: D3 vblank\n");
6469 				}
6470 				break;
6471 			case 1: /* D3 vline */
6472 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6473 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6474 					DRM_DEBUG("IH: D3 vline\n");
6475 				}
6476 				break;
6477 			default:
6478 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6479 				break;
6480 			}
6481 			break;
6482 		case 4: /* D4 vblank/vline */
6483 			switch (src_data) {
6484 			case 0: /* D4 vblank */
6485 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6486 					if (rdev->irq.crtc_vblank_int[3]) {
6487 						drm_handle_vblank(rdev->ddev, 3);
6488 						rdev->pm.vblank_sync = true;
6489 						wake_up(&rdev->irq.vblank_queue);
6490 					}
6491 					if (atomic_read(&rdev->irq.pflip[3]))
6492 						radeon_crtc_handle_vblank(rdev, 3);
6493 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6494 					DRM_DEBUG("IH: D4 vblank\n");
6495 				}
6496 				break;
6497 			case 1: /* D4 vline */
6498 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6499 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6500 					DRM_DEBUG("IH: D4 vline\n");
6501 				}
6502 				break;
6503 			default:
6504 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6505 				break;
6506 			}
6507 			break;
6508 		case 5: /* D5 vblank/vline */
6509 			switch (src_data) {
6510 			case 0: /* D5 vblank */
6511 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6512 					if (rdev->irq.crtc_vblank_int[4]) {
6513 						drm_handle_vblank(rdev->ddev, 4);
6514 						rdev->pm.vblank_sync = true;
6515 						wake_up(&rdev->irq.vblank_queue);
6516 					}
6517 					if (atomic_read(&rdev->irq.pflip[4]))
6518 						radeon_crtc_handle_vblank(rdev, 4);
6519 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6520 					DRM_DEBUG("IH: D5 vblank\n");
6521 				}
6522 				break;
6523 			case 1: /* D5 vline */
6524 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6525 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6526 					DRM_DEBUG("IH: D5 vline\n");
6527 				}
6528 				break;
6529 			default:
6530 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6531 				break;
6532 			}
6533 			break;
6534 		case 6: /* D6 vblank/vline */
6535 			switch (src_data) {
6536 			case 0: /* D6 vblank */
6537 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6538 					if (rdev->irq.crtc_vblank_int[5]) {
6539 						drm_handle_vblank(rdev->ddev, 5);
6540 						rdev->pm.vblank_sync = true;
6541 						wake_up(&rdev->irq.vblank_queue);
6542 					}
6543 					if (atomic_read(&rdev->irq.pflip[5]))
6544 						radeon_crtc_handle_vblank(rdev, 5);
6545 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6546 					DRM_DEBUG("IH: D6 vblank\n");
6547 				}
6548 				break;
6549 			case 1: /* D6 vline */
6550 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6551 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6552 					DRM_DEBUG("IH: D6 vline\n");
6553 				}
6554 				break;
6555 			default:
6556 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6557 				break;
6558 			}
6559 			break;
6560 		case 8: /* D1 page flip */
6561 		case 10: /* D2 page flip */
6562 		case 12: /* D3 page flip */
6563 		case 14: /* D4 page flip */
6564 		case 16: /* D5 page flip */
6565 		case 18: /* D6 page flip */
6566 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
6567 			if (radeon_use_pflipirq > 0)
6568 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
6569 			break;
6570 		case 42: /* HPD hotplug */
6571 			switch (src_data) {
6572 			case 0:
6573 				if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6574 					rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6575 					queue_hotplug = true;
6576 					DRM_DEBUG("IH: HPD1\n");
6577 				}
6578 				break;
6579 			case 1:
6580 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6581 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6582 					queue_hotplug = true;
6583 					DRM_DEBUG("IH: HPD2\n");
6584 				}
6585 				break;
6586 			case 2:
6587 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6588 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6589 					queue_hotplug = true;
6590 					DRM_DEBUG("IH: HPD3\n");
6591 				}
6592 				break;
6593 			case 3:
6594 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6595 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6596 					queue_hotplug = true;
6597 					DRM_DEBUG("IH: HPD4\n");
6598 				}
6599 				break;
6600 			case 4:
6601 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6602 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6603 					queue_hotplug = true;
6604 					DRM_DEBUG("IH: HPD5\n");
6605 				}
6606 				break;
6607 			case 5:
6608 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6609 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6610 					queue_hotplug = true;
6611 					DRM_DEBUG("IH: HPD6\n");
6612 				}
6613 				break;
6614 			default:
6615 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6616 				break;
6617 			}
6618 			break;
6619 		case 96:
6620 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
6621 			WREG32(SRBM_INT_ACK, 0x1);
6622 			break;
6623 		case 124: /* UVD */
6624 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6625 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6626 			break;
6627 		case 146:
6628 		case 147:
6629 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6630 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6631 			/* reset addr and status */
6632 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6633 			if (addr == 0x0 && status == 0x0)
6634 				break;
6635 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6636 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6637 				addr);
6638 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6639 				status);
6640 			si_vm_decode_fault(rdev, status, addr);
6641 			break;
6642 		case 176: /* RINGID0 CP_INT */
6643 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6644 			break;
6645 		case 177: /* RINGID1 CP_INT */
6646 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6647 			break;
6648 		case 178: /* RINGID2 CP_INT */
6649 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6650 			break;
6651 		case 181: /* CP EOP event */
6652 			DRM_DEBUG("IH: CP EOP\n");
6653 			switch (ring_id) {
6654 			case 0:
6655 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6656 				break;
6657 			case 1:
6658 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6659 				break;
6660 			case 2:
6661 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6662 				break;
6663 			}
6664 			break;
6665 		case 224: /* DMA trap event */
6666 			DRM_DEBUG("IH: DMA trap\n");
6667 			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6668 			break;
6669 		case 230: /* thermal low to high */
6670 			DRM_DEBUG("IH: thermal low to high\n");
6671 			rdev->pm.dpm.thermal.high_to_low = false;
6672 			queue_thermal = true;
6673 			break;
6674 		case 231: /* thermal high to low */
6675 			DRM_DEBUG("IH: thermal high to low\n");
6676 			rdev->pm.dpm.thermal.high_to_low = true;
6677 			queue_thermal = true;
6678 			break;
6679 		case 233: /* GUI IDLE */
6680 			DRM_DEBUG("IH: GUI idle\n");
6681 			break;
6682 		case 244: /* DMA trap event */
6683 			DRM_DEBUG("IH: DMA1 trap\n");
6684 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6685 			break;
6686 		default:
6687 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6688 			break;
6689 		}
6690 
6691 		/* wptr/rptr are in bytes! */
6692 		rptr += 16;
6693 		rptr &= rdev->ih.ptr_mask;
6694 		WREG32(IH_RB_RPTR, rptr);
6695 	}
6696 	if (queue_hotplug)
6697 		schedule_work(&rdev->hotplug_work);
6698 	if (queue_thermal && rdev->pm.dpm_enabled)
6699 		schedule_work(&rdev->pm.dpm.thermal.work);
6700 	rdev->ih.rptr = rptr;
6701 	atomic_set(&rdev->ih.lock, 0);
6702 
6703 	/* make sure wptr hasn't changed while processing */
6704 	wptr = si_get_ih_wptr(rdev);
6705 	if (wptr != rptr)
6706 		goto restart_ih;
6707 
6708 	return IRQ_HANDLED;
6709 }
6710 
6711 /*
6712  * startup/shutdown callbacks
6713  */
6714 static int si_startup(struct radeon_device *rdev)
6715 {
6716 	struct radeon_ring *ring;
6717 	int r;
6718 
6719 	/* enable pcie gen2/3 link */
6720 	si_pcie_gen3_enable(rdev);
6721 	/* enable aspm */
6722 	si_program_aspm(rdev);
6723 
6724 	/* scratch needs to be initialized before MC */
6725 	r = r600_vram_scratch_init(rdev);
6726 	if (r)
6727 		return r;
6728 
6729 	si_mc_program(rdev);
6730 
6731 	if (!rdev->pm.dpm_enabled) {
6732 		r = si_mc_load_microcode(rdev);
6733 		if (r) {
6734 			DRM_ERROR("Failed to load MC firmware!\n");
6735 			return r;
6736 		}
6737 	}
6738 
6739 	r = si_pcie_gart_enable(rdev);
6740 	if (r)
6741 		return r;
6742 	si_gpu_init(rdev);
6743 
6744 	/* allocate rlc buffers */
6745 	if (rdev->family == CHIP_VERDE) {
6746 		rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
6747 		rdev->rlc.reg_list_size =
6748 			(u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
6749 	}
6750 	rdev->rlc.cs_data = si_cs_data;
6751 	r = sumo_rlc_init(rdev);
6752 	if (r) {
6753 		DRM_ERROR("Failed to init rlc BOs!\n");
6754 		return r;
6755 	}
6756 
6757 	/* allocate wb buffer */
6758 	r = radeon_wb_init(rdev);
6759 	if (r)
6760 		return r;
6761 
6762 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6763 	if (r) {
6764 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6765 		return r;
6766 	}
6767 
6768 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6769 	if (r) {
6770 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6771 		return r;
6772 	}
6773 
6774 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6775 	if (r) {
6776 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6777 		return r;
6778 	}
6779 
6780 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6781 	if (r) {
6782 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6783 		return r;
6784 	}
6785 
6786 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6787 	if (r) {
6788 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6789 		return r;
6790 	}
6791 
6792 	if (rdev->has_uvd) {
6793 		r = uvd_v2_2_resume(rdev);
6794 		if (!r) {
6795 			r = radeon_fence_driver_start_ring(rdev,
6796 							   R600_RING_TYPE_UVD_INDEX);
6797 			if (r)
6798 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6799 		}
6800 		if (r)
6801 			rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6802 	}
6803 
6804 	/* Enable IRQ */
6805 	if (!rdev->irq.installed) {
6806 		r = radeon_irq_kms_init(rdev);
6807 		if (r)
6808 			return r;
6809 	}
6810 
6811 	r = si_irq_init(rdev);
6812 	if (r) {
6813 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
6814 		radeon_irq_kms_fini(rdev);
6815 		return r;
6816 	}
6817 	si_irq_set(rdev);
6818 
6819 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6820 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6821 			     RADEON_CP_PACKET2);
6822 	if (r)
6823 		return r;
6824 
6825 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6826 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6827 			     RADEON_CP_PACKET2);
6828 	if (r)
6829 		return r;
6830 
6831 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6832 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6833 			     RADEON_CP_PACKET2);
6834 	if (r)
6835 		return r;
6836 
6837 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6838 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6839 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6840 	if (r)
6841 		return r;
6842 
6843 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6844 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6845 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6846 	if (r)
6847 		return r;
6848 
6849 	r = si_cp_load_microcode(rdev);
6850 	if (r)
6851 		return r;
6852 	r = si_cp_resume(rdev);
6853 	if (r)
6854 		return r;
6855 
6856 	r = cayman_dma_resume(rdev);
6857 	if (r)
6858 		return r;
6859 
6860 	if (rdev->has_uvd) {
6861 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6862 		if (ring->ring_size) {
6863 			r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
6864 					     RADEON_CP_PACKET2);
6865 			if (!r)
6866 				r = uvd_v1_0_init(rdev);
6867 			if (r)
6868 				DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6869 		}
6870 	}
6871 
6872 	r = radeon_ib_pool_init(rdev);
6873 	if (r) {
6874 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6875 		return r;
6876 	}
6877 
6878 	r = radeon_vm_manager_init(rdev);
6879 	if (r) {
6880 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6881 		return r;
6882 	}
6883 
6884 	r = radeon_audio_init(rdev);
6885 	if (r)
6886 		return r;
6887 
6888 	return 0;
6889 }
6890 
6891 int si_resume(struct radeon_device *rdev)
6892 {
6893 	int r;
6894 
6895 	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6896 	 * posting will perform necessary task to bring back GPU into good
6897 	 * shape.
6898 	 */
6899 	/* post card */
6900 	atom_asic_init(rdev->mode_info.atom_context);
6901 
6902 	/* init golden registers */
6903 	si_init_golden_registers(rdev);
6904 
6905 	if (rdev->pm.pm_method == PM_METHOD_DPM)
6906 		radeon_pm_resume(rdev);
6907 
6908 	rdev->accel_working = true;
6909 	r = si_startup(rdev);
6910 	if (r) {
6911 		DRM_ERROR("si startup failed on resume\n");
6912 		rdev->accel_working = false;
6913 		return r;
6914 	}
6915 
6916 	return r;
6917 
6918 }
6919 
6920 int si_suspend(struct radeon_device *rdev)
6921 {
6922 	radeon_pm_suspend(rdev);
6923 	radeon_audio_fini(rdev);
6924 	radeon_vm_manager_fini(rdev);
6925 	si_cp_enable(rdev, false);
6926 	cayman_dma_stop(rdev);
6927 	if (rdev->has_uvd) {
6928 		uvd_v1_0_fini(rdev);
6929 		radeon_uvd_suspend(rdev);
6930 	}
6931 	si_fini_pg(rdev);
6932 	si_fini_cg(rdev);
6933 	si_irq_suspend(rdev);
6934 	radeon_wb_disable(rdev);
6935 	si_pcie_gart_disable(rdev);
6936 	return 0;
6937 }
6938 
6939 /* Plan is to move initialization in that function and use
6940  * helper function so that radeon_device_init pretty much
6941  * do nothing more than calling asic specific function. This
6942  * should also allow to remove a bunch of callback function
6943  * like vram_info.
6944  */
6945 int si_init(struct radeon_device *rdev)
6946 {
6947 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6948 	int r;
6949 
6950 	/* Read BIOS */
6951 	if (!radeon_get_bios(rdev)) {
6952 		if (ASIC_IS_AVIVO(rdev))
6953 			return -EINVAL;
6954 	}
6955 	/* Must be an ATOMBIOS */
6956 	if (!rdev->is_atom_bios) {
6957 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6958 		return -EINVAL;
6959 	}
6960 	r = radeon_atombios_init(rdev);
6961 	if (r)
6962 		return r;
6963 
6964 	/* Post card if necessary */
6965 	if (!radeon_card_posted(rdev)) {
6966 		if (!rdev->bios) {
6967 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6968 			return -EINVAL;
6969 		}
6970 		DRM_INFO("GPU not posted. posting now...\n");
6971 		atom_asic_init(rdev->mode_info.atom_context);
6972 	}
6973 	/* init golden registers */
6974 	si_init_golden_registers(rdev);
6975 	/* Initialize scratch registers */
6976 	si_scratch_init(rdev);
6977 	/* Initialize surface registers */
6978 	radeon_surface_init(rdev);
6979 	/* Initialize clocks */
6980 	radeon_get_clock_info(rdev->ddev);
6981 
6982 	/* Fence driver */
6983 	r = radeon_fence_driver_init(rdev);
6984 	if (r)
6985 		return r;
6986 
6987 	/* initialize memory controller */
6988 	r = si_mc_init(rdev);
6989 	if (r)
6990 		return r;
6991 	/* Memory manager */
6992 	r = radeon_bo_init(rdev);
6993 	if (r)
6994 		return r;
6995 
6996 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6997 	    !rdev->rlc_fw || !rdev->mc_fw) {
6998 		r = si_init_microcode(rdev);
6999 		if (r) {
7000 			DRM_ERROR("Failed to load firmware!\n");
7001 			return r;
7002 		}
7003 	}
7004 
7005 	/* Initialize power management */
7006 	radeon_pm_init(rdev);
7007 
7008 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7009 	ring->ring_obj = NULL;
7010 	r600_ring_init(rdev, ring, 1024 * 1024);
7011 
7012 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7013 	ring->ring_obj = NULL;
7014 	r600_ring_init(rdev, ring, 1024 * 1024);
7015 
7016 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7017 	ring->ring_obj = NULL;
7018 	r600_ring_init(rdev, ring, 1024 * 1024);
7019 
7020 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7021 	ring->ring_obj = NULL;
7022 	r600_ring_init(rdev, ring, 64 * 1024);
7023 
7024 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7025 	ring->ring_obj = NULL;
7026 	r600_ring_init(rdev, ring, 64 * 1024);
7027 
7028 	if (rdev->has_uvd) {
7029 		r = radeon_uvd_init(rdev);
7030 		if (!r) {
7031 			ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7032 			ring->ring_obj = NULL;
7033 			r600_ring_init(rdev, ring, 4096);
7034 		}
7035 	}
7036 
7037 	rdev->ih.ring_obj = NULL;
7038 	r600_ih_ring_init(rdev, 64 * 1024);
7039 
7040 	r = r600_pcie_gart_init(rdev);
7041 	if (r)
7042 		return r;
7043 
7044 	rdev->accel_working = true;
7045 	r = si_startup(rdev);
7046 	if (r) {
7047 		dev_err(rdev->dev, "disabling GPU acceleration\n");
7048 		si_cp_fini(rdev);
7049 		cayman_dma_fini(rdev);
7050 		si_irq_fini(rdev);
7051 		sumo_rlc_fini(rdev);
7052 		radeon_wb_fini(rdev);
7053 		radeon_ib_pool_fini(rdev);
7054 		radeon_vm_manager_fini(rdev);
7055 		radeon_irq_kms_fini(rdev);
7056 		si_pcie_gart_fini(rdev);
7057 		rdev->accel_working = false;
7058 	}
7059 
7060 	/* Don't start up if the MC ucode is missing.
7061 	 * The default clocks and voltages before the MC ucode
7062 	 * is loaded are not suffient for advanced operations.
7063 	 */
7064 	if (!rdev->mc_fw) {
7065 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
7066 		return -EINVAL;
7067 	}
7068 
7069 	return 0;
7070 }
7071 
7072 void si_fini(struct radeon_device *rdev)
7073 {
7074 	radeon_pm_fini(rdev);
7075 	si_cp_fini(rdev);
7076 	cayman_dma_fini(rdev);
7077 	si_fini_pg(rdev);
7078 	si_fini_cg(rdev);
7079 	si_irq_fini(rdev);
7080 	sumo_rlc_fini(rdev);
7081 	radeon_wb_fini(rdev);
7082 	radeon_vm_manager_fini(rdev);
7083 	radeon_ib_pool_fini(rdev);
7084 	radeon_irq_kms_fini(rdev);
7085 	if (rdev->has_uvd) {
7086 		uvd_v1_0_fini(rdev);
7087 		radeon_uvd_fini(rdev);
7088 	}
7089 	si_pcie_gart_fini(rdev);
7090 	r600_vram_scratch_fini(rdev);
7091 	radeon_gem_fini(rdev);
7092 	radeon_fence_driver_fini(rdev);
7093 	radeon_bo_fini(rdev);
7094 	radeon_atombios_fini(rdev);
7095 	kfree(rdev->bios);
7096 	rdev->bios = NULL;
7097 }
7098 
7099 /**
7100  * si_get_gpu_clock_counter - return GPU clock counter snapshot
7101  *
7102  * @rdev: radeon_device pointer
7103  *
7104  * Fetches a GPU clock counter snapshot (SI).
7105  * Returns the 64 bit clock counter snapshot.
7106  */
7107 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
7108 {
7109 	uint64_t clock;
7110 
7111 	mutex_lock(&rdev->gpu_clock_mutex);
7112 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
7113 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
7114 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
7115 	mutex_unlock(&rdev->gpu_clock_mutex);
7116 	return clock;
7117 }
7118 
7119 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
7120 {
7121 	unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
7122 	int r;
7123 
7124 	/* bypass vclk and dclk with bclk */
7125 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7126 		VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
7127 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7128 
7129 	/* put PLL in bypass mode */
7130 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
7131 
7132 	if (!vclk || !dclk) {
7133 		/* keep the Bypass mode */
7134 		return 0;
7135 	}
7136 
7137 	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
7138 					  16384, 0x03FFFFFF, 0, 128, 5,
7139 					  &fb_div, &vclk_div, &dclk_div);
7140 	if (r)
7141 		return r;
7142 
7143 	/* set RESET_ANTI_MUX to 0 */
7144 	WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7145 
7146 	/* set VCO_MODE to 1 */
7147 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
7148 
7149 	/* disable sleep mode */
7150 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
7151 
7152 	/* deassert UPLL_RESET */
7153 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7154 
7155 	mdelay(1);
7156 
7157 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7158 	if (r)
7159 		return r;
7160 
7161 	/* assert UPLL_RESET again */
7162 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
7163 
7164 	/* disable spread spectrum. */
7165 	WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7166 
7167 	/* set feedback divider */
7168 	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
7169 
7170 	/* set ref divider to 0 */
7171 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
7172 
7173 	if (fb_div < 307200)
7174 		WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
7175 	else
7176 		WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
7177 
7178 	/* set PDIV_A and PDIV_B */
7179 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7180 		UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
7181 		~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
7182 
7183 	/* give the PLL some time to settle */
7184 	mdelay(15);
7185 
7186 	/* deassert PLL_RESET */
7187 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7188 
7189 	mdelay(15);
7190 
7191 	/* switch from bypass mode to normal mode */
7192 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
7193 
7194 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7195 	if (r)
7196 		return r;
7197 
7198 	/* switch VCLK and DCLK selection */
7199 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7200 		VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
7201 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7202 
7203 	mdelay(100);
7204 
7205 	return 0;
7206 }
7207 
7208 static void si_pcie_gen3_enable(struct radeon_device *rdev)
7209 {
7210 	struct pci_dev *root = rdev->pdev->bus->self;
7211 	int bridge_pos, gpu_pos;
7212 	u32 speed_cntl, mask, current_data_rate;
7213 	int ret, i;
7214 	u16 tmp16;
7215 
7216 	if (pci_is_root_bus(rdev->pdev->bus))
7217 		return;
7218 
7219 	if (radeon_pcie_gen2 == 0)
7220 		return;
7221 
7222 	if (rdev->flags & RADEON_IS_IGP)
7223 		return;
7224 
7225 	if (!(rdev->flags & RADEON_IS_PCIE))
7226 		return;
7227 
7228 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
7229 	if (ret != 0)
7230 		return;
7231 
7232 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
7233 		return;
7234 
7235 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7236 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7237 		LC_CURRENT_DATA_RATE_SHIFT;
7238 	if (mask & DRM_PCIE_SPEED_80) {
7239 		if (current_data_rate == 2) {
7240 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7241 			return;
7242 		}
7243 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7244 	} else if (mask & DRM_PCIE_SPEED_50) {
7245 		if (current_data_rate == 1) {
7246 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7247 			return;
7248 		}
7249 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7250 	}
7251 
7252 	bridge_pos = pci_pcie_cap(root);
7253 	if (!bridge_pos)
7254 		return;
7255 
7256 	gpu_pos = pci_pcie_cap(rdev->pdev);
7257 	if (!gpu_pos)
7258 		return;
7259 
7260 	if (mask & DRM_PCIE_SPEED_80) {
7261 		/* re-try equalization if gen3 is not already enabled */
7262 		if (current_data_rate != 2) {
7263 			u16 bridge_cfg, gpu_cfg;
7264 			u16 bridge_cfg2, gpu_cfg2;
7265 			u32 max_lw, current_lw, tmp;
7266 
7267 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7268 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7269 
7270 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7271 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7272 
7273 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7274 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7275 
7276 			tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7277 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7278 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7279 
7280 			if (current_lw < max_lw) {
7281 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7282 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
7283 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7284 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7285 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7286 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7287 				}
7288 			}
7289 
7290 			for (i = 0; i < 10; i++) {
7291 				/* check status */
7292 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
7293 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7294 					break;
7295 
7296 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7297 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7298 
7299 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
7300 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
7301 
7302 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7303 				tmp |= LC_SET_QUIESCE;
7304 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7305 
7306 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7307 				tmp |= LC_REDO_EQ;
7308 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7309 
7310 				mdelay(100);
7311 
7312 				/* linkctl */
7313 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
7314 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7315 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7316 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7317 
7318 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
7319 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7320 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7321 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7322 
7323 				/* linkctl2 */
7324 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7325 				tmp16 &= ~((1 << 4) | (7 << 9));
7326 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
7327 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
7328 
7329 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7330 				tmp16 &= ~((1 << 4) | (7 << 9));
7331 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
7332 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7333 
7334 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7335 				tmp &= ~LC_SET_QUIESCE;
7336 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7337 			}
7338 		}
7339 	}
7340 
7341 	/* set the link speed */
7342 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7343 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7344 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7345 
7346 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7347 	tmp16 &= ~0xf;
7348 	if (mask & DRM_PCIE_SPEED_80)
7349 		tmp16 |= 3; /* gen3 */
7350 	else if (mask & DRM_PCIE_SPEED_50)
7351 		tmp16 |= 2; /* gen2 */
7352 	else
7353 		tmp16 |= 1; /* gen1 */
7354 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7355 
7356 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7357 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7358 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7359 
7360 	for (i = 0; i < rdev->usec_timeout; i++) {
7361 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7362 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7363 			break;
7364 		udelay(1);
7365 	}
7366 }
7367 
7368 static void si_program_aspm(struct radeon_device *rdev)
7369 {
7370 	u32 data, orig;
7371 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7372 	bool disable_clkreq = false;
7373 
7374 	if (radeon_aspm == 0)
7375 		return;
7376 
7377 	if (!(rdev->flags & RADEON_IS_PCIE))
7378 		return;
7379 
7380 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7381 	data &= ~LC_XMIT_N_FTS_MASK;
7382 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7383 	if (orig != data)
7384 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7385 
7386 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7387 	data |= LC_GO_TO_RECOVERY;
7388 	if (orig != data)
7389 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7390 
7391 	orig = data = RREG32_PCIE(PCIE_P_CNTL);
7392 	data |= P_IGNORE_EDB_ERR;
7393 	if (orig != data)
7394 		WREG32_PCIE(PCIE_P_CNTL, data);
7395 
7396 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7397 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7398 	data |= LC_PMI_TO_L1_DIS;
7399 	if (!disable_l0s)
7400 		data |= LC_L0S_INACTIVITY(7);
7401 
7402 	if (!disable_l1) {
7403 		data |= LC_L1_INACTIVITY(7);
7404 		data &= ~LC_PMI_TO_L1_DIS;
7405 		if (orig != data)
7406 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7407 
7408 		if (!disable_plloff_in_l1) {
7409 			bool clk_req_support;
7410 
7411 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7412 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7413 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7414 			if (orig != data)
7415 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7416 
7417 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7418 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7419 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7420 			if (orig != data)
7421 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7422 
7423 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7424 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7425 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7426 			if (orig != data)
7427 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7428 
7429 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7430 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7431 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7432 			if (orig != data)
7433 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7434 
7435 			if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7436 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7437 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7438 				if (orig != data)
7439 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7440 
7441 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7442 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7443 				if (orig != data)
7444 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7445 
7446 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7447 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7448 				if (orig != data)
7449 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7450 
7451 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7452 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7453 				if (orig != data)
7454 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7455 
7456 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7457 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7458 				if (orig != data)
7459 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7460 
7461 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7462 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7463 				if (orig != data)
7464 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7465 
7466 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7467 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7468 				if (orig != data)
7469 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7470 
7471 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7472 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7473 				if (orig != data)
7474 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7475 			}
7476 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7477 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7478 			data |= LC_DYN_LANES_PWR_STATE(3);
7479 			if (orig != data)
7480 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7481 
7482 			orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7483 			data &= ~LS2_EXIT_TIME_MASK;
7484 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7485 				data |= LS2_EXIT_TIME(5);
7486 			if (orig != data)
7487 				WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7488 
7489 			orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7490 			data &= ~LS2_EXIT_TIME_MASK;
7491 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7492 				data |= LS2_EXIT_TIME(5);
7493 			if (orig != data)
7494 				WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7495 
7496 			if (!disable_clkreq &&
7497 			    !pci_is_root_bus(rdev->pdev->bus)) {
7498 				struct pci_dev *root = rdev->pdev->bus->self;
7499 				u32 lnkcap;
7500 
7501 				clk_req_support = false;
7502 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7503 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7504 					clk_req_support = true;
7505 			} else {
7506 				clk_req_support = false;
7507 			}
7508 
7509 			if (clk_req_support) {
7510 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7511 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7512 				if (orig != data)
7513 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7514 
7515 				orig = data = RREG32(THM_CLK_CNTL);
7516 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7517 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7518 				if (orig != data)
7519 					WREG32(THM_CLK_CNTL, data);
7520 
7521 				orig = data = RREG32(MISC_CLK_CNTL);
7522 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7523 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7524 				if (orig != data)
7525 					WREG32(MISC_CLK_CNTL, data);
7526 
7527 				orig = data = RREG32(CG_CLKPIN_CNTL);
7528 				data &= ~BCLK_AS_XCLK;
7529 				if (orig != data)
7530 					WREG32(CG_CLKPIN_CNTL, data);
7531 
7532 				orig = data = RREG32(CG_CLKPIN_CNTL_2);
7533 				data &= ~FORCE_BIF_REFCLK_EN;
7534 				if (orig != data)
7535 					WREG32(CG_CLKPIN_CNTL_2, data);
7536 
7537 				orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7538 				data &= ~MPLL_CLKOUT_SEL_MASK;
7539 				data |= MPLL_CLKOUT_SEL(4);
7540 				if (orig != data)
7541 					WREG32(MPLL_BYPASSCLK_SEL, data);
7542 
7543 				orig = data = RREG32(SPLL_CNTL_MODE);
7544 				data &= ~SPLL_REFCLK_SEL_MASK;
7545 				if (orig != data)
7546 					WREG32(SPLL_CNTL_MODE, data);
7547 			}
7548 		}
7549 	} else {
7550 		if (orig != data)
7551 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7552 	}
7553 
7554 	orig = data = RREG32_PCIE(PCIE_CNTL2);
7555 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7556 	if (orig != data)
7557 		WREG32_PCIE(PCIE_CNTL2, data);
7558 
7559 	if (!disable_l0s) {
7560 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7561 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7562 			data = RREG32_PCIE(PCIE_LC_STATUS1);
7563 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7564 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7565 				data &= ~LC_L0S_INACTIVITY_MASK;
7566 				if (orig != data)
7567 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7568 			}
7569 		}
7570 	}
7571 }
7572