xref: /openbmc/linux/drivers/gpu/drm/radeon/si.c (revision 77d84ff8)
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include <drm/radeon_drm.h>
31 #include "sid.h"
32 #include "atom.h"
33 #include "si_blit_shaders.h"
34 #include "clearstate_si.h"
35 #include "radeon_ucode.h"
36 
37 
38 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
39 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
44 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
45 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
46 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
47 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
48 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
49 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
50 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
51 MODULE_FIRMWARE("radeon/VERDE_me.bin");
52 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
53 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
54 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
55 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
56 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
57 MODULE_FIRMWARE("radeon/OLAND_me.bin");
58 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
59 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
60 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
61 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
62 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
63 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
64 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
65 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
66 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
67 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
68 
69 static void si_pcie_gen3_enable(struct radeon_device *rdev);
70 static void si_program_aspm(struct radeon_device *rdev);
71 extern void sumo_rlc_fini(struct radeon_device *rdev);
72 extern int sumo_rlc_init(struct radeon_device *rdev);
73 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
74 extern void r600_ih_ring_fini(struct radeon_device *rdev);
75 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
76 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
77 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
78 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
79 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
80 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
81 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
82 					 bool enable);
83 static void si_fini_pg(struct radeon_device *rdev);
84 static void si_fini_cg(struct radeon_device *rdev);
85 static void si_rlc_stop(struct radeon_device *rdev);
86 
87 static const u32 verde_rlc_save_restore_register_list[] =
88 {
89 	(0x8000 << 16) | (0x98f4 >> 2),
90 	0x00000000,
91 	(0x8040 << 16) | (0x98f4 >> 2),
92 	0x00000000,
93 	(0x8000 << 16) | (0xe80 >> 2),
94 	0x00000000,
95 	(0x8040 << 16) | (0xe80 >> 2),
96 	0x00000000,
97 	(0x8000 << 16) | (0x89bc >> 2),
98 	0x00000000,
99 	(0x8040 << 16) | (0x89bc >> 2),
100 	0x00000000,
101 	(0x8000 << 16) | (0x8c1c >> 2),
102 	0x00000000,
103 	(0x8040 << 16) | (0x8c1c >> 2),
104 	0x00000000,
105 	(0x9c00 << 16) | (0x98f0 >> 2),
106 	0x00000000,
107 	(0x9c00 << 16) | (0xe7c >> 2),
108 	0x00000000,
109 	(0x8000 << 16) | (0x9148 >> 2),
110 	0x00000000,
111 	(0x8040 << 16) | (0x9148 >> 2),
112 	0x00000000,
113 	(0x9c00 << 16) | (0x9150 >> 2),
114 	0x00000000,
115 	(0x9c00 << 16) | (0x897c >> 2),
116 	0x00000000,
117 	(0x9c00 << 16) | (0x8d8c >> 2),
118 	0x00000000,
119 	(0x9c00 << 16) | (0xac54 >> 2),
120 	0X00000000,
121 	0x3,
122 	(0x9c00 << 16) | (0x98f8 >> 2),
123 	0x00000000,
124 	(0x9c00 << 16) | (0x9910 >> 2),
125 	0x00000000,
126 	(0x9c00 << 16) | (0x9914 >> 2),
127 	0x00000000,
128 	(0x9c00 << 16) | (0x9918 >> 2),
129 	0x00000000,
130 	(0x9c00 << 16) | (0x991c >> 2),
131 	0x00000000,
132 	(0x9c00 << 16) | (0x9920 >> 2),
133 	0x00000000,
134 	(0x9c00 << 16) | (0x9924 >> 2),
135 	0x00000000,
136 	(0x9c00 << 16) | (0x9928 >> 2),
137 	0x00000000,
138 	(0x9c00 << 16) | (0x992c >> 2),
139 	0x00000000,
140 	(0x9c00 << 16) | (0x9930 >> 2),
141 	0x00000000,
142 	(0x9c00 << 16) | (0x9934 >> 2),
143 	0x00000000,
144 	(0x9c00 << 16) | (0x9938 >> 2),
145 	0x00000000,
146 	(0x9c00 << 16) | (0x993c >> 2),
147 	0x00000000,
148 	(0x9c00 << 16) | (0x9940 >> 2),
149 	0x00000000,
150 	(0x9c00 << 16) | (0x9944 >> 2),
151 	0x00000000,
152 	(0x9c00 << 16) | (0x9948 >> 2),
153 	0x00000000,
154 	(0x9c00 << 16) | (0x994c >> 2),
155 	0x00000000,
156 	(0x9c00 << 16) | (0x9950 >> 2),
157 	0x00000000,
158 	(0x9c00 << 16) | (0x9954 >> 2),
159 	0x00000000,
160 	(0x9c00 << 16) | (0x9958 >> 2),
161 	0x00000000,
162 	(0x9c00 << 16) | (0x995c >> 2),
163 	0x00000000,
164 	(0x9c00 << 16) | (0x9960 >> 2),
165 	0x00000000,
166 	(0x9c00 << 16) | (0x9964 >> 2),
167 	0x00000000,
168 	(0x9c00 << 16) | (0x9968 >> 2),
169 	0x00000000,
170 	(0x9c00 << 16) | (0x996c >> 2),
171 	0x00000000,
172 	(0x9c00 << 16) | (0x9970 >> 2),
173 	0x00000000,
174 	(0x9c00 << 16) | (0x9974 >> 2),
175 	0x00000000,
176 	(0x9c00 << 16) | (0x9978 >> 2),
177 	0x00000000,
178 	(0x9c00 << 16) | (0x997c >> 2),
179 	0x00000000,
180 	(0x9c00 << 16) | (0x9980 >> 2),
181 	0x00000000,
182 	(0x9c00 << 16) | (0x9984 >> 2),
183 	0x00000000,
184 	(0x9c00 << 16) | (0x9988 >> 2),
185 	0x00000000,
186 	(0x9c00 << 16) | (0x998c >> 2),
187 	0x00000000,
188 	(0x9c00 << 16) | (0x8c00 >> 2),
189 	0x00000000,
190 	(0x9c00 << 16) | (0x8c14 >> 2),
191 	0x00000000,
192 	(0x9c00 << 16) | (0x8c04 >> 2),
193 	0x00000000,
194 	(0x9c00 << 16) | (0x8c08 >> 2),
195 	0x00000000,
196 	(0x8000 << 16) | (0x9b7c >> 2),
197 	0x00000000,
198 	(0x8040 << 16) | (0x9b7c >> 2),
199 	0x00000000,
200 	(0x8000 << 16) | (0xe84 >> 2),
201 	0x00000000,
202 	(0x8040 << 16) | (0xe84 >> 2),
203 	0x00000000,
204 	(0x8000 << 16) | (0x89c0 >> 2),
205 	0x00000000,
206 	(0x8040 << 16) | (0x89c0 >> 2),
207 	0x00000000,
208 	(0x8000 << 16) | (0x914c >> 2),
209 	0x00000000,
210 	(0x8040 << 16) | (0x914c >> 2),
211 	0x00000000,
212 	(0x8000 << 16) | (0x8c20 >> 2),
213 	0x00000000,
214 	(0x8040 << 16) | (0x8c20 >> 2),
215 	0x00000000,
216 	(0x8000 << 16) | (0x9354 >> 2),
217 	0x00000000,
218 	(0x8040 << 16) | (0x9354 >> 2),
219 	0x00000000,
220 	(0x9c00 << 16) | (0x9060 >> 2),
221 	0x00000000,
222 	(0x9c00 << 16) | (0x9364 >> 2),
223 	0x00000000,
224 	(0x9c00 << 16) | (0x9100 >> 2),
225 	0x00000000,
226 	(0x9c00 << 16) | (0x913c >> 2),
227 	0x00000000,
228 	(0x8000 << 16) | (0x90e0 >> 2),
229 	0x00000000,
230 	(0x8000 << 16) | (0x90e4 >> 2),
231 	0x00000000,
232 	(0x8000 << 16) | (0x90e8 >> 2),
233 	0x00000000,
234 	(0x8040 << 16) | (0x90e0 >> 2),
235 	0x00000000,
236 	(0x8040 << 16) | (0x90e4 >> 2),
237 	0x00000000,
238 	(0x8040 << 16) | (0x90e8 >> 2),
239 	0x00000000,
240 	(0x9c00 << 16) | (0x8bcc >> 2),
241 	0x00000000,
242 	(0x9c00 << 16) | (0x8b24 >> 2),
243 	0x00000000,
244 	(0x9c00 << 16) | (0x88c4 >> 2),
245 	0x00000000,
246 	(0x9c00 << 16) | (0x8e50 >> 2),
247 	0x00000000,
248 	(0x9c00 << 16) | (0x8c0c >> 2),
249 	0x00000000,
250 	(0x9c00 << 16) | (0x8e58 >> 2),
251 	0x00000000,
252 	(0x9c00 << 16) | (0x8e5c >> 2),
253 	0x00000000,
254 	(0x9c00 << 16) | (0x9508 >> 2),
255 	0x00000000,
256 	(0x9c00 << 16) | (0x950c >> 2),
257 	0x00000000,
258 	(0x9c00 << 16) | (0x9494 >> 2),
259 	0x00000000,
260 	(0x9c00 << 16) | (0xac0c >> 2),
261 	0x00000000,
262 	(0x9c00 << 16) | (0xac10 >> 2),
263 	0x00000000,
264 	(0x9c00 << 16) | (0xac14 >> 2),
265 	0x00000000,
266 	(0x9c00 << 16) | (0xae00 >> 2),
267 	0x00000000,
268 	(0x9c00 << 16) | (0xac08 >> 2),
269 	0x00000000,
270 	(0x9c00 << 16) | (0x88d4 >> 2),
271 	0x00000000,
272 	(0x9c00 << 16) | (0x88c8 >> 2),
273 	0x00000000,
274 	(0x9c00 << 16) | (0x88cc >> 2),
275 	0x00000000,
276 	(0x9c00 << 16) | (0x89b0 >> 2),
277 	0x00000000,
278 	(0x9c00 << 16) | (0x8b10 >> 2),
279 	0x00000000,
280 	(0x9c00 << 16) | (0x8a14 >> 2),
281 	0x00000000,
282 	(0x9c00 << 16) | (0x9830 >> 2),
283 	0x00000000,
284 	(0x9c00 << 16) | (0x9834 >> 2),
285 	0x00000000,
286 	(0x9c00 << 16) | (0x9838 >> 2),
287 	0x00000000,
288 	(0x9c00 << 16) | (0x9a10 >> 2),
289 	0x00000000,
290 	(0x8000 << 16) | (0x9870 >> 2),
291 	0x00000000,
292 	(0x8000 << 16) | (0x9874 >> 2),
293 	0x00000000,
294 	(0x8001 << 16) | (0x9870 >> 2),
295 	0x00000000,
296 	(0x8001 << 16) | (0x9874 >> 2),
297 	0x00000000,
298 	(0x8040 << 16) | (0x9870 >> 2),
299 	0x00000000,
300 	(0x8040 << 16) | (0x9874 >> 2),
301 	0x00000000,
302 	(0x8041 << 16) | (0x9870 >> 2),
303 	0x00000000,
304 	(0x8041 << 16) | (0x9874 >> 2),
305 	0x00000000,
306 	0x00000000
307 };
308 
309 static const u32 tahiti_golden_rlc_registers[] =
310 {
311 	0xc424, 0xffffffff, 0x00601005,
312 	0xc47c, 0xffffffff, 0x10104040,
313 	0xc488, 0xffffffff, 0x0100000a,
314 	0xc314, 0xffffffff, 0x00000800,
315 	0xc30c, 0xffffffff, 0x800000f4,
316 	0xf4a8, 0xffffffff, 0x00000000
317 };
318 
319 static const u32 tahiti_golden_registers[] =
320 {
321 	0x9a10, 0x00010000, 0x00018208,
322 	0x9830, 0xffffffff, 0x00000000,
323 	0x9834, 0xf00fffff, 0x00000400,
324 	0x9838, 0x0002021c, 0x00020200,
325 	0xc78, 0x00000080, 0x00000000,
326 	0xd030, 0x000300c0, 0x00800040,
327 	0xd830, 0x000300c0, 0x00800040,
328 	0x5bb0, 0x000000f0, 0x00000070,
329 	0x5bc0, 0x00200000, 0x50100000,
330 	0x7030, 0x31000311, 0x00000011,
331 	0x277c, 0x00000003, 0x000007ff,
332 	0x240c, 0x000007ff, 0x00000000,
333 	0x8a14, 0xf000001f, 0x00000007,
334 	0x8b24, 0xffffffff, 0x00ffffff,
335 	0x8b10, 0x0000ff0f, 0x00000000,
336 	0x28a4c, 0x07ffffff, 0x4e000000,
337 	0x28350, 0x3f3f3fff, 0x2a00126a,
338 	0x30, 0x000000ff, 0x0040,
339 	0x34, 0x00000040, 0x00004040,
340 	0x9100, 0x07ffffff, 0x03000000,
341 	0x8e88, 0x01ff1f3f, 0x00000000,
342 	0x8e84, 0x01ff1f3f, 0x00000000,
343 	0x9060, 0x0000007f, 0x00000020,
344 	0x9508, 0x00010000, 0x00010000,
345 	0xac14, 0x00000200, 0x000002fb,
346 	0xac10, 0xffffffff, 0x0000543b,
347 	0xac0c, 0xffffffff, 0xa9210876,
348 	0x88d0, 0xffffffff, 0x000fff40,
349 	0x88d4, 0x0000001f, 0x00000010,
350 	0x1410, 0x20000000, 0x20fffed8,
351 	0x15c0, 0x000c0fc0, 0x000c0400
352 };
353 
354 static const u32 tahiti_golden_registers2[] =
355 {
356 	0xc64, 0x00000001, 0x00000001
357 };
358 
359 static const u32 pitcairn_golden_rlc_registers[] =
360 {
361 	0xc424, 0xffffffff, 0x00601004,
362 	0xc47c, 0xffffffff, 0x10102020,
363 	0xc488, 0xffffffff, 0x01000020,
364 	0xc314, 0xffffffff, 0x00000800,
365 	0xc30c, 0xffffffff, 0x800000a4
366 };
367 
368 static const u32 pitcairn_golden_registers[] =
369 {
370 	0x9a10, 0x00010000, 0x00018208,
371 	0x9830, 0xffffffff, 0x00000000,
372 	0x9834, 0xf00fffff, 0x00000400,
373 	0x9838, 0x0002021c, 0x00020200,
374 	0xc78, 0x00000080, 0x00000000,
375 	0xd030, 0x000300c0, 0x00800040,
376 	0xd830, 0x000300c0, 0x00800040,
377 	0x5bb0, 0x000000f0, 0x00000070,
378 	0x5bc0, 0x00200000, 0x50100000,
379 	0x7030, 0x31000311, 0x00000011,
380 	0x2ae4, 0x00073ffe, 0x000022a2,
381 	0x240c, 0x000007ff, 0x00000000,
382 	0x8a14, 0xf000001f, 0x00000007,
383 	0x8b24, 0xffffffff, 0x00ffffff,
384 	0x8b10, 0x0000ff0f, 0x00000000,
385 	0x28a4c, 0x07ffffff, 0x4e000000,
386 	0x28350, 0x3f3f3fff, 0x2a00126a,
387 	0x30, 0x000000ff, 0x0040,
388 	0x34, 0x00000040, 0x00004040,
389 	0x9100, 0x07ffffff, 0x03000000,
390 	0x9060, 0x0000007f, 0x00000020,
391 	0x9508, 0x00010000, 0x00010000,
392 	0xac14, 0x000003ff, 0x000000f7,
393 	0xac10, 0xffffffff, 0x00000000,
394 	0xac0c, 0xffffffff, 0x32761054,
395 	0x88d4, 0x0000001f, 0x00000010,
396 	0x15c0, 0x000c0fc0, 0x000c0400
397 };
398 
399 static const u32 verde_golden_rlc_registers[] =
400 {
401 	0xc424, 0xffffffff, 0x033f1005,
402 	0xc47c, 0xffffffff, 0x10808020,
403 	0xc488, 0xffffffff, 0x00800008,
404 	0xc314, 0xffffffff, 0x00001000,
405 	0xc30c, 0xffffffff, 0x80010014
406 };
407 
408 static const u32 verde_golden_registers[] =
409 {
410 	0x9a10, 0x00010000, 0x00018208,
411 	0x9830, 0xffffffff, 0x00000000,
412 	0x9834, 0xf00fffff, 0x00000400,
413 	0x9838, 0x0002021c, 0x00020200,
414 	0xc78, 0x00000080, 0x00000000,
415 	0xd030, 0x000300c0, 0x00800040,
416 	0xd030, 0x000300c0, 0x00800040,
417 	0xd830, 0x000300c0, 0x00800040,
418 	0xd830, 0x000300c0, 0x00800040,
419 	0x5bb0, 0x000000f0, 0x00000070,
420 	0x5bc0, 0x00200000, 0x50100000,
421 	0x7030, 0x31000311, 0x00000011,
422 	0x2ae4, 0x00073ffe, 0x000022a2,
423 	0x2ae4, 0x00073ffe, 0x000022a2,
424 	0x2ae4, 0x00073ffe, 0x000022a2,
425 	0x240c, 0x000007ff, 0x00000000,
426 	0x240c, 0x000007ff, 0x00000000,
427 	0x240c, 0x000007ff, 0x00000000,
428 	0x8a14, 0xf000001f, 0x00000007,
429 	0x8a14, 0xf000001f, 0x00000007,
430 	0x8a14, 0xf000001f, 0x00000007,
431 	0x8b24, 0xffffffff, 0x00ffffff,
432 	0x8b10, 0x0000ff0f, 0x00000000,
433 	0x28a4c, 0x07ffffff, 0x4e000000,
434 	0x28350, 0x3f3f3fff, 0x0000124a,
435 	0x28350, 0x3f3f3fff, 0x0000124a,
436 	0x28350, 0x3f3f3fff, 0x0000124a,
437 	0x30, 0x000000ff, 0x0040,
438 	0x34, 0x00000040, 0x00004040,
439 	0x9100, 0x07ffffff, 0x03000000,
440 	0x9100, 0x07ffffff, 0x03000000,
441 	0x8e88, 0x01ff1f3f, 0x00000000,
442 	0x8e88, 0x01ff1f3f, 0x00000000,
443 	0x8e88, 0x01ff1f3f, 0x00000000,
444 	0x8e84, 0x01ff1f3f, 0x00000000,
445 	0x8e84, 0x01ff1f3f, 0x00000000,
446 	0x8e84, 0x01ff1f3f, 0x00000000,
447 	0x9060, 0x0000007f, 0x00000020,
448 	0x9508, 0x00010000, 0x00010000,
449 	0xac14, 0x000003ff, 0x00000003,
450 	0xac14, 0x000003ff, 0x00000003,
451 	0xac14, 0x000003ff, 0x00000003,
452 	0xac10, 0xffffffff, 0x00000000,
453 	0xac10, 0xffffffff, 0x00000000,
454 	0xac10, 0xffffffff, 0x00000000,
455 	0xac0c, 0xffffffff, 0x00001032,
456 	0xac0c, 0xffffffff, 0x00001032,
457 	0xac0c, 0xffffffff, 0x00001032,
458 	0x88d4, 0x0000001f, 0x00000010,
459 	0x88d4, 0x0000001f, 0x00000010,
460 	0x88d4, 0x0000001f, 0x00000010,
461 	0x15c0, 0x000c0fc0, 0x000c0400
462 };
463 
464 static const u32 oland_golden_rlc_registers[] =
465 {
466 	0xc424, 0xffffffff, 0x00601005,
467 	0xc47c, 0xffffffff, 0x10104040,
468 	0xc488, 0xffffffff, 0x0100000a,
469 	0xc314, 0xffffffff, 0x00000800,
470 	0xc30c, 0xffffffff, 0x800000f4
471 };
472 
473 static const u32 oland_golden_registers[] =
474 {
475 	0x9a10, 0x00010000, 0x00018208,
476 	0x9830, 0xffffffff, 0x00000000,
477 	0x9834, 0xf00fffff, 0x00000400,
478 	0x9838, 0x0002021c, 0x00020200,
479 	0xc78, 0x00000080, 0x00000000,
480 	0xd030, 0x000300c0, 0x00800040,
481 	0xd830, 0x000300c0, 0x00800040,
482 	0x5bb0, 0x000000f0, 0x00000070,
483 	0x5bc0, 0x00200000, 0x50100000,
484 	0x7030, 0x31000311, 0x00000011,
485 	0x2ae4, 0x00073ffe, 0x000022a2,
486 	0x240c, 0x000007ff, 0x00000000,
487 	0x8a14, 0xf000001f, 0x00000007,
488 	0x8b24, 0xffffffff, 0x00ffffff,
489 	0x8b10, 0x0000ff0f, 0x00000000,
490 	0x28a4c, 0x07ffffff, 0x4e000000,
491 	0x28350, 0x3f3f3fff, 0x00000082,
492 	0x30, 0x000000ff, 0x0040,
493 	0x34, 0x00000040, 0x00004040,
494 	0x9100, 0x07ffffff, 0x03000000,
495 	0x9060, 0x0000007f, 0x00000020,
496 	0x9508, 0x00010000, 0x00010000,
497 	0xac14, 0x000003ff, 0x000000f3,
498 	0xac10, 0xffffffff, 0x00000000,
499 	0xac0c, 0xffffffff, 0x00003210,
500 	0x88d4, 0x0000001f, 0x00000010,
501 	0x15c0, 0x000c0fc0, 0x000c0400
502 };
503 
504 static const u32 hainan_golden_registers[] =
505 {
506 	0x9a10, 0x00010000, 0x00018208,
507 	0x9830, 0xffffffff, 0x00000000,
508 	0x9834, 0xf00fffff, 0x00000400,
509 	0x9838, 0x0002021c, 0x00020200,
510 	0xd0c0, 0xff000fff, 0x00000100,
511 	0xd030, 0x000300c0, 0x00800040,
512 	0xd8c0, 0xff000fff, 0x00000100,
513 	0xd830, 0x000300c0, 0x00800040,
514 	0x2ae4, 0x00073ffe, 0x000022a2,
515 	0x240c, 0x000007ff, 0x00000000,
516 	0x8a14, 0xf000001f, 0x00000007,
517 	0x8b24, 0xffffffff, 0x00ffffff,
518 	0x8b10, 0x0000ff0f, 0x00000000,
519 	0x28a4c, 0x07ffffff, 0x4e000000,
520 	0x28350, 0x3f3f3fff, 0x00000000,
521 	0x30, 0x000000ff, 0x0040,
522 	0x34, 0x00000040, 0x00004040,
523 	0x9100, 0x03e00000, 0x03600000,
524 	0x9060, 0x0000007f, 0x00000020,
525 	0x9508, 0x00010000, 0x00010000,
526 	0xac14, 0x000003ff, 0x000000f1,
527 	0xac10, 0xffffffff, 0x00000000,
528 	0xac0c, 0xffffffff, 0x00003210,
529 	0x88d4, 0x0000001f, 0x00000010,
530 	0x15c0, 0x000c0fc0, 0x000c0400
531 };
532 
533 static const u32 hainan_golden_registers2[] =
534 {
535 	0x98f8, 0xffffffff, 0x02010001
536 };
537 
538 static const u32 tahiti_mgcg_cgcg_init[] =
539 {
540 	0xc400, 0xffffffff, 0xfffffffc,
541 	0x802c, 0xffffffff, 0xe0000000,
542 	0x9a60, 0xffffffff, 0x00000100,
543 	0x92a4, 0xffffffff, 0x00000100,
544 	0xc164, 0xffffffff, 0x00000100,
545 	0x9774, 0xffffffff, 0x00000100,
546 	0x8984, 0xffffffff, 0x06000100,
547 	0x8a18, 0xffffffff, 0x00000100,
548 	0x92a0, 0xffffffff, 0x00000100,
549 	0xc380, 0xffffffff, 0x00000100,
550 	0x8b28, 0xffffffff, 0x00000100,
551 	0x9144, 0xffffffff, 0x00000100,
552 	0x8d88, 0xffffffff, 0x00000100,
553 	0x8d8c, 0xffffffff, 0x00000100,
554 	0x9030, 0xffffffff, 0x00000100,
555 	0x9034, 0xffffffff, 0x00000100,
556 	0x9038, 0xffffffff, 0x00000100,
557 	0x903c, 0xffffffff, 0x00000100,
558 	0xad80, 0xffffffff, 0x00000100,
559 	0xac54, 0xffffffff, 0x00000100,
560 	0x897c, 0xffffffff, 0x06000100,
561 	0x9868, 0xffffffff, 0x00000100,
562 	0x9510, 0xffffffff, 0x00000100,
563 	0xaf04, 0xffffffff, 0x00000100,
564 	0xae04, 0xffffffff, 0x00000100,
565 	0x949c, 0xffffffff, 0x00000100,
566 	0x802c, 0xffffffff, 0xe0000000,
567 	0x9160, 0xffffffff, 0x00010000,
568 	0x9164, 0xffffffff, 0x00030002,
569 	0x9168, 0xffffffff, 0x00040007,
570 	0x916c, 0xffffffff, 0x00060005,
571 	0x9170, 0xffffffff, 0x00090008,
572 	0x9174, 0xffffffff, 0x00020001,
573 	0x9178, 0xffffffff, 0x00040003,
574 	0x917c, 0xffffffff, 0x00000007,
575 	0x9180, 0xffffffff, 0x00060005,
576 	0x9184, 0xffffffff, 0x00090008,
577 	0x9188, 0xffffffff, 0x00030002,
578 	0x918c, 0xffffffff, 0x00050004,
579 	0x9190, 0xffffffff, 0x00000008,
580 	0x9194, 0xffffffff, 0x00070006,
581 	0x9198, 0xffffffff, 0x000a0009,
582 	0x919c, 0xffffffff, 0x00040003,
583 	0x91a0, 0xffffffff, 0x00060005,
584 	0x91a4, 0xffffffff, 0x00000009,
585 	0x91a8, 0xffffffff, 0x00080007,
586 	0x91ac, 0xffffffff, 0x000b000a,
587 	0x91b0, 0xffffffff, 0x00050004,
588 	0x91b4, 0xffffffff, 0x00070006,
589 	0x91b8, 0xffffffff, 0x0008000b,
590 	0x91bc, 0xffffffff, 0x000a0009,
591 	0x91c0, 0xffffffff, 0x000d000c,
592 	0x91c4, 0xffffffff, 0x00060005,
593 	0x91c8, 0xffffffff, 0x00080007,
594 	0x91cc, 0xffffffff, 0x0000000b,
595 	0x91d0, 0xffffffff, 0x000a0009,
596 	0x91d4, 0xffffffff, 0x000d000c,
597 	0x91d8, 0xffffffff, 0x00070006,
598 	0x91dc, 0xffffffff, 0x00090008,
599 	0x91e0, 0xffffffff, 0x0000000c,
600 	0x91e4, 0xffffffff, 0x000b000a,
601 	0x91e8, 0xffffffff, 0x000e000d,
602 	0x91ec, 0xffffffff, 0x00080007,
603 	0x91f0, 0xffffffff, 0x000a0009,
604 	0x91f4, 0xffffffff, 0x0000000d,
605 	0x91f8, 0xffffffff, 0x000c000b,
606 	0x91fc, 0xffffffff, 0x000f000e,
607 	0x9200, 0xffffffff, 0x00090008,
608 	0x9204, 0xffffffff, 0x000b000a,
609 	0x9208, 0xffffffff, 0x000c000f,
610 	0x920c, 0xffffffff, 0x000e000d,
611 	0x9210, 0xffffffff, 0x00110010,
612 	0x9214, 0xffffffff, 0x000a0009,
613 	0x9218, 0xffffffff, 0x000c000b,
614 	0x921c, 0xffffffff, 0x0000000f,
615 	0x9220, 0xffffffff, 0x000e000d,
616 	0x9224, 0xffffffff, 0x00110010,
617 	0x9228, 0xffffffff, 0x000b000a,
618 	0x922c, 0xffffffff, 0x000d000c,
619 	0x9230, 0xffffffff, 0x00000010,
620 	0x9234, 0xffffffff, 0x000f000e,
621 	0x9238, 0xffffffff, 0x00120011,
622 	0x923c, 0xffffffff, 0x000c000b,
623 	0x9240, 0xffffffff, 0x000e000d,
624 	0x9244, 0xffffffff, 0x00000011,
625 	0x9248, 0xffffffff, 0x0010000f,
626 	0x924c, 0xffffffff, 0x00130012,
627 	0x9250, 0xffffffff, 0x000d000c,
628 	0x9254, 0xffffffff, 0x000f000e,
629 	0x9258, 0xffffffff, 0x00100013,
630 	0x925c, 0xffffffff, 0x00120011,
631 	0x9260, 0xffffffff, 0x00150014,
632 	0x9264, 0xffffffff, 0x000e000d,
633 	0x9268, 0xffffffff, 0x0010000f,
634 	0x926c, 0xffffffff, 0x00000013,
635 	0x9270, 0xffffffff, 0x00120011,
636 	0x9274, 0xffffffff, 0x00150014,
637 	0x9278, 0xffffffff, 0x000f000e,
638 	0x927c, 0xffffffff, 0x00110010,
639 	0x9280, 0xffffffff, 0x00000014,
640 	0x9284, 0xffffffff, 0x00130012,
641 	0x9288, 0xffffffff, 0x00160015,
642 	0x928c, 0xffffffff, 0x0010000f,
643 	0x9290, 0xffffffff, 0x00120011,
644 	0x9294, 0xffffffff, 0x00000015,
645 	0x9298, 0xffffffff, 0x00140013,
646 	0x929c, 0xffffffff, 0x00170016,
647 	0x9150, 0xffffffff, 0x96940200,
648 	0x8708, 0xffffffff, 0x00900100,
649 	0xc478, 0xffffffff, 0x00000080,
650 	0xc404, 0xffffffff, 0x0020003f,
651 	0x30, 0xffffffff, 0x0000001c,
652 	0x34, 0x000f0000, 0x000f0000,
653 	0x160c, 0xffffffff, 0x00000100,
654 	0x1024, 0xffffffff, 0x00000100,
655 	0x102c, 0x00000101, 0x00000000,
656 	0x20a8, 0xffffffff, 0x00000104,
657 	0x264c, 0x000c0000, 0x000c0000,
658 	0x2648, 0x000c0000, 0x000c0000,
659 	0x55e4, 0xff000fff, 0x00000100,
660 	0x55e8, 0x00000001, 0x00000001,
661 	0x2f50, 0x00000001, 0x00000001,
662 	0x30cc, 0xc0000fff, 0x00000104,
663 	0xc1e4, 0x00000001, 0x00000001,
664 	0xd0c0, 0xfffffff0, 0x00000100,
665 	0xd8c0, 0xfffffff0, 0x00000100
666 };
667 
668 static const u32 pitcairn_mgcg_cgcg_init[] =
669 {
670 	0xc400, 0xffffffff, 0xfffffffc,
671 	0x802c, 0xffffffff, 0xe0000000,
672 	0x9a60, 0xffffffff, 0x00000100,
673 	0x92a4, 0xffffffff, 0x00000100,
674 	0xc164, 0xffffffff, 0x00000100,
675 	0x9774, 0xffffffff, 0x00000100,
676 	0x8984, 0xffffffff, 0x06000100,
677 	0x8a18, 0xffffffff, 0x00000100,
678 	0x92a0, 0xffffffff, 0x00000100,
679 	0xc380, 0xffffffff, 0x00000100,
680 	0x8b28, 0xffffffff, 0x00000100,
681 	0x9144, 0xffffffff, 0x00000100,
682 	0x8d88, 0xffffffff, 0x00000100,
683 	0x8d8c, 0xffffffff, 0x00000100,
684 	0x9030, 0xffffffff, 0x00000100,
685 	0x9034, 0xffffffff, 0x00000100,
686 	0x9038, 0xffffffff, 0x00000100,
687 	0x903c, 0xffffffff, 0x00000100,
688 	0xad80, 0xffffffff, 0x00000100,
689 	0xac54, 0xffffffff, 0x00000100,
690 	0x897c, 0xffffffff, 0x06000100,
691 	0x9868, 0xffffffff, 0x00000100,
692 	0x9510, 0xffffffff, 0x00000100,
693 	0xaf04, 0xffffffff, 0x00000100,
694 	0xae04, 0xffffffff, 0x00000100,
695 	0x949c, 0xffffffff, 0x00000100,
696 	0x802c, 0xffffffff, 0xe0000000,
697 	0x9160, 0xffffffff, 0x00010000,
698 	0x9164, 0xffffffff, 0x00030002,
699 	0x9168, 0xffffffff, 0x00040007,
700 	0x916c, 0xffffffff, 0x00060005,
701 	0x9170, 0xffffffff, 0x00090008,
702 	0x9174, 0xffffffff, 0x00020001,
703 	0x9178, 0xffffffff, 0x00040003,
704 	0x917c, 0xffffffff, 0x00000007,
705 	0x9180, 0xffffffff, 0x00060005,
706 	0x9184, 0xffffffff, 0x00090008,
707 	0x9188, 0xffffffff, 0x00030002,
708 	0x918c, 0xffffffff, 0x00050004,
709 	0x9190, 0xffffffff, 0x00000008,
710 	0x9194, 0xffffffff, 0x00070006,
711 	0x9198, 0xffffffff, 0x000a0009,
712 	0x919c, 0xffffffff, 0x00040003,
713 	0x91a0, 0xffffffff, 0x00060005,
714 	0x91a4, 0xffffffff, 0x00000009,
715 	0x91a8, 0xffffffff, 0x00080007,
716 	0x91ac, 0xffffffff, 0x000b000a,
717 	0x91b0, 0xffffffff, 0x00050004,
718 	0x91b4, 0xffffffff, 0x00070006,
719 	0x91b8, 0xffffffff, 0x0008000b,
720 	0x91bc, 0xffffffff, 0x000a0009,
721 	0x91c0, 0xffffffff, 0x000d000c,
722 	0x9200, 0xffffffff, 0x00090008,
723 	0x9204, 0xffffffff, 0x000b000a,
724 	0x9208, 0xffffffff, 0x000c000f,
725 	0x920c, 0xffffffff, 0x000e000d,
726 	0x9210, 0xffffffff, 0x00110010,
727 	0x9214, 0xffffffff, 0x000a0009,
728 	0x9218, 0xffffffff, 0x000c000b,
729 	0x921c, 0xffffffff, 0x0000000f,
730 	0x9220, 0xffffffff, 0x000e000d,
731 	0x9224, 0xffffffff, 0x00110010,
732 	0x9228, 0xffffffff, 0x000b000a,
733 	0x922c, 0xffffffff, 0x000d000c,
734 	0x9230, 0xffffffff, 0x00000010,
735 	0x9234, 0xffffffff, 0x000f000e,
736 	0x9238, 0xffffffff, 0x00120011,
737 	0x923c, 0xffffffff, 0x000c000b,
738 	0x9240, 0xffffffff, 0x000e000d,
739 	0x9244, 0xffffffff, 0x00000011,
740 	0x9248, 0xffffffff, 0x0010000f,
741 	0x924c, 0xffffffff, 0x00130012,
742 	0x9250, 0xffffffff, 0x000d000c,
743 	0x9254, 0xffffffff, 0x000f000e,
744 	0x9258, 0xffffffff, 0x00100013,
745 	0x925c, 0xffffffff, 0x00120011,
746 	0x9260, 0xffffffff, 0x00150014,
747 	0x9150, 0xffffffff, 0x96940200,
748 	0x8708, 0xffffffff, 0x00900100,
749 	0xc478, 0xffffffff, 0x00000080,
750 	0xc404, 0xffffffff, 0x0020003f,
751 	0x30, 0xffffffff, 0x0000001c,
752 	0x34, 0x000f0000, 0x000f0000,
753 	0x160c, 0xffffffff, 0x00000100,
754 	0x1024, 0xffffffff, 0x00000100,
755 	0x102c, 0x00000101, 0x00000000,
756 	0x20a8, 0xffffffff, 0x00000104,
757 	0x55e4, 0xff000fff, 0x00000100,
758 	0x55e8, 0x00000001, 0x00000001,
759 	0x2f50, 0x00000001, 0x00000001,
760 	0x30cc, 0xc0000fff, 0x00000104,
761 	0xc1e4, 0x00000001, 0x00000001,
762 	0xd0c0, 0xfffffff0, 0x00000100,
763 	0xd8c0, 0xfffffff0, 0x00000100
764 };
765 
766 static const u32 verde_mgcg_cgcg_init[] =
767 {
768 	0xc400, 0xffffffff, 0xfffffffc,
769 	0x802c, 0xffffffff, 0xe0000000,
770 	0x9a60, 0xffffffff, 0x00000100,
771 	0x92a4, 0xffffffff, 0x00000100,
772 	0xc164, 0xffffffff, 0x00000100,
773 	0x9774, 0xffffffff, 0x00000100,
774 	0x8984, 0xffffffff, 0x06000100,
775 	0x8a18, 0xffffffff, 0x00000100,
776 	0x92a0, 0xffffffff, 0x00000100,
777 	0xc380, 0xffffffff, 0x00000100,
778 	0x8b28, 0xffffffff, 0x00000100,
779 	0x9144, 0xffffffff, 0x00000100,
780 	0x8d88, 0xffffffff, 0x00000100,
781 	0x8d8c, 0xffffffff, 0x00000100,
782 	0x9030, 0xffffffff, 0x00000100,
783 	0x9034, 0xffffffff, 0x00000100,
784 	0x9038, 0xffffffff, 0x00000100,
785 	0x903c, 0xffffffff, 0x00000100,
786 	0xad80, 0xffffffff, 0x00000100,
787 	0xac54, 0xffffffff, 0x00000100,
788 	0x897c, 0xffffffff, 0x06000100,
789 	0x9868, 0xffffffff, 0x00000100,
790 	0x9510, 0xffffffff, 0x00000100,
791 	0xaf04, 0xffffffff, 0x00000100,
792 	0xae04, 0xffffffff, 0x00000100,
793 	0x949c, 0xffffffff, 0x00000100,
794 	0x802c, 0xffffffff, 0xe0000000,
795 	0x9160, 0xffffffff, 0x00010000,
796 	0x9164, 0xffffffff, 0x00030002,
797 	0x9168, 0xffffffff, 0x00040007,
798 	0x916c, 0xffffffff, 0x00060005,
799 	0x9170, 0xffffffff, 0x00090008,
800 	0x9174, 0xffffffff, 0x00020001,
801 	0x9178, 0xffffffff, 0x00040003,
802 	0x917c, 0xffffffff, 0x00000007,
803 	0x9180, 0xffffffff, 0x00060005,
804 	0x9184, 0xffffffff, 0x00090008,
805 	0x9188, 0xffffffff, 0x00030002,
806 	0x918c, 0xffffffff, 0x00050004,
807 	0x9190, 0xffffffff, 0x00000008,
808 	0x9194, 0xffffffff, 0x00070006,
809 	0x9198, 0xffffffff, 0x000a0009,
810 	0x919c, 0xffffffff, 0x00040003,
811 	0x91a0, 0xffffffff, 0x00060005,
812 	0x91a4, 0xffffffff, 0x00000009,
813 	0x91a8, 0xffffffff, 0x00080007,
814 	0x91ac, 0xffffffff, 0x000b000a,
815 	0x91b0, 0xffffffff, 0x00050004,
816 	0x91b4, 0xffffffff, 0x00070006,
817 	0x91b8, 0xffffffff, 0x0008000b,
818 	0x91bc, 0xffffffff, 0x000a0009,
819 	0x91c0, 0xffffffff, 0x000d000c,
820 	0x9200, 0xffffffff, 0x00090008,
821 	0x9204, 0xffffffff, 0x000b000a,
822 	0x9208, 0xffffffff, 0x000c000f,
823 	0x920c, 0xffffffff, 0x000e000d,
824 	0x9210, 0xffffffff, 0x00110010,
825 	0x9214, 0xffffffff, 0x000a0009,
826 	0x9218, 0xffffffff, 0x000c000b,
827 	0x921c, 0xffffffff, 0x0000000f,
828 	0x9220, 0xffffffff, 0x000e000d,
829 	0x9224, 0xffffffff, 0x00110010,
830 	0x9228, 0xffffffff, 0x000b000a,
831 	0x922c, 0xffffffff, 0x000d000c,
832 	0x9230, 0xffffffff, 0x00000010,
833 	0x9234, 0xffffffff, 0x000f000e,
834 	0x9238, 0xffffffff, 0x00120011,
835 	0x923c, 0xffffffff, 0x000c000b,
836 	0x9240, 0xffffffff, 0x000e000d,
837 	0x9244, 0xffffffff, 0x00000011,
838 	0x9248, 0xffffffff, 0x0010000f,
839 	0x924c, 0xffffffff, 0x00130012,
840 	0x9250, 0xffffffff, 0x000d000c,
841 	0x9254, 0xffffffff, 0x000f000e,
842 	0x9258, 0xffffffff, 0x00100013,
843 	0x925c, 0xffffffff, 0x00120011,
844 	0x9260, 0xffffffff, 0x00150014,
845 	0x9150, 0xffffffff, 0x96940200,
846 	0x8708, 0xffffffff, 0x00900100,
847 	0xc478, 0xffffffff, 0x00000080,
848 	0xc404, 0xffffffff, 0x0020003f,
849 	0x30, 0xffffffff, 0x0000001c,
850 	0x34, 0x000f0000, 0x000f0000,
851 	0x160c, 0xffffffff, 0x00000100,
852 	0x1024, 0xffffffff, 0x00000100,
853 	0x102c, 0x00000101, 0x00000000,
854 	0x20a8, 0xffffffff, 0x00000104,
855 	0x264c, 0x000c0000, 0x000c0000,
856 	0x2648, 0x000c0000, 0x000c0000,
857 	0x55e4, 0xff000fff, 0x00000100,
858 	0x55e8, 0x00000001, 0x00000001,
859 	0x2f50, 0x00000001, 0x00000001,
860 	0x30cc, 0xc0000fff, 0x00000104,
861 	0xc1e4, 0x00000001, 0x00000001,
862 	0xd0c0, 0xfffffff0, 0x00000100,
863 	0xd8c0, 0xfffffff0, 0x00000100
864 };
865 
866 static const u32 oland_mgcg_cgcg_init[] =
867 {
868 	0xc400, 0xffffffff, 0xfffffffc,
869 	0x802c, 0xffffffff, 0xe0000000,
870 	0x9a60, 0xffffffff, 0x00000100,
871 	0x92a4, 0xffffffff, 0x00000100,
872 	0xc164, 0xffffffff, 0x00000100,
873 	0x9774, 0xffffffff, 0x00000100,
874 	0x8984, 0xffffffff, 0x06000100,
875 	0x8a18, 0xffffffff, 0x00000100,
876 	0x92a0, 0xffffffff, 0x00000100,
877 	0xc380, 0xffffffff, 0x00000100,
878 	0x8b28, 0xffffffff, 0x00000100,
879 	0x9144, 0xffffffff, 0x00000100,
880 	0x8d88, 0xffffffff, 0x00000100,
881 	0x8d8c, 0xffffffff, 0x00000100,
882 	0x9030, 0xffffffff, 0x00000100,
883 	0x9034, 0xffffffff, 0x00000100,
884 	0x9038, 0xffffffff, 0x00000100,
885 	0x903c, 0xffffffff, 0x00000100,
886 	0xad80, 0xffffffff, 0x00000100,
887 	0xac54, 0xffffffff, 0x00000100,
888 	0x897c, 0xffffffff, 0x06000100,
889 	0x9868, 0xffffffff, 0x00000100,
890 	0x9510, 0xffffffff, 0x00000100,
891 	0xaf04, 0xffffffff, 0x00000100,
892 	0xae04, 0xffffffff, 0x00000100,
893 	0x949c, 0xffffffff, 0x00000100,
894 	0x802c, 0xffffffff, 0xe0000000,
895 	0x9160, 0xffffffff, 0x00010000,
896 	0x9164, 0xffffffff, 0x00030002,
897 	0x9168, 0xffffffff, 0x00040007,
898 	0x916c, 0xffffffff, 0x00060005,
899 	0x9170, 0xffffffff, 0x00090008,
900 	0x9174, 0xffffffff, 0x00020001,
901 	0x9178, 0xffffffff, 0x00040003,
902 	0x917c, 0xffffffff, 0x00000007,
903 	0x9180, 0xffffffff, 0x00060005,
904 	0x9184, 0xffffffff, 0x00090008,
905 	0x9188, 0xffffffff, 0x00030002,
906 	0x918c, 0xffffffff, 0x00050004,
907 	0x9190, 0xffffffff, 0x00000008,
908 	0x9194, 0xffffffff, 0x00070006,
909 	0x9198, 0xffffffff, 0x000a0009,
910 	0x919c, 0xffffffff, 0x00040003,
911 	0x91a0, 0xffffffff, 0x00060005,
912 	0x91a4, 0xffffffff, 0x00000009,
913 	0x91a8, 0xffffffff, 0x00080007,
914 	0x91ac, 0xffffffff, 0x000b000a,
915 	0x91b0, 0xffffffff, 0x00050004,
916 	0x91b4, 0xffffffff, 0x00070006,
917 	0x91b8, 0xffffffff, 0x0008000b,
918 	0x91bc, 0xffffffff, 0x000a0009,
919 	0x91c0, 0xffffffff, 0x000d000c,
920 	0x91c4, 0xffffffff, 0x00060005,
921 	0x91c8, 0xffffffff, 0x00080007,
922 	0x91cc, 0xffffffff, 0x0000000b,
923 	0x91d0, 0xffffffff, 0x000a0009,
924 	0x91d4, 0xffffffff, 0x000d000c,
925 	0x9150, 0xffffffff, 0x96940200,
926 	0x8708, 0xffffffff, 0x00900100,
927 	0xc478, 0xffffffff, 0x00000080,
928 	0xc404, 0xffffffff, 0x0020003f,
929 	0x30, 0xffffffff, 0x0000001c,
930 	0x34, 0x000f0000, 0x000f0000,
931 	0x160c, 0xffffffff, 0x00000100,
932 	0x1024, 0xffffffff, 0x00000100,
933 	0x102c, 0x00000101, 0x00000000,
934 	0x20a8, 0xffffffff, 0x00000104,
935 	0x264c, 0x000c0000, 0x000c0000,
936 	0x2648, 0x000c0000, 0x000c0000,
937 	0x55e4, 0xff000fff, 0x00000100,
938 	0x55e8, 0x00000001, 0x00000001,
939 	0x2f50, 0x00000001, 0x00000001,
940 	0x30cc, 0xc0000fff, 0x00000104,
941 	0xc1e4, 0x00000001, 0x00000001,
942 	0xd0c0, 0xfffffff0, 0x00000100,
943 	0xd8c0, 0xfffffff0, 0x00000100
944 };
945 
946 static const u32 hainan_mgcg_cgcg_init[] =
947 {
948 	0xc400, 0xffffffff, 0xfffffffc,
949 	0x802c, 0xffffffff, 0xe0000000,
950 	0x9a60, 0xffffffff, 0x00000100,
951 	0x92a4, 0xffffffff, 0x00000100,
952 	0xc164, 0xffffffff, 0x00000100,
953 	0x9774, 0xffffffff, 0x00000100,
954 	0x8984, 0xffffffff, 0x06000100,
955 	0x8a18, 0xffffffff, 0x00000100,
956 	0x92a0, 0xffffffff, 0x00000100,
957 	0xc380, 0xffffffff, 0x00000100,
958 	0x8b28, 0xffffffff, 0x00000100,
959 	0x9144, 0xffffffff, 0x00000100,
960 	0x8d88, 0xffffffff, 0x00000100,
961 	0x8d8c, 0xffffffff, 0x00000100,
962 	0x9030, 0xffffffff, 0x00000100,
963 	0x9034, 0xffffffff, 0x00000100,
964 	0x9038, 0xffffffff, 0x00000100,
965 	0x903c, 0xffffffff, 0x00000100,
966 	0xad80, 0xffffffff, 0x00000100,
967 	0xac54, 0xffffffff, 0x00000100,
968 	0x897c, 0xffffffff, 0x06000100,
969 	0x9868, 0xffffffff, 0x00000100,
970 	0x9510, 0xffffffff, 0x00000100,
971 	0xaf04, 0xffffffff, 0x00000100,
972 	0xae04, 0xffffffff, 0x00000100,
973 	0x949c, 0xffffffff, 0x00000100,
974 	0x802c, 0xffffffff, 0xe0000000,
975 	0x9160, 0xffffffff, 0x00010000,
976 	0x9164, 0xffffffff, 0x00030002,
977 	0x9168, 0xffffffff, 0x00040007,
978 	0x916c, 0xffffffff, 0x00060005,
979 	0x9170, 0xffffffff, 0x00090008,
980 	0x9174, 0xffffffff, 0x00020001,
981 	0x9178, 0xffffffff, 0x00040003,
982 	0x917c, 0xffffffff, 0x00000007,
983 	0x9180, 0xffffffff, 0x00060005,
984 	0x9184, 0xffffffff, 0x00090008,
985 	0x9188, 0xffffffff, 0x00030002,
986 	0x918c, 0xffffffff, 0x00050004,
987 	0x9190, 0xffffffff, 0x00000008,
988 	0x9194, 0xffffffff, 0x00070006,
989 	0x9198, 0xffffffff, 0x000a0009,
990 	0x919c, 0xffffffff, 0x00040003,
991 	0x91a0, 0xffffffff, 0x00060005,
992 	0x91a4, 0xffffffff, 0x00000009,
993 	0x91a8, 0xffffffff, 0x00080007,
994 	0x91ac, 0xffffffff, 0x000b000a,
995 	0x91b0, 0xffffffff, 0x00050004,
996 	0x91b4, 0xffffffff, 0x00070006,
997 	0x91b8, 0xffffffff, 0x0008000b,
998 	0x91bc, 0xffffffff, 0x000a0009,
999 	0x91c0, 0xffffffff, 0x000d000c,
1000 	0x91c4, 0xffffffff, 0x00060005,
1001 	0x91c8, 0xffffffff, 0x00080007,
1002 	0x91cc, 0xffffffff, 0x0000000b,
1003 	0x91d0, 0xffffffff, 0x000a0009,
1004 	0x91d4, 0xffffffff, 0x000d000c,
1005 	0x9150, 0xffffffff, 0x96940200,
1006 	0x8708, 0xffffffff, 0x00900100,
1007 	0xc478, 0xffffffff, 0x00000080,
1008 	0xc404, 0xffffffff, 0x0020003f,
1009 	0x30, 0xffffffff, 0x0000001c,
1010 	0x34, 0x000f0000, 0x000f0000,
1011 	0x160c, 0xffffffff, 0x00000100,
1012 	0x1024, 0xffffffff, 0x00000100,
1013 	0x20a8, 0xffffffff, 0x00000104,
1014 	0x264c, 0x000c0000, 0x000c0000,
1015 	0x2648, 0x000c0000, 0x000c0000,
1016 	0x2f50, 0x00000001, 0x00000001,
1017 	0x30cc, 0xc0000fff, 0x00000104,
1018 	0xc1e4, 0x00000001, 0x00000001,
1019 	0xd0c0, 0xfffffff0, 0x00000100,
1020 	0xd8c0, 0xfffffff0, 0x00000100
1021 };
1022 
1023 static u32 verde_pg_init[] =
1024 {
1025 	0x353c, 0xffffffff, 0x40000,
1026 	0x3538, 0xffffffff, 0x200010ff,
1027 	0x353c, 0xffffffff, 0x0,
1028 	0x353c, 0xffffffff, 0x0,
1029 	0x353c, 0xffffffff, 0x0,
1030 	0x353c, 0xffffffff, 0x0,
1031 	0x353c, 0xffffffff, 0x0,
1032 	0x353c, 0xffffffff, 0x7007,
1033 	0x3538, 0xffffffff, 0x300010ff,
1034 	0x353c, 0xffffffff, 0x0,
1035 	0x353c, 0xffffffff, 0x0,
1036 	0x353c, 0xffffffff, 0x0,
1037 	0x353c, 0xffffffff, 0x0,
1038 	0x353c, 0xffffffff, 0x0,
1039 	0x353c, 0xffffffff, 0x400000,
1040 	0x3538, 0xffffffff, 0x100010ff,
1041 	0x353c, 0xffffffff, 0x0,
1042 	0x353c, 0xffffffff, 0x0,
1043 	0x353c, 0xffffffff, 0x0,
1044 	0x353c, 0xffffffff, 0x0,
1045 	0x353c, 0xffffffff, 0x0,
1046 	0x353c, 0xffffffff, 0x120200,
1047 	0x3538, 0xffffffff, 0x500010ff,
1048 	0x353c, 0xffffffff, 0x0,
1049 	0x353c, 0xffffffff, 0x0,
1050 	0x353c, 0xffffffff, 0x0,
1051 	0x353c, 0xffffffff, 0x0,
1052 	0x353c, 0xffffffff, 0x0,
1053 	0x353c, 0xffffffff, 0x1e1e16,
1054 	0x3538, 0xffffffff, 0x600010ff,
1055 	0x353c, 0xffffffff, 0x0,
1056 	0x353c, 0xffffffff, 0x0,
1057 	0x353c, 0xffffffff, 0x0,
1058 	0x353c, 0xffffffff, 0x0,
1059 	0x353c, 0xffffffff, 0x0,
1060 	0x353c, 0xffffffff, 0x171f1e,
1061 	0x3538, 0xffffffff, 0x700010ff,
1062 	0x353c, 0xffffffff, 0x0,
1063 	0x353c, 0xffffffff, 0x0,
1064 	0x353c, 0xffffffff, 0x0,
1065 	0x353c, 0xffffffff, 0x0,
1066 	0x353c, 0xffffffff, 0x0,
1067 	0x353c, 0xffffffff, 0x0,
1068 	0x3538, 0xffffffff, 0x9ff,
1069 	0x3500, 0xffffffff, 0x0,
1070 	0x3504, 0xffffffff, 0x10000800,
1071 	0x3504, 0xffffffff, 0xf,
1072 	0x3504, 0xffffffff, 0xf,
1073 	0x3500, 0xffffffff, 0x4,
1074 	0x3504, 0xffffffff, 0x1000051e,
1075 	0x3504, 0xffffffff, 0xffff,
1076 	0x3504, 0xffffffff, 0xffff,
1077 	0x3500, 0xffffffff, 0x8,
1078 	0x3504, 0xffffffff, 0x80500,
1079 	0x3500, 0xffffffff, 0x12,
1080 	0x3504, 0xffffffff, 0x9050c,
1081 	0x3500, 0xffffffff, 0x1d,
1082 	0x3504, 0xffffffff, 0xb052c,
1083 	0x3500, 0xffffffff, 0x2a,
1084 	0x3504, 0xffffffff, 0x1053e,
1085 	0x3500, 0xffffffff, 0x2d,
1086 	0x3504, 0xffffffff, 0x10546,
1087 	0x3500, 0xffffffff, 0x30,
1088 	0x3504, 0xffffffff, 0xa054e,
1089 	0x3500, 0xffffffff, 0x3c,
1090 	0x3504, 0xffffffff, 0x1055f,
1091 	0x3500, 0xffffffff, 0x3f,
1092 	0x3504, 0xffffffff, 0x10567,
1093 	0x3500, 0xffffffff, 0x42,
1094 	0x3504, 0xffffffff, 0x1056f,
1095 	0x3500, 0xffffffff, 0x45,
1096 	0x3504, 0xffffffff, 0x10572,
1097 	0x3500, 0xffffffff, 0x48,
1098 	0x3504, 0xffffffff, 0x20575,
1099 	0x3500, 0xffffffff, 0x4c,
1100 	0x3504, 0xffffffff, 0x190801,
1101 	0x3500, 0xffffffff, 0x67,
1102 	0x3504, 0xffffffff, 0x1082a,
1103 	0x3500, 0xffffffff, 0x6a,
1104 	0x3504, 0xffffffff, 0x1b082d,
1105 	0x3500, 0xffffffff, 0x87,
1106 	0x3504, 0xffffffff, 0x310851,
1107 	0x3500, 0xffffffff, 0xba,
1108 	0x3504, 0xffffffff, 0x891,
1109 	0x3500, 0xffffffff, 0xbc,
1110 	0x3504, 0xffffffff, 0x893,
1111 	0x3500, 0xffffffff, 0xbe,
1112 	0x3504, 0xffffffff, 0x20895,
1113 	0x3500, 0xffffffff, 0xc2,
1114 	0x3504, 0xffffffff, 0x20899,
1115 	0x3500, 0xffffffff, 0xc6,
1116 	0x3504, 0xffffffff, 0x2089d,
1117 	0x3500, 0xffffffff, 0xca,
1118 	0x3504, 0xffffffff, 0x8a1,
1119 	0x3500, 0xffffffff, 0xcc,
1120 	0x3504, 0xffffffff, 0x8a3,
1121 	0x3500, 0xffffffff, 0xce,
1122 	0x3504, 0xffffffff, 0x308a5,
1123 	0x3500, 0xffffffff, 0xd3,
1124 	0x3504, 0xffffffff, 0x6d08cd,
1125 	0x3500, 0xffffffff, 0x142,
1126 	0x3504, 0xffffffff, 0x2000095a,
1127 	0x3504, 0xffffffff, 0x1,
1128 	0x3500, 0xffffffff, 0x144,
1129 	0x3504, 0xffffffff, 0x301f095b,
1130 	0x3500, 0xffffffff, 0x165,
1131 	0x3504, 0xffffffff, 0xc094d,
1132 	0x3500, 0xffffffff, 0x173,
1133 	0x3504, 0xffffffff, 0xf096d,
1134 	0x3500, 0xffffffff, 0x184,
1135 	0x3504, 0xffffffff, 0x15097f,
1136 	0x3500, 0xffffffff, 0x19b,
1137 	0x3504, 0xffffffff, 0xc0998,
1138 	0x3500, 0xffffffff, 0x1a9,
1139 	0x3504, 0xffffffff, 0x409a7,
1140 	0x3500, 0xffffffff, 0x1af,
1141 	0x3504, 0xffffffff, 0xcdc,
1142 	0x3500, 0xffffffff, 0x1b1,
1143 	0x3504, 0xffffffff, 0x800,
1144 	0x3508, 0xffffffff, 0x6c9b2000,
1145 	0x3510, 0xfc00, 0x2000,
1146 	0x3544, 0xffffffff, 0xfc0,
1147 	0x28d4, 0x00000100, 0x100
1148 };
1149 
1150 static void si_init_golden_registers(struct radeon_device *rdev)
1151 {
1152 	switch (rdev->family) {
1153 	case CHIP_TAHITI:
1154 		radeon_program_register_sequence(rdev,
1155 						 tahiti_golden_registers,
1156 						 (const u32)ARRAY_SIZE(tahiti_golden_registers));
1157 		radeon_program_register_sequence(rdev,
1158 						 tahiti_golden_rlc_registers,
1159 						 (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1160 		radeon_program_register_sequence(rdev,
1161 						 tahiti_mgcg_cgcg_init,
1162 						 (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1163 		radeon_program_register_sequence(rdev,
1164 						 tahiti_golden_registers2,
1165 						 (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1166 		break;
1167 	case CHIP_PITCAIRN:
1168 		radeon_program_register_sequence(rdev,
1169 						 pitcairn_golden_registers,
1170 						 (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1171 		radeon_program_register_sequence(rdev,
1172 						 pitcairn_golden_rlc_registers,
1173 						 (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1174 		radeon_program_register_sequence(rdev,
1175 						 pitcairn_mgcg_cgcg_init,
1176 						 (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1177 		break;
1178 	case CHIP_VERDE:
1179 		radeon_program_register_sequence(rdev,
1180 						 verde_golden_registers,
1181 						 (const u32)ARRAY_SIZE(verde_golden_registers));
1182 		radeon_program_register_sequence(rdev,
1183 						 verde_golden_rlc_registers,
1184 						 (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1185 		radeon_program_register_sequence(rdev,
1186 						 verde_mgcg_cgcg_init,
1187 						 (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1188 		radeon_program_register_sequence(rdev,
1189 						 verde_pg_init,
1190 						 (const u32)ARRAY_SIZE(verde_pg_init));
1191 		break;
1192 	case CHIP_OLAND:
1193 		radeon_program_register_sequence(rdev,
1194 						 oland_golden_registers,
1195 						 (const u32)ARRAY_SIZE(oland_golden_registers));
1196 		radeon_program_register_sequence(rdev,
1197 						 oland_golden_rlc_registers,
1198 						 (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1199 		radeon_program_register_sequence(rdev,
1200 						 oland_mgcg_cgcg_init,
1201 						 (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1202 		break;
1203 	case CHIP_HAINAN:
1204 		radeon_program_register_sequence(rdev,
1205 						 hainan_golden_registers,
1206 						 (const u32)ARRAY_SIZE(hainan_golden_registers));
1207 		radeon_program_register_sequence(rdev,
1208 						 hainan_golden_registers2,
1209 						 (const u32)ARRAY_SIZE(hainan_golden_registers2));
1210 		radeon_program_register_sequence(rdev,
1211 						 hainan_mgcg_cgcg_init,
1212 						 (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1213 		break;
1214 	default:
1215 		break;
1216 	}
1217 }
1218 
1219 #define PCIE_BUS_CLK                10000
1220 #define TCLK                        (PCIE_BUS_CLK / 10)
1221 
1222 /**
1223  * si_get_xclk - get the xclk
1224  *
1225  * @rdev: radeon_device pointer
1226  *
1227  * Returns the reference clock used by the gfx engine
1228  * (SI).
1229  */
1230 u32 si_get_xclk(struct radeon_device *rdev)
1231 {
1232         u32 reference_clock = rdev->clock.spll.reference_freq;
1233 	u32 tmp;
1234 
1235 	tmp = RREG32(CG_CLKPIN_CNTL_2);
1236 	if (tmp & MUX_TCLK_TO_XCLK)
1237 		return TCLK;
1238 
1239 	tmp = RREG32(CG_CLKPIN_CNTL);
1240 	if (tmp & XTALIN_DIVIDE)
1241 		return reference_clock / 4;
1242 
1243 	return reference_clock;
1244 }
1245 
1246 /* get temperature in millidegrees */
1247 int si_get_temp(struct radeon_device *rdev)
1248 {
1249 	u32 temp;
1250 	int actual_temp = 0;
1251 
1252 	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1253 		CTF_TEMP_SHIFT;
1254 
1255 	if (temp & 0x200)
1256 		actual_temp = 255;
1257 	else
1258 		actual_temp = temp & 0x1ff;
1259 
1260 	actual_temp = (actual_temp * 1000);
1261 
1262 	return actual_temp;
1263 }
1264 
1265 #define TAHITI_IO_MC_REGS_SIZE 36
1266 
1267 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1268 	{0x0000006f, 0x03044000},
1269 	{0x00000070, 0x0480c018},
1270 	{0x00000071, 0x00000040},
1271 	{0x00000072, 0x01000000},
1272 	{0x00000074, 0x000000ff},
1273 	{0x00000075, 0x00143400},
1274 	{0x00000076, 0x08ec0800},
1275 	{0x00000077, 0x040000cc},
1276 	{0x00000079, 0x00000000},
1277 	{0x0000007a, 0x21000409},
1278 	{0x0000007c, 0x00000000},
1279 	{0x0000007d, 0xe8000000},
1280 	{0x0000007e, 0x044408a8},
1281 	{0x0000007f, 0x00000003},
1282 	{0x00000080, 0x00000000},
1283 	{0x00000081, 0x01000000},
1284 	{0x00000082, 0x02000000},
1285 	{0x00000083, 0x00000000},
1286 	{0x00000084, 0xe3f3e4f4},
1287 	{0x00000085, 0x00052024},
1288 	{0x00000087, 0x00000000},
1289 	{0x00000088, 0x66036603},
1290 	{0x00000089, 0x01000000},
1291 	{0x0000008b, 0x1c0a0000},
1292 	{0x0000008c, 0xff010000},
1293 	{0x0000008e, 0xffffefff},
1294 	{0x0000008f, 0xfff3efff},
1295 	{0x00000090, 0xfff3efbf},
1296 	{0x00000094, 0x00101101},
1297 	{0x00000095, 0x00000fff},
1298 	{0x00000096, 0x00116fff},
1299 	{0x00000097, 0x60010000},
1300 	{0x00000098, 0x10010000},
1301 	{0x00000099, 0x00006000},
1302 	{0x0000009a, 0x00001000},
1303 	{0x0000009f, 0x00a77400}
1304 };
1305 
1306 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1307 	{0x0000006f, 0x03044000},
1308 	{0x00000070, 0x0480c018},
1309 	{0x00000071, 0x00000040},
1310 	{0x00000072, 0x01000000},
1311 	{0x00000074, 0x000000ff},
1312 	{0x00000075, 0x00143400},
1313 	{0x00000076, 0x08ec0800},
1314 	{0x00000077, 0x040000cc},
1315 	{0x00000079, 0x00000000},
1316 	{0x0000007a, 0x21000409},
1317 	{0x0000007c, 0x00000000},
1318 	{0x0000007d, 0xe8000000},
1319 	{0x0000007e, 0x044408a8},
1320 	{0x0000007f, 0x00000003},
1321 	{0x00000080, 0x00000000},
1322 	{0x00000081, 0x01000000},
1323 	{0x00000082, 0x02000000},
1324 	{0x00000083, 0x00000000},
1325 	{0x00000084, 0xe3f3e4f4},
1326 	{0x00000085, 0x00052024},
1327 	{0x00000087, 0x00000000},
1328 	{0x00000088, 0x66036603},
1329 	{0x00000089, 0x01000000},
1330 	{0x0000008b, 0x1c0a0000},
1331 	{0x0000008c, 0xff010000},
1332 	{0x0000008e, 0xffffefff},
1333 	{0x0000008f, 0xfff3efff},
1334 	{0x00000090, 0xfff3efbf},
1335 	{0x00000094, 0x00101101},
1336 	{0x00000095, 0x00000fff},
1337 	{0x00000096, 0x00116fff},
1338 	{0x00000097, 0x60010000},
1339 	{0x00000098, 0x10010000},
1340 	{0x00000099, 0x00006000},
1341 	{0x0000009a, 0x00001000},
1342 	{0x0000009f, 0x00a47400}
1343 };
1344 
1345 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1346 	{0x0000006f, 0x03044000},
1347 	{0x00000070, 0x0480c018},
1348 	{0x00000071, 0x00000040},
1349 	{0x00000072, 0x01000000},
1350 	{0x00000074, 0x000000ff},
1351 	{0x00000075, 0x00143400},
1352 	{0x00000076, 0x08ec0800},
1353 	{0x00000077, 0x040000cc},
1354 	{0x00000079, 0x00000000},
1355 	{0x0000007a, 0x21000409},
1356 	{0x0000007c, 0x00000000},
1357 	{0x0000007d, 0xe8000000},
1358 	{0x0000007e, 0x044408a8},
1359 	{0x0000007f, 0x00000003},
1360 	{0x00000080, 0x00000000},
1361 	{0x00000081, 0x01000000},
1362 	{0x00000082, 0x02000000},
1363 	{0x00000083, 0x00000000},
1364 	{0x00000084, 0xe3f3e4f4},
1365 	{0x00000085, 0x00052024},
1366 	{0x00000087, 0x00000000},
1367 	{0x00000088, 0x66036603},
1368 	{0x00000089, 0x01000000},
1369 	{0x0000008b, 0x1c0a0000},
1370 	{0x0000008c, 0xff010000},
1371 	{0x0000008e, 0xffffefff},
1372 	{0x0000008f, 0xfff3efff},
1373 	{0x00000090, 0xfff3efbf},
1374 	{0x00000094, 0x00101101},
1375 	{0x00000095, 0x00000fff},
1376 	{0x00000096, 0x00116fff},
1377 	{0x00000097, 0x60010000},
1378 	{0x00000098, 0x10010000},
1379 	{0x00000099, 0x00006000},
1380 	{0x0000009a, 0x00001000},
1381 	{0x0000009f, 0x00a37400}
1382 };
1383 
1384 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1385 	{0x0000006f, 0x03044000},
1386 	{0x00000070, 0x0480c018},
1387 	{0x00000071, 0x00000040},
1388 	{0x00000072, 0x01000000},
1389 	{0x00000074, 0x000000ff},
1390 	{0x00000075, 0x00143400},
1391 	{0x00000076, 0x08ec0800},
1392 	{0x00000077, 0x040000cc},
1393 	{0x00000079, 0x00000000},
1394 	{0x0000007a, 0x21000409},
1395 	{0x0000007c, 0x00000000},
1396 	{0x0000007d, 0xe8000000},
1397 	{0x0000007e, 0x044408a8},
1398 	{0x0000007f, 0x00000003},
1399 	{0x00000080, 0x00000000},
1400 	{0x00000081, 0x01000000},
1401 	{0x00000082, 0x02000000},
1402 	{0x00000083, 0x00000000},
1403 	{0x00000084, 0xe3f3e4f4},
1404 	{0x00000085, 0x00052024},
1405 	{0x00000087, 0x00000000},
1406 	{0x00000088, 0x66036603},
1407 	{0x00000089, 0x01000000},
1408 	{0x0000008b, 0x1c0a0000},
1409 	{0x0000008c, 0xff010000},
1410 	{0x0000008e, 0xffffefff},
1411 	{0x0000008f, 0xfff3efff},
1412 	{0x00000090, 0xfff3efbf},
1413 	{0x00000094, 0x00101101},
1414 	{0x00000095, 0x00000fff},
1415 	{0x00000096, 0x00116fff},
1416 	{0x00000097, 0x60010000},
1417 	{0x00000098, 0x10010000},
1418 	{0x00000099, 0x00006000},
1419 	{0x0000009a, 0x00001000},
1420 	{0x0000009f, 0x00a17730}
1421 };
1422 
1423 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1424 	{0x0000006f, 0x03044000},
1425 	{0x00000070, 0x0480c018},
1426 	{0x00000071, 0x00000040},
1427 	{0x00000072, 0x01000000},
1428 	{0x00000074, 0x000000ff},
1429 	{0x00000075, 0x00143400},
1430 	{0x00000076, 0x08ec0800},
1431 	{0x00000077, 0x040000cc},
1432 	{0x00000079, 0x00000000},
1433 	{0x0000007a, 0x21000409},
1434 	{0x0000007c, 0x00000000},
1435 	{0x0000007d, 0xe8000000},
1436 	{0x0000007e, 0x044408a8},
1437 	{0x0000007f, 0x00000003},
1438 	{0x00000080, 0x00000000},
1439 	{0x00000081, 0x01000000},
1440 	{0x00000082, 0x02000000},
1441 	{0x00000083, 0x00000000},
1442 	{0x00000084, 0xe3f3e4f4},
1443 	{0x00000085, 0x00052024},
1444 	{0x00000087, 0x00000000},
1445 	{0x00000088, 0x66036603},
1446 	{0x00000089, 0x01000000},
1447 	{0x0000008b, 0x1c0a0000},
1448 	{0x0000008c, 0xff010000},
1449 	{0x0000008e, 0xffffefff},
1450 	{0x0000008f, 0xfff3efff},
1451 	{0x00000090, 0xfff3efbf},
1452 	{0x00000094, 0x00101101},
1453 	{0x00000095, 0x00000fff},
1454 	{0x00000096, 0x00116fff},
1455 	{0x00000097, 0x60010000},
1456 	{0x00000098, 0x10010000},
1457 	{0x00000099, 0x00006000},
1458 	{0x0000009a, 0x00001000},
1459 	{0x0000009f, 0x00a07730}
1460 };
1461 
1462 /* ucode loading */
1463 static int si_mc_load_microcode(struct radeon_device *rdev)
1464 {
1465 	const __be32 *fw_data;
1466 	u32 running, blackout = 0;
1467 	u32 *io_mc_regs;
1468 	int i, ucode_size, regs_size;
1469 
1470 	if (!rdev->mc_fw)
1471 		return -EINVAL;
1472 
1473 	switch (rdev->family) {
1474 	case CHIP_TAHITI:
1475 		io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1476 		ucode_size = SI_MC_UCODE_SIZE;
1477 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1478 		break;
1479 	case CHIP_PITCAIRN:
1480 		io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1481 		ucode_size = SI_MC_UCODE_SIZE;
1482 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1483 		break;
1484 	case CHIP_VERDE:
1485 	default:
1486 		io_mc_regs = (u32 *)&verde_io_mc_regs;
1487 		ucode_size = SI_MC_UCODE_SIZE;
1488 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1489 		break;
1490 	case CHIP_OLAND:
1491 		io_mc_regs = (u32 *)&oland_io_mc_regs;
1492 		ucode_size = OLAND_MC_UCODE_SIZE;
1493 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1494 		break;
1495 	case CHIP_HAINAN:
1496 		io_mc_regs = (u32 *)&hainan_io_mc_regs;
1497 		ucode_size = OLAND_MC_UCODE_SIZE;
1498 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1499 		break;
1500 	}
1501 
1502 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1503 
1504 	if (running == 0) {
1505 		if (running) {
1506 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1507 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1508 		}
1509 
1510 		/* reset the engine and set to writable */
1511 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1512 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1513 
1514 		/* load mc io regs */
1515 		for (i = 0; i < regs_size; i++) {
1516 			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1517 			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1518 		}
1519 		/* load the MC ucode */
1520 		fw_data = (const __be32 *)rdev->mc_fw->data;
1521 		for (i = 0; i < ucode_size; i++)
1522 			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1523 
1524 		/* put the engine back into the active state */
1525 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1526 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1527 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1528 
1529 		/* wait for training to complete */
1530 		for (i = 0; i < rdev->usec_timeout; i++) {
1531 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1532 				break;
1533 			udelay(1);
1534 		}
1535 		for (i = 0; i < rdev->usec_timeout; i++) {
1536 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1537 				break;
1538 			udelay(1);
1539 		}
1540 
1541 		if (running)
1542 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1543 	}
1544 
1545 	return 0;
1546 }
1547 
1548 static int si_init_microcode(struct radeon_device *rdev)
1549 {
1550 	const char *chip_name;
1551 	const char *rlc_chip_name;
1552 	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1553 	size_t smc_req_size;
1554 	char fw_name[30];
1555 	int err;
1556 
1557 	DRM_DEBUG("\n");
1558 
1559 	switch (rdev->family) {
1560 	case CHIP_TAHITI:
1561 		chip_name = "TAHITI";
1562 		rlc_chip_name = "TAHITI";
1563 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1564 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1565 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1566 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1567 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1568 		smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1569 		break;
1570 	case CHIP_PITCAIRN:
1571 		chip_name = "PITCAIRN";
1572 		rlc_chip_name = "PITCAIRN";
1573 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1574 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1575 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1576 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1577 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1578 		smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1579 		break;
1580 	case CHIP_VERDE:
1581 		chip_name = "VERDE";
1582 		rlc_chip_name = "VERDE";
1583 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1584 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1585 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1586 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1587 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1588 		smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1589 		break;
1590 	case CHIP_OLAND:
1591 		chip_name = "OLAND";
1592 		rlc_chip_name = "OLAND";
1593 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1594 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1595 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1596 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1597 		mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1598 		smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1599 		break;
1600 	case CHIP_HAINAN:
1601 		chip_name = "HAINAN";
1602 		rlc_chip_name = "HAINAN";
1603 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1604 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1605 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1606 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1607 		mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1608 		smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1609 		break;
1610 	default: BUG();
1611 	}
1612 
1613 	DRM_INFO("Loading %s Microcode\n", chip_name);
1614 
1615 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1616 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1617 	if (err)
1618 		goto out;
1619 	if (rdev->pfp_fw->size != pfp_req_size) {
1620 		printk(KERN_ERR
1621 		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1622 		       rdev->pfp_fw->size, fw_name);
1623 		err = -EINVAL;
1624 		goto out;
1625 	}
1626 
1627 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1628 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1629 	if (err)
1630 		goto out;
1631 	if (rdev->me_fw->size != me_req_size) {
1632 		printk(KERN_ERR
1633 		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1634 		       rdev->me_fw->size, fw_name);
1635 		err = -EINVAL;
1636 	}
1637 
1638 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1639 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1640 	if (err)
1641 		goto out;
1642 	if (rdev->ce_fw->size != ce_req_size) {
1643 		printk(KERN_ERR
1644 		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1645 		       rdev->ce_fw->size, fw_name);
1646 		err = -EINVAL;
1647 	}
1648 
1649 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
1650 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1651 	if (err)
1652 		goto out;
1653 	if (rdev->rlc_fw->size != rlc_req_size) {
1654 		printk(KERN_ERR
1655 		       "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1656 		       rdev->rlc_fw->size, fw_name);
1657 		err = -EINVAL;
1658 	}
1659 
1660 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1661 	err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1662 	if (err)
1663 		goto out;
1664 	if (rdev->mc_fw->size != mc_req_size) {
1665 		printk(KERN_ERR
1666 		       "si_mc: Bogus length %zu in firmware \"%s\"\n",
1667 		       rdev->mc_fw->size, fw_name);
1668 		err = -EINVAL;
1669 	}
1670 
1671 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1672 	err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1673 	if (err) {
1674 		printk(KERN_ERR
1675 		       "smc: error loading firmware \"%s\"\n",
1676 		       fw_name);
1677 		release_firmware(rdev->smc_fw);
1678 		rdev->smc_fw = NULL;
1679 		err = 0;
1680 	} else if (rdev->smc_fw->size != smc_req_size) {
1681 		printk(KERN_ERR
1682 		       "si_smc: Bogus length %zu in firmware \"%s\"\n",
1683 		       rdev->smc_fw->size, fw_name);
1684 		err = -EINVAL;
1685 	}
1686 
1687 out:
1688 	if (err) {
1689 		if (err != -EINVAL)
1690 			printk(KERN_ERR
1691 			       "si_cp: Failed to load firmware \"%s\"\n",
1692 			       fw_name);
1693 		release_firmware(rdev->pfp_fw);
1694 		rdev->pfp_fw = NULL;
1695 		release_firmware(rdev->me_fw);
1696 		rdev->me_fw = NULL;
1697 		release_firmware(rdev->ce_fw);
1698 		rdev->ce_fw = NULL;
1699 		release_firmware(rdev->rlc_fw);
1700 		rdev->rlc_fw = NULL;
1701 		release_firmware(rdev->mc_fw);
1702 		rdev->mc_fw = NULL;
1703 		release_firmware(rdev->smc_fw);
1704 		rdev->smc_fw = NULL;
1705 	}
1706 	return err;
1707 }
1708 
1709 /* watermark setup */
1710 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1711 				   struct radeon_crtc *radeon_crtc,
1712 				   struct drm_display_mode *mode,
1713 				   struct drm_display_mode *other_mode)
1714 {
1715 	u32 tmp, buffer_alloc, i;
1716 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1717 	/*
1718 	 * Line Buffer Setup
1719 	 * There are 3 line buffers, each one shared by 2 display controllers.
1720 	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1721 	 * the display controllers.  The paritioning is done via one of four
1722 	 * preset allocations specified in bits 21:20:
1723 	 *  0 - half lb
1724 	 *  2 - whole lb, other crtc must be disabled
1725 	 */
1726 	/* this can get tricky if we have two large displays on a paired group
1727 	 * of crtcs.  Ideally for multiple large displays we'd assign them to
1728 	 * non-linked crtcs for maximum line buffer allocation.
1729 	 */
1730 	if (radeon_crtc->base.enabled && mode) {
1731 		if (other_mode) {
1732 			tmp = 0; /* 1/2 */
1733 			buffer_alloc = 1;
1734 		} else {
1735 			tmp = 2; /* whole */
1736 			buffer_alloc = 2;
1737 		}
1738 	} else {
1739 		tmp = 0;
1740 		buffer_alloc = 0;
1741 	}
1742 
1743 	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1744 	       DC_LB_MEMORY_CONFIG(tmp));
1745 
1746 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1747 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1748 	for (i = 0; i < rdev->usec_timeout; i++) {
1749 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1750 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
1751 			break;
1752 		udelay(1);
1753 	}
1754 
1755 	if (radeon_crtc->base.enabled && mode) {
1756 		switch (tmp) {
1757 		case 0:
1758 		default:
1759 			return 4096 * 2;
1760 		case 2:
1761 			return 8192 * 2;
1762 		}
1763 	}
1764 
1765 	/* controller not enabled, so no lb used */
1766 	return 0;
1767 }
1768 
1769 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1770 {
1771 	u32 tmp = RREG32(MC_SHARED_CHMAP);
1772 
1773 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1774 	case 0:
1775 	default:
1776 		return 1;
1777 	case 1:
1778 		return 2;
1779 	case 2:
1780 		return 4;
1781 	case 3:
1782 		return 8;
1783 	case 4:
1784 		return 3;
1785 	case 5:
1786 		return 6;
1787 	case 6:
1788 		return 10;
1789 	case 7:
1790 		return 12;
1791 	case 8:
1792 		return 16;
1793 	}
1794 }
1795 
1796 struct dce6_wm_params {
1797 	u32 dram_channels; /* number of dram channels */
1798 	u32 yclk;          /* bandwidth per dram data pin in kHz */
1799 	u32 sclk;          /* engine clock in kHz */
1800 	u32 disp_clk;      /* display clock in kHz */
1801 	u32 src_width;     /* viewport width */
1802 	u32 active_time;   /* active display time in ns */
1803 	u32 blank_time;    /* blank time in ns */
1804 	bool interlaced;    /* mode is interlaced */
1805 	fixed20_12 vsc;    /* vertical scale ratio */
1806 	u32 num_heads;     /* number of active crtcs */
1807 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1808 	u32 lb_size;       /* line buffer allocated to pipe */
1809 	u32 vtaps;         /* vertical scaler taps */
1810 };
1811 
1812 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1813 {
1814 	/* Calculate raw DRAM Bandwidth */
1815 	fixed20_12 dram_efficiency; /* 0.7 */
1816 	fixed20_12 yclk, dram_channels, bandwidth;
1817 	fixed20_12 a;
1818 
1819 	a.full = dfixed_const(1000);
1820 	yclk.full = dfixed_const(wm->yclk);
1821 	yclk.full = dfixed_div(yclk, a);
1822 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
1823 	a.full = dfixed_const(10);
1824 	dram_efficiency.full = dfixed_const(7);
1825 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
1826 	bandwidth.full = dfixed_mul(dram_channels, yclk);
1827 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
1828 
1829 	return dfixed_trunc(bandwidth);
1830 }
1831 
1832 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1833 {
1834 	/* Calculate DRAM Bandwidth and the part allocated to display. */
1835 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
1836 	fixed20_12 yclk, dram_channels, bandwidth;
1837 	fixed20_12 a;
1838 
1839 	a.full = dfixed_const(1000);
1840 	yclk.full = dfixed_const(wm->yclk);
1841 	yclk.full = dfixed_div(yclk, a);
1842 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
1843 	a.full = dfixed_const(10);
1844 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
1845 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
1846 	bandwidth.full = dfixed_mul(dram_channels, yclk);
1847 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
1848 
1849 	return dfixed_trunc(bandwidth);
1850 }
1851 
1852 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
1853 {
1854 	/* Calculate the display Data return Bandwidth */
1855 	fixed20_12 return_efficiency; /* 0.8 */
1856 	fixed20_12 sclk, bandwidth;
1857 	fixed20_12 a;
1858 
1859 	a.full = dfixed_const(1000);
1860 	sclk.full = dfixed_const(wm->sclk);
1861 	sclk.full = dfixed_div(sclk, a);
1862 	a.full = dfixed_const(10);
1863 	return_efficiency.full = dfixed_const(8);
1864 	return_efficiency.full = dfixed_div(return_efficiency, a);
1865 	a.full = dfixed_const(32);
1866 	bandwidth.full = dfixed_mul(a, sclk);
1867 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
1868 
1869 	return dfixed_trunc(bandwidth);
1870 }
1871 
1872 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
1873 {
1874 	return 32;
1875 }
1876 
1877 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
1878 {
1879 	/* Calculate the DMIF Request Bandwidth */
1880 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
1881 	fixed20_12 disp_clk, sclk, bandwidth;
1882 	fixed20_12 a, b1, b2;
1883 	u32 min_bandwidth;
1884 
1885 	a.full = dfixed_const(1000);
1886 	disp_clk.full = dfixed_const(wm->disp_clk);
1887 	disp_clk.full = dfixed_div(disp_clk, a);
1888 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
1889 	b1.full = dfixed_mul(a, disp_clk);
1890 
1891 	a.full = dfixed_const(1000);
1892 	sclk.full = dfixed_const(wm->sclk);
1893 	sclk.full = dfixed_div(sclk, a);
1894 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
1895 	b2.full = dfixed_mul(a, sclk);
1896 
1897 	a.full = dfixed_const(10);
1898 	disp_clk_request_efficiency.full = dfixed_const(8);
1899 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
1900 
1901 	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
1902 
1903 	a.full = dfixed_const(min_bandwidth);
1904 	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
1905 
1906 	return dfixed_trunc(bandwidth);
1907 }
1908 
1909 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
1910 {
1911 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
1912 	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
1913 	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
1914 	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
1915 
1916 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
1917 }
1918 
1919 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
1920 {
1921 	/* Calculate the display mode Average Bandwidth
1922 	 * DisplayMode should contain the source and destination dimensions,
1923 	 * timing, etc.
1924 	 */
1925 	fixed20_12 bpp;
1926 	fixed20_12 line_time;
1927 	fixed20_12 src_width;
1928 	fixed20_12 bandwidth;
1929 	fixed20_12 a;
1930 
1931 	a.full = dfixed_const(1000);
1932 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
1933 	line_time.full = dfixed_div(line_time, a);
1934 	bpp.full = dfixed_const(wm->bytes_per_pixel);
1935 	src_width.full = dfixed_const(wm->src_width);
1936 	bandwidth.full = dfixed_mul(src_width, bpp);
1937 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
1938 	bandwidth.full = dfixed_div(bandwidth, line_time);
1939 
1940 	return dfixed_trunc(bandwidth);
1941 }
1942 
1943 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
1944 {
1945 	/* First calcualte the latency in ns */
1946 	u32 mc_latency = 2000; /* 2000 ns. */
1947 	u32 available_bandwidth = dce6_available_bandwidth(wm);
1948 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
1949 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
1950 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
1951 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
1952 		(wm->num_heads * cursor_line_pair_return_time);
1953 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
1954 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
1955 	u32 tmp, dmif_size = 12288;
1956 	fixed20_12 a, b, c;
1957 
1958 	if (wm->num_heads == 0)
1959 		return 0;
1960 
1961 	a.full = dfixed_const(2);
1962 	b.full = dfixed_const(1);
1963 	if ((wm->vsc.full > a.full) ||
1964 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
1965 	    (wm->vtaps >= 5) ||
1966 	    ((wm->vsc.full >= a.full) && wm->interlaced))
1967 		max_src_lines_per_dst_line = 4;
1968 	else
1969 		max_src_lines_per_dst_line = 2;
1970 
1971 	a.full = dfixed_const(available_bandwidth);
1972 	b.full = dfixed_const(wm->num_heads);
1973 	a.full = dfixed_div(a, b);
1974 
1975 	b.full = dfixed_const(mc_latency + 512);
1976 	c.full = dfixed_const(wm->disp_clk);
1977 	b.full = dfixed_div(b, c);
1978 
1979 	c.full = dfixed_const(dmif_size);
1980 	b.full = dfixed_div(c, b);
1981 
1982 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
1983 
1984 	b.full = dfixed_const(1000);
1985 	c.full = dfixed_const(wm->disp_clk);
1986 	b.full = dfixed_div(c, b);
1987 	c.full = dfixed_const(wm->bytes_per_pixel);
1988 	b.full = dfixed_mul(b, c);
1989 
1990 	lb_fill_bw = min(tmp, dfixed_trunc(b));
1991 
1992 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
1993 	b.full = dfixed_const(1000);
1994 	c.full = dfixed_const(lb_fill_bw);
1995 	b.full = dfixed_div(c, b);
1996 	a.full = dfixed_div(a, b);
1997 	line_fill_time = dfixed_trunc(a);
1998 
1999 	if (line_fill_time < wm->active_time)
2000 		return latency;
2001 	else
2002 		return latency + (line_fill_time - wm->active_time);
2003 
2004 }
2005 
2006 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2007 {
2008 	if (dce6_average_bandwidth(wm) <=
2009 	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2010 		return true;
2011 	else
2012 		return false;
2013 };
2014 
2015 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2016 {
2017 	if (dce6_average_bandwidth(wm) <=
2018 	    (dce6_available_bandwidth(wm) / wm->num_heads))
2019 		return true;
2020 	else
2021 		return false;
2022 };
2023 
2024 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2025 {
2026 	u32 lb_partitions = wm->lb_size / wm->src_width;
2027 	u32 line_time = wm->active_time + wm->blank_time;
2028 	u32 latency_tolerant_lines;
2029 	u32 latency_hiding;
2030 	fixed20_12 a;
2031 
2032 	a.full = dfixed_const(1);
2033 	if (wm->vsc.full > a.full)
2034 		latency_tolerant_lines = 1;
2035 	else {
2036 		if (lb_partitions <= (wm->vtaps + 1))
2037 			latency_tolerant_lines = 1;
2038 		else
2039 			latency_tolerant_lines = 2;
2040 	}
2041 
2042 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2043 
2044 	if (dce6_latency_watermark(wm) <= latency_hiding)
2045 		return true;
2046 	else
2047 		return false;
2048 }
2049 
2050 static void dce6_program_watermarks(struct radeon_device *rdev,
2051 					 struct radeon_crtc *radeon_crtc,
2052 					 u32 lb_size, u32 num_heads)
2053 {
2054 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
2055 	struct dce6_wm_params wm_low, wm_high;
2056 	u32 dram_channels;
2057 	u32 pixel_period;
2058 	u32 line_time = 0;
2059 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
2060 	u32 priority_a_mark = 0, priority_b_mark = 0;
2061 	u32 priority_a_cnt = PRIORITY_OFF;
2062 	u32 priority_b_cnt = PRIORITY_OFF;
2063 	u32 tmp, arb_control3;
2064 	fixed20_12 a, b, c;
2065 
2066 	if (radeon_crtc->base.enabled && num_heads && mode) {
2067 		pixel_period = 1000000 / (u32)mode->clock;
2068 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2069 		priority_a_cnt = 0;
2070 		priority_b_cnt = 0;
2071 
2072 		if (rdev->family == CHIP_ARUBA)
2073 			dram_channels = evergreen_get_number_of_dram_channels(rdev);
2074 		else
2075 			dram_channels = si_get_number_of_dram_channels(rdev);
2076 
2077 		/* watermark for high clocks */
2078 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2079 			wm_high.yclk =
2080 				radeon_dpm_get_mclk(rdev, false) * 10;
2081 			wm_high.sclk =
2082 				radeon_dpm_get_sclk(rdev, false) * 10;
2083 		} else {
2084 			wm_high.yclk = rdev->pm.current_mclk * 10;
2085 			wm_high.sclk = rdev->pm.current_sclk * 10;
2086 		}
2087 
2088 		wm_high.disp_clk = mode->clock;
2089 		wm_high.src_width = mode->crtc_hdisplay;
2090 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2091 		wm_high.blank_time = line_time - wm_high.active_time;
2092 		wm_high.interlaced = false;
2093 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2094 			wm_high.interlaced = true;
2095 		wm_high.vsc = radeon_crtc->vsc;
2096 		wm_high.vtaps = 1;
2097 		if (radeon_crtc->rmx_type != RMX_OFF)
2098 			wm_high.vtaps = 2;
2099 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2100 		wm_high.lb_size = lb_size;
2101 		wm_high.dram_channels = dram_channels;
2102 		wm_high.num_heads = num_heads;
2103 
2104 		/* watermark for low clocks */
2105 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2106 			wm_low.yclk =
2107 				radeon_dpm_get_mclk(rdev, true) * 10;
2108 			wm_low.sclk =
2109 				radeon_dpm_get_sclk(rdev, true) * 10;
2110 		} else {
2111 			wm_low.yclk = rdev->pm.current_mclk * 10;
2112 			wm_low.sclk = rdev->pm.current_sclk * 10;
2113 		}
2114 
2115 		wm_low.disp_clk = mode->clock;
2116 		wm_low.src_width = mode->crtc_hdisplay;
2117 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2118 		wm_low.blank_time = line_time - wm_low.active_time;
2119 		wm_low.interlaced = false;
2120 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2121 			wm_low.interlaced = true;
2122 		wm_low.vsc = radeon_crtc->vsc;
2123 		wm_low.vtaps = 1;
2124 		if (radeon_crtc->rmx_type != RMX_OFF)
2125 			wm_low.vtaps = 2;
2126 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2127 		wm_low.lb_size = lb_size;
2128 		wm_low.dram_channels = dram_channels;
2129 		wm_low.num_heads = num_heads;
2130 
2131 		/* set for high clocks */
2132 		latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2133 		/* set for low clocks */
2134 		latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2135 
2136 		/* possibly force display priority to high */
2137 		/* should really do this at mode validation time... */
2138 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2139 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2140 		    !dce6_check_latency_hiding(&wm_high) ||
2141 		    (rdev->disp_priority == 2)) {
2142 			DRM_DEBUG_KMS("force priority to high\n");
2143 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2144 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2145 		}
2146 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2147 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2148 		    !dce6_check_latency_hiding(&wm_low) ||
2149 		    (rdev->disp_priority == 2)) {
2150 			DRM_DEBUG_KMS("force priority to high\n");
2151 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2152 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2153 		}
2154 
2155 		a.full = dfixed_const(1000);
2156 		b.full = dfixed_const(mode->clock);
2157 		b.full = dfixed_div(b, a);
2158 		c.full = dfixed_const(latency_watermark_a);
2159 		c.full = dfixed_mul(c, b);
2160 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2161 		c.full = dfixed_div(c, a);
2162 		a.full = dfixed_const(16);
2163 		c.full = dfixed_div(c, a);
2164 		priority_a_mark = dfixed_trunc(c);
2165 		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2166 
2167 		a.full = dfixed_const(1000);
2168 		b.full = dfixed_const(mode->clock);
2169 		b.full = dfixed_div(b, a);
2170 		c.full = dfixed_const(latency_watermark_b);
2171 		c.full = dfixed_mul(c, b);
2172 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2173 		c.full = dfixed_div(c, a);
2174 		a.full = dfixed_const(16);
2175 		c.full = dfixed_div(c, a);
2176 		priority_b_mark = dfixed_trunc(c);
2177 		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2178 	}
2179 
2180 	/* select wm A */
2181 	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2182 	tmp = arb_control3;
2183 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2184 	tmp |= LATENCY_WATERMARK_MASK(1);
2185 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2186 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2187 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2188 		LATENCY_HIGH_WATERMARK(line_time)));
2189 	/* select wm B */
2190 	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2191 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2192 	tmp |= LATENCY_WATERMARK_MASK(2);
2193 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2194 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2195 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2196 		LATENCY_HIGH_WATERMARK(line_time)));
2197 	/* restore original selection */
2198 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2199 
2200 	/* write the priority marks */
2201 	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2202 	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2203 
2204 	/* save values for DPM */
2205 	radeon_crtc->line_time = line_time;
2206 	radeon_crtc->wm_high = latency_watermark_a;
2207 	radeon_crtc->wm_low = latency_watermark_b;
2208 }
2209 
2210 void dce6_bandwidth_update(struct radeon_device *rdev)
2211 {
2212 	struct drm_display_mode *mode0 = NULL;
2213 	struct drm_display_mode *mode1 = NULL;
2214 	u32 num_heads = 0, lb_size;
2215 	int i;
2216 
2217 	radeon_update_display_priority(rdev);
2218 
2219 	for (i = 0; i < rdev->num_crtc; i++) {
2220 		if (rdev->mode_info.crtcs[i]->base.enabled)
2221 			num_heads++;
2222 	}
2223 	for (i = 0; i < rdev->num_crtc; i += 2) {
2224 		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2225 		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2226 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2227 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2228 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2229 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2230 	}
2231 }
2232 
2233 /*
2234  * Core functions
2235  */
2236 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2237 {
2238 	const u32 num_tile_mode_states = 32;
2239 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2240 
2241 	switch (rdev->config.si.mem_row_size_in_kb) {
2242 	case 1:
2243 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2244 		break;
2245 	case 2:
2246 	default:
2247 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2248 		break;
2249 	case 4:
2250 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2251 		break;
2252 	}
2253 
2254 	if ((rdev->family == CHIP_TAHITI) ||
2255 	    (rdev->family == CHIP_PITCAIRN)) {
2256 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2257 			switch (reg_offset) {
2258 			case 0:  /* non-AA compressed depth or any compressed stencil */
2259 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2260 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2261 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2262 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2263 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2264 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2265 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2266 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2267 				break;
2268 			case 1:  /* 2xAA/4xAA compressed depth only */
2269 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2270 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2271 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2272 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2273 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2274 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2275 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2276 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2277 				break;
2278 			case 2:  /* 8xAA compressed depth only */
2279 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2280 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2281 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2282 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2283 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2284 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2285 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2286 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2287 				break;
2288 			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2289 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2290 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2291 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2292 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2293 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2294 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2295 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2296 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2297 				break;
2298 			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2299 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2300 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2301 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2302 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2303 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2304 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2305 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2306 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2307 				break;
2308 			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2309 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2310 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2311 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2312 						 TILE_SPLIT(split_equal_to_row_size) |
2313 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2314 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2315 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2316 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2317 				break;
2318 			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2319 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2320 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2321 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2322 						 TILE_SPLIT(split_equal_to_row_size) |
2323 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2324 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2325 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2326 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2327 				break;
2328 			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2329 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2330 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2331 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2332 						 TILE_SPLIT(split_equal_to_row_size) |
2333 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2334 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2335 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2336 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2337 				break;
2338 			case 8:  /* 1D and 1D Array Surfaces */
2339 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2340 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2341 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2342 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2343 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2344 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2345 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2346 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2347 				break;
2348 			case 9:  /* Displayable maps. */
2349 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2350 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2351 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2352 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2353 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2354 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2355 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2356 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2357 				break;
2358 			case 10:  /* Display 8bpp. */
2359 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2360 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2361 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2362 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2363 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2364 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2365 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2366 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2367 				break;
2368 			case 11:  /* Display 16bpp. */
2369 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2370 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2371 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2372 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2373 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2374 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2375 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2376 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2377 				break;
2378 			case 12:  /* Display 32bpp. */
2379 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2380 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2381 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2382 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2383 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2384 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2385 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2386 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2387 				break;
2388 			case 13:  /* Thin. */
2389 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2390 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2391 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2392 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2393 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2394 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2395 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2396 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2397 				break;
2398 			case 14:  /* Thin 8 bpp. */
2399 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2400 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2401 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2402 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2403 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2404 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2405 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2406 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2407 				break;
2408 			case 15:  /* Thin 16 bpp. */
2409 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2410 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2411 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2412 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2413 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2414 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2415 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2416 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2417 				break;
2418 			case 16:  /* Thin 32 bpp. */
2419 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2420 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2421 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2422 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2423 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2424 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2425 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2426 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2427 				break;
2428 			case 17:  /* Thin 64 bpp. */
2429 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2430 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2431 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2432 						 TILE_SPLIT(split_equal_to_row_size) |
2433 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2434 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2435 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2436 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2437 				break;
2438 			case 21:  /* 8 bpp PRT. */
2439 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2440 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2441 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2442 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2443 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2444 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2445 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2446 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2447 				break;
2448 			case 22:  /* 16 bpp PRT */
2449 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2450 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2451 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2452 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2453 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2454 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2455 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2456 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2457 				break;
2458 			case 23:  /* 32 bpp PRT */
2459 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2460 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2461 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2462 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2463 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2464 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2465 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2466 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2467 				break;
2468 			case 24:  /* 64 bpp PRT */
2469 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2470 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2471 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2472 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2473 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2474 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2475 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2476 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2477 				break;
2478 			case 25:  /* 128 bpp PRT */
2479 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2480 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2481 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2482 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2483 						 NUM_BANKS(ADDR_SURF_8_BANK) |
2484 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2486 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2487 				break;
2488 			default:
2489 				gb_tile_moden = 0;
2490 				break;
2491 			}
2492 			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2493 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2494 		}
2495 	} else if ((rdev->family == CHIP_VERDE) ||
2496 		   (rdev->family == CHIP_OLAND) ||
2497 		   (rdev->family == CHIP_HAINAN)) {
2498 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2499 			switch (reg_offset) {
2500 			case 0:  /* non-AA compressed depth or any compressed stencil */
2501 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2502 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2503 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2504 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2505 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2506 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2507 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2508 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2509 				break;
2510 			case 1:  /* 2xAA/4xAA compressed depth only */
2511 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2512 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2513 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2514 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2515 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2516 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2517 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2518 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2519 				break;
2520 			case 2:  /* 8xAA compressed depth only */
2521 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2522 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2523 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2524 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2525 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2526 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2527 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2528 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2529 				break;
2530 			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2531 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2532 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2533 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2534 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2535 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2536 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2537 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2538 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2539 				break;
2540 			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2541 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2542 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2543 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2544 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2545 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2546 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2547 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2548 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2549 				break;
2550 			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2551 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2552 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2553 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2554 						 TILE_SPLIT(split_equal_to_row_size) |
2555 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2556 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2557 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2558 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2559 				break;
2560 			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2561 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2562 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2563 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2564 						 TILE_SPLIT(split_equal_to_row_size) |
2565 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2566 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2567 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2568 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2569 				break;
2570 			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2571 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2572 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2573 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2574 						 TILE_SPLIT(split_equal_to_row_size) |
2575 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2576 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2577 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2578 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2579 				break;
2580 			case 8:  /* 1D and 1D Array Surfaces */
2581 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2582 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2583 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2584 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2585 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2586 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2587 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2588 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2589 				break;
2590 			case 9:  /* Displayable maps. */
2591 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2592 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2593 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2594 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2595 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2596 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2597 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2598 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2599 				break;
2600 			case 10:  /* Display 8bpp. */
2601 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2602 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2603 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2604 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2605 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2606 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2607 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2608 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2609 				break;
2610 			case 11:  /* Display 16bpp. */
2611 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2612 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2613 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2614 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2615 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2616 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2617 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2618 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2619 				break;
2620 			case 12:  /* Display 32bpp. */
2621 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2622 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2623 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2624 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2625 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2626 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2627 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2628 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2629 				break;
2630 			case 13:  /* Thin. */
2631 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2632 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2633 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2634 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2635 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2636 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2637 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2638 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2639 				break;
2640 			case 14:  /* Thin 8 bpp. */
2641 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2642 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2643 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2644 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2645 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2646 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2647 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2648 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2649 				break;
2650 			case 15:  /* Thin 16 bpp. */
2651 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2652 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2653 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2654 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2655 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2656 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2657 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2658 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2659 				break;
2660 			case 16:  /* Thin 32 bpp. */
2661 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2662 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2663 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2664 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2665 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2666 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2667 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2668 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2669 				break;
2670 			case 17:  /* Thin 64 bpp. */
2671 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2672 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2673 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2674 						 TILE_SPLIT(split_equal_to_row_size) |
2675 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2676 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2677 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2678 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2679 				break;
2680 			case 21:  /* 8 bpp PRT. */
2681 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2682 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2683 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2684 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2685 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2686 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2687 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2688 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2689 				break;
2690 			case 22:  /* 16 bpp PRT */
2691 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2692 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2693 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2694 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2695 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2696 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2697 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2698 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2699 				break;
2700 			case 23:  /* 32 bpp PRT */
2701 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2702 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2703 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2704 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2705 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2706 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2707 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2708 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2709 				break;
2710 			case 24:  /* 64 bpp PRT */
2711 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2712 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2713 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2714 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2715 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2716 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2717 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2718 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2719 				break;
2720 			case 25:  /* 128 bpp PRT */
2721 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2722 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2723 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2724 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2725 						 NUM_BANKS(ADDR_SURF_8_BANK) |
2726 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2728 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2729 				break;
2730 			default:
2731 				gb_tile_moden = 0;
2732 				break;
2733 			}
2734 			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2735 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2736 		}
2737 	} else
2738 		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2739 }
2740 
2741 static void si_select_se_sh(struct radeon_device *rdev,
2742 			    u32 se_num, u32 sh_num)
2743 {
2744 	u32 data = INSTANCE_BROADCAST_WRITES;
2745 
2746 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2747 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2748 	else if (se_num == 0xffffffff)
2749 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2750 	else if (sh_num == 0xffffffff)
2751 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2752 	else
2753 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2754 	WREG32(GRBM_GFX_INDEX, data);
2755 }
2756 
2757 static u32 si_create_bitmask(u32 bit_width)
2758 {
2759 	u32 i, mask = 0;
2760 
2761 	for (i = 0; i < bit_width; i++) {
2762 		mask <<= 1;
2763 		mask |= 1;
2764 	}
2765 	return mask;
2766 }
2767 
2768 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2769 {
2770 	u32 data, mask;
2771 
2772 	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2773 	if (data & 1)
2774 		data &= INACTIVE_CUS_MASK;
2775 	else
2776 		data = 0;
2777 	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2778 
2779 	data >>= INACTIVE_CUS_SHIFT;
2780 
2781 	mask = si_create_bitmask(cu_per_sh);
2782 
2783 	return ~data & mask;
2784 }
2785 
2786 static void si_setup_spi(struct radeon_device *rdev,
2787 			 u32 se_num, u32 sh_per_se,
2788 			 u32 cu_per_sh)
2789 {
2790 	int i, j, k;
2791 	u32 data, mask, active_cu;
2792 
2793 	for (i = 0; i < se_num; i++) {
2794 		for (j = 0; j < sh_per_se; j++) {
2795 			si_select_se_sh(rdev, i, j);
2796 			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2797 			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2798 
2799 			mask = 1;
2800 			for (k = 0; k < 16; k++) {
2801 				mask <<= k;
2802 				if (active_cu & mask) {
2803 					data &= ~mask;
2804 					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2805 					break;
2806 				}
2807 			}
2808 		}
2809 	}
2810 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2811 }
2812 
2813 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2814 			      u32 max_rb_num, u32 se_num,
2815 			      u32 sh_per_se)
2816 {
2817 	u32 data, mask;
2818 
2819 	data = RREG32(CC_RB_BACKEND_DISABLE);
2820 	if (data & 1)
2821 		data &= BACKEND_DISABLE_MASK;
2822 	else
2823 		data = 0;
2824 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2825 
2826 	data >>= BACKEND_DISABLE_SHIFT;
2827 
2828 	mask = si_create_bitmask(max_rb_num / se_num / sh_per_se);
2829 
2830 	return data & mask;
2831 }
2832 
2833 static void si_setup_rb(struct radeon_device *rdev,
2834 			u32 se_num, u32 sh_per_se,
2835 			u32 max_rb_num)
2836 {
2837 	int i, j;
2838 	u32 data, mask;
2839 	u32 disabled_rbs = 0;
2840 	u32 enabled_rbs = 0;
2841 
2842 	for (i = 0; i < se_num; i++) {
2843 		for (j = 0; j < sh_per_se; j++) {
2844 			si_select_se_sh(rdev, i, j);
2845 			data = si_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2846 			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
2847 		}
2848 	}
2849 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2850 
2851 	mask = 1;
2852 	for (i = 0; i < max_rb_num; i++) {
2853 		if (!(disabled_rbs & mask))
2854 			enabled_rbs |= mask;
2855 		mask <<= 1;
2856 	}
2857 
2858 	for (i = 0; i < se_num; i++) {
2859 		si_select_se_sh(rdev, i, 0xffffffff);
2860 		data = 0;
2861 		for (j = 0; j < sh_per_se; j++) {
2862 			switch (enabled_rbs & 3) {
2863 			case 1:
2864 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2865 				break;
2866 			case 2:
2867 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2868 				break;
2869 			case 3:
2870 			default:
2871 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2872 				break;
2873 			}
2874 			enabled_rbs >>= 2;
2875 		}
2876 		WREG32(PA_SC_RASTER_CONFIG, data);
2877 	}
2878 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2879 }
2880 
2881 static void si_gpu_init(struct radeon_device *rdev)
2882 {
2883 	u32 gb_addr_config = 0;
2884 	u32 mc_shared_chmap, mc_arb_ramcfg;
2885 	u32 sx_debug_1;
2886 	u32 hdp_host_path_cntl;
2887 	u32 tmp;
2888 	int i, j;
2889 
2890 	switch (rdev->family) {
2891 	case CHIP_TAHITI:
2892 		rdev->config.si.max_shader_engines = 2;
2893 		rdev->config.si.max_tile_pipes = 12;
2894 		rdev->config.si.max_cu_per_sh = 8;
2895 		rdev->config.si.max_sh_per_se = 2;
2896 		rdev->config.si.max_backends_per_se = 4;
2897 		rdev->config.si.max_texture_channel_caches = 12;
2898 		rdev->config.si.max_gprs = 256;
2899 		rdev->config.si.max_gs_threads = 32;
2900 		rdev->config.si.max_hw_contexts = 8;
2901 
2902 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2903 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2904 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2905 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2906 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2907 		break;
2908 	case CHIP_PITCAIRN:
2909 		rdev->config.si.max_shader_engines = 2;
2910 		rdev->config.si.max_tile_pipes = 8;
2911 		rdev->config.si.max_cu_per_sh = 5;
2912 		rdev->config.si.max_sh_per_se = 2;
2913 		rdev->config.si.max_backends_per_se = 4;
2914 		rdev->config.si.max_texture_channel_caches = 8;
2915 		rdev->config.si.max_gprs = 256;
2916 		rdev->config.si.max_gs_threads = 32;
2917 		rdev->config.si.max_hw_contexts = 8;
2918 
2919 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2920 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2921 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2922 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2923 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2924 		break;
2925 	case CHIP_VERDE:
2926 	default:
2927 		rdev->config.si.max_shader_engines = 1;
2928 		rdev->config.si.max_tile_pipes = 4;
2929 		rdev->config.si.max_cu_per_sh = 5;
2930 		rdev->config.si.max_sh_per_se = 2;
2931 		rdev->config.si.max_backends_per_se = 4;
2932 		rdev->config.si.max_texture_channel_caches = 4;
2933 		rdev->config.si.max_gprs = 256;
2934 		rdev->config.si.max_gs_threads = 32;
2935 		rdev->config.si.max_hw_contexts = 8;
2936 
2937 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2938 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2939 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2940 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2941 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2942 		break;
2943 	case CHIP_OLAND:
2944 		rdev->config.si.max_shader_engines = 1;
2945 		rdev->config.si.max_tile_pipes = 4;
2946 		rdev->config.si.max_cu_per_sh = 6;
2947 		rdev->config.si.max_sh_per_se = 1;
2948 		rdev->config.si.max_backends_per_se = 2;
2949 		rdev->config.si.max_texture_channel_caches = 4;
2950 		rdev->config.si.max_gprs = 256;
2951 		rdev->config.si.max_gs_threads = 16;
2952 		rdev->config.si.max_hw_contexts = 8;
2953 
2954 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2955 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2956 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2957 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2958 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2959 		break;
2960 	case CHIP_HAINAN:
2961 		rdev->config.si.max_shader_engines = 1;
2962 		rdev->config.si.max_tile_pipes = 4;
2963 		rdev->config.si.max_cu_per_sh = 5;
2964 		rdev->config.si.max_sh_per_se = 1;
2965 		rdev->config.si.max_backends_per_se = 1;
2966 		rdev->config.si.max_texture_channel_caches = 2;
2967 		rdev->config.si.max_gprs = 256;
2968 		rdev->config.si.max_gs_threads = 16;
2969 		rdev->config.si.max_hw_contexts = 8;
2970 
2971 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2972 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2973 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2974 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2975 		gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
2976 		break;
2977 	}
2978 
2979 	/* Initialize HDP */
2980 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2981 		WREG32((0x2c14 + j), 0x00000000);
2982 		WREG32((0x2c18 + j), 0x00000000);
2983 		WREG32((0x2c1c + j), 0x00000000);
2984 		WREG32((0x2c20 + j), 0x00000000);
2985 		WREG32((0x2c24 + j), 0x00000000);
2986 	}
2987 
2988 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2989 
2990 	evergreen_fix_pci_max_read_req_size(rdev);
2991 
2992 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2993 
2994 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2995 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2996 
2997 	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
2998 	rdev->config.si.mem_max_burst_length_bytes = 256;
2999 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3000 	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3001 	if (rdev->config.si.mem_row_size_in_kb > 4)
3002 		rdev->config.si.mem_row_size_in_kb = 4;
3003 	/* XXX use MC settings? */
3004 	rdev->config.si.shader_engine_tile_size = 32;
3005 	rdev->config.si.num_gpus = 1;
3006 	rdev->config.si.multi_gpu_tile_size = 64;
3007 
3008 	/* fix up row size */
3009 	gb_addr_config &= ~ROW_SIZE_MASK;
3010 	switch (rdev->config.si.mem_row_size_in_kb) {
3011 	case 1:
3012 	default:
3013 		gb_addr_config |= ROW_SIZE(0);
3014 		break;
3015 	case 2:
3016 		gb_addr_config |= ROW_SIZE(1);
3017 		break;
3018 	case 4:
3019 		gb_addr_config |= ROW_SIZE(2);
3020 		break;
3021 	}
3022 
3023 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3024 	 * not have bank info, so create a custom tiling dword.
3025 	 * bits 3:0   num_pipes
3026 	 * bits 7:4   num_banks
3027 	 * bits 11:8  group_size
3028 	 * bits 15:12 row_size
3029 	 */
3030 	rdev->config.si.tile_config = 0;
3031 	switch (rdev->config.si.num_tile_pipes) {
3032 	case 1:
3033 		rdev->config.si.tile_config |= (0 << 0);
3034 		break;
3035 	case 2:
3036 		rdev->config.si.tile_config |= (1 << 0);
3037 		break;
3038 	case 4:
3039 		rdev->config.si.tile_config |= (2 << 0);
3040 		break;
3041 	case 8:
3042 	default:
3043 		/* XXX what about 12? */
3044 		rdev->config.si.tile_config |= (3 << 0);
3045 		break;
3046 	}
3047 	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3048 	case 0: /* four banks */
3049 		rdev->config.si.tile_config |= 0 << 4;
3050 		break;
3051 	case 1: /* eight banks */
3052 		rdev->config.si.tile_config |= 1 << 4;
3053 		break;
3054 	case 2: /* sixteen banks */
3055 	default:
3056 		rdev->config.si.tile_config |= 2 << 4;
3057 		break;
3058 	}
3059 	rdev->config.si.tile_config |=
3060 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3061 	rdev->config.si.tile_config |=
3062 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3063 
3064 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3065 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3066 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3067 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3068 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3069 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3070 	if (rdev->has_uvd) {
3071 		WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3072 		WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3073 		WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3074 	}
3075 
3076 	si_tiling_mode_table_init(rdev);
3077 
3078 	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3079 		    rdev->config.si.max_sh_per_se,
3080 		    rdev->config.si.max_backends_per_se);
3081 
3082 	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3083 		     rdev->config.si.max_sh_per_se,
3084 		     rdev->config.si.max_cu_per_sh);
3085 
3086 
3087 	/* set HW defaults for 3D engine */
3088 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3089 				     ROQ_IB2_START(0x2b)));
3090 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3091 
3092 	sx_debug_1 = RREG32(SX_DEBUG_1);
3093 	WREG32(SX_DEBUG_1, sx_debug_1);
3094 
3095 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3096 
3097 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3098 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3099 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3100 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3101 
3102 	WREG32(VGT_NUM_INSTANCES, 1);
3103 
3104 	WREG32(CP_PERFMON_CNTL, 0);
3105 
3106 	WREG32(SQ_CONFIG, 0);
3107 
3108 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3109 					  FORCE_EOV_MAX_REZ_CNT(255)));
3110 
3111 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3112 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3113 
3114 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3115 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3116 
3117 	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3118 	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3119 	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3120 	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3121 	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3122 	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3123 	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3124 	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3125 
3126 	tmp = RREG32(HDP_MISC_CNTL);
3127 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3128 	WREG32(HDP_MISC_CNTL, tmp);
3129 
3130 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3131 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3132 
3133 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3134 
3135 	udelay(50);
3136 }
3137 
3138 /*
3139  * GPU scratch registers helpers function.
3140  */
3141 static void si_scratch_init(struct radeon_device *rdev)
3142 {
3143 	int i;
3144 
3145 	rdev->scratch.num_reg = 7;
3146 	rdev->scratch.reg_base = SCRATCH_REG0;
3147 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3148 		rdev->scratch.free[i] = true;
3149 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3150 	}
3151 }
3152 
3153 void si_fence_ring_emit(struct radeon_device *rdev,
3154 			struct radeon_fence *fence)
3155 {
3156 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3157 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3158 
3159 	/* flush read cache over gart */
3160 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3161 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3162 	radeon_ring_write(ring, 0);
3163 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3164 	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3165 			  PACKET3_TC_ACTION_ENA |
3166 			  PACKET3_SH_KCACHE_ACTION_ENA |
3167 			  PACKET3_SH_ICACHE_ACTION_ENA);
3168 	radeon_ring_write(ring, 0xFFFFFFFF);
3169 	radeon_ring_write(ring, 0);
3170 	radeon_ring_write(ring, 10); /* poll interval */
3171 	/* EVENT_WRITE_EOP - flush caches, send int */
3172 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3173 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3174 	radeon_ring_write(ring, addr & 0xffffffff);
3175 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3176 	radeon_ring_write(ring, fence->seq);
3177 	radeon_ring_write(ring, 0);
3178 }
3179 
3180 /*
3181  * IB stuff
3182  */
3183 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3184 {
3185 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3186 	u32 header;
3187 
3188 	if (ib->is_const_ib) {
3189 		/* set switch buffer packet before const IB */
3190 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3191 		radeon_ring_write(ring, 0);
3192 
3193 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3194 	} else {
3195 		u32 next_rptr;
3196 		if (ring->rptr_save_reg) {
3197 			next_rptr = ring->wptr + 3 + 4 + 8;
3198 			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3199 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3200 						  PACKET3_SET_CONFIG_REG_START) >> 2));
3201 			radeon_ring_write(ring, next_rptr);
3202 		} else if (rdev->wb.enabled) {
3203 			next_rptr = ring->wptr + 5 + 4 + 8;
3204 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3205 			radeon_ring_write(ring, (1 << 8));
3206 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3207 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3208 			radeon_ring_write(ring, next_rptr);
3209 		}
3210 
3211 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3212 	}
3213 
3214 	radeon_ring_write(ring, header);
3215 	radeon_ring_write(ring,
3216 #ifdef __BIG_ENDIAN
3217 			  (2 << 0) |
3218 #endif
3219 			  (ib->gpu_addr & 0xFFFFFFFC));
3220 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3221 	radeon_ring_write(ring, ib->length_dw |
3222 			  (ib->vm ? (ib->vm->id << 24) : 0));
3223 
3224 	if (!ib->is_const_ib) {
3225 		/* flush read cache over gart for this vmid */
3226 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3227 		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3228 		radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
3229 		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3230 		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3231 				  PACKET3_TC_ACTION_ENA |
3232 				  PACKET3_SH_KCACHE_ACTION_ENA |
3233 				  PACKET3_SH_ICACHE_ACTION_ENA);
3234 		radeon_ring_write(ring, 0xFFFFFFFF);
3235 		radeon_ring_write(ring, 0);
3236 		radeon_ring_write(ring, 10); /* poll interval */
3237 	}
3238 }
3239 
3240 /*
3241  * CP.
3242  */
3243 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3244 {
3245 	if (enable)
3246 		WREG32(CP_ME_CNTL, 0);
3247 	else {
3248 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3249 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3250 		WREG32(SCRATCH_UMSK, 0);
3251 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3252 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3253 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3254 	}
3255 	udelay(50);
3256 }
3257 
3258 static int si_cp_load_microcode(struct radeon_device *rdev)
3259 {
3260 	const __be32 *fw_data;
3261 	int i;
3262 
3263 	if (!rdev->me_fw || !rdev->pfp_fw)
3264 		return -EINVAL;
3265 
3266 	si_cp_enable(rdev, false);
3267 
3268 	/* PFP */
3269 	fw_data = (const __be32 *)rdev->pfp_fw->data;
3270 	WREG32(CP_PFP_UCODE_ADDR, 0);
3271 	for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3272 		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3273 	WREG32(CP_PFP_UCODE_ADDR, 0);
3274 
3275 	/* CE */
3276 	fw_data = (const __be32 *)rdev->ce_fw->data;
3277 	WREG32(CP_CE_UCODE_ADDR, 0);
3278 	for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3279 		WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3280 	WREG32(CP_CE_UCODE_ADDR, 0);
3281 
3282 	/* ME */
3283 	fw_data = (const __be32 *)rdev->me_fw->data;
3284 	WREG32(CP_ME_RAM_WADDR, 0);
3285 	for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3286 		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3287 	WREG32(CP_ME_RAM_WADDR, 0);
3288 
3289 	WREG32(CP_PFP_UCODE_ADDR, 0);
3290 	WREG32(CP_CE_UCODE_ADDR, 0);
3291 	WREG32(CP_ME_RAM_WADDR, 0);
3292 	WREG32(CP_ME_RAM_RADDR, 0);
3293 	return 0;
3294 }
3295 
3296 static int si_cp_start(struct radeon_device *rdev)
3297 {
3298 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3299 	int r, i;
3300 
3301 	r = radeon_ring_lock(rdev, ring, 7 + 4);
3302 	if (r) {
3303 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3304 		return r;
3305 	}
3306 	/* init the CP */
3307 	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3308 	radeon_ring_write(ring, 0x1);
3309 	radeon_ring_write(ring, 0x0);
3310 	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3311 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3312 	radeon_ring_write(ring, 0);
3313 	radeon_ring_write(ring, 0);
3314 
3315 	/* init the CE partitions */
3316 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3317 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3318 	radeon_ring_write(ring, 0xc000);
3319 	radeon_ring_write(ring, 0xe000);
3320 	radeon_ring_unlock_commit(rdev, ring);
3321 
3322 	si_cp_enable(rdev, true);
3323 
3324 	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3325 	if (r) {
3326 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3327 		return r;
3328 	}
3329 
3330 	/* setup clear context state */
3331 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3332 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3333 
3334 	for (i = 0; i < si_default_size; i++)
3335 		radeon_ring_write(ring, si_default_state[i]);
3336 
3337 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3338 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3339 
3340 	/* set clear context state */
3341 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3342 	radeon_ring_write(ring, 0);
3343 
3344 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3345 	radeon_ring_write(ring, 0x00000316);
3346 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3347 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3348 
3349 	radeon_ring_unlock_commit(rdev, ring);
3350 
3351 	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3352 		ring = &rdev->ring[i];
3353 		r = radeon_ring_lock(rdev, ring, 2);
3354 
3355 		/* clear the compute context state */
3356 		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3357 		radeon_ring_write(ring, 0);
3358 
3359 		radeon_ring_unlock_commit(rdev, ring);
3360 	}
3361 
3362 	return 0;
3363 }
3364 
3365 static void si_cp_fini(struct radeon_device *rdev)
3366 {
3367 	struct radeon_ring *ring;
3368 	si_cp_enable(rdev, false);
3369 
3370 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3371 	radeon_ring_fini(rdev, ring);
3372 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3373 
3374 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3375 	radeon_ring_fini(rdev, ring);
3376 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3377 
3378 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3379 	radeon_ring_fini(rdev, ring);
3380 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3381 }
3382 
3383 static int si_cp_resume(struct radeon_device *rdev)
3384 {
3385 	struct radeon_ring *ring;
3386 	u32 tmp;
3387 	u32 rb_bufsz;
3388 	int r;
3389 
3390 	si_enable_gui_idle_interrupt(rdev, false);
3391 
3392 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3393 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3394 
3395 	/* Set the write pointer delay */
3396 	WREG32(CP_RB_WPTR_DELAY, 0);
3397 
3398 	WREG32(CP_DEBUG, 0);
3399 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3400 
3401 	/* ring 0 - compute and gfx */
3402 	/* Set ring buffer size */
3403 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3404 	rb_bufsz = order_base_2(ring->ring_size / 8);
3405 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3406 #ifdef __BIG_ENDIAN
3407 	tmp |= BUF_SWAP_32BIT;
3408 #endif
3409 	WREG32(CP_RB0_CNTL, tmp);
3410 
3411 	/* Initialize the ring buffer's read and write pointers */
3412 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3413 	ring->wptr = 0;
3414 	WREG32(CP_RB0_WPTR, ring->wptr);
3415 
3416 	/* set the wb address whether it's enabled or not */
3417 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3418 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3419 
3420 	if (rdev->wb.enabled)
3421 		WREG32(SCRATCH_UMSK, 0xff);
3422 	else {
3423 		tmp |= RB_NO_UPDATE;
3424 		WREG32(SCRATCH_UMSK, 0);
3425 	}
3426 
3427 	mdelay(1);
3428 	WREG32(CP_RB0_CNTL, tmp);
3429 
3430 	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3431 
3432 	ring->rptr = RREG32(CP_RB0_RPTR);
3433 
3434 	/* ring1  - compute only */
3435 	/* Set ring buffer size */
3436 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3437 	rb_bufsz = order_base_2(ring->ring_size / 8);
3438 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3439 #ifdef __BIG_ENDIAN
3440 	tmp |= BUF_SWAP_32BIT;
3441 #endif
3442 	WREG32(CP_RB1_CNTL, tmp);
3443 
3444 	/* Initialize the ring buffer's read and write pointers */
3445 	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3446 	ring->wptr = 0;
3447 	WREG32(CP_RB1_WPTR, ring->wptr);
3448 
3449 	/* set the wb address whether it's enabled or not */
3450 	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3451 	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3452 
3453 	mdelay(1);
3454 	WREG32(CP_RB1_CNTL, tmp);
3455 
3456 	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3457 
3458 	ring->rptr = RREG32(CP_RB1_RPTR);
3459 
3460 	/* ring2 - compute only */
3461 	/* Set ring buffer size */
3462 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3463 	rb_bufsz = order_base_2(ring->ring_size / 8);
3464 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3465 #ifdef __BIG_ENDIAN
3466 	tmp |= BUF_SWAP_32BIT;
3467 #endif
3468 	WREG32(CP_RB2_CNTL, tmp);
3469 
3470 	/* Initialize the ring buffer's read and write pointers */
3471 	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3472 	ring->wptr = 0;
3473 	WREG32(CP_RB2_WPTR, ring->wptr);
3474 
3475 	/* set the wb address whether it's enabled or not */
3476 	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3477 	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3478 
3479 	mdelay(1);
3480 	WREG32(CP_RB2_CNTL, tmp);
3481 
3482 	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3483 
3484 	ring->rptr = RREG32(CP_RB2_RPTR);
3485 
3486 	/* start the rings */
3487 	si_cp_start(rdev);
3488 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3489 	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3490 	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3491 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3492 	if (r) {
3493 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3494 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3495 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3496 		return r;
3497 	}
3498 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3499 	if (r) {
3500 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3501 	}
3502 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3503 	if (r) {
3504 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3505 	}
3506 
3507 	si_enable_gui_idle_interrupt(rdev, true);
3508 
3509 	return 0;
3510 }
3511 
3512 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3513 {
3514 	u32 reset_mask = 0;
3515 	u32 tmp;
3516 
3517 	/* GRBM_STATUS */
3518 	tmp = RREG32(GRBM_STATUS);
3519 	if (tmp & (PA_BUSY | SC_BUSY |
3520 		   BCI_BUSY | SX_BUSY |
3521 		   TA_BUSY | VGT_BUSY |
3522 		   DB_BUSY | CB_BUSY |
3523 		   GDS_BUSY | SPI_BUSY |
3524 		   IA_BUSY | IA_BUSY_NO_DMA))
3525 		reset_mask |= RADEON_RESET_GFX;
3526 
3527 	if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3528 		   CP_BUSY | CP_COHERENCY_BUSY))
3529 		reset_mask |= RADEON_RESET_CP;
3530 
3531 	if (tmp & GRBM_EE_BUSY)
3532 		reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3533 
3534 	/* GRBM_STATUS2 */
3535 	tmp = RREG32(GRBM_STATUS2);
3536 	if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3537 		reset_mask |= RADEON_RESET_RLC;
3538 
3539 	/* DMA_STATUS_REG 0 */
3540 	tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3541 	if (!(tmp & DMA_IDLE))
3542 		reset_mask |= RADEON_RESET_DMA;
3543 
3544 	/* DMA_STATUS_REG 1 */
3545 	tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3546 	if (!(tmp & DMA_IDLE))
3547 		reset_mask |= RADEON_RESET_DMA1;
3548 
3549 	/* SRBM_STATUS2 */
3550 	tmp = RREG32(SRBM_STATUS2);
3551 	if (tmp & DMA_BUSY)
3552 		reset_mask |= RADEON_RESET_DMA;
3553 
3554 	if (tmp & DMA1_BUSY)
3555 		reset_mask |= RADEON_RESET_DMA1;
3556 
3557 	/* SRBM_STATUS */
3558 	tmp = RREG32(SRBM_STATUS);
3559 
3560 	if (tmp & IH_BUSY)
3561 		reset_mask |= RADEON_RESET_IH;
3562 
3563 	if (tmp & SEM_BUSY)
3564 		reset_mask |= RADEON_RESET_SEM;
3565 
3566 	if (tmp & GRBM_RQ_PENDING)
3567 		reset_mask |= RADEON_RESET_GRBM;
3568 
3569 	if (tmp & VMC_BUSY)
3570 		reset_mask |= RADEON_RESET_VMC;
3571 
3572 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3573 		   MCC_BUSY | MCD_BUSY))
3574 		reset_mask |= RADEON_RESET_MC;
3575 
3576 	if (evergreen_is_display_hung(rdev))
3577 		reset_mask |= RADEON_RESET_DISPLAY;
3578 
3579 	/* VM_L2_STATUS */
3580 	tmp = RREG32(VM_L2_STATUS);
3581 	if (tmp & L2_BUSY)
3582 		reset_mask |= RADEON_RESET_VMC;
3583 
3584 	/* Skip MC reset as it's mostly likely not hung, just busy */
3585 	if (reset_mask & RADEON_RESET_MC) {
3586 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3587 		reset_mask &= ~RADEON_RESET_MC;
3588 	}
3589 
3590 	return reset_mask;
3591 }
3592 
3593 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3594 {
3595 	struct evergreen_mc_save save;
3596 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3597 	u32 tmp;
3598 
3599 	if (reset_mask == 0)
3600 		return;
3601 
3602 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3603 
3604 	evergreen_print_gpu_status_regs(rdev);
3605 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3606 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3607 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3608 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3609 
3610 	/* disable PG/CG */
3611 	si_fini_pg(rdev);
3612 	si_fini_cg(rdev);
3613 
3614 	/* stop the rlc */
3615 	si_rlc_stop(rdev);
3616 
3617 	/* Disable CP parsing/prefetching */
3618 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3619 
3620 	if (reset_mask & RADEON_RESET_DMA) {
3621 		/* dma0 */
3622 		tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3623 		tmp &= ~DMA_RB_ENABLE;
3624 		WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3625 	}
3626 	if (reset_mask & RADEON_RESET_DMA1) {
3627 		/* dma1 */
3628 		tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3629 		tmp &= ~DMA_RB_ENABLE;
3630 		WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3631 	}
3632 
3633 	udelay(50);
3634 
3635 	evergreen_mc_stop(rdev, &save);
3636 	if (evergreen_mc_wait_for_idle(rdev)) {
3637 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3638 	}
3639 
3640 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3641 		grbm_soft_reset = SOFT_RESET_CB |
3642 			SOFT_RESET_DB |
3643 			SOFT_RESET_GDS |
3644 			SOFT_RESET_PA |
3645 			SOFT_RESET_SC |
3646 			SOFT_RESET_BCI |
3647 			SOFT_RESET_SPI |
3648 			SOFT_RESET_SX |
3649 			SOFT_RESET_TC |
3650 			SOFT_RESET_TA |
3651 			SOFT_RESET_VGT |
3652 			SOFT_RESET_IA;
3653 	}
3654 
3655 	if (reset_mask & RADEON_RESET_CP) {
3656 		grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3657 
3658 		srbm_soft_reset |= SOFT_RESET_GRBM;
3659 	}
3660 
3661 	if (reset_mask & RADEON_RESET_DMA)
3662 		srbm_soft_reset |= SOFT_RESET_DMA;
3663 
3664 	if (reset_mask & RADEON_RESET_DMA1)
3665 		srbm_soft_reset |= SOFT_RESET_DMA1;
3666 
3667 	if (reset_mask & RADEON_RESET_DISPLAY)
3668 		srbm_soft_reset |= SOFT_RESET_DC;
3669 
3670 	if (reset_mask & RADEON_RESET_RLC)
3671 		grbm_soft_reset |= SOFT_RESET_RLC;
3672 
3673 	if (reset_mask & RADEON_RESET_SEM)
3674 		srbm_soft_reset |= SOFT_RESET_SEM;
3675 
3676 	if (reset_mask & RADEON_RESET_IH)
3677 		srbm_soft_reset |= SOFT_RESET_IH;
3678 
3679 	if (reset_mask & RADEON_RESET_GRBM)
3680 		srbm_soft_reset |= SOFT_RESET_GRBM;
3681 
3682 	if (reset_mask & RADEON_RESET_VMC)
3683 		srbm_soft_reset |= SOFT_RESET_VMC;
3684 
3685 	if (reset_mask & RADEON_RESET_MC)
3686 		srbm_soft_reset |= SOFT_RESET_MC;
3687 
3688 	if (grbm_soft_reset) {
3689 		tmp = RREG32(GRBM_SOFT_RESET);
3690 		tmp |= grbm_soft_reset;
3691 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3692 		WREG32(GRBM_SOFT_RESET, tmp);
3693 		tmp = RREG32(GRBM_SOFT_RESET);
3694 
3695 		udelay(50);
3696 
3697 		tmp &= ~grbm_soft_reset;
3698 		WREG32(GRBM_SOFT_RESET, tmp);
3699 		tmp = RREG32(GRBM_SOFT_RESET);
3700 	}
3701 
3702 	if (srbm_soft_reset) {
3703 		tmp = RREG32(SRBM_SOFT_RESET);
3704 		tmp |= srbm_soft_reset;
3705 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3706 		WREG32(SRBM_SOFT_RESET, tmp);
3707 		tmp = RREG32(SRBM_SOFT_RESET);
3708 
3709 		udelay(50);
3710 
3711 		tmp &= ~srbm_soft_reset;
3712 		WREG32(SRBM_SOFT_RESET, tmp);
3713 		tmp = RREG32(SRBM_SOFT_RESET);
3714 	}
3715 
3716 	/* Wait a little for things to settle down */
3717 	udelay(50);
3718 
3719 	evergreen_mc_resume(rdev, &save);
3720 	udelay(50);
3721 
3722 	evergreen_print_gpu_status_regs(rdev);
3723 }
3724 
3725 int si_asic_reset(struct radeon_device *rdev)
3726 {
3727 	u32 reset_mask;
3728 
3729 	reset_mask = si_gpu_check_soft_reset(rdev);
3730 
3731 	if (reset_mask)
3732 		r600_set_bios_scratch_engine_hung(rdev, true);
3733 
3734 	si_gpu_soft_reset(rdev, reset_mask);
3735 
3736 	reset_mask = si_gpu_check_soft_reset(rdev);
3737 
3738 	if (!reset_mask)
3739 		r600_set_bios_scratch_engine_hung(rdev, false);
3740 
3741 	return 0;
3742 }
3743 
3744 /**
3745  * si_gfx_is_lockup - Check if the GFX engine is locked up
3746  *
3747  * @rdev: radeon_device pointer
3748  * @ring: radeon_ring structure holding ring information
3749  *
3750  * Check if the GFX engine is locked up.
3751  * Returns true if the engine appears to be locked up, false if not.
3752  */
3753 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3754 {
3755 	u32 reset_mask = si_gpu_check_soft_reset(rdev);
3756 
3757 	if (!(reset_mask & (RADEON_RESET_GFX |
3758 			    RADEON_RESET_COMPUTE |
3759 			    RADEON_RESET_CP))) {
3760 		radeon_ring_lockup_update(ring);
3761 		return false;
3762 	}
3763 	/* force CP activities */
3764 	radeon_ring_force_activity(rdev, ring);
3765 	return radeon_ring_test_lockup(rdev, ring);
3766 }
3767 
3768 /* MC */
3769 static void si_mc_program(struct radeon_device *rdev)
3770 {
3771 	struct evergreen_mc_save save;
3772 	u32 tmp;
3773 	int i, j;
3774 
3775 	/* Initialize HDP */
3776 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3777 		WREG32((0x2c14 + j), 0x00000000);
3778 		WREG32((0x2c18 + j), 0x00000000);
3779 		WREG32((0x2c1c + j), 0x00000000);
3780 		WREG32((0x2c20 + j), 0x00000000);
3781 		WREG32((0x2c24 + j), 0x00000000);
3782 	}
3783 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3784 
3785 	evergreen_mc_stop(rdev, &save);
3786 	if (radeon_mc_wait_for_idle(rdev)) {
3787 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3788 	}
3789 	if (!ASIC_IS_NODCE(rdev))
3790 		/* Lockout access through VGA aperture*/
3791 		WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3792 	/* Update configuration */
3793 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3794 	       rdev->mc.vram_start >> 12);
3795 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3796 	       rdev->mc.vram_end >> 12);
3797 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3798 	       rdev->vram_scratch.gpu_addr >> 12);
3799 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3800 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3801 	WREG32(MC_VM_FB_LOCATION, tmp);
3802 	/* XXX double check these! */
3803 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3804 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3805 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3806 	WREG32(MC_VM_AGP_BASE, 0);
3807 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3808 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3809 	if (radeon_mc_wait_for_idle(rdev)) {
3810 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3811 	}
3812 	evergreen_mc_resume(rdev, &save);
3813 	if (!ASIC_IS_NODCE(rdev)) {
3814 		/* we need to own VRAM, so turn off the VGA renderer here
3815 		 * to stop it overwriting our objects */
3816 		rv515_vga_render_disable(rdev);
3817 	}
3818 }
3819 
3820 void si_vram_gtt_location(struct radeon_device *rdev,
3821 			  struct radeon_mc *mc)
3822 {
3823 	if (mc->mc_vram_size > 0xFFC0000000ULL) {
3824 		/* leave room for at least 1024M GTT */
3825 		dev_warn(rdev->dev, "limiting VRAM\n");
3826 		mc->real_vram_size = 0xFFC0000000ULL;
3827 		mc->mc_vram_size = 0xFFC0000000ULL;
3828 	}
3829 	radeon_vram_location(rdev, &rdev->mc, 0);
3830 	rdev->mc.gtt_base_align = 0;
3831 	radeon_gtt_location(rdev, mc);
3832 }
3833 
3834 static int si_mc_init(struct radeon_device *rdev)
3835 {
3836 	u32 tmp;
3837 	int chansize, numchan;
3838 
3839 	/* Get VRAM informations */
3840 	rdev->mc.vram_is_ddr = true;
3841 	tmp = RREG32(MC_ARB_RAMCFG);
3842 	if (tmp & CHANSIZE_OVERRIDE) {
3843 		chansize = 16;
3844 	} else if (tmp & CHANSIZE_MASK) {
3845 		chansize = 64;
3846 	} else {
3847 		chansize = 32;
3848 	}
3849 	tmp = RREG32(MC_SHARED_CHMAP);
3850 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3851 	case 0:
3852 	default:
3853 		numchan = 1;
3854 		break;
3855 	case 1:
3856 		numchan = 2;
3857 		break;
3858 	case 2:
3859 		numchan = 4;
3860 		break;
3861 	case 3:
3862 		numchan = 8;
3863 		break;
3864 	case 4:
3865 		numchan = 3;
3866 		break;
3867 	case 5:
3868 		numchan = 6;
3869 		break;
3870 	case 6:
3871 		numchan = 10;
3872 		break;
3873 	case 7:
3874 		numchan = 12;
3875 		break;
3876 	case 8:
3877 		numchan = 16;
3878 		break;
3879 	}
3880 	rdev->mc.vram_width = numchan * chansize;
3881 	/* Could aper size report 0 ? */
3882 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
3883 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
3884 	/* size in MB on si */
3885 	tmp = RREG32(CONFIG_MEMSIZE);
3886 	/* some boards may have garbage in the upper 16 bits */
3887 	if (tmp & 0xffff0000) {
3888 		DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
3889 		if (tmp & 0xffff)
3890 			tmp &= 0xffff;
3891 	}
3892 	rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
3893 	rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
3894 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
3895 	si_vram_gtt_location(rdev, &rdev->mc);
3896 	radeon_update_bandwidth_info(rdev);
3897 
3898 	return 0;
3899 }
3900 
3901 /*
3902  * GART
3903  */
3904 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
3905 {
3906 	/* flush hdp cache */
3907 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
3908 
3909 	/* bits 0-15 are the VM contexts0-15 */
3910 	WREG32(VM_INVALIDATE_REQUEST, 1);
3911 }
3912 
3913 static int si_pcie_gart_enable(struct radeon_device *rdev)
3914 {
3915 	int r, i;
3916 
3917 	if (rdev->gart.robj == NULL) {
3918 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
3919 		return -EINVAL;
3920 	}
3921 	r = radeon_gart_table_vram_pin(rdev);
3922 	if (r)
3923 		return r;
3924 	radeon_gart_restore(rdev);
3925 	/* Setup TLB control */
3926 	WREG32(MC_VM_MX_L1_TLB_CNTL,
3927 	       (0xA << 7) |
3928 	       ENABLE_L1_TLB |
3929 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3930 	       ENABLE_ADVANCED_DRIVER_MODEL |
3931 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3932 	/* Setup L2 cache */
3933 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
3934 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3935 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3936 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
3937 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
3938 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
3939 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3940 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
3941 	/* setup context0 */
3942 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
3943 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
3944 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
3945 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
3946 			(u32)(rdev->dummy_page.addr >> 12));
3947 	WREG32(VM_CONTEXT0_CNTL2, 0);
3948 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
3949 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
3950 
3951 	WREG32(0x15D4, 0);
3952 	WREG32(0x15D8, 0);
3953 	WREG32(0x15DC, 0);
3954 
3955 	/* empty context1-15 */
3956 	/* set vm size, must be a multiple of 4 */
3957 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
3958 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
3959 	/* Assign the pt base to something valid for now; the pts used for
3960 	 * the VMs are determined by the application and setup and assigned
3961 	 * on the fly in the vm part of radeon_gart.c
3962 	 */
3963 	for (i = 1; i < 16; i++) {
3964 		if (i < 8)
3965 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
3966 			       rdev->gart.table_addr >> 12);
3967 		else
3968 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
3969 			       rdev->gart.table_addr >> 12);
3970 	}
3971 
3972 	/* enable context1-15 */
3973 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
3974 	       (u32)(rdev->dummy_page.addr >> 12));
3975 	WREG32(VM_CONTEXT1_CNTL2, 4);
3976 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
3977 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3978 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3979 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3980 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3981 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
3982 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
3983 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
3984 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
3985 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
3986 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
3987 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3988 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
3989 
3990 	si_pcie_gart_tlb_flush(rdev);
3991 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
3992 		 (unsigned)(rdev->mc.gtt_size >> 20),
3993 		 (unsigned long long)rdev->gart.table_addr);
3994 	rdev->gart.ready = true;
3995 	return 0;
3996 }
3997 
3998 static void si_pcie_gart_disable(struct radeon_device *rdev)
3999 {
4000 	/* Disable all tables */
4001 	WREG32(VM_CONTEXT0_CNTL, 0);
4002 	WREG32(VM_CONTEXT1_CNTL, 0);
4003 	/* Setup TLB control */
4004 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4005 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4006 	/* Setup L2 cache */
4007 	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4008 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4009 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4010 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4011 	WREG32(VM_L2_CNTL2, 0);
4012 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4013 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4014 	radeon_gart_table_vram_unpin(rdev);
4015 }
4016 
4017 static void si_pcie_gart_fini(struct radeon_device *rdev)
4018 {
4019 	si_pcie_gart_disable(rdev);
4020 	radeon_gart_table_vram_free(rdev);
4021 	radeon_gart_fini(rdev);
4022 }
4023 
4024 /* vm parser */
4025 static bool si_vm_reg_valid(u32 reg)
4026 {
4027 	/* context regs are fine */
4028 	if (reg >= 0x28000)
4029 		return true;
4030 
4031 	/* check config regs */
4032 	switch (reg) {
4033 	case GRBM_GFX_INDEX:
4034 	case CP_STRMOUT_CNTL:
4035 	case VGT_VTX_VECT_EJECT_REG:
4036 	case VGT_CACHE_INVALIDATION:
4037 	case VGT_ESGS_RING_SIZE:
4038 	case VGT_GSVS_RING_SIZE:
4039 	case VGT_GS_VERTEX_REUSE:
4040 	case VGT_PRIMITIVE_TYPE:
4041 	case VGT_INDEX_TYPE:
4042 	case VGT_NUM_INDICES:
4043 	case VGT_NUM_INSTANCES:
4044 	case VGT_TF_RING_SIZE:
4045 	case VGT_HS_OFFCHIP_PARAM:
4046 	case VGT_TF_MEMORY_BASE:
4047 	case PA_CL_ENHANCE:
4048 	case PA_SU_LINE_STIPPLE_VALUE:
4049 	case PA_SC_LINE_STIPPLE_STATE:
4050 	case PA_SC_ENHANCE:
4051 	case SQC_CACHES:
4052 	case SPI_STATIC_THREAD_MGMT_1:
4053 	case SPI_STATIC_THREAD_MGMT_2:
4054 	case SPI_STATIC_THREAD_MGMT_3:
4055 	case SPI_PS_MAX_WAVE_ID:
4056 	case SPI_CONFIG_CNTL:
4057 	case SPI_CONFIG_CNTL_1:
4058 	case TA_CNTL_AUX:
4059 		return true;
4060 	default:
4061 		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4062 		return false;
4063 	}
4064 }
4065 
4066 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4067 				  u32 *ib, struct radeon_cs_packet *pkt)
4068 {
4069 	switch (pkt->opcode) {
4070 	case PACKET3_NOP:
4071 	case PACKET3_SET_BASE:
4072 	case PACKET3_SET_CE_DE_COUNTERS:
4073 	case PACKET3_LOAD_CONST_RAM:
4074 	case PACKET3_WRITE_CONST_RAM:
4075 	case PACKET3_WRITE_CONST_RAM_OFFSET:
4076 	case PACKET3_DUMP_CONST_RAM:
4077 	case PACKET3_INCREMENT_CE_COUNTER:
4078 	case PACKET3_WAIT_ON_DE_COUNTER:
4079 	case PACKET3_CE_WRITE:
4080 		break;
4081 	default:
4082 		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4083 		return -EINVAL;
4084 	}
4085 	return 0;
4086 }
4087 
4088 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4089 {
4090 	u32 start_reg, reg, i;
4091 	u32 command = ib[idx + 4];
4092 	u32 info = ib[idx + 1];
4093 	u32 idx_value = ib[idx];
4094 	if (command & PACKET3_CP_DMA_CMD_SAS) {
4095 		/* src address space is register */
4096 		if (((info & 0x60000000) >> 29) == 0) {
4097 			start_reg = idx_value << 2;
4098 			if (command & PACKET3_CP_DMA_CMD_SAIC) {
4099 				reg = start_reg;
4100 				if (!si_vm_reg_valid(reg)) {
4101 					DRM_ERROR("CP DMA Bad SRC register\n");
4102 					return -EINVAL;
4103 				}
4104 			} else {
4105 				for (i = 0; i < (command & 0x1fffff); i++) {
4106 					reg = start_reg + (4 * i);
4107 					if (!si_vm_reg_valid(reg)) {
4108 						DRM_ERROR("CP DMA Bad SRC register\n");
4109 						return -EINVAL;
4110 					}
4111 				}
4112 			}
4113 		}
4114 	}
4115 	if (command & PACKET3_CP_DMA_CMD_DAS) {
4116 		/* dst address space is register */
4117 		if (((info & 0x00300000) >> 20) == 0) {
4118 			start_reg = ib[idx + 2];
4119 			if (command & PACKET3_CP_DMA_CMD_DAIC) {
4120 				reg = start_reg;
4121 				if (!si_vm_reg_valid(reg)) {
4122 					DRM_ERROR("CP DMA Bad DST register\n");
4123 					return -EINVAL;
4124 				}
4125 			} else {
4126 				for (i = 0; i < (command & 0x1fffff); i++) {
4127 					reg = start_reg + (4 * i);
4128 				if (!si_vm_reg_valid(reg)) {
4129 						DRM_ERROR("CP DMA Bad DST register\n");
4130 						return -EINVAL;
4131 					}
4132 				}
4133 			}
4134 		}
4135 	}
4136 	return 0;
4137 }
4138 
4139 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4140 				   u32 *ib, struct radeon_cs_packet *pkt)
4141 {
4142 	int r;
4143 	u32 idx = pkt->idx + 1;
4144 	u32 idx_value = ib[idx];
4145 	u32 start_reg, end_reg, reg, i;
4146 
4147 	switch (pkt->opcode) {
4148 	case PACKET3_NOP:
4149 	case PACKET3_SET_BASE:
4150 	case PACKET3_CLEAR_STATE:
4151 	case PACKET3_INDEX_BUFFER_SIZE:
4152 	case PACKET3_DISPATCH_DIRECT:
4153 	case PACKET3_DISPATCH_INDIRECT:
4154 	case PACKET3_ALLOC_GDS:
4155 	case PACKET3_WRITE_GDS_RAM:
4156 	case PACKET3_ATOMIC_GDS:
4157 	case PACKET3_ATOMIC:
4158 	case PACKET3_OCCLUSION_QUERY:
4159 	case PACKET3_SET_PREDICATION:
4160 	case PACKET3_COND_EXEC:
4161 	case PACKET3_PRED_EXEC:
4162 	case PACKET3_DRAW_INDIRECT:
4163 	case PACKET3_DRAW_INDEX_INDIRECT:
4164 	case PACKET3_INDEX_BASE:
4165 	case PACKET3_DRAW_INDEX_2:
4166 	case PACKET3_CONTEXT_CONTROL:
4167 	case PACKET3_INDEX_TYPE:
4168 	case PACKET3_DRAW_INDIRECT_MULTI:
4169 	case PACKET3_DRAW_INDEX_AUTO:
4170 	case PACKET3_DRAW_INDEX_IMMD:
4171 	case PACKET3_NUM_INSTANCES:
4172 	case PACKET3_DRAW_INDEX_MULTI_AUTO:
4173 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4174 	case PACKET3_DRAW_INDEX_OFFSET_2:
4175 	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4176 	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4177 	case PACKET3_MPEG_INDEX:
4178 	case PACKET3_WAIT_REG_MEM:
4179 	case PACKET3_MEM_WRITE:
4180 	case PACKET3_PFP_SYNC_ME:
4181 	case PACKET3_SURFACE_SYNC:
4182 	case PACKET3_EVENT_WRITE:
4183 	case PACKET3_EVENT_WRITE_EOP:
4184 	case PACKET3_EVENT_WRITE_EOS:
4185 	case PACKET3_SET_CONTEXT_REG:
4186 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4187 	case PACKET3_SET_SH_REG:
4188 	case PACKET3_SET_SH_REG_OFFSET:
4189 	case PACKET3_INCREMENT_DE_COUNTER:
4190 	case PACKET3_WAIT_ON_CE_COUNTER:
4191 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4192 	case PACKET3_ME_WRITE:
4193 		break;
4194 	case PACKET3_COPY_DATA:
4195 		if ((idx_value & 0xf00) == 0) {
4196 			reg = ib[idx + 3] * 4;
4197 			if (!si_vm_reg_valid(reg))
4198 				return -EINVAL;
4199 		}
4200 		break;
4201 	case PACKET3_WRITE_DATA:
4202 		if ((idx_value & 0xf00) == 0) {
4203 			start_reg = ib[idx + 1] * 4;
4204 			if (idx_value & 0x10000) {
4205 				if (!si_vm_reg_valid(start_reg))
4206 					return -EINVAL;
4207 			} else {
4208 				for (i = 0; i < (pkt->count - 2); i++) {
4209 					reg = start_reg + (4 * i);
4210 					if (!si_vm_reg_valid(reg))
4211 						return -EINVAL;
4212 				}
4213 			}
4214 		}
4215 		break;
4216 	case PACKET3_COND_WRITE:
4217 		if (idx_value & 0x100) {
4218 			reg = ib[idx + 5] * 4;
4219 			if (!si_vm_reg_valid(reg))
4220 				return -EINVAL;
4221 		}
4222 		break;
4223 	case PACKET3_COPY_DW:
4224 		if (idx_value & 0x2) {
4225 			reg = ib[idx + 3] * 4;
4226 			if (!si_vm_reg_valid(reg))
4227 				return -EINVAL;
4228 		}
4229 		break;
4230 	case PACKET3_SET_CONFIG_REG:
4231 		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4232 		end_reg = 4 * pkt->count + start_reg - 4;
4233 		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4234 		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4235 		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4236 			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4237 			return -EINVAL;
4238 		}
4239 		for (i = 0; i < pkt->count; i++) {
4240 			reg = start_reg + (4 * i);
4241 			if (!si_vm_reg_valid(reg))
4242 				return -EINVAL;
4243 		}
4244 		break;
4245 	case PACKET3_CP_DMA:
4246 		r = si_vm_packet3_cp_dma_check(ib, idx);
4247 		if (r)
4248 			return r;
4249 		break;
4250 	default:
4251 		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4252 		return -EINVAL;
4253 	}
4254 	return 0;
4255 }
4256 
4257 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4258 				       u32 *ib, struct radeon_cs_packet *pkt)
4259 {
4260 	int r;
4261 	u32 idx = pkt->idx + 1;
4262 	u32 idx_value = ib[idx];
4263 	u32 start_reg, reg, i;
4264 
4265 	switch (pkt->opcode) {
4266 	case PACKET3_NOP:
4267 	case PACKET3_SET_BASE:
4268 	case PACKET3_CLEAR_STATE:
4269 	case PACKET3_DISPATCH_DIRECT:
4270 	case PACKET3_DISPATCH_INDIRECT:
4271 	case PACKET3_ALLOC_GDS:
4272 	case PACKET3_WRITE_GDS_RAM:
4273 	case PACKET3_ATOMIC_GDS:
4274 	case PACKET3_ATOMIC:
4275 	case PACKET3_OCCLUSION_QUERY:
4276 	case PACKET3_SET_PREDICATION:
4277 	case PACKET3_COND_EXEC:
4278 	case PACKET3_PRED_EXEC:
4279 	case PACKET3_CONTEXT_CONTROL:
4280 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4281 	case PACKET3_WAIT_REG_MEM:
4282 	case PACKET3_MEM_WRITE:
4283 	case PACKET3_PFP_SYNC_ME:
4284 	case PACKET3_SURFACE_SYNC:
4285 	case PACKET3_EVENT_WRITE:
4286 	case PACKET3_EVENT_WRITE_EOP:
4287 	case PACKET3_EVENT_WRITE_EOS:
4288 	case PACKET3_SET_CONTEXT_REG:
4289 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4290 	case PACKET3_SET_SH_REG:
4291 	case PACKET3_SET_SH_REG_OFFSET:
4292 	case PACKET3_INCREMENT_DE_COUNTER:
4293 	case PACKET3_WAIT_ON_CE_COUNTER:
4294 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4295 	case PACKET3_ME_WRITE:
4296 		break;
4297 	case PACKET3_COPY_DATA:
4298 		if ((idx_value & 0xf00) == 0) {
4299 			reg = ib[idx + 3] * 4;
4300 			if (!si_vm_reg_valid(reg))
4301 				return -EINVAL;
4302 		}
4303 		break;
4304 	case PACKET3_WRITE_DATA:
4305 		if ((idx_value & 0xf00) == 0) {
4306 			start_reg = ib[idx + 1] * 4;
4307 			if (idx_value & 0x10000) {
4308 				if (!si_vm_reg_valid(start_reg))
4309 					return -EINVAL;
4310 			} else {
4311 				for (i = 0; i < (pkt->count - 2); i++) {
4312 					reg = start_reg + (4 * i);
4313 					if (!si_vm_reg_valid(reg))
4314 						return -EINVAL;
4315 				}
4316 			}
4317 		}
4318 		break;
4319 	case PACKET3_COND_WRITE:
4320 		if (idx_value & 0x100) {
4321 			reg = ib[idx + 5] * 4;
4322 			if (!si_vm_reg_valid(reg))
4323 				return -EINVAL;
4324 		}
4325 		break;
4326 	case PACKET3_COPY_DW:
4327 		if (idx_value & 0x2) {
4328 			reg = ib[idx + 3] * 4;
4329 			if (!si_vm_reg_valid(reg))
4330 				return -EINVAL;
4331 		}
4332 		break;
4333 	case PACKET3_CP_DMA:
4334 		r = si_vm_packet3_cp_dma_check(ib, idx);
4335 		if (r)
4336 			return r;
4337 		break;
4338 	default:
4339 		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4340 		return -EINVAL;
4341 	}
4342 	return 0;
4343 }
4344 
4345 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4346 {
4347 	int ret = 0;
4348 	u32 idx = 0;
4349 	struct radeon_cs_packet pkt;
4350 
4351 	do {
4352 		pkt.idx = idx;
4353 		pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4354 		pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4355 		pkt.one_reg_wr = 0;
4356 		switch (pkt.type) {
4357 		case RADEON_PACKET_TYPE0:
4358 			dev_err(rdev->dev, "Packet0 not allowed!\n");
4359 			ret = -EINVAL;
4360 			break;
4361 		case RADEON_PACKET_TYPE2:
4362 			idx += 1;
4363 			break;
4364 		case RADEON_PACKET_TYPE3:
4365 			pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4366 			if (ib->is_const_ib)
4367 				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4368 			else {
4369 				switch (ib->ring) {
4370 				case RADEON_RING_TYPE_GFX_INDEX:
4371 					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4372 					break;
4373 				case CAYMAN_RING_TYPE_CP1_INDEX:
4374 				case CAYMAN_RING_TYPE_CP2_INDEX:
4375 					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4376 					break;
4377 				default:
4378 					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4379 					ret = -EINVAL;
4380 					break;
4381 				}
4382 			}
4383 			idx += pkt.count + 2;
4384 			break;
4385 		default:
4386 			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4387 			ret = -EINVAL;
4388 			break;
4389 		}
4390 		if (ret)
4391 			break;
4392 	} while (idx < ib->length_dw);
4393 
4394 	return ret;
4395 }
4396 
4397 /*
4398  * vm
4399  */
4400 int si_vm_init(struct radeon_device *rdev)
4401 {
4402 	/* number of VMs */
4403 	rdev->vm_manager.nvm = 16;
4404 	/* base offset of vram pages */
4405 	rdev->vm_manager.vram_base_offset = 0;
4406 
4407 	return 0;
4408 }
4409 
4410 void si_vm_fini(struct radeon_device *rdev)
4411 {
4412 }
4413 
4414 /**
4415  * si_vm_decode_fault - print human readable fault info
4416  *
4417  * @rdev: radeon_device pointer
4418  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4419  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4420  *
4421  * Print human readable fault information (SI).
4422  */
4423 static void si_vm_decode_fault(struct radeon_device *rdev,
4424 			       u32 status, u32 addr)
4425 {
4426 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4427 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4428 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4429 	char *block;
4430 
4431 	if (rdev->family == CHIP_TAHITI) {
4432 		switch (mc_id) {
4433 		case 160:
4434 		case 144:
4435 		case 96:
4436 		case 80:
4437 		case 224:
4438 		case 208:
4439 		case 32:
4440 		case 16:
4441 			block = "CB";
4442 			break;
4443 		case 161:
4444 		case 145:
4445 		case 97:
4446 		case 81:
4447 		case 225:
4448 		case 209:
4449 		case 33:
4450 		case 17:
4451 			block = "CB_FMASK";
4452 			break;
4453 		case 162:
4454 		case 146:
4455 		case 98:
4456 		case 82:
4457 		case 226:
4458 		case 210:
4459 		case 34:
4460 		case 18:
4461 			block = "CB_CMASK";
4462 			break;
4463 		case 163:
4464 		case 147:
4465 		case 99:
4466 		case 83:
4467 		case 227:
4468 		case 211:
4469 		case 35:
4470 		case 19:
4471 			block = "CB_IMMED";
4472 			break;
4473 		case 164:
4474 		case 148:
4475 		case 100:
4476 		case 84:
4477 		case 228:
4478 		case 212:
4479 		case 36:
4480 		case 20:
4481 			block = "DB";
4482 			break;
4483 		case 165:
4484 		case 149:
4485 		case 101:
4486 		case 85:
4487 		case 229:
4488 		case 213:
4489 		case 37:
4490 		case 21:
4491 			block = "DB_HTILE";
4492 			break;
4493 		case 167:
4494 		case 151:
4495 		case 103:
4496 		case 87:
4497 		case 231:
4498 		case 215:
4499 		case 39:
4500 		case 23:
4501 			block = "DB_STEN";
4502 			break;
4503 		case 72:
4504 		case 68:
4505 		case 64:
4506 		case 8:
4507 		case 4:
4508 		case 0:
4509 		case 136:
4510 		case 132:
4511 		case 128:
4512 		case 200:
4513 		case 196:
4514 		case 192:
4515 			block = "TC";
4516 			break;
4517 		case 112:
4518 		case 48:
4519 			block = "CP";
4520 			break;
4521 		case 49:
4522 		case 177:
4523 		case 50:
4524 		case 178:
4525 			block = "SH";
4526 			break;
4527 		case 53:
4528 		case 190:
4529 			block = "VGT";
4530 			break;
4531 		case 117:
4532 			block = "IH";
4533 			break;
4534 		case 51:
4535 		case 115:
4536 			block = "RLC";
4537 			break;
4538 		case 119:
4539 		case 183:
4540 			block = "DMA0";
4541 			break;
4542 		case 61:
4543 			block = "DMA1";
4544 			break;
4545 		case 248:
4546 		case 120:
4547 			block = "HDP";
4548 			break;
4549 		default:
4550 			block = "unknown";
4551 			break;
4552 		}
4553 	} else {
4554 		switch (mc_id) {
4555 		case 32:
4556 		case 16:
4557 		case 96:
4558 		case 80:
4559 		case 160:
4560 		case 144:
4561 		case 224:
4562 		case 208:
4563 			block = "CB";
4564 			break;
4565 		case 33:
4566 		case 17:
4567 		case 97:
4568 		case 81:
4569 		case 161:
4570 		case 145:
4571 		case 225:
4572 		case 209:
4573 			block = "CB_FMASK";
4574 			break;
4575 		case 34:
4576 		case 18:
4577 		case 98:
4578 		case 82:
4579 		case 162:
4580 		case 146:
4581 		case 226:
4582 		case 210:
4583 			block = "CB_CMASK";
4584 			break;
4585 		case 35:
4586 		case 19:
4587 		case 99:
4588 		case 83:
4589 		case 163:
4590 		case 147:
4591 		case 227:
4592 		case 211:
4593 			block = "CB_IMMED";
4594 			break;
4595 		case 36:
4596 		case 20:
4597 		case 100:
4598 		case 84:
4599 		case 164:
4600 		case 148:
4601 		case 228:
4602 		case 212:
4603 			block = "DB";
4604 			break;
4605 		case 37:
4606 		case 21:
4607 		case 101:
4608 		case 85:
4609 		case 165:
4610 		case 149:
4611 		case 229:
4612 		case 213:
4613 			block = "DB_HTILE";
4614 			break;
4615 		case 39:
4616 		case 23:
4617 		case 103:
4618 		case 87:
4619 		case 167:
4620 		case 151:
4621 		case 231:
4622 		case 215:
4623 			block = "DB_STEN";
4624 			break;
4625 		case 72:
4626 		case 68:
4627 		case 8:
4628 		case 4:
4629 		case 136:
4630 		case 132:
4631 		case 200:
4632 		case 196:
4633 			block = "TC";
4634 			break;
4635 		case 112:
4636 		case 48:
4637 			block = "CP";
4638 			break;
4639 		case 49:
4640 		case 177:
4641 		case 50:
4642 		case 178:
4643 			block = "SH";
4644 			break;
4645 		case 53:
4646 			block = "VGT";
4647 			break;
4648 		case 117:
4649 			block = "IH";
4650 			break;
4651 		case 51:
4652 		case 115:
4653 			block = "RLC";
4654 			break;
4655 		case 119:
4656 		case 183:
4657 			block = "DMA0";
4658 			break;
4659 		case 61:
4660 			block = "DMA1";
4661 			break;
4662 		case 248:
4663 		case 120:
4664 			block = "HDP";
4665 			break;
4666 		default:
4667 			block = "unknown";
4668 			break;
4669 		}
4670 	}
4671 
4672 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4673 	       protections, vmid, addr,
4674 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4675 	       block, mc_id);
4676 }
4677 
4678 void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4679 {
4680 	struct radeon_ring *ring = &rdev->ring[ridx];
4681 
4682 	if (vm == NULL)
4683 		return;
4684 
4685 	/* write new base address */
4686 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4687 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4688 				 WRITE_DATA_DST_SEL(0)));
4689 
4690 	if (vm->id < 8) {
4691 		radeon_ring_write(ring,
4692 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4693 	} else {
4694 		radeon_ring_write(ring,
4695 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4696 	}
4697 	radeon_ring_write(ring, 0);
4698 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4699 
4700 	/* flush hdp cache */
4701 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4702 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4703 				 WRITE_DATA_DST_SEL(0)));
4704 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4705 	radeon_ring_write(ring, 0);
4706 	radeon_ring_write(ring, 0x1);
4707 
4708 	/* bits 0-15 are the VM contexts0-15 */
4709 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4710 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4711 				 WRITE_DATA_DST_SEL(0)));
4712 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4713 	radeon_ring_write(ring, 0);
4714 	radeon_ring_write(ring, 1 << vm->id);
4715 
4716 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
4717 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4718 	radeon_ring_write(ring, 0x0);
4719 }
4720 
4721 /*
4722  *  Power and clock gating
4723  */
4724 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
4725 {
4726 	int i;
4727 
4728 	for (i = 0; i < rdev->usec_timeout; i++) {
4729 		if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
4730 			break;
4731 		udelay(1);
4732 	}
4733 
4734 	for (i = 0; i < rdev->usec_timeout; i++) {
4735 		if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
4736 			break;
4737 		udelay(1);
4738 	}
4739 }
4740 
4741 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
4742 					 bool enable)
4743 {
4744 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
4745 	u32 mask;
4746 	int i;
4747 
4748 	if (enable)
4749 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4750 	else
4751 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4752 	WREG32(CP_INT_CNTL_RING0, tmp);
4753 
4754 	if (!enable) {
4755 		/* read a gfx register */
4756 		tmp = RREG32(DB_DEPTH_INFO);
4757 
4758 		mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
4759 		for (i = 0; i < rdev->usec_timeout; i++) {
4760 			if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
4761 				break;
4762 			udelay(1);
4763 		}
4764 	}
4765 }
4766 
4767 static void si_set_uvd_dcm(struct radeon_device *rdev,
4768 			   bool sw_mode)
4769 {
4770 	u32 tmp, tmp2;
4771 
4772 	tmp = RREG32(UVD_CGC_CTRL);
4773 	tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
4774 	tmp |= DCM | CG_DT(1) | CLK_OD(4);
4775 
4776 	if (sw_mode) {
4777 		tmp &= ~0x7ffff800;
4778 		tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
4779 	} else {
4780 		tmp |= 0x7ffff800;
4781 		tmp2 = 0;
4782 	}
4783 
4784 	WREG32(UVD_CGC_CTRL, tmp);
4785 	WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
4786 }
4787 
4788 void si_init_uvd_internal_cg(struct radeon_device *rdev)
4789 {
4790 	bool hw_mode = true;
4791 
4792 	if (hw_mode) {
4793 		si_set_uvd_dcm(rdev, false);
4794 	} else {
4795 		u32 tmp = RREG32(UVD_CGC_CTRL);
4796 		tmp &= ~DCM;
4797 		WREG32(UVD_CGC_CTRL, tmp);
4798 	}
4799 }
4800 
4801 static u32 si_halt_rlc(struct radeon_device *rdev)
4802 {
4803 	u32 data, orig;
4804 
4805 	orig = data = RREG32(RLC_CNTL);
4806 
4807 	if (data & RLC_ENABLE) {
4808 		data &= ~RLC_ENABLE;
4809 		WREG32(RLC_CNTL, data);
4810 
4811 		si_wait_for_rlc_serdes(rdev);
4812 	}
4813 
4814 	return orig;
4815 }
4816 
4817 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
4818 {
4819 	u32 tmp;
4820 
4821 	tmp = RREG32(RLC_CNTL);
4822 	if (tmp != rlc)
4823 		WREG32(RLC_CNTL, rlc);
4824 }
4825 
4826 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
4827 {
4828 	u32 data, orig;
4829 
4830 	orig = data = RREG32(DMA_PG);
4831 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
4832 		data |= PG_CNTL_ENABLE;
4833 	else
4834 		data &= ~PG_CNTL_ENABLE;
4835 	if (orig != data)
4836 		WREG32(DMA_PG, data);
4837 }
4838 
4839 static void si_init_dma_pg(struct radeon_device *rdev)
4840 {
4841 	u32 tmp;
4842 
4843 	WREG32(DMA_PGFSM_WRITE,  0x00002000);
4844 	WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
4845 
4846 	for (tmp = 0; tmp < 5; tmp++)
4847 		WREG32(DMA_PGFSM_WRITE, 0);
4848 }
4849 
4850 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
4851 			       bool enable)
4852 {
4853 	u32 tmp;
4854 
4855 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
4856 		tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
4857 		WREG32(RLC_TTOP_D, tmp);
4858 
4859 		tmp = RREG32(RLC_PG_CNTL);
4860 		tmp |= GFX_PG_ENABLE;
4861 		WREG32(RLC_PG_CNTL, tmp);
4862 
4863 		tmp = RREG32(RLC_AUTO_PG_CTRL);
4864 		tmp |= AUTO_PG_EN;
4865 		WREG32(RLC_AUTO_PG_CTRL, tmp);
4866 	} else {
4867 		tmp = RREG32(RLC_AUTO_PG_CTRL);
4868 		tmp &= ~AUTO_PG_EN;
4869 		WREG32(RLC_AUTO_PG_CTRL, tmp);
4870 
4871 		tmp = RREG32(DB_RENDER_CONTROL);
4872 	}
4873 }
4874 
4875 static void si_init_gfx_cgpg(struct radeon_device *rdev)
4876 {
4877 	u32 tmp;
4878 
4879 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
4880 
4881 	tmp = RREG32(RLC_PG_CNTL);
4882 	tmp |= GFX_PG_SRC;
4883 	WREG32(RLC_PG_CNTL, tmp);
4884 
4885 	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
4886 
4887 	tmp = RREG32(RLC_AUTO_PG_CTRL);
4888 
4889 	tmp &= ~GRBM_REG_SGIT_MASK;
4890 	tmp |= GRBM_REG_SGIT(0x700);
4891 	tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
4892 	WREG32(RLC_AUTO_PG_CTRL, tmp);
4893 }
4894 
4895 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
4896 {
4897 	u32 mask = 0, tmp, tmp1;
4898 	int i;
4899 
4900 	si_select_se_sh(rdev, se, sh);
4901 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
4902 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
4903 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4904 
4905 	tmp &= 0xffff0000;
4906 
4907 	tmp |= tmp1;
4908 	tmp >>= 16;
4909 
4910 	for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
4911 		mask <<= 1;
4912 		mask |= 1;
4913 	}
4914 
4915 	return (~tmp) & mask;
4916 }
4917 
4918 static void si_init_ao_cu_mask(struct radeon_device *rdev)
4919 {
4920 	u32 i, j, k, active_cu_number = 0;
4921 	u32 mask, counter, cu_bitmap;
4922 	u32 tmp = 0;
4923 
4924 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
4925 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
4926 			mask = 1;
4927 			cu_bitmap = 0;
4928 			counter  = 0;
4929 			for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
4930 				if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
4931 					if (counter < 2)
4932 						cu_bitmap |= mask;
4933 					counter++;
4934 				}
4935 				mask <<= 1;
4936 			}
4937 
4938 			active_cu_number += counter;
4939 			tmp |= (cu_bitmap << (i * 16 + j * 8));
4940 		}
4941 	}
4942 
4943 	WREG32(RLC_PG_AO_CU_MASK, tmp);
4944 
4945 	tmp = RREG32(RLC_MAX_PG_CU);
4946 	tmp &= ~MAX_PU_CU_MASK;
4947 	tmp |= MAX_PU_CU(active_cu_number);
4948 	WREG32(RLC_MAX_PG_CU, tmp);
4949 }
4950 
4951 static void si_enable_cgcg(struct radeon_device *rdev,
4952 			   bool enable)
4953 {
4954 	u32 data, orig, tmp;
4955 
4956 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
4957 
4958 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
4959 		si_enable_gui_idle_interrupt(rdev, true);
4960 
4961 		WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
4962 
4963 		tmp = si_halt_rlc(rdev);
4964 
4965 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
4966 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
4967 		WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
4968 
4969 		si_wait_for_rlc_serdes(rdev);
4970 
4971 		si_update_rlc(rdev, tmp);
4972 
4973 		WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
4974 
4975 		data |= CGCG_EN | CGLS_EN;
4976 	} else {
4977 		si_enable_gui_idle_interrupt(rdev, false);
4978 
4979 		RREG32(CB_CGTT_SCLK_CTRL);
4980 		RREG32(CB_CGTT_SCLK_CTRL);
4981 		RREG32(CB_CGTT_SCLK_CTRL);
4982 		RREG32(CB_CGTT_SCLK_CTRL);
4983 
4984 		data &= ~(CGCG_EN | CGLS_EN);
4985 	}
4986 
4987 	if (orig != data)
4988 		WREG32(RLC_CGCG_CGLS_CTRL, data);
4989 }
4990 
4991 static void si_enable_mgcg(struct radeon_device *rdev,
4992 			   bool enable)
4993 {
4994 	u32 data, orig, tmp = 0;
4995 
4996 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
4997 		orig = data = RREG32(CGTS_SM_CTRL_REG);
4998 		data = 0x96940200;
4999 		if (orig != data)
5000 			WREG32(CGTS_SM_CTRL_REG, data);
5001 
5002 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5003 			orig = data = RREG32(CP_MEM_SLP_CNTL);
5004 			data |= CP_MEM_LS_EN;
5005 			if (orig != data)
5006 				WREG32(CP_MEM_SLP_CNTL, data);
5007 		}
5008 
5009 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5010 		data &= 0xffffffc0;
5011 		if (orig != data)
5012 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5013 
5014 		tmp = si_halt_rlc(rdev);
5015 
5016 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5017 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5018 		WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5019 
5020 		si_update_rlc(rdev, tmp);
5021 	} else {
5022 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5023 		data |= 0x00000003;
5024 		if (orig != data)
5025 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5026 
5027 		data = RREG32(CP_MEM_SLP_CNTL);
5028 		if (data & CP_MEM_LS_EN) {
5029 			data &= ~CP_MEM_LS_EN;
5030 			WREG32(CP_MEM_SLP_CNTL, data);
5031 		}
5032 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5033 		data |= LS_OVERRIDE | OVERRIDE;
5034 		if (orig != data)
5035 			WREG32(CGTS_SM_CTRL_REG, data);
5036 
5037 		tmp = si_halt_rlc(rdev);
5038 
5039 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5040 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5041 		WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5042 
5043 		si_update_rlc(rdev, tmp);
5044 	}
5045 }
5046 
5047 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5048 			       bool enable)
5049 {
5050 	u32 orig, data, tmp;
5051 
5052 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5053 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5054 		tmp |= 0x3fff;
5055 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5056 
5057 		orig = data = RREG32(UVD_CGC_CTRL);
5058 		data |= DCM;
5059 		if (orig != data)
5060 			WREG32(UVD_CGC_CTRL, data);
5061 
5062 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5063 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5064 	} else {
5065 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5066 		tmp &= ~0x3fff;
5067 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5068 
5069 		orig = data = RREG32(UVD_CGC_CTRL);
5070 		data &= ~DCM;
5071 		if (orig != data)
5072 			WREG32(UVD_CGC_CTRL, data);
5073 
5074 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5075 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5076 	}
5077 }
5078 
5079 static const u32 mc_cg_registers[] =
5080 {
5081 	MC_HUB_MISC_HUB_CG,
5082 	MC_HUB_MISC_SIP_CG,
5083 	MC_HUB_MISC_VM_CG,
5084 	MC_XPB_CLK_GAT,
5085 	ATC_MISC_CG,
5086 	MC_CITF_MISC_WR_CG,
5087 	MC_CITF_MISC_RD_CG,
5088 	MC_CITF_MISC_VM_CG,
5089 	VM_L2_CG,
5090 };
5091 
5092 static void si_enable_mc_ls(struct radeon_device *rdev,
5093 			    bool enable)
5094 {
5095 	int i;
5096 	u32 orig, data;
5097 
5098 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5099 		orig = data = RREG32(mc_cg_registers[i]);
5100 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5101 			data |= MC_LS_ENABLE;
5102 		else
5103 			data &= ~MC_LS_ENABLE;
5104 		if (data != orig)
5105 			WREG32(mc_cg_registers[i], data);
5106 	}
5107 }
5108 
5109 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5110 			       bool enable)
5111 {
5112 	int i;
5113 	u32 orig, data;
5114 
5115 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5116 		orig = data = RREG32(mc_cg_registers[i]);
5117 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5118 			data |= MC_CG_ENABLE;
5119 		else
5120 			data &= ~MC_CG_ENABLE;
5121 		if (data != orig)
5122 			WREG32(mc_cg_registers[i], data);
5123 	}
5124 }
5125 
5126 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5127 			       bool enable)
5128 {
5129 	u32 orig, data, offset;
5130 	int i;
5131 
5132 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5133 		for (i = 0; i < 2; i++) {
5134 			if (i == 0)
5135 				offset = DMA0_REGISTER_OFFSET;
5136 			else
5137 				offset = DMA1_REGISTER_OFFSET;
5138 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5139 			data &= ~MEM_POWER_OVERRIDE;
5140 			if (data != orig)
5141 				WREG32(DMA_POWER_CNTL + offset, data);
5142 			WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5143 		}
5144 	} else {
5145 		for (i = 0; i < 2; i++) {
5146 			if (i == 0)
5147 				offset = DMA0_REGISTER_OFFSET;
5148 			else
5149 				offset = DMA1_REGISTER_OFFSET;
5150 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5151 			data |= MEM_POWER_OVERRIDE;
5152 			if (data != orig)
5153 				WREG32(DMA_POWER_CNTL + offset, data);
5154 
5155 			orig = data = RREG32(DMA_CLK_CTRL + offset);
5156 			data = 0xff000000;
5157 			if (data != orig)
5158 				WREG32(DMA_CLK_CTRL + offset, data);
5159 		}
5160 	}
5161 }
5162 
5163 static void si_enable_bif_mgls(struct radeon_device *rdev,
5164 			       bool enable)
5165 {
5166 	u32 orig, data;
5167 
5168 	orig = data = RREG32_PCIE(PCIE_CNTL2);
5169 
5170 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5171 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5172 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5173 	else
5174 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5175 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5176 
5177 	if (orig != data)
5178 		WREG32_PCIE(PCIE_CNTL2, data);
5179 }
5180 
5181 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5182 			       bool enable)
5183 {
5184 	u32 orig, data;
5185 
5186 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
5187 
5188 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5189 		data &= ~CLOCK_GATING_DIS;
5190 	else
5191 		data |= CLOCK_GATING_DIS;
5192 
5193 	if (orig != data)
5194 		WREG32(HDP_HOST_PATH_CNTL, data);
5195 }
5196 
5197 static void si_enable_hdp_ls(struct radeon_device *rdev,
5198 			     bool enable)
5199 {
5200 	u32 orig, data;
5201 
5202 	orig = data = RREG32(HDP_MEM_POWER_LS);
5203 
5204 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5205 		data |= HDP_LS_ENABLE;
5206 	else
5207 		data &= ~HDP_LS_ENABLE;
5208 
5209 	if (orig != data)
5210 		WREG32(HDP_MEM_POWER_LS, data);
5211 }
5212 
5213 void si_update_cg(struct radeon_device *rdev,
5214 		  u32 block, bool enable)
5215 {
5216 	if (block & RADEON_CG_BLOCK_GFX) {
5217 		si_enable_gui_idle_interrupt(rdev, false);
5218 		/* order matters! */
5219 		if (enable) {
5220 			si_enable_mgcg(rdev, true);
5221 			si_enable_cgcg(rdev, true);
5222 		} else {
5223 			si_enable_cgcg(rdev, false);
5224 			si_enable_mgcg(rdev, false);
5225 		}
5226 		si_enable_gui_idle_interrupt(rdev, true);
5227 	}
5228 
5229 	if (block & RADEON_CG_BLOCK_MC) {
5230 		si_enable_mc_mgcg(rdev, enable);
5231 		si_enable_mc_ls(rdev, enable);
5232 	}
5233 
5234 	if (block & RADEON_CG_BLOCK_SDMA) {
5235 		si_enable_dma_mgcg(rdev, enable);
5236 	}
5237 
5238 	if (block & RADEON_CG_BLOCK_BIF) {
5239 		si_enable_bif_mgls(rdev, enable);
5240 	}
5241 
5242 	if (block & RADEON_CG_BLOCK_UVD) {
5243 		if (rdev->has_uvd) {
5244 			si_enable_uvd_mgcg(rdev, enable);
5245 		}
5246 	}
5247 
5248 	if (block & RADEON_CG_BLOCK_HDP) {
5249 		si_enable_hdp_mgcg(rdev, enable);
5250 		si_enable_hdp_ls(rdev, enable);
5251 	}
5252 }
5253 
5254 static void si_init_cg(struct radeon_device *rdev)
5255 {
5256 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5257 			    RADEON_CG_BLOCK_MC |
5258 			    RADEON_CG_BLOCK_SDMA |
5259 			    RADEON_CG_BLOCK_BIF |
5260 			    RADEON_CG_BLOCK_HDP), true);
5261 	if (rdev->has_uvd) {
5262 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5263 		si_init_uvd_internal_cg(rdev);
5264 	}
5265 }
5266 
5267 static void si_fini_cg(struct radeon_device *rdev)
5268 {
5269 	if (rdev->has_uvd) {
5270 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5271 	}
5272 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5273 			    RADEON_CG_BLOCK_MC |
5274 			    RADEON_CG_BLOCK_SDMA |
5275 			    RADEON_CG_BLOCK_BIF |
5276 			    RADEON_CG_BLOCK_HDP), false);
5277 }
5278 
5279 u32 si_get_csb_size(struct radeon_device *rdev)
5280 {
5281 	u32 count = 0;
5282 	const struct cs_section_def *sect = NULL;
5283 	const struct cs_extent_def *ext = NULL;
5284 
5285 	if (rdev->rlc.cs_data == NULL)
5286 		return 0;
5287 
5288 	/* begin clear state */
5289 	count += 2;
5290 	/* context control state */
5291 	count += 3;
5292 
5293 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5294 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5295 			if (sect->id == SECT_CONTEXT)
5296 				count += 2 + ext->reg_count;
5297 			else
5298 				return 0;
5299 		}
5300 	}
5301 	/* pa_sc_raster_config */
5302 	count += 3;
5303 	/* end clear state */
5304 	count += 2;
5305 	/* clear state */
5306 	count += 2;
5307 
5308 	return count;
5309 }
5310 
5311 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5312 {
5313 	u32 count = 0, i;
5314 	const struct cs_section_def *sect = NULL;
5315 	const struct cs_extent_def *ext = NULL;
5316 
5317 	if (rdev->rlc.cs_data == NULL)
5318 		return;
5319 	if (buffer == NULL)
5320 		return;
5321 
5322 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5323 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5324 
5325 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5326 	buffer[count++] = cpu_to_le32(0x80000000);
5327 	buffer[count++] = cpu_to_le32(0x80000000);
5328 
5329 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5330 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5331 			if (sect->id == SECT_CONTEXT) {
5332 				buffer[count++] =
5333 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5334 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5335 				for (i = 0; i < ext->reg_count; i++)
5336 					buffer[count++] = cpu_to_le32(ext->extent[i]);
5337 			} else {
5338 				return;
5339 			}
5340 		}
5341 	}
5342 
5343 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5344 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5345 	switch (rdev->family) {
5346 	case CHIP_TAHITI:
5347 	case CHIP_PITCAIRN:
5348 		buffer[count++] = cpu_to_le32(0x2a00126a);
5349 		break;
5350 	case CHIP_VERDE:
5351 		buffer[count++] = cpu_to_le32(0x0000124a);
5352 		break;
5353 	case CHIP_OLAND:
5354 		buffer[count++] = cpu_to_le32(0x00000082);
5355 		break;
5356 	case CHIP_HAINAN:
5357 		buffer[count++] = cpu_to_le32(0x00000000);
5358 		break;
5359 	default:
5360 		buffer[count++] = cpu_to_le32(0x00000000);
5361 		break;
5362 	}
5363 
5364 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5365 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5366 
5367 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5368 	buffer[count++] = cpu_to_le32(0);
5369 }
5370 
5371 static void si_init_pg(struct radeon_device *rdev)
5372 {
5373 	if (rdev->pg_flags) {
5374 		if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5375 			si_init_dma_pg(rdev);
5376 		}
5377 		si_init_ao_cu_mask(rdev);
5378 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5379 			si_init_gfx_cgpg(rdev);
5380 		}
5381 		si_enable_dma_pg(rdev, true);
5382 		si_enable_gfx_cgpg(rdev, true);
5383 	} else {
5384 		WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5385 		WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5386 	}
5387 }
5388 
5389 static void si_fini_pg(struct radeon_device *rdev)
5390 {
5391 	if (rdev->pg_flags) {
5392 		si_enable_dma_pg(rdev, false);
5393 		si_enable_gfx_cgpg(rdev, false);
5394 	}
5395 }
5396 
5397 /*
5398  * RLC
5399  */
5400 void si_rlc_reset(struct radeon_device *rdev)
5401 {
5402 	u32 tmp = RREG32(GRBM_SOFT_RESET);
5403 
5404 	tmp |= SOFT_RESET_RLC;
5405 	WREG32(GRBM_SOFT_RESET, tmp);
5406 	udelay(50);
5407 	tmp &= ~SOFT_RESET_RLC;
5408 	WREG32(GRBM_SOFT_RESET, tmp);
5409 	udelay(50);
5410 }
5411 
5412 static void si_rlc_stop(struct radeon_device *rdev)
5413 {
5414 	WREG32(RLC_CNTL, 0);
5415 
5416 	si_enable_gui_idle_interrupt(rdev, false);
5417 
5418 	si_wait_for_rlc_serdes(rdev);
5419 }
5420 
5421 static void si_rlc_start(struct radeon_device *rdev)
5422 {
5423 	WREG32(RLC_CNTL, RLC_ENABLE);
5424 
5425 	si_enable_gui_idle_interrupt(rdev, true);
5426 
5427 	udelay(50);
5428 }
5429 
5430 static bool si_lbpw_supported(struct radeon_device *rdev)
5431 {
5432 	u32 tmp;
5433 
5434 	/* Enable LBPW only for DDR3 */
5435 	tmp = RREG32(MC_SEQ_MISC0);
5436 	if ((tmp & 0xF0000000) == 0xB0000000)
5437 		return true;
5438 	return false;
5439 }
5440 
5441 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5442 {
5443 	u32 tmp;
5444 
5445 	tmp = RREG32(RLC_LB_CNTL);
5446 	if (enable)
5447 		tmp |= LOAD_BALANCE_ENABLE;
5448 	else
5449 		tmp &= ~LOAD_BALANCE_ENABLE;
5450 	WREG32(RLC_LB_CNTL, tmp);
5451 
5452 	if (!enable) {
5453 		si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5454 		WREG32(SPI_LB_CU_MASK, 0x00ff);
5455 	}
5456 }
5457 
5458 static int si_rlc_resume(struct radeon_device *rdev)
5459 {
5460 	u32 i;
5461 	const __be32 *fw_data;
5462 
5463 	if (!rdev->rlc_fw)
5464 		return -EINVAL;
5465 
5466 	si_rlc_stop(rdev);
5467 
5468 	si_rlc_reset(rdev);
5469 
5470 	si_init_pg(rdev);
5471 
5472 	si_init_cg(rdev);
5473 
5474 	WREG32(RLC_RL_BASE, 0);
5475 	WREG32(RLC_RL_SIZE, 0);
5476 	WREG32(RLC_LB_CNTL, 0);
5477 	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5478 	WREG32(RLC_LB_CNTR_INIT, 0);
5479 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5480 
5481 	WREG32(RLC_MC_CNTL, 0);
5482 	WREG32(RLC_UCODE_CNTL, 0);
5483 
5484 	fw_data = (const __be32 *)rdev->rlc_fw->data;
5485 	for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5486 		WREG32(RLC_UCODE_ADDR, i);
5487 		WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5488 	}
5489 	WREG32(RLC_UCODE_ADDR, 0);
5490 
5491 	si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5492 
5493 	si_rlc_start(rdev);
5494 
5495 	return 0;
5496 }
5497 
5498 static void si_enable_interrupts(struct radeon_device *rdev)
5499 {
5500 	u32 ih_cntl = RREG32(IH_CNTL);
5501 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5502 
5503 	ih_cntl |= ENABLE_INTR;
5504 	ih_rb_cntl |= IH_RB_ENABLE;
5505 	WREG32(IH_CNTL, ih_cntl);
5506 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5507 	rdev->ih.enabled = true;
5508 }
5509 
5510 static void si_disable_interrupts(struct radeon_device *rdev)
5511 {
5512 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5513 	u32 ih_cntl = RREG32(IH_CNTL);
5514 
5515 	ih_rb_cntl &= ~IH_RB_ENABLE;
5516 	ih_cntl &= ~ENABLE_INTR;
5517 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5518 	WREG32(IH_CNTL, ih_cntl);
5519 	/* set rptr, wptr to 0 */
5520 	WREG32(IH_RB_RPTR, 0);
5521 	WREG32(IH_RB_WPTR, 0);
5522 	rdev->ih.enabled = false;
5523 	rdev->ih.rptr = 0;
5524 }
5525 
5526 static void si_disable_interrupt_state(struct radeon_device *rdev)
5527 {
5528 	u32 tmp;
5529 
5530 	tmp = RREG32(CP_INT_CNTL_RING0) &
5531 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5532 	WREG32(CP_INT_CNTL_RING0, tmp);
5533 	WREG32(CP_INT_CNTL_RING1, 0);
5534 	WREG32(CP_INT_CNTL_RING2, 0);
5535 	tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5536 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5537 	tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5538 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5539 	WREG32(GRBM_INT_CNTL, 0);
5540 	if (rdev->num_crtc >= 2) {
5541 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5542 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5543 	}
5544 	if (rdev->num_crtc >= 4) {
5545 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5546 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5547 	}
5548 	if (rdev->num_crtc >= 6) {
5549 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5550 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5551 	}
5552 
5553 	if (rdev->num_crtc >= 2) {
5554 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5555 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5556 	}
5557 	if (rdev->num_crtc >= 4) {
5558 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5559 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5560 	}
5561 	if (rdev->num_crtc >= 6) {
5562 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5563 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5564 	}
5565 
5566 	if (!ASIC_IS_NODCE(rdev)) {
5567 		WREG32(DACA_AUTODETECT_INT_CONTROL, 0);
5568 
5569 		tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5570 		WREG32(DC_HPD1_INT_CONTROL, tmp);
5571 		tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5572 		WREG32(DC_HPD2_INT_CONTROL, tmp);
5573 		tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5574 		WREG32(DC_HPD3_INT_CONTROL, tmp);
5575 		tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5576 		WREG32(DC_HPD4_INT_CONTROL, tmp);
5577 		tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5578 		WREG32(DC_HPD5_INT_CONTROL, tmp);
5579 		tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5580 		WREG32(DC_HPD6_INT_CONTROL, tmp);
5581 	}
5582 }
5583 
5584 static int si_irq_init(struct radeon_device *rdev)
5585 {
5586 	int ret = 0;
5587 	int rb_bufsz;
5588 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5589 
5590 	/* allocate ring */
5591 	ret = r600_ih_ring_alloc(rdev);
5592 	if (ret)
5593 		return ret;
5594 
5595 	/* disable irqs */
5596 	si_disable_interrupts(rdev);
5597 
5598 	/* init rlc */
5599 	ret = si_rlc_resume(rdev);
5600 	if (ret) {
5601 		r600_ih_ring_fini(rdev);
5602 		return ret;
5603 	}
5604 
5605 	/* setup interrupt control */
5606 	/* set dummy read address to ring address */
5607 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5608 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
5609 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5610 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5611 	 */
5612 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5613 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5614 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5615 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
5616 
5617 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5618 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
5619 
5620 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5621 		      IH_WPTR_OVERFLOW_CLEAR |
5622 		      (rb_bufsz << 1));
5623 
5624 	if (rdev->wb.enabled)
5625 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5626 
5627 	/* set the writeback address whether it's enabled or not */
5628 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5629 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5630 
5631 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5632 
5633 	/* set rptr, wptr to 0 */
5634 	WREG32(IH_RB_RPTR, 0);
5635 	WREG32(IH_RB_WPTR, 0);
5636 
5637 	/* Default settings for IH_CNTL (disabled at first) */
5638 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5639 	/* RPTR_REARM only works if msi's are enabled */
5640 	if (rdev->msi_enabled)
5641 		ih_cntl |= RPTR_REARM;
5642 	WREG32(IH_CNTL, ih_cntl);
5643 
5644 	/* force the active interrupt state to all disabled */
5645 	si_disable_interrupt_state(rdev);
5646 
5647 	pci_set_master(rdev->pdev);
5648 
5649 	/* enable irqs */
5650 	si_enable_interrupts(rdev);
5651 
5652 	return ret;
5653 }
5654 
5655 int si_irq_set(struct radeon_device *rdev)
5656 {
5657 	u32 cp_int_cntl;
5658 	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
5659 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5660 	u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
5661 	u32 grbm_int_cntl = 0;
5662 	u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
5663 	u32 dma_cntl, dma_cntl1;
5664 	u32 thermal_int = 0;
5665 
5666 	if (!rdev->irq.installed) {
5667 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5668 		return -EINVAL;
5669 	}
5670 	/* don't enable anything if the ih is disabled */
5671 	if (!rdev->ih.enabled) {
5672 		si_disable_interrupts(rdev);
5673 		/* force the active interrupt state to all disabled */
5674 		si_disable_interrupt_state(rdev);
5675 		return 0;
5676 	}
5677 
5678 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
5679 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5680 
5681 	if (!ASIC_IS_NODCE(rdev)) {
5682 		hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5683 		hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5684 		hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5685 		hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5686 		hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5687 		hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5688 	}
5689 
5690 	dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5691 	dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5692 
5693 	thermal_int = RREG32(CG_THERMAL_INT) &
5694 		~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
5695 
5696 	/* enable CP interrupts on all rings */
5697 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5698 		DRM_DEBUG("si_irq_set: sw int gfx\n");
5699 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5700 	}
5701 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5702 		DRM_DEBUG("si_irq_set: sw int cp1\n");
5703 		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
5704 	}
5705 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5706 		DRM_DEBUG("si_irq_set: sw int cp2\n");
5707 		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
5708 	}
5709 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5710 		DRM_DEBUG("si_irq_set: sw int dma\n");
5711 		dma_cntl |= TRAP_ENABLE;
5712 	}
5713 
5714 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5715 		DRM_DEBUG("si_irq_set: sw int dma1\n");
5716 		dma_cntl1 |= TRAP_ENABLE;
5717 	}
5718 	if (rdev->irq.crtc_vblank_int[0] ||
5719 	    atomic_read(&rdev->irq.pflip[0])) {
5720 		DRM_DEBUG("si_irq_set: vblank 0\n");
5721 		crtc1 |= VBLANK_INT_MASK;
5722 	}
5723 	if (rdev->irq.crtc_vblank_int[1] ||
5724 	    atomic_read(&rdev->irq.pflip[1])) {
5725 		DRM_DEBUG("si_irq_set: vblank 1\n");
5726 		crtc2 |= VBLANK_INT_MASK;
5727 	}
5728 	if (rdev->irq.crtc_vblank_int[2] ||
5729 	    atomic_read(&rdev->irq.pflip[2])) {
5730 		DRM_DEBUG("si_irq_set: vblank 2\n");
5731 		crtc3 |= VBLANK_INT_MASK;
5732 	}
5733 	if (rdev->irq.crtc_vblank_int[3] ||
5734 	    atomic_read(&rdev->irq.pflip[3])) {
5735 		DRM_DEBUG("si_irq_set: vblank 3\n");
5736 		crtc4 |= VBLANK_INT_MASK;
5737 	}
5738 	if (rdev->irq.crtc_vblank_int[4] ||
5739 	    atomic_read(&rdev->irq.pflip[4])) {
5740 		DRM_DEBUG("si_irq_set: vblank 4\n");
5741 		crtc5 |= VBLANK_INT_MASK;
5742 	}
5743 	if (rdev->irq.crtc_vblank_int[5] ||
5744 	    atomic_read(&rdev->irq.pflip[5])) {
5745 		DRM_DEBUG("si_irq_set: vblank 5\n");
5746 		crtc6 |= VBLANK_INT_MASK;
5747 	}
5748 	if (rdev->irq.hpd[0]) {
5749 		DRM_DEBUG("si_irq_set: hpd 1\n");
5750 		hpd1 |= DC_HPDx_INT_EN;
5751 	}
5752 	if (rdev->irq.hpd[1]) {
5753 		DRM_DEBUG("si_irq_set: hpd 2\n");
5754 		hpd2 |= DC_HPDx_INT_EN;
5755 	}
5756 	if (rdev->irq.hpd[2]) {
5757 		DRM_DEBUG("si_irq_set: hpd 3\n");
5758 		hpd3 |= DC_HPDx_INT_EN;
5759 	}
5760 	if (rdev->irq.hpd[3]) {
5761 		DRM_DEBUG("si_irq_set: hpd 4\n");
5762 		hpd4 |= DC_HPDx_INT_EN;
5763 	}
5764 	if (rdev->irq.hpd[4]) {
5765 		DRM_DEBUG("si_irq_set: hpd 5\n");
5766 		hpd5 |= DC_HPDx_INT_EN;
5767 	}
5768 	if (rdev->irq.hpd[5]) {
5769 		DRM_DEBUG("si_irq_set: hpd 6\n");
5770 		hpd6 |= DC_HPDx_INT_EN;
5771 	}
5772 
5773 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5774 	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
5775 	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
5776 
5777 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
5778 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
5779 
5780 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5781 
5782 	if (rdev->irq.dpm_thermal) {
5783 		DRM_DEBUG("dpm thermal\n");
5784 		thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
5785 	}
5786 
5787 	if (rdev->num_crtc >= 2) {
5788 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5789 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5790 	}
5791 	if (rdev->num_crtc >= 4) {
5792 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5793 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5794 	}
5795 	if (rdev->num_crtc >= 6) {
5796 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5797 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5798 	}
5799 
5800 	if (rdev->num_crtc >= 2) {
5801 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, grph1);
5802 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, grph2);
5803 	}
5804 	if (rdev->num_crtc >= 4) {
5805 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, grph3);
5806 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, grph4);
5807 	}
5808 	if (rdev->num_crtc >= 6) {
5809 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, grph5);
5810 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, grph6);
5811 	}
5812 
5813 	if (!ASIC_IS_NODCE(rdev)) {
5814 		WREG32(DC_HPD1_INT_CONTROL, hpd1);
5815 		WREG32(DC_HPD2_INT_CONTROL, hpd2);
5816 		WREG32(DC_HPD3_INT_CONTROL, hpd3);
5817 		WREG32(DC_HPD4_INT_CONTROL, hpd4);
5818 		WREG32(DC_HPD5_INT_CONTROL, hpd5);
5819 		WREG32(DC_HPD6_INT_CONTROL, hpd6);
5820 	}
5821 
5822 	WREG32(CG_THERMAL_INT, thermal_int);
5823 
5824 	return 0;
5825 }
5826 
5827 static inline void si_irq_ack(struct radeon_device *rdev)
5828 {
5829 	u32 tmp;
5830 
5831 	if (ASIC_IS_NODCE(rdev))
5832 		return;
5833 
5834 	rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5835 	rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5836 	rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5837 	rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5838 	rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
5839 	rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
5840 	rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
5841 	rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
5842 	if (rdev->num_crtc >= 4) {
5843 		rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
5844 		rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
5845 	}
5846 	if (rdev->num_crtc >= 6) {
5847 		rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
5848 		rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
5849 	}
5850 
5851 	if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
5852 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5853 	if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
5854 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5855 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
5856 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
5857 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
5858 		WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
5859 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
5860 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
5861 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
5862 		WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
5863 
5864 	if (rdev->num_crtc >= 4) {
5865 		if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
5866 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5867 		if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
5868 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5869 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
5870 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
5871 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
5872 			WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
5873 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
5874 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
5875 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
5876 			WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
5877 	}
5878 
5879 	if (rdev->num_crtc >= 6) {
5880 		if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
5881 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5882 		if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
5883 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5884 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
5885 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
5886 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
5887 			WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
5888 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
5889 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
5890 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
5891 			WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
5892 	}
5893 
5894 	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
5895 		tmp = RREG32(DC_HPD1_INT_CONTROL);
5896 		tmp |= DC_HPDx_INT_ACK;
5897 		WREG32(DC_HPD1_INT_CONTROL, tmp);
5898 	}
5899 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
5900 		tmp = RREG32(DC_HPD2_INT_CONTROL);
5901 		tmp |= DC_HPDx_INT_ACK;
5902 		WREG32(DC_HPD2_INT_CONTROL, tmp);
5903 	}
5904 	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5905 		tmp = RREG32(DC_HPD3_INT_CONTROL);
5906 		tmp |= DC_HPDx_INT_ACK;
5907 		WREG32(DC_HPD3_INT_CONTROL, tmp);
5908 	}
5909 	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5910 		tmp = RREG32(DC_HPD4_INT_CONTROL);
5911 		tmp |= DC_HPDx_INT_ACK;
5912 		WREG32(DC_HPD4_INT_CONTROL, tmp);
5913 	}
5914 	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5915 		tmp = RREG32(DC_HPD5_INT_CONTROL);
5916 		tmp |= DC_HPDx_INT_ACK;
5917 		WREG32(DC_HPD5_INT_CONTROL, tmp);
5918 	}
5919 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5920 		tmp = RREG32(DC_HPD5_INT_CONTROL);
5921 		tmp |= DC_HPDx_INT_ACK;
5922 		WREG32(DC_HPD6_INT_CONTROL, tmp);
5923 	}
5924 }
5925 
5926 static void si_irq_disable(struct radeon_device *rdev)
5927 {
5928 	si_disable_interrupts(rdev);
5929 	/* Wait and acknowledge irq */
5930 	mdelay(1);
5931 	si_irq_ack(rdev);
5932 	si_disable_interrupt_state(rdev);
5933 }
5934 
5935 static void si_irq_suspend(struct radeon_device *rdev)
5936 {
5937 	si_irq_disable(rdev);
5938 	si_rlc_stop(rdev);
5939 }
5940 
5941 static void si_irq_fini(struct radeon_device *rdev)
5942 {
5943 	si_irq_suspend(rdev);
5944 	r600_ih_ring_fini(rdev);
5945 }
5946 
5947 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
5948 {
5949 	u32 wptr, tmp;
5950 
5951 	if (rdev->wb.enabled)
5952 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
5953 	else
5954 		wptr = RREG32(IH_RB_WPTR);
5955 
5956 	if (wptr & RB_OVERFLOW) {
5957 		/* When a ring buffer overflow happen start parsing interrupt
5958 		 * from the last not overwritten vector (wptr + 16). Hopefully
5959 		 * this should allow us to catchup.
5960 		 */
5961 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
5962 			wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
5963 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
5964 		tmp = RREG32(IH_RB_CNTL);
5965 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
5966 		WREG32(IH_RB_CNTL, tmp);
5967 	}
5968 	return (wptr & rdev->ih.ptr_mask);
5969 }
5970 
5971 /*        SI IV Ring
5972  * Each IV ring entry is 128 bits:
5973  * [7:0]    - interrupt source id
5974  * [31:8]   - reserved
5975  * [59:32]  - interrupt source data
5976  * [63:60]  - reserved
5977  * [71:64]  - RINGID
5978  * [79:72]  - VMID
5979  * [127:80] - reserved
5980  */
5981 int si_irq_process(struct radeon_device *rdev)
5982 {
5983 	u32 wptr;
5984 	u32 rptr;
5985 	u32 src_id, src_data, ring_id;
5986 	u32 ring_index;
5987 	bool queue_hotplug = false;
5988 	bool queue_thermal = false;
5989 	u32 status, addr;
5990 
5991 	if (!rdev->ih.enabled || rdev->shutdown)
5992 		return IRQ_NONE;
5993 
5994 	wptr = si_get_ih_wptr(rdev);
5995 
5996 restart_ih:
5997 	/* is somebody else already processing irqs? */
5998 	if (atomic_xchg(&rdev->ih.lock, 1))
5999 		return IRQ_NONE;
6000 
6001 	rptr = rdev->ih.rptr;
6002 	DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6003 
6004 	/* Order reading of wptr vs. reading of IH ring data */
6005 	rmb();
6006 
6007 	/* display interrupts */
6008 	si_irq_ack(rdev);
6009 
6010 	while (rptr != wptr) {
6011 		/* wptr/rptr are in bytes! */
6012 		ring_index = rptr / 4;
6013 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6014 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6015 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6016 
6017 		switch (src_id) {
6018 		case 1: /* D1 vblank/vline */
6019 			switch (src_data) {
6020 			case 0: /* D1 vblank */
6021 				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
6022 					if (rdev->irq.crtc_vblank_int[0]) {
6023 						drm_handle_vblank(rdev->ddev, 0);
6024 						rdev->pm.vblank_sync = true;
6025 						wake_up(&rdev->irq.vblank_queue);
6026 					}
6027 					if (atomic_read(&rdev->irq.pflip[0]))
6028 						radeon_crtc_handle_flip(rdev, 0);
6029 					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6030 					DRM_DEBUG("IH: D1 vblank\n");
6031 				}
6032 				break;
6033 			case 1: /* D1 vline */
6034 				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
6035 					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6036 					DRM_DEBUG("IH: D1 vline\n");
6037 				}
6038 				break;
6039 			default:
6040 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6041 				break;
6042 			}
6043 			break;
6044 		case 2: /* D2 vblank/vline */
6045 			switch (src_data) {
6046 			case 0: /* D2 vblank */
6047 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6048 					if (rdev->irq.crtc_vblank_int[1]) {
6049 						drm_handle_vblank(rdev->ddev, 1);
6050 						rdev->pm.vblank_sync = true;
6051 						wake_up(&rdev->irq.vblank_queue);
6052 					}
6053 					if (atomic_read(&rdev->irq.pflip[1]))
6054 						radeon_crtc_handle_flip(rdev, 1);
6055 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6056 					DRM_DEBUG("IH: D2 vblank\n");
6057 				}
6058 				break;
6059 			case 1: /* D2 vline */
6060 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6061 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6062 					DRM_DEBUG("IH: D2 vline\n");
6063 				}
6064 				break;
6065 			default:
6066 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6067 				break;
6068 			}
6069 			break;
6070 		case 3: /* D3 vblank/vline */
6071 			switch (src_data) {
6072 			case 0: /* D3 vblank */
6073 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6074 					if (rdev->irq.crtc_vblank_int[2]) {
6075 						drm_handle_vblank(rdev->ddev, 2);
6076 						rdev->pm.vblank_sync = true;
6077 						wake_up(&rdev->irq.vblank_queue);
6078 					}
6079 					if (atomic_read(&rdev->irq.pflip[2]))
6080 						radeon_crtc_handle_flip(rdev, 2);
6081 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6082 					DRM_DEBUG("IH: D3 vblank\n");
6083 				}
6084 				break;
6085 			case 1: /* D3 vline */
6086 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6087 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6088 					DRM_DEBUG("IH: D3 vline\n");
6089 				}
6090 				break;
6091 			default:
6092 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6093 				break;
6094 			}
6095 			break;
6096 		case 4: /* D4 vblank/vline */
6097 			switch (src_data) {
6098 			case 0: /* D4 vblank */
6099 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6100 					if (rdev->irq.crtc_vblank_int[3]) {
6101 						drm_handle_vblank(rdev->ddev, 3);
6102 						rdev->pm.vblank_sync = true;
6103 						wake_up(&rdev->irq.vblank_queue);
6104 					}
6105 					if (atomic_read(&rdev->irq.pflip[3]))
6106 						radeon_crtc_handle_flip(rdev, 3);
6107 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6108 					DRM_DEBUG("IH: D4 vblank\n");
6109 				}
6110 				break;
6111 			case 1: /* D4 vline */
6112 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6113 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6114 					DRM_DEBUG("IH: D4 vline\n");
6115 				}
6116 				break;
6117 			default:
6118 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6119 				break;
6120 			}
6121 			break;
6122 		case 5: /* D5 vblank/vline */
6123 			switch (src_data) {
6124 			case 0: /* D5 vblank */
6125 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6126 					if (rdev->irq.crtc_vblank_int[4]) {
6127 						drm_handle_vblank(rdev->ddev, 4);
6128 						rdev->pm.vblank_sync = true;
6129 						wake_up(&rdev->irq.vblank_queue);
6130 					}
6131 					if (atomic_read(&rdev->irq.pflip[4]))
6132 						radeon_crtc_handle_flip(rdev, 4);
6133 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6134 					DRM_DEBUG("IH: D5 vblank\n");
6135 				}
6136 				break;
6137 			case 1: /* D5 vline */
6138 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6139 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6140 					DRM_DEBUG("IH: D5 vline\n");
6141 				}
6142 				break;
6143 			default:
6144 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6145 				break;
6146 			}
6147 			break;
6148 		case 6: /* D6 vblank/vline */
6149 			switch (src_data) {
6150 			case 0: /* D6 vblank */
6151 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6152 					if (rdev->irq.crtc_vblank_int[5]) {
6153 						drm_handle_vblank(rdev->ddev, 5);
6154 						rdev->pm.vblank_sync = true;
6155 						wake_up(&rdev->irq.vblank_queue);
6156 					}
6157 					if (atomic_read(&rdev->irq.pflip[5]))
6158 						radeon_crtc_handle_flip(rdev, 5);
6159 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6160 					DRM_DEBUG("IH: D6 vblank\n");
6161 				}
6162 				break;
6163 			case 1: /* D6 vline */
6164 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6165 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6166 					DRM_DEBUG("IH: D6 vline\n");
6167 				}
6168 				break;
6169 			default:
6170 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6171 				break;
6172 			}
6173 			break;
6174 		case 42: /* HPD hotplug */
6175 			switch (src_data) {
6176 			case 0:
6177 				if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6178 					rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6179 					queue_hotplug = true;
6180 					DRM_DEBUG("IH: HPD1\n");
6181 				}
6182 				break;
6183 			case 1:
6184 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6185 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6186 					queue_hotplug = true;
6187 					DRM_DEBUG("IH: HPD2\n");
6188 				}
6189 				break;
6190 			case 2:
6191 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6192 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6193 					queue_hotplug = true;
6194 					DRM_DEBUG("IH: HPD3\n");
6195 				}
6196 				break;
6197 			case 3:
6198 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6199 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6200 					queue_hotplug = true;
6201 					DRM_DEBUG("IH: HPD4\n");
6202 				}
6203 				break;
6204 			case 4:
6205 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6206 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6207 					queue_hotplug = true;
6208 					DRM_DEBUG("IH: HPD5\n");
6209 				}
6210 				break;
6211 			case 5:
6212 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6213 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6214 					queue_hotplug = true;
6215 					DRM_DEBUG("IH: HPD6\n");
6216 				}
6217 				break;
6218 			default:
6219 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6220 				break;
6221 			}
6222 			break;
6223 		case 146:
6224 		case 147:
6225 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6226 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6227 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6228 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6229 				addr);
6230 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6231 				status);
6232 			si_vm_decode_fault(rdev, status, addr);
6233 			/* reset addr and status */
6234 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6235 			break;
6236 		case 176: /* RINGID0 CP_INT */
6237 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6238 			break;
6239 		case 177: /* RINGID1 CP_INT */
6240 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6241 			break;
6242 		case 178: /* RINGID2 CP_INT */
6243 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6244 			break;
6245 		case 181: /* CP EOP event */
6246 			DRM_DEBUG("IH: CP EOP\n");
6247 			switch (ring_id) {
6248 			case 0:
6249 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6250 				break;
6251 			case 1:
6252 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6253 				break;
6254 			case 2:
6255 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6256 				break;
6257 			}
6258 			break;
6259 		case 224: /* DMA trap event */
6260 			DRM_DEBUG("IH: DMA trap\n");
6261 			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6262 			break;
6263 		case 230: /* thermal low to high */
6264 			DRM_DEBUG("IH: thermal low to high\n");
6265 			rdev->pm.dpm.thermal.high_to_low = false;
6266 			queue_thermal = true;
6267 			break;
6268 		case 231: /* thermal high to low */
6269 			DRM_DEBUG("IH: thermal high to low\n");
6270 			rdev->pm.dpm.thermal.high_to_low = true;
6271 			queue_thermal = true;
6272 			break;
6273 		case 233: /* GUI IDLE */
6274 			DRM_DEBUG("IH: GUI idle\n");
6275 			break;
6276 		case 244: /* DMA trap event */
6277 			DRM_DEBUG("IH: DMA1 trap\n");
6278 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6279 			break;
6280 		default:
6281 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6282 			break;
6283 		}
6284 
6285 		/* wptr/rptr are in bytes! */
6286 		rptr += 16;
6287 		rptr &= rdev->ih.ptr_mask;
6288 	}
6289 	if (queue_hotplug)
6290 		schedule_work(&rdev->hotplug_work);
6291 	if (queue_thermal && rdev->pm.dpm_enabled)
6292 		schedule_work(&rdev->pm.dpm.thermal.work);
6293 	rdev->ih.rptr = rptr;
6294 	WREG32(IH_RB_RPTR, rdev->ih.rptr);
6295 	atomic_set(&rdev->ih.lock, 0);
6296 
6297 	/* make sure wptr hasn't changed while processing */
6298 	wptr = si_get_ih_wptr(rdev);
6299 	if (wptr != rptr)
6300 		goto restart_ih;
6301 
6302 	return IRQ_HANDLED;
6303 }
6304 
6305 /*
6306  * startup/shutdown callbacks
6307  */
6308 static int si_startup(struct radeon_device *rdev)
6309 {
6310 	struct radeon_ring *ring;
6311 	int r;
6312 
6313 	/* enable pcie gen2/3 link */
6314 	si_pcie_gen3_enable(rdev);
6315 	/* enable aspm */
6316 	si_program_aspm(rdev);
6317 
6318 	/* scratch needs to be initialized before MC */
6319 	r = r600_vram_scratch_init(rdev);
6320 	if (r)
6321 		return r;
6322 
6323 	si_mc_program(rdev);
6324 
6325 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6326 	    !rdev->rlc_fw || !rdev->mc_fw) {
6327 		r = si_init_microcode(rdev);
6328 		if (r) {
6329 			DRM_ERROR("Failed to load firmware!\n");
6330 			return r;
6331 		}
6332 	}
6333 
6334 	r = si_mc_load_microcode(rdev);
6335 	if (r) {
6336 		DRM_ERROR("Failed to load MC firmware!\n");
6337 		return r;
6338 	}
6339 
6340 	r = si_pcie_gart_enable(rdev);
6341 	if (r)
6342 		return r;
6343 	si_gpu_init(rdev);
6344 
6345 	/* allocate rlc buffers */
6346 	if (rdev->family == CHIP_VERDE) {
6347 		rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
6348 		rdev->rlc.reg_list_size =
6349 			(u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
6350 	}
6351 	rdev->rlc.cs_data = si_cs_data;
6352 	r = sumo_rlc_init(rdev);
6353 	if (r) {
6354 		DRM_ERROR("Failed to init rlc BOs!\n");
6355 		return r;
6356 	}
6357 
6358 	/* allocate wb buffer */
6359 	r = radeon_wb_init(rdev);
6360 	if (r)
6361 		return r;
6362 
6363 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6364 	if (r) {
6365 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6366 		return r;
6367 	}
6368 
6369 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6370 	if (r) {
6371 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6372 		return r;
6373 	}
6374 
6375 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6376 	if (r) {
6377 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6378 		return r;
6379 	}
6380 
6381 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6382 	if (r) {
6383 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6384 		return r;
6385 	}
6386 
6387 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6388 	if (r) {
6389 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6390 		return r;
6391 	}
6392 
6393 	if (rdev->has_uvd) {
6394 		r = uvd_v2_2_resume(rdev);
6395 		if (!r) {
6396 			r = radeon_fence_driver_start_ring(rdev,
6397 							   R600_RING_TYPE_UVD_INDEX);
6398 			if (r)
6399 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6400 		}
6401 		if (r)
6402 			rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6403 	}
6404 
6405 	/* Enable IRQ */
6406 	if (!rdev->irq.installed) {
6407 		r = radeon_irq_kms_init(rdev);
6408 		if (r)
6409 			return r;
6410 	}
6411 
6412 	r = si_irq_init(rdev);
6413 	if (r) {
6414 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
6415 		radeon_irq_kms_fini(rdev);
6416 		return r;
6417 	}
6418 	si_irq_set(rdev);
6419 
6420 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6421 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6422 			     CP_RB0_RPTR, CP_RB0_WPTR,
6423 			     RADEON_CP_PACKET2);
6424 	if (r)
6425 		return r;
6426 
6427 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6428 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6429 			     CP_RB1_RPTR, CP_RB1_WPTR,
6430 			     RADEON_CP_PACKET2);
6431 	if (r)
6432 		return r;
6433 
6434 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6435 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6436 			     CP_RB2_RPTR, CP_RB2_WPTR,
6437 			     RADEON_CP_PACKET2);
6438 	if (r)
6439 		return r;
6440 
6441 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6442 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6443 			     DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
6444 			     DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
6445 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6446 	if (r)
6447 		return r;
6448 
6449 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6450 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6451 			     DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
6452 			     DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
6453 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6454 	if (r)
6455 		return r;
6456 
6457 	r = si_cp_load_microcode(rdev);
6458 	if (r)
6459 		return r;
6460 	r = si_cp_resume(rdev);
6461 	if (r)
6462 		return r;
6463 
6464 	r = cayman_dma_resume(rdev);
6465 	if (r)
6466 		return r;
6467 
6468 	if (rdev->has_uvd) {
6469 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6470 		if (ring->ring_size) {
6471 			r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
6472 					     UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
6473 					     RADEON_CP_PACKET2);
6474 			if (!r)
6475 				r = uvd_v1_0_init(rdev);
6476 			if (r)
6477 				DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6478 		}
6479 	}
6480 
6481 	r = radeon_ib_pool_init(rdev);
6482 	if (r) {
6483 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6484 		return r;
6485 	}
6486 
6487 	r = radeon_vm_manager_init(rdev);
6488 	if (r) {
6489 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6490 		return r;
6491 	}
6492 
6493 	r = dce6_audio_init(rdev);
6494 	if (r)
6495 		return r;
6496 
6497 	return 0;
6498 }
6499 
6500 int si_resume(struct radeon_device *rdev)
6501 {
6502 	int r;
6503 
6504 	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6505 	 * posting will perform necessary task to bring back GPU into good
6506 	 * shape.
6507 	 */
6508 	/* post card */
6509 	atom_asic_init(rdev->mode_info.atom_context);
6510 
6511 	/* init golden registers */
6512 	si_init_golden_registers(rdev);
6513 
6514 	rdev->accel_working = true;
6515 	r = si_startup(rdev);
6516 	if (r) {
6517 		DRM_ERROR("si startup failed on resume\n");
6518 		rdev->accel_working = false;
6519 		return r;
6520 	}
6521 
6522 	return r;
6523 
6524 }
6525 
6526 int si_suspend(struct radeon_device *rdev)
6527 {
6528 	dce6_audio_fini(rdev);
6529 	radeon_vm_manager_fini(rdev);
6530 	si_cp_enable(rdev, false);
6531 	cayman_dma_stop(rdev);
6532 	if (rdev->has_uvd) {
6533 		uvd_v1_0_fini(rdev);
6534 		radeon_uvd_suspend(rdev);
6535 	}
6536 	si_fini_pg(rdev);
6537 	si_fini_cg(rdev);
6538 	si_irq_suspend(rdev);
6539 	radeon_wb_disable(rdev);
6540 	si_pcie_gart_disable(rdev);
6541 	return 0;
6542 }
6543 
6544 /* Plan is to move initialization in that function and use
6545  * helper function so that radeon_device_init pretty much
6546  * do nothing more than calling asic specific function. This
6547  * should also allow to remove a bunch of callback function
6548  * like vram_info.
6549  */
6550 int si_init(struct radeon_device *rdev)
6551 {
6552 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6553 	int r;
6554 
6555 	/* Read BIOS */
6556 	if (!radeon_get_bios(rdev)) {
6557 		if (ASIC_IS_AVIVO(rdev))
6558 			return -EINVAL;
6559 	}
6560 	/* Must be an ATOMBIOS */
6561 	if (!rdev->is_atom_bios) {
6562 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6563 		return -EINVAL;
6564 	}
6565 	r = radeon_atombios_init(rdev);
6566 	if (r)
6567 		return r;
6568 
6569 	/* Post card if necessary */
6570 	if (!radeon_card_posted(rdev)) {
6571 		if (!rdev->bios) {
6572 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6573 			return -EINVAL;
6574 		}
6575 		DRM_INFO("GPU not posted. posting now...\n");
6576 		atom_asic_init(rdev->mode_info.atom_context);
6577 	}
6578 	/* init golden registers */
6579 	si_init_golden_registers(rdev);
6580 	/* Initialize scratch registers */
6581 	si_scratch_init(rdev);
6582 	/* Initialize surface registers */
6583 	radeon_surface_init(rdev);
6584 	/* Initialize clocks */
6585 	radeon_get_clock_info(rdev->ddev);
6586 
6587 	/* Fence driver */
6588 	r = radeon_fence_driver_init(rdev);
6589 	if (r)
6590 		return r;
6591 
6592 	/* initialize memory controller */
6593 	r = si_mc_init(rdev);
6594 	if (r)
6595 		return r;
6596 	/* Memory manager */
6597 	r = radeon_bo_init(rdev);
6598 	if (r)
6599 		return r;
6600 
6601 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6602 	ring->ring_obj = NULL;
6603 	r600_ring_init(rdev, ring, 1024 * 1024);
6604 
6605 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6606 	ring->ring_obj = NULL;
6607 	r600_ring_init(rdev, ring, 1024 * 1024);
6608 
6609 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6610 	ring->ring_obj = NULL;
6611 	r600_ring_init(rdev, ring, 1024 * 1024);
6612 
6613 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6614 	ring->ring_obj = NULL;
6615 	r600_ring_init(rdev, ring, 64 * 1024);
6616 
6617 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6618 	ring->ring_obj = NULL;
6619 	r600_ring_init(rdev, ring, 64 * 1024);
6620 
6621 	if (rdev->has_uvd) {
6622 		r = radeon_uvd_init(rdev);
6623 		if (!r) {
6624 			ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6625 			ring->ring_obj = NULL;
6626 			r600_ring_init(rdev, ring, 4096);
6627 		}
6628 	}
6629 
6630 	rdev->ih.ring_obj = NULL;
6631 	r600_ih_ring_init(rdev, 64 * 1024);
6632 
6633 	r = r600_pcie_gart_init(rdev);
6634 	if (r)
6635 		return r;
6636 
6637 	rdev->accel_working = true;
6638 	r = si_startup(rdev);
6639 	if (r) {
6640 		dev_err(rdev->dev, "disabling GPU acceleration\n");
6641 		si_cp_fini(rdev);
6642 		cayman_dma_fini(rdev);
6643 		si_irq_fini(rdev);
6644 		sumo_rlc_fini(rdev);
6645 		radeon_wb_fini(rdev);
6646 		radeon_ib_pool_fini(rdev);
6647 		radeon_vm_manager_fini(rdev);
6648 		radeon_irq_kms_fini(rdev);
6649 		si_pcie_gart_fini(rdev);
6650 		rdev->accel_working = false;
6651 	}
6652 
6653 	/* Don't start up if the MC ucode is missing.
6654 	 * The default clocks and voltages before the MC ucode
6655 	 * is loaded are not suffient for advanced operations.
6656 	 */
6657 	if (!rdev->mc_fw) {
6658 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
6659 		return -EINVAL;
6660 	}
6661 
6662 	return 0;
6663 }
6664 
6665 void si_fini(struct radeon_device *rdev)
6666 {
6667 	si_cp_fini(rdev);
6668 	cayman_dma_fini(rdev);
6669 	si_fini_pg(rdev);
6670 	si_fini_cg(rdev);
6671 	si_irq_fini(rdev);
6672 	sumo_rlc_fini(rdev);
6673 	radeon_wb_fini(rdev);
6674 	radeon_vm_manager_fini(rdev);
6675 	radeon_ib_pool_fini(rdev);
6676 	radeon_irq_kms_fini(rdev);
6677 	if (rdev->has_uvd) {
6678 		uvd_v1_0_fini(rdev);
6679 		radeon_uvd_fini(rdev);
6680 	}
6681 	si_pcie_gart_fini(rdev);
6682 	r600_vram_scratch_fini(rdev);
6683 	radeon_gem_fini(rdev);
6684 	radeon_fence_driver_fini(rdev);
6685 	radeon_bo_fini(rdev);
6686 	radeon_atombios_fini(rdev);
6687 	kfree(rdev->bios);
6688 	rdev->bios = NULL;
6689 }
6690 
6691 /**
6692  * si_get_gpu_clock_counter - return GPU clock counter snapshot
6693  *
6694  * @rdev: radeon_device pointer
6695  *
6696  * Fetches a GPU clock counter snapshot (SI).
6697  * Returns the 64 bit clock counter snapshot.
6698  */
6699 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
6700 {
6701 	uint64_t clock;
6702 
6703 	mutex_lock(&rdev->gpu_clock_mutex);
6704 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6705 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6706 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6707 	mutex_unlock(&rdev->gpu_clock_mutex);
6708 	return clock;
6709 }
6710 
6711 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6712 {
6713 	unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
6714 	int r;
6715 
6716 	/* bypass vclk and dclk with bclk */
6717 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
6718 		VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
6719 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6720 
6721 	/* put PLL in bypass mode */
6722 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
6723 
6724 	if (!vclk || !dclk) {
6725 		/* keep the Bypass mode, put PLL to sleep */
6726 		WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6727 		return 0;
6728 	}
6729 
6730 	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
6731 					  16384, 0x03FFFFFF, 0, 128, 5,
6732 					  &fb_div, &vclk_div, &dclk_div);
6733 	if (r)
6734 		return r;
6735 
6736 	/* set RESET_ANTI_MUX to 0 */
6737 	WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
6738 
6739 	/* set VCO_MODE to 1 */
6740 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
6741 
6742 	/* toggle UPLL_SLEEP to 1 then back to 0 */
6743 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6744 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
6745 
6746 	/* deassert UPLL_RESET */
6747 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6748 
6749 	mdelay(1);
6750 
6751 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6752 	if (r)
6753 		return r;
6754 
6755 	/* assert UPLL_RESET again */
6756 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
6757 
6758 	/* disable spread spectrum. */
6759 	WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
6760 
6761 	/* set feedback divider */
6762 	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
6763 
6764 	/* set ref divider to 0 */
6765 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
6766 
6767 	if (fb_div < 307200)
6768 		WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
6769 	else
6770 		WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
6771 
6772 	/* set PDIV_A and PDIV_B */
6773 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
6774 		UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
6775 		~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
6776 
6777 	/* give the PLL some time to settle */
6778 	mdelay(15);
6779 
6780 	/* deassert PLL_RESET */
6781 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6782 
6783 	mdelay(15);
6784 
6785 	/* switch from bypass mode to normal mode */
6786 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
6787 
6788 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6789 	if (r)
6790 		return r;
6791 
6792 	/* switch VCLK and DCLK selection */
6793 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
6794 		VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
6795 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6796 
6797 	mdelay(100);
6798 
6799 	return 0;
6800 }
6801 
6802 static void si_pcie_gen3_enable(struct radeon_device *rdev)
6803 {
6804 	struct pci_dev *root = rdev->pdev->bus->self;
6805 	int bridge_pos, gpu_pos;
6806 	u32 speed_cntl, mask, current_data_rate;
6807 	int ret, i;
6808 	u16 tmp16;
6809 
6810 	if (radeon_pcie_gen2 == 0)
6811 		return;
6812 
6813 	if (rdev->flags & RADEON_IS_IGP)
6814 		return;
6815 
6816 	if (!(rdev->flags & RADEON_IS_PCIE))
6817 		return;
6818 
6819 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
6820 	if (ret != 0)
6821 		return;
6822 
6823 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
6824 		return;
6825 
6826 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6827 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
6828 		LC_CURRENT_DATA_RATE_SHIFT;
6829 	if (mask & DRM_PCIE_SPEED_80) {
6830 		if (current_data_rate == 2) {
6831 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
6832 			return;
6833 		}
6834 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
6835 	} else if (mask & DRM_PCIE_SPEED_50) {
6836 		if (current_data_rate == 1) {
6837 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
6838 			return;
6839 		}
6840 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
6841 	}
6842 
6843 	bridge_pos = pci_pcie_cap(root);
6844 	if (!bridge_pos)
6845 		return;
6846 
6847 	gpu_pos = pci_pcie_cap(rdev->pdev);
6848 	if (!gpu_pos)
6849 		return;
6850 
6851 	if (mask & DRM_PCIE_SPEED_80) {
6852 		/* re-try equalization if gen3 is not already enabled */
6853 		if (current_data_rate != 2) {
6854 			u16 bridge_cfg, gpu_cfg;
6855 			u16 bridge_cfg2, gpu_cfg2;
6856 			u32 max_lw, current_lw, tmp;
6857 
6858 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
6859 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
6860 
6861 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
6862 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
6863 
6864 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
6865 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
6866 
6867 			tmp = RREG32_PCIE(PCIE_LC_STATUS1);
6868 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
6869 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
6870 
6871 			if (current_lw < max_lw) {
6872 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
6873 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
6874 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
6875 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
6876 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
6877 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
6878 				}
6879 			}
6880 
6881 			for (i = 0; i < 10; i++) {
6882 				/* check status */
6883 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
6884 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
6885 					break;
6886 
6887 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
6888 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
6889 
6890 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
6891 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
6892 
6893 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6894 				tmp |= LC_SET_QUIESCE;
6895 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6896 
6897 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6898 				tmp |= LC_REDO_EQ;
6899 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6900 
6901 				mdelay(100);
6902 
6903 				/* linkctl */
6904 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
6905 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
6906 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
6907 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
6908 
6909 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
6910 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
6911 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
6912 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
6913 
6914 				/* linkctl2 */
6915 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
6916 				tmp16 &= ~((1 << 4) | (7 << 9));
6917 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
6918 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
6919 
6920 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
6921 				tmp16 &= ~((1 << 4) | (7 << 9));
6922 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
6923 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
6924 
6925 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6926 				tmp &= ~LC_SET_QUIESCE;
6927 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6928 			}
6929 		}
6930 	}
6931 
6932 	/* set the link speed */
6933 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
6934 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
6935 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
6936 
6937 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
6938 	tmp16 &= ~0xf;
6939 	if (mask & DRM_PCIE_SPEED_80)
6940 		tmp16 |= 3; /* gen3 */
6941 	else if (mask & DRM_PCIE_SPEED_50)
6942 		tmp16 |= 2; /* gen2 */
6943 	else
6944 		tmp16 |= 1; /* gen1 */
6945 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
6946 
6947 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6948 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
6949 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
6950 
6951 	for (i = 0; i < rdev->usec_timeout; i++) {
6952 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6953 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
6954 			break;
6955 		udelay(1);
6956 	}
6957 }
6958 
6959 static void si_program_aspm(struct radeon_device *rdev)
6960 {
6961 	u32 data, orig;
6962 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
6963 	bool disable_clkreq = false;
6964 
6965 	if (radeon_aspm == 0)
6966 		return;
6967 
6968 	if (!(rdev->flags & RADEON_IS_PCIE))
6969 		return;
6970 
6971 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
6972 	data &= ~LC_XMIT_N_FTS_MASK;
6973 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
6974 	if (orig != data)
6975 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
6976 
6977 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
6978 	data |= LC_GO_TO_RECOVERY;
6979 	if (orig != data)
6980 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
6981 
6982 	orig = data = RREG32_PCIE(PCIE_P_CNTL);
6983 	data |= P_IGNORE_EDB_ERR;
6984 	if (orig != data)
6985 		WREG32_PCIE(PCIE_P_CNTL, data);
6986 
6987 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
6988 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
6989 	data |= LC_PMI_TO_L1_DIS;
6990 	if (!disable_l0s)
6991 		data |= LC_L0S_INACTIVITY(7);
6992 
6993 	if (!disable_l1) {
6994 		data |= LC_L1_INACTIVITY(7);
6995 		data &= ~LC_PMI_TO_L1_DIS;
6996 		if (orig != data)
6997 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
6998 
6999 		if (!disable_plloff_in_l1) {
7000 			bool clk_req_support;
7001 
7002 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7003 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7004 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7005 			if (orig != data)
7006 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7007 
7008 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7009 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7010 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7011 			if (orig != data)
7012 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7013 
7014 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7015 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7016 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7017 			if (orig != data)
7018 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7019 
7020 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7021 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7022 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7023 			if (orig != data)
7024 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7025 
7026 			if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7027 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7028 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7029 				if (orig != data)
7030 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7031 
7032 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7033 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7034 				if (orig != data)
7035 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7036 
7037 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7038 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7039 				if (orig != data)
7040 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7041 
7042 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7043 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7044 				if (orig != data)
7045 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7046 
7047 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7048 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7049 				if (orig != data)
7050 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7051 
7052 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7053 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7054 				if (orig != data)
7055 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7056 
7057 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7058 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7059 				if (orig != data)
7060 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7061 
7062 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7063 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7064 				if (orig != data)
7065 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7066 			}
7067 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7068 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7069 			data |= LC_DYN_LANES_PWR_STATE(3);
7070 			if (orig != data)
7071 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7072 
7073 			orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7074 			data &= ~LS2_EXIT_TIME_MASK;
7075 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7076 				data |= LS2_EXIT_TIME(5);
7077 			if (orig != data)
7078 				WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7079 
7080 			orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7081 			data &= ~LS2_EXIT_TIME_MASK;
7082 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7083 				data |= LS2_EXIT_TIME(5);
7084 			if (orig != data)
7085 				WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7086 
7087 			if (!disable_clkreq) {
7088 				struct pci_dev *root = rdev->pdev->bus->self;
7089 				u32 lnkcap;
7090 
7091 				clk_req_support = false;
7092 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7093 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7094 					clk_req_support = true;
7095 			} else {
7096 				clk_req_support = false;
7097 			}
7098 
7099 			if (clk_req_support) {
7100 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7101 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7102 				if (orig != data)
7103 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7104 
7105 				orig = data = RREG32(THM_CLK_CNTL);
7106 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7107 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7108 				if (orig != data)
7109 					WREG32(THM_CLK_CNTL, data);
7110 
7111 				orig = data = RREG32(MISC_CLK_CNTL);
7112 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7113 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7114 				if (orig != data)
7115 					WREG32(MISC_CLK_CNTL, data);
7116 
7117 				orig = data = RREG32(CG_CLKPIN_CNTL);
7118 				data &= ~BCLK_AS_XCLK;
7119 				if (orig != data)
7120 					WREG32(CG_CLKPIN_CNTL, data);
7121 
7122 				orig = data = RREG32(CG_CLKPIN_CNTL_2);
7123 				data &= ~FORCE_BIF_REFCLK_EN;
7124 				if (orig != data)
7125 					WREG32(CG_CLKPIN_CNTL_2, data);
7126 
7127 				orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7128 				data &= ~MPLL_CLKOUT_SEL_MASK;
7129 				data |= MPLL_CLKOUT_SEL(4);
7130 				if (orig != data)
7131 					WREG32(MPLL_BYPASSCLK_SEL, data);
7132 
7133 				orig = data = RREG32(SPLL_CNTL_MODE);
7134 				data &= ~SPLL_REFCLK_SEL_MASK;
7135 				if (orig != data)
7136 					WREG32(SPLL_CNTL_MODE, data);
7137 			}
7138 		}
7139 	} else {
7140 		if (orig != data)
7141 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7142 	}
7143 
7144 	orig = data = RREG32_PCIE(PCIE_CNTL2);
7145 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7146 	if (orig != data)
7147 		WREG32_PCIE(PCIE_CNTL2, data);
7148 
7149 	if (!disable_l0s) {
7150 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7151 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7152 			data = RREG32_PCIE(PCIE_LC_STATUS1);
7153 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7154 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7155 				data &= ~LC_L0S_INACTIVITY_MASK;
7156 				if (orig != data)
7157 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7158 			}
7159 		}
7160 	}
7161 }
7162